Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jul 2021 17:19:13 +0000 (10:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jul 2021 17:19:13 +0000 (10:19 -0700)
Pull UML updates from Richard Weinberger:

 - Support for optimized routines based on the host CPU

 - Support for PCI via virtio

 - Various fixes

* tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: remove unneeded semicolon in um_arch.c
  um: Remove the repeated declaration
  um: fix error return code in winch_tramp()
  um: fix error return code in slip_open()
  um: Fix stack pointer alignment
  um: implement flush_cache_vmap/flush_cache_vunmap
  um: add a UML specific futex implementation
  um: enable the use of optimized xor routines in UML
  um: Add support for host CPU flags and alignment
  um: allow not setting extra rpaths in the linux binary
  um: virtio/pci: enable suspend/resume
  um: add PCI over virtio emulation driver
  um: irqs: allow invoking time-travel handler multiple times
  um: time-travel/signals: fix ndelay() in interrupt
  um: expose time-travel mode to userspace side
  um: export signals_enabled directly
  um: remove unused smp_sigio_handler() declaration
  lib: add iomem emulation (logic_iomem)
  um: allow disabling NO_IOMEM

53 files changed:
arch/um/Kconfig
arch/um/Makefile
arch/um/drivers/Kconfig
arch/um/drivers/Makefile
arch/um/drivers/chan_user.c
arch/um/drivers/slip_user.c
arch/um/drivers/ubd_kern.c
arch/um/drivers/virt-pci.c [new file with mode: 0644]
arch/um/drivers/virtio_uml.c
arch/um/include/asm/Kbuild
arch/um/include/asm/cacheflush.h [new file with mode: 0644]
arch/um/include/asm/cpufeature.h [new file with mode: 0644]
arch/um/include/asm/fpu/api.h [new file with mode: 0644]
arch/um/include/asm/futex.h [new file with mode: 0644]
arch/um/include/asm/io.h
arch/um/include/asm/irq.h
arch/um/include/asm/irqflags.h
arch/um/include/asm/msi.h [new file with mode: 0644]
arch/um/include/asm/pci.h [new file with mode: 0644]
arch/um/include/asm/processor-generic.h
arch/um/include/asm/tlb.h
arch/um/include/asm/xor.h
arch/um/include/linux/time-internal.h
arch/um/include/linux/virtio-uml.h [new file with mode: 0644]
arch/um/include/shared/irq_user.h
arch/um/include/shared/kern_util.h
arch/um/include/shared/longjmp.h
arch/um/include/shared/os.h
arch/um/include/shared/timetravel.h [new file with mode: 0644]
arch/um/kernel/Makefile
arch/um/kernel/ioport.c [new file with mode: 0644]
arch/um/kernel/irq.c
arch/um/kernel/ksyms.c
arch/um/kernel/skas/clone.c
arch/um/kernel/skas/uaccess.c
arch/um/kernel/time.c
arch/um/kernel/um_arch.c
arch/um/os-Linux/helper.c
arch/um/os-Linux/signal.c
arch/um/os-Linux/skas/process.c
arch/um/os-Linux/start_up.c
arch/x86/Makefile.um
drivers/input/Kconfig
drivers/input/gameport/Kconfig
drivers/input/joystick/Kconfig
drivers/tty/Kconfig
drivers/video/console/Kconfig
include/asm-generic/logic_io.h [new file with mode: 0644]
include/linux/logic_iomem.h [new file with mode: 0644]
include/uapi/linux/virtio_pcidev.h [new file with mode: 0644]
lib/Kconfig
lib/Makefile
lib/logic_iomem.c [new file with mode: 0644]

index 57cfd9a..0561b73 100644 (file)
@@ -15,7 +15,7 @@ config UML
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DEBUG_BUGVERBOSE
-       select NO_DMA
+       select NO_DMA if !UML_DMA_EMULATION
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
        select HAVE_GCC_PLUGINS
@@ -26,7 +26,22 @@ config MMU
        bool
        default y
 
+config UML_DMA_EMULATION
+       bool
+
 config NO_IOMEM
+       bool "disable IOMEM" if EXPERT
+       depends on !INDIRECT_IOMEM
+       default y
+
+config UML_IOMEM_EMULATION
+       bool
+       select INDIRECT_IOMEM
+       select GENERIC_PCI_IOMAP
+       select GENERIC_IOMAP
+       select NO_GENERIC_PCI_IOPORT_MAP
+
+config NO_IOPORT_MAP
        def_bool y
 
 config ISA
@@ -61,6 +76,9 @@ config NR_CPUS
        range 1 1
        default 1
 
+config ARCH_HAS_CACHE_LINE_SIZE
+       def_bool y
+
 source "arch/$(HEADER_ARCH)/um/Kconfig"
 
 config MAY_HAVE_RUNTIME_DEPS
@@ -91,6 +109,19 @@ config LD_SCRIPT_DYN
        depends on !LD_SCRIPT_STATIC
        select MODULE_REL_CRCS if MODVERSIONS
 
+config LD_SCRIPT_DYN_RPATH
+       bool "set rpath in the binary" if EXPERT
+       default y
+       depends on LD_SCRIPT_DYN
+       help
+         Add /lib (and /lib64 for 64-bit) to the linux binary's rpath
+         explicitly.
+
+         You may need to turn this off if compiling for nix systems
+         that have their libraries in random /nix directories and
+         might otherwise unexpected use libraries from /lib or /lib64
+         instead of the desired ones.
+
 config HOSTFS
        tristate "Host filesystem"
        help
index 1cea46f..12a7ace 100644 (file)
@@ -118,7 +118,8 @@ archprepare:
        $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h
 
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
+LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie)
+LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib
 
 CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
        -fno-stack-protector $(call cc-option, -fno-stack-protector-all)
index 03ba34b..f145842 100644 (file)
@@ -357,3 +357,23 @@ config UML_RTC
          rtcwake, especially in time-travel mode. This driver enables that
          by providing a fake RTC clock that causes a wakeup at the right
          time.
+
+config UML_PCI_OVER_VIRTIO
+       bool "Enable PCI over VIRTIO device simulation"
+       # in theory, just VIRTIO is enough, but that causes recursion
+       depends on VIRTIO_UML
+       select FORCE_PCI
+       select UML_IOMEM_EMULATION
+       select UML_DMA_EMULATION
+       select PCI_MSI
+       select PCI_MSI_IRQ_DOMAIN
+       select PCI_LOCKLESS_CONFIG
+
+config UML_PCI_OVER_VIRTIO_DEVICE_ID
+       int "set the virtio device ID for PCI emulation"
+       default -1
+       depends on UML_PCI_OVER_VIRTIO
+       help
+         There's no official device ID assigned (yet), set the one you
+         wish to use for experimentation here. The default of -1 is
+         not valid and will cause the driver to fail at probe.
index dcc64a0..803666e 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
 obj-$(CONFIG_UML_RTC) += rtc.o
+obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
 
 # pcap_user.o must be added explicitly.
 USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
index d8845d4..6040817 100644 (file)
@@ -256,7 +256,8 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
                goto out_close;
        }
 
-       if (os_set_fd_block(*fd_out, 0)) {
+       err = os_set_fd_block(*fd_out, 0);
+       if (err) {
                printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd "
                       "non-blocking.\n");
                goto out_close;
index 482a19c..7334019 100644 (file)
@@ -145,7 +145,8 @@ static int slip_open(void *data)
        }
        sfd = err;
 
-       if (set_up_tty(sfd))
+       err = set_up_tty(sfd);
+       if (err)
                goto out_close2;
 
        pri->slave = sfd;
index 8e0b43c..cbd4f00 100644 (file)
@@ -1242,8 +1242,7 @@ static int __init ubd_driver_init(void){
                 * enough. So use anyway the io thread. */
        }
        stack = alloc_stack(0, 0);
-       io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
-                                &thread_fd);
+       io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
        if(io_pid < 0){
                printk(KERN_ERR
                       "ubd : Failed to start I/O thread (errno = %d) - "
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
new file mode 100644 (file)
index 0000000..0b80283
--- /dev/null
@@ -0,0 +1,895 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/logic_iomem.h>
+#include <linux/irqdomain.h>
+#include <linux/virtio_pcidev.h>
+#include <linux/virtio-uml.h>
+#include <linux/delay.h>
+#include <linux/msi.h>
+#include <asm/unaligned.h>
+#include <irq_kern.h>
+
+#define MAX_DEVICES 8
+#define MAX_MSI_VECTORS 32
+#define CFG_SPACE_SIZE 4096
+
+/* for MSI-X we have a 32-bit payload */
+#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
+#define NUM_IRQ_MSGS   10
+
+#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
+#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
+
+struct um_pci_device {
+       struct virtio_device *vdev;
+
+       /* for now just standard BARs */
+       u8 resptr[PCI_STD_NUM_BARS];
+
+       struct virtqueue *cmd_vq, *irq_vq;
+
+#define UM_PCI_STAT_WAITING    0
+       unsigned long status;
+
+       int irq;
+};
+
+struct um_pci_device_reg {
+       struct um_pci_device *dev;
+       void __iomem *iomem;
+};
+
+static struct pci_host_bridge *bridge;
+static DEFINE_MUTEX(um_pci_mtx);
+static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
+static struct fwnode_handle *um_pci_fwnode;
+static struct irq_domain *um_pci_inner_domain;
+static struct irq_domain *um_pci_msi_domain;
+static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
+
+#define UM_VIRT_PCI_MAXDELAY 40000
+
+static int um_pci_send_cmd(struct um_pci_device *dev,
+                          struct virtio_pcidev_msg *cmd,
+                          unsigned int cmd_size,
+                          const void *extra, unsigned int extra_size,
+                          void *out, unsigned int out_size)
+{
+       struct scatterlist out_sg, extra_sg, in_sg;
+       struct scatterlist *sgs_list[] = {
+               [0] = &out_sg,
+               [1] = extra ? &extra_sg : &in_sg,
+               [2] = extra ? &in_sg : NULL,
+       };
+       int delay_count = 0;
+       int ret, len;
+       bool posted;
+
+       if (WARN_ON(cmd_size < sizeof(*cmd)))
+               return -EINVAL;
+
+       switch (cmd->op) {
+       case VIRTIO_PCIDEV_OP_CFG_WRITE:
+       case VIRTIO_PCIDEV_OP_MMIO_WRITE:
+       case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
+               /* in PCI, writes are posted, so don't wait */
+               posted = !out;
+               WARN_ON(!posted);
+               break;
+       default:
+               posted = false;
+               break;
+       }
+
+       if (posted) {
+               u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
+
+               if (ncmd) {
+                       memcpy(ncmd, cmd, cmd_size);
+                       if (extra)
+                               memcpy(ncmd + cmd_size, extra, extra_size);
+                       cmd = (void *)ncmd;
+                       cmd_size += extra_size;
+                       extra = NULL;
+                       extra_size = 0;
+               } else {
+                       /* try without allocating memory */
+                       posted = false;
+               }
+       }
+
+       sg_init_one(&out_sg, cmd, cmd_size);
+       if (extra)
+               sg_init_one(&extra_sg, extra, extra_size);
+       if (out)
+               sg_init_one(&in_sg, out, out_size);
+
+       /* add to internal virtio queue */
+       ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
+                               extra ? 2 : 1,
+                               out ? 1 : 0,
+                               posted ? cmd : HANDLE_NO_FREE(cmd),
+                               GFP_ATOMIC);
+       if (ret)
+               return ret;
+
+       if (posted) {
+               virtqueue_kick(dev->cmd_vq);
+               return 0;
+       }
+
+       /* kick and poll for getting a response on the queue */
+       set_bit(UM_PCI_STAT_WAITING, &dev->status);
+       virtqueue_kick(dev->cmd_vq);
+
+       while (1) {
+               void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
+
+               if (completed == HANDLE_NO_FREE(cmd))
+                       break;
+
+               if (completed && !HANDLE_IS_NO_FREE(completed))
+                       kfree(completed);
+
+               if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
+                             ++delay_count > UM_VIRT_PCI_MAXDELAY,
+                             "um virt-pci delay: %d", delay_count)) {
+                       ret = -EIO;
+                       break;
+               }
+               udelay(1);
+       }
+       clear_bit(UM_PCI_STAT_WAITING, &dev->status);
+
+       return ret;
+}
+
+static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
+                                         int size)
+{
+       struct um_pci_device_reg *reg = priv;
+       struct um_pci_device *dev = reg->dev;
+       struct virtio_pcidev_msg hdr = {
+               .op = VIRTIO_PCIDEV_OP_CFG_READ,
+               .size = size,
+               .addr = offset,
+       };
+       /* maximum size - we may only use parts of it */
+       u8 data[8];
+
+       if (!dev)
+               return ~0ULL;
+
+       memset(data, 0xff, sizeof(data));
+
+       switch (size) {
+       case 1:
+       case 2:
+       case 4:
+#ifdef CONFIG_64BIT
+       case 8:
+#endif
+               break;
+       default:
+               WARN(1, "invalid config space read size %d\n", size);
+               return ~0ULL;
+       }
+
+       if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0,
+                           data, sizeof(data)))
+               return ~0ULL;
+
+       switch (size) {
+       case 1:
+               return data[0];
+       case 2:
+               return le16_to_cpup((void *)data);
+       case 4:
+               return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+       case 8:
+               return le64_to_cpup((void *)data);
+#endif
+       default:
+               return ~0ULL;
+       }
+}
+
+static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
+                                 unsigned long val)
+{
+       struct um_pci_device_reg *reg = priv;
+       struct um_pci_device *dev = reg->dev;
+       struct {
+               struct virtio_pcidev_msg hdr;
+               /* maximum size - we may only use parts of it */
+               u8 data[8];
+       } msg = {
+               .hdr = {
+                       .op = VIRTIO_PCIDEV_OP_CFG_WRITE,
+                       .size = size,
+                       .addr = offset,
+               },
+       };
+
+       if (!dev)
+               return;
+
+       switch (size) {
+       case 1:
+               msg.data[0] = (u8)val;
+               break;
+       case 2:
+               put_unaligned_le16(val, (void *)msg.data);
+               break;
+       case 4:
+               put_unaligned_le32(val, (void *)msg.data);
+               break;
+#ifdef CONFIG_64BIT
+       case 8:
+               put_unaligned_le64(val, (void *)msg.data);
+               break;
+#endif
+       default:
+               WARN(1, "invalid config space write size %d\n", size);
+               return;
+       }
+
+       WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
+}
+
+static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
+       .read = um_pci_cfgspace_read,
+       .write = um_pci_cfgspace_write,
+};
+
+static void um_pci_bar_copy_from(void *priv, void *buffer,
+                                unsigned int offset, int size)
+{
+       u8 *resptr = priv;
+       struct um_pci_device *dev = container_of(resptr - *resptr,
+                                                struct um_pci_device,
+                                                resptr[0]);
+       struct virtio_pcidev_msg hdr = {
+               .op = VIRTIO_PCIDEV_OP_MMIO_READ,
+               .bar = *resptr,
+               .size = size,
+               .addr = offset,
+       };
+
+       memset(buffer, 0xff, size);
+
+       um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
+}
+
+static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
+                                    int size)
+{
+       /* maximum size - we may only use parts of it */
+       u8 data[8];
+
+       switch (size) {
+       case 1:
+       case 2:
+       case 4:
+#ifdef CONFIG_64BIT
+       case 8:
+#endif
+               break;
+       default:
+               WARN(1, "invalid config space read size %d\n", size);
+               return ~0ULL;
+       }
+
+       um_pci_bar_copy_from(priv, data, offset, size);
+
+       switch (size) {
+       case 1:
+               return data[0];
+       case 2:
+               return le16_to_cpup((void *)data);
+       case 4:
+               return le32_to_cpup((void *)data);
+#ifdef CONFIG_64BIT
+       case 8:
+               return le64_to_cpup((void *)data);
+#endif
+       default:
+               return ~0ULL;
+       }
+}
+
+static void um_pci_bar_copy_to(void *priv, unsigned int offset,
+                              const void *buffer, int size)
+{
+       u8 *resptr = priv;
+       struct um_pci_device *dev = container_of(resptr - *resptr,
+                                                struct um_pci_device,
+                                                resptr[0]);
+       struct virtio_pcidev_msg hdr = {
+               .op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
+               .bar = *resptr,
+               .size = size,
+               .addr = offset,
+       };
+
+       um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
+}
+
+static void um_pci_bar_write(void *priv, unsigned int offset, int size,
+                            unsigned long val)
+{
+       /* maximum size - we may only use parts of it */
+       u8 data[8];
+
+       switch (size) {
+       case 1:
+               data[0] = (u8)val;
+               break;
+       case 2:
+               put_unaligned_le16(val, (void *)data);
+               break;
+       case 4:
+               put_unaligned_le32(val, (void *)data);
+               break;
+#ifdef CONFIG_64BIT
+       case 8:
+               put_unaligned_le64(val, (void *)data);
+               break;
+#endif
+       default:
+               WARN(1, "invalid config space write size %d\n", size);
+               return;
+       }
+
+       um_pci_bar_copy_to(priv, offset, data, size);
+}
+
+static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
+{
+       u8 *resptr = priv;
+       struct um_pci_device *dev = container_of(resptr - *resptr,
+                                                struct um_pci_device,
+                                                resptr[0]);
+       struct {
+               struct virtio_pcidev_msg hdr;
+               u8 data;
+       } msg = {
+               .hdr = {
+                       .op = VIRTIO_PCIDEV_OP_CFG_WRITE,
+                       .bar = *resptr,
+                       .size = size,
+                       .addr = offset,
+               },
+               .data = value,
+       };
+
+       um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
+}
+
+static const struct logic_iomem_ops um_pci_device_bar_ops = {
+       .read = um_pci_bar_read,
+       .write = um_pci_bar_write,
+       .set = um_pci_bar_set,
+       .copy_from = um_pci_bar_copy_from,
+       .copy_to = um_pci_bar_copy_to,
+};
+
+static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
+                                   int where)
+{
+       struct um_pci_device_reg *dev;
+       unsigned int busn = bus->number;
+
+       if (busn > 0)
+               return NULL;
+
+       /* not allowing functions for now ... */
+       if (devfn % 8)
+               return NULL;
+
+       if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
+               return NULL;
+
+       dev = &um_pci_devices[devfn / 8];
+       if (!dev)
+               return NULL;
+
+       return (void __iomem *)((unsigned long)dev->iomem + where);
+}
+
+static struct pci_ops um_pci_ops = {
+       .map_bus = um_pci_map_bus,
+       .read = pci_generic_config_read,
+       .write = pci_generic_config_write,
+};
+
+static void um_pci_rescan(void)
+{
+       pci_lock_rescan_remove();
+       pci_rescan_bus(bridge->bus);
+       pci_unlock_rescan_remove();
+}
+
+static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
+{
+       struct scatterlist sg[1];
+
+       sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
+       if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
+               kfree(buf);
+       else if (kick)
+               virtqueue_kick(vq);
+}
+
+static void um_pci_handle_irq_message(struct virtqueue *vq,
+                                     struct virtio_pcidev_msg *msg)
+{
+       struct virtio_device *vdev = vq->vdev;
+       struct um_pci_device *dev = vdev->priv;
+
+       /* we should properly chain interrupts, but on ARCH=um we don't care */
+
+       switch (msg->op) {
+       case VIRTIO_PCIDEV_OP_INT:
+               generic_handle_irq(dev->irq);
+               break;
+       case VIRTIO_PCIDEV_OP_MSI:
+               /* our MSI message is just the interrupt number */
+               if (msg->size == sizeof(u32))
+                       generic_handle_irq(le32_to_cpup((void *)msg->data));
+               else
+                       generic_handle_irq(le16_to_cpup((void *)msg->data));
+               break;
+       case VIRTIO_PCIDEV_OP_PME:
+               /* nothing to do - we already woke up due to the message */
+               break;
+       default:
+               dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
+               break;
+       }
+}
+
+static void um_pci_cmd_vq_cb(struct virtqueue *vq)
+{
+       struct virtio_device *vdev = vq->vdev;
+       struct um_pci_device *dev = vdev->priv;
+       void *cmd;
+       int len;
+
+       if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
+               return;
+
+       while ((cmd = virtqueue_get_buf(vq, &len))) {
+               if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
+                       continue;
+               kfree(cmd);
+       }
+}
+
+static void um_pci_irq_vq_cb(struct virtqueue *vq)
+{
+       struct virtio_pcidev_msg *msg;
+       int len;
+
+       while ((msg = virtqueue_get_buf(vq, &len))) {
+               if (len >= sizeof(*msg))
+                       um_pci_handle_irq_message(vq, msg);
+
+               /* recycle the message buffer */
+               um_pci_irq_vq_addbuf(vq, msg, true);
+       }
+}
+
+static int um_pci_init_vqs(struct um_pci_device *dev)
+{
+       struct virtqueue *vqs[2];
+       static const char *const names[2] = { "cmd", "irq" };
+       vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb };
+       int err, i;
+
+       err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL);
+       if (err)
+               return err;
+
+       dev->cmd_vq = vqs[0];
+       dev->irq_vq = vqs[1];
+
+       for (i = 0; i < NUM_IRQ_MSGS; i++) {
+               void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
+
+               if (msg)
+                       um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
+       }
+
+       virtqueue_kick(dev->irq_vq);
+
+       return 0;
+}
+
+static int um_pci_virtio_probe(struct virtio_device *vdev)
+{
+       struct um_pci_device *dev;
+       int i, free = -1;
+       int err = -ENOSPC;
+
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+
+       dev->vdev = vdev;
+       vdev->priv = dev;
+
+       mutex_lock(&um_pci_mtx);
+       for (i = 0; i < MAX_DEVICES; i++) {
+               if (um_pci_devices[i].dev)
+                       continue;
+               free = i;
+               break;
+       }
+
+       if (free < 0)
+               goto error;
+
+       err = um_pci_init_vqs(dev);
+       if (err)
+               goto error;
+
+       dev->irq = irq_alloc_desc(numa_node_id());
+       if (dev->irq < 0) {
+               err = dev->irq;
+               goto error;
+       }
+       um_pci_devices[free].dev = dev;
+       vdev->priv = dev;
+
+       mutex_unlock(&um_pci_mtx);
+
+       device_set_wakeup_enable(&vdev->dev, true);
+
+       /*
+        * In order to do suspend-resume properly, don't allow VQs
+        * to be suspended.
+        */
+       virtio_uml_set_no_vq_suspend(vdev, true);
+
+       um_pci_rescan();
+       return 0;
+error:
+       mutex_unlock(&um_pci_mtx);
+       kfree(dev);
+       return err;
+}
+
+static void um_pci_virtio_remove(struct virtio_device *vdev)
+{
+       struct um_pci_device *dev = vdev->priv;
+       int i;
+
+        /* Stop all virtqueues */
+        vdev->config->reset(vdev);
+        vdev->config->del_vqs(vdev);
+
+       device_set_wakeup_enable(&vdev->dev, false);
+
+       mutex_lock(&um_pci_mtx);
+       for (i = 0; i < MAX_DEVICES; i++) {
+               if (um_pci_devices[i].dev != dev)
+                       continue;
+               um_pci_devices[i].dev = NULL;
+               irq_free_desc(dev->irq);
+       }
+       mutex_unlock(&um_pci_mtx);
+
+       um_pci_rescan();
+
+       kfree(dev);
+}
+
+static struct virtio_device_id id_table[] = {
+       { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
+       { 0 },
+};
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+static struct virtio_driver um_pci_virtio_driver = {
+       .driver.name = "virtio-pci",
+       .driver.owner = THIS_MODULE,
+       .id_table = id_table,
+       .probe = um_pci_virtio_probe,
+       .remove = um_pci_virtio_remove,
+};
+
+static struct resource virt_cfgspace_resource = {
+       .name = "PCI config space",
+       .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
+       .end = 0xf0000000 - 1,
+       .flags = IORESOURCE_MEM,
+};
+
+static long um_pci_map_cfgspace(unsigned long offset, size_t size,
+                               const struct logic_iomem_ops **ops,
+                               void **priv)
+{
+       if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
+               return -EINVAL;
+
+       if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
+               *ops = &um_pci_device_cfgspace_ops;
+               *priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
+               return 0;
+       }
+
+       WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
+       return -ENOENT;
+}
+
+static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
+       .map = um_pci_map_cfgspace,
+};
+
+static struct resource virt_iomem_resource = {
+       .name = "PCI iomem",
+       .start = 0xf0000000,
+       .end = 0xffffffff,
+       .flags = IORESOURCE_MEM,
+};
+
+struct um_pci_map_iomem_data {
+       unsigned long offset;
+       size_t size;
+       const struct logic_iomem_ops **ops;
+       void **priv;
+       long ret;
+};
+
+static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
+{
+       struct um_pci_map_iomem_data *data = _data;
+       struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
+       struct um_pci_device *dev;
+       int i;
+
+       if (!reg->dev)
+               return 0;
+
+       for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
+               struct resource *r = &pdev->resource[i];
+
+               if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
+                       continue;
+
+               /*
+                * must be the whole or part of the resource,
+                * not allowed to only overlap
+                */
+               if (data->offset < r->start || data->offset > r->end)
+                       continue;
+               if (data->offset + data->size - 1 > r->end)
+                       continue;
+
+               dev = reg->dev;
+               *data->ops = &um_pci_device_bar_ops;
+               dev->resptr[i] = i;
+               *data->priv = &dev->resptr[i];
+               data->ret = data->offset - r->start;
+
+               /* no need to continue */
+               return 1;
+       }
+
+       return 0;
+}
+
+static long um_pci_map_iomem(unsigned long offset, size_t size,
+                            const struct logic_iomem_ops **ops,
+                            void **priv)
+{
+       struct um_pci_map_iomem_data data = {
+               /* we want the full address here */
+               .offset = offset + virt_iomem_resource.start,
+               .size = size,
+               .ops = ops,
+               .priv = priv,
+               .ret = -ENOENT,
+       };
+
+       pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
+       return data.ret;
+}
+
+static const struct logic_iomem_region_ops um_pci_iomem_ops = {
+       .map = um_pci_map_iomem,
+};
+
+static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+       /*
+        * This is a very low address and not actually valid 'physical' memory
+        * in UML, so we can simply map MSI(-X) vectors to there, it cannot be
+        * legitimately written to by the device in any other way.
+        * We use the (virtual) IRQ number here as the message to simplify the
+        * code that receives the message, where for now we simply trust the
+        * device to send the correct message.
+        */
+       msg->address_hi = 0;
+       msg->address_lo = 0xa0000;
+       msg->data = data->irq;
+}
+
+static struct irq_chip um_pci_msi_bottom_irq_chip = {
+       .name = "UM virtio MSI",
+       .irq_compose_msi_msg = um_pci_compose_msi_msg,
+};
+
+static int um_pci_inner_domain_alloc(struct irq_domain *domain,
+                                    unsigned int virq, unsigned int nr_irqs,
+                                    void *args)
+{
+       unsigned long bit;
+
+       WARN_ON(nr_irqs != 1);
+
+       mutex_lock(&um_pci_mtx);
+       bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
+       if (bit >= MAX_MSI_VECTORS) {
+               mutex_unlock(&um_pci_mtx);
+               return -ENOSPC;
+       }
+
+       set_bit(bit, um_pci_msi_used);
+       mutex_unlock(&um_pci_mtx);
+
+       irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
+                           domain->host_data, handle_simple_irq,
+                           NULL, NULL);
+
+       return 0;
+}
+
+static void um_pci_inner_domain_free(struct irq_domain *domain,
+                                    unsigned int virq, unsigned int nr_irqs)
+{
+       struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+       mutex_lock(&um_pci_mtx);
+
+       if (!test_bit(d->hwirq, um_pci_msi_used))
+               pr_err("trying to free unused MSI#%lu\n", d->hwirq);
+       else
+               __clear_bit(d->hwirq, um_pci_msi_used);
+
+       mutex_unlock(&um_pci_mtx);
+}
+
+static const struct irq_domain_ops um_pci_inner_domain_ops = {
+       .alloc = um_pci_inner_domain_alloc,
+       .free = um_pci_inner_domain_free,
+};
+
+static struct irq_chip um_pci_msi_irq_chip = {
+       .name = "UM virtio PCIe MSI",
+       .irq_mask = pci_msi_mask_irq,
+       .irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info um_pci_msi_domain_info = {
+       .flags  = MSI_FLAG_USE_DEF_DOM_OPS |
+                 MSI_FLAG_USE_DEF_CHIP_OPS |
+                 MSI_FLAG_PCI_MSIX,
+       .chip   = &um_pci_msi_irq_chip,
+};
+
+static struct resource busn_resource = {
+       .name   = "PCI busn",
+       .start  = 0,
+       .end    = 0,
+       .flags  = IORESOURCE_BUS,
+};
+
+static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+{
+       struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
+
+       if (WARN_ON(!reg->dev))
+               return -EINVAL;
+
+       /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
+       return reg->dev->irq;
+}
+
+void *pci_root_bus_fwnode(struct pci_bus *bus)
+{
+       return um_pci_fwnode;
+}
+
+int um_pci_init(void)
+{
+       int err, i;
+
+       WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
+                                      &um_pci_cfgspace_ops));
+       WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
+                                      &um_pci_iomem_ops));
+
+       if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
+                "No virtio device ID configured for PCI - no PCI support\n"))
+               return 0;
+
+       bridge = pci_alloc_host_bridge(0);
+       if (!bridge)
+               return -ENOMEM;
+
+       um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
+       if (!um_pci_fwnode) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS,
+                                              MAX_MSI_VECTORS, 0,
+                                              &um_pci_inner_domain_ops, NULL);
+       if (!um_pci_inner_domain) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
+                                                     &um_pci_msi_domain_info,
+                                                     um_pci_inner_domain);
+       if (!um_pci_msi_domain) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       pci_add_resource(&bridge->windows, &virt_iomem_resource);
+       pci_add_resource(&bridge->windows, &busn_resource);
+       bridge->ops = &um_pci_ops;
+       bridge->map_irq = um_pci_map_irq;
+
+       for (i = 0; i < MAX_DEVICES; i++) {
+               resource_size_t start;
+
+               start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
+               um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
+               if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+       }
+
+       err = pci_host_probe(bridge);
+       if (err)
+               goto free;
+
+       err = register_virtio_driver(&um_pci_virtio_driver);
+       if (err)
+               goto free;
+       return 0;
+free:
+       if (um_pci_inner_domain)
+               irq_domain_remove(um_pci_inner_domain);
+       if (um_pci_fwnode)
+               irq_domain_free_fwnode(um_pci_fwnode);
+       pci_free_resource_list(&bridge->windows);
+       pci_free_host_bridge(bridge);
+       return err;
+}
+module_init(um_pci_init);
+
+void um_pci_exit(void)
+{
+       unregister_virtio_driver(&um_pci_virtio_driver);
+       irq_domain_remove(um_pci_msi_domain);
+       irq_domain_remove(um_pci_inner_domain);
+       pci_free_resource_list(&bridge->windows);
+       pci_free_host_bridge(bridge);
+}
+module_exit(um_pci_exit);
index 91ddf74..4412d6f 100644 (file)
@@ -56,6 +56,7 @@ struct virtio_uml_device {
        u8 status;
        u8 registered:1;
        u8 suspended:1;
+       u8 no_vq_suspend:1;
 
        u8 config_changed_irq:1;
        uint64_t vq_irq_vq_map;
@@ -1098,6 +1099,19 @@ static void virtio_uml_release_dev(struct device *d)
        kfree(vu_dev);
 }
 
+void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
+                                 bool no_vq_suspend)
+{
+       struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+       if (WARN_ON(vdev->config != &virtio_uml_config_ops))
+               return;
+
+       vu_dev->no_vq_suspend = no_vq_suspend;
+       dev_info(&vdev->dev, "%sabled VQ suspend\n",
+                no_vq_suspend ? "dis" : "en");
+}
+
 /* Platform device */
 
 static int virtio_uml_probe(struct platform_device *pdev)
@@ -1302,13 +1316,16 @@ MODULE_DEVICE_TABLE(of, virtio_uml_match);
 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
 {
        struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
-       struct virtqueue *vq;
 
-       virtio_device_for_each_vq((&vu_dev->vdev), vq) {
-               struct virtio_uml_vq_info *info = vq->priv;
+       if (!vu_dev->no_vq_suspend) {
+               struct virtqueue *vq;
 
-               info->suspended = true;
-               vhost_user_set_vring_enable(vu_dev, vq->index, false);
+               virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+                       struct virtio_uml_vq_info *info = vq->priv;
+
+                       info->suspended = true;
+                       vhost_user_set_vring_enable(vu_dev, vq->index, false);
+               }
        }
 
        if (!device_may_wakeup(&vu_dev->vdev.dev)) {
@@ -1322,13 +1339,16 @@ static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
 static int virtio_uml_resume(struct platform_device *pdev)
 {
        struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
-       struct virtqueue *vq;
 
-       virtio_device_for_each_vq((&vu_dev->vdev), vq) {
-               struct virtio_uml_vq_info *info = vq->priv;
+       if (!vu_dev->no_vq_suspend) {
+               struct virtqueue *vq;
+
+               virtio_device_for_each_vq((&vu_dev->vdev), vq) {
+                       struct virtio_uml_vq_info *info = vq->priv;
 
-               info->suspended = false;
-               vhost_user_set_vring_enable(vu_dev, vq->index, true);
+                       info->suspended = false;
+                       vhost_user_set_vring_enable(vu_dev, vq->index, true);
+               }
        }
 
        vu_dev->suspended = false;
index d7492e5..e5a7b55 100644 (file)
@@ -7,8 +7,8 @@ generic-y += device.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
+generic-y += fb.h
 generic-y += ftrace.h
-generic-y += futex.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
@@ -17,7 +17,6 @@ generic-y += mcs_spinlock.h
 generic-y += mmiowb.h
 generic-y += module.lds.h
 generic-y += param.h
-generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += softirq_stack.h
@@ -27,3 +26,4 @@ generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += kprobes.h
 generic-y += mm_hooks.h
+generic-y += vga.h
diff --git a/arch/um/include/asm/cacheflush.h b/arch/um/include/asm/cacheflush.h
new file mode 100644 (file)
index 0000000..4c9858c
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __UM_ASM_CACHEFLUSH_H
+#define __UM_ASM_CACHEFLUSH_H
+
+#include <asm/tlbflush.h>
+#define flush_cache_vmap flush_tlb_kernel_range
+#define flush_cache_vunmap flush_tlb_kernel_range
+
+#include <asm-generic/cacheflush.h>
+#endif /* __UM_ASM_CACHEFLUSH_H */
diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
new file mode 100644 (file)
index 0000000..19cd7ed
--- /dev/null
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CPUFEATURE_H
+#define _ASM_UM_CPUFEATURE_H
+
+#include <asm/processor.h>
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+
+#include <asm/asm.h>
+#include <linux/bitops.h>
+
+extern const char * const x86_cap_flags[NCAPINTS*32];
+extern const char * const x86_power_flags[32];
+#define X86_CAP_FMT "%s"
+#define x86_cap_flag(flag) x86_cap_flags[flag]
+
+/*
+ * In order to save room, we index into this array by doing
+ * X86_BUG_<name> - NCAPINTS*32.
+ */
+extern const char * const x86_bug_flags[NBUGINTS*32];
+
+#define test_cpu_cap(c, bit)                                           \
+        test_bit(bit, (unsigned long *)((c)->x86_capability))
+
+/*
+ * There are 32 bits/features in each mask word.  The high bits
+ * (selected with (bit>>5) give us the word number and the low 5
+ * bits give us the bit/feature number inside the word.
+ * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
+ * see if it is set in the mask word.
+ */
+#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit)    \
+       (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
+
+#define cpu_has(c, bit)                                                        \
+        test_cpu_cap(c, bit)
+
+#define this_cpu_has(bit)                                              \
+       (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
+        x86_this_cpu_test_bit(bit,                                     \
+               (unsigned long __percpu *)&cpu_info.x86_capability))
+
+/*
+ * This macro is for detection of features which need kernel
+ * infrastructure to be used.  It may *not* directly test the CPU
+ * itself.  Use the cpu_has() family if you want true runtime
+ * testing of CPU features, like in hypervisor code where you are
+ * supporting a possible guest feature where host support for it
+ * is not relevant.
+ */
+#define cpu_feature_enabled(bit)       \
+       (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
+
+#define boot_cpu_has(bit)      cpu_has(&boot_cpu_data, bit)
+
+#define set_cpu_cap(c, bit)    set_bit(bit, (unsigned long *)((c)->x86_capability))
+
+extern void setup_clear_cpu_cap(unsigned int bit);
+
+#define setup_force_cpu_cap(bit) do { \
+       set_cpu_cap(&boot_cpu_data, bit);       \
+       set_bit(bit, (unsigned long *)cpu_caps_set);    \
+} while (0)
+
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
+#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO)
+
+/*
+ * Workaround for the sake of BPF compilation which utilizes kernel
+ * headers, but clang does not support ASM GOTO and fails the build.
+ */
+#ifndef __BPF_TRACING__
+#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
+#endif
+
+#define static_cpu_has(bit)            boot_cpu_has(bit)
+
+#else
+
+/*
+ * Static testing of CPU features. Used the same as boot_cpu_has(). It
+ * statically patches the target code for additional performance. Use
+ * static_cpu_has() only in fast paths, where every cycle counts. Which
+ * means that the boot_cpu_has() variant is already fast enough for the
+ * majority of cases and you should stick to using it as it is generally
+ * only two instructions: a RIP-relative MOV and a TEST.
+ */
+static __always_inline bool _static_cpu_has(u16 bit)
+{
+       asm_volatile_goto("1: jmp 6f\n"
+                "2:\n"
+                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                        "((5f-4f) - (2b-1b)),0x90\n"
+                "3:\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 4f - .\n"              /* repl offset */
+                " .word %P[always]\n"          /* always replace */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 5f - 4f\n"             /* repl len */
+                " .byte 3b - 2b\n"             /* pad len */
+                ".previous\n"
+                ".section .altinstr_replacement,\"ax\"\n"
+                "4: jmp %l[t_no]\n"
+                "5:\n"
+                ".previous\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 0\n"                   /* no replacement */
+                " .word %P[feature]\n"         /* feature bit */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 0\n"                   /* repl len */
+                " .byte 0\n"                   /* pad len */
+                ".previous\n"
+                ".section .altinstr_aux,\"ax\"\n"
+                "6:\n"
+                " testb %[bitnum],%[cap_byte]\n"
+                " jnz %l[t_yes]\n"
+                " jmp %l[t_no]\n"
+                ".previous\n"
+                : : [feature]  "i" (bit),
+                    [always]   "i" (X86_FEATURE_ALWAYS),
+                    [bitnum]   "i" (1 << (bit & 7)),
+                    [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+                : : t_yes, t_no);
+t_yes:
+       return true;
+t_no:
+       return false;
+}
+
+#define static_cpu_has(bit)                                    \
+(                                                              \
+       __builtin_constant_p(boot_cpu_has(bit)) ?               \
+               boot_cpu_has(bit) :                             \
+               _static_cpu_has(bit)                            \
+)
+#endif
+
+#define cpu_has_bug(c, bit)            cpu_has(c, (bit))
+#define set_cpu_bug(c, bit)            set_cpu_cap(c, (bit))
+
+#define static_cpu_has_bug(bit)                static_cpu_has((bit))
+#define boot_cpu_has_bug(bit)          cpu_has_bug(&boot_cpu_data, (bit))
+#define boot_cpu_set_bug(bit)          set_cpu_cap(&boot_cpu_data, (bit))
+
+#define MAX_CPU_FEATURES               (NCAPINTS * 32)
+#define cpu_have_feature               boot_cpu_has
+
+#define CPU_FEATURE_TYPEFMT            "x86,ven%04Xfam%04Xmod%04X"
+#define CPU_FEATURE_TYPEVAL            boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
+                                       boot_cpu_data.x86_model
+
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#endif /* _ASM_UM_CPUFEATURE_H */
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
new file mode 100644 (file)
index 0000000..71bfd9e
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_UM_FPU_API_H
+#define _ASM_UM_FPU_API_H
+
+/* Copyright (c) 2020 Cambridge Greys Ltd
+ * Copyright (c) 2020 Red Hat Inc.
+ * A set of "dummy" defines to allow the direct inclusion
+ * of x86 optimized copy, xor, etc routines into the
+ * UML code tree. */
+
+#define kernel_fpu_begin() (void)0
+#define kernel_fpu_end() (void)0
+
+static inline bool irq_fpu_usable(void)
+{
+       return true;
+}
+
+
+#endif
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
new file mode 100644 (file)
index 0000000..780aa6b
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_FUTEX_H
+#define _ASM_UM_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval);
+
+#endif
index 6ce18d3..9ea42cc 100644 (file)
@@ -3,16 +3,23 @@
 #define _ASM_UM_IO_H
 #include <linux/types.h>
 
+/* get emulated iomem (if desired) */
+#include <asm-generic/logic_io.h>
+
+#ifndef ioremap
 #define ioremap ioremap
 static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
 {
        return NULL;
 }
+#endif /* ioremap */
 
+#ifndef iounmap
 #define iounmap iounmap
 static inline void iounmap(void __iomem *addr)
 {
 }
+#endif /* iounmap */
 
 #include <asm-generic/io.h>
 
index 3f5d3e8..e187c78 100644 (file)
 
 #endif
 
-#define NR_IRQS                        64
+#define UM_LAST_SIGNAL_IRQ     64
+/* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */
+#ifdef CONFIG_PCI_MSI
+#define NR_IRQS                        (UM_LAST_SIGNAL_IRQ + 64)
+#else
+#define NR_IRQS                        UM_LAST_SIGNAL_IRQ
+#endif /* CONFIG_PCI_MSI */
 
 #include <asm-generic/irq.h>
 #endif
index 0642ad9..dab5744 100644 (file)
@@ -2,15 +2,15 @@
 #ifndef __UM_IRQFLAGS_H
 #define __UM_IRQFLAGS_H
 
-extern int get_signals(void);
-extern int set_signals(int enable);
-extern void block_signals(void);
-extern void unblock_signals(void);
+extern int signals_enabled;
+int set_signals(int enable);
+void block_signals(void);
+void unblock_signals(void);
 
 #define arch_local_save_flags arch_local_save_flags
 static inline unsigned long arch_local_save_flags(void)
 {
-       return get_signals();
+       return signals_enabled;
 }
 
 #define arch_local_irq_restore arch_local_irq_restore
diff --git a/arch/um/include/asm/msi.h b/arch/um/include/asm/msi.h
new file mode 100644 (file)
index 0000000..c8c6c38
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/msi.h>
diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h
new file mode 100644 (file)
index 0000000..da13fd5
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_UM_PCI_H
+#define __ASM_UM_PCI_H
+#include <linux/types.h>
+#include <asm/io.h>
+
+#define PCIBIOS_MIN_IO         0
+#define PCIBIOS_MIN_MEM                0
+
+#define pcibios_assign_all_busses() 1
+
+extern int isa_dma_bridge_buggy;
+
+#ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+       /* no legacy IRQs */
+       return -ENODEV;
+}
+#endif
+
+#ifdef CONFIG_PCI_DOMAINS
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+       /* always show the domain in /proc */
+       return 1;
+}
+#endif  /* CONFIG_PCI */
+
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+/*
+ * This is a bit of an annoying hack, and it assumes we only have
+ * the virt-pci (if anything). Which is true, but still.
+ */
+void *pci_root_bus_fwnode(struct pci_bus *bus);
+#define pci_root_bus_fwnode    pci_root_bus_fwnode
+#endif
+
+#endif  /* __ASM_UM_PCI_H */
index afd9b26..b5cf0ed 100644 (file)
@@ -16,6 +16,8 @@ struct task_struct;
 
 #include <linux/prefetch.h>
 
+#include <asm/cpufeatures.h>
+
 struct mm_struct;
 
 struct thread_struct {
@@ -90,12 +92,18 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry,
 struct cpuinfo_um {
        unsigned long loops_per_jiffy;
        int ipi_pipe[2];
+       int cache_alignment;
+       union {
+               __u32           x86_capability[NCAPINTS + NBUGINTS];
+               unsigned long   x86_capability_alignment;
+       };
 };
 
 extern struct cpuinfo_um boot_cpu_data;
 
 #define cpu_data (&boot_cpu_data)
 #define current_cpu_data boot_cpu_data
+#define cache_line_size()      (boot_cpu_data.cache_alignment)
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
 extern unsigned long get_wchan(struct task_struct *p);
index ff9c628..0422467 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 
 #include <asm/tlbflush.h>
-#include <asm-generic/cacheflush.h>
+#include <asm/cacheflush.h>
 #include <asm-generic/tlb.h>
 
 #endif
index 36b33d6..f512704 100644 (file)
@@ -1,7 +1,22 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/xor.h>
+#ifndef _ASM_UM_XOR_H
+#define _ASM_UM_XOR_H
+
+#ifdef CONFIG_64BIT
+#undef CONFIG_X86_32
+#else
+#define CONFIG_X86_32 1
+#endif
+
+#include <asm/cpufeature.h>
+#include <../../x86/include/asm/xor.h>
 #include <linux/time-internal.h>
 
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#undef XOR_SELECT_TEMPLATE
 /* pick an arbitrary one - measuring isn't possible with inf-cpu */
 #define XOR_SELECT_TEMPLATE(x) \
        (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
+#endif
+
+#endif
index 759956a..b222266 100644 (file)
@@ -8,17 +8,11 @@
 #define __TIMER_INTERNAL_H__
 #include <linux/list.h>
 #include <asm/bug.h>
+#include <shared/timetravel.h>
 
 #define TIMER_MULTIPLIER 256
 #define TIMER_MIN_DELTA  500
 
-enum time_travel_mode {
-       TT_MODE_OFF,
-       TT_MODE_BASIC,
-       TT_MODE_INFCPU,
-       TT_MODE_EXTERNAL,
-};
-
 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
 struct time_travel_event {
        unsigned long long time;
@@ -27,8 +21,6 @@ struct time_travel_event {
        bool pending, onstack;
 };
 
-extern enum time_travel_mode time_travel_mode;
-
 void time_travel_sleep(void);
 
 static inline void
@@ -62,8 +54,6 @@ bool time_travel_del_event(struct time_travel_event *e);
 struct time_travel_event {
 };
 
-#define time_travel_mode TT_MODE_OFF
-
 static inline void time_travel_sleep(void)
 {
 }
diff --git a/arch/um/include/linux/virtio-uml.h b/arch/um/include/linux/virtio-uml.h
new file mode 100644 (file)
index 0000000..2f652fa
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#ifndef __VIRTIO_UML_H__
+#define __VIRTIO_UML_H__
+
+void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
+                                 bool no_vq_suspend);
+
+#endif /* __VIRTIO_UML_H__ */
index 07239e8..065829f 100644 (file)
@@ -17,6 +17,7 @@ enum um_irq_type {
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
+void sigio_run_timetravel_handlers(void);
 extern void free_irq_by_fd(int fd);
 extern void deactivate_fd(int fd, int irqnum);
 extern int deactivate_all_fds(void);
index 2888ec8..a2cfd42 100644 (file)
@@ -33,7 +33,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip,
                             int is_write, int is_user, int *code_out);
 
 extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
-extern int smp_sigio_handler(void);
 extern void initial_thread_cb(void (*proc)(void *), void *arg);
 extern int is_syscall(unsigned long addr);
 
index 85a1cc2..bdb2869 100644 (file)
@@ -5,6 +5,7 @@
 #include <sysdep/archsetjmp.h>
 #include <os.h>
 
+extern int signals_enabled;
 extern int setjmp(jmp_buf);
 extern void longjmp(jmp_buf, int);
 
@@ -12,13 +13,12 @@ extern void longjmp(jmp_buf, int);
        longjmp(*buf, val);     \
 } while(0)
 
-#define UML_SETJMP(buf) ({ \
-       int n;     \
-       volatile int enable;    \
-       enable = get_signals(); \
-       n = setjmp(*buf); \
-       if(n != 0) \
-               set_signals_trace(enable); \
+#define UML_SETJMP(buf) ({                             \
+       int n, enable;                                  \
+       enable = *(volatile int *)&signals_enabled;     \
+       n = setjmp(*buf);                               \
+       if(n != 0)                                      \
+               set_signals_trace(enable);              \
        n; })
 
 #endif
index 13d86f9..60b84ed 100644 (file)
@@ -187,6 +187,9 @@ int os_poll(unsigned int n, const int *fds);
 extern void os_early_checks(void);
 extern void os_check_bugs(void);
 extern void check_host_supports_tls(int *supports_tls, int *tls_min);
+extern void get_host_cpu_features(
+       void (*flags_helper_func)(char *line),
+       void (*cache_helper_func)(char *line));
 
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
@@ -211,7 +214,6 @@ extern int os_protect_memory(void *addr, unsigned long len,
 extern int os_unmap_memory(void *addr, int len);
 extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
-extern void os_flush_stdout(void);
 extern int os_mincore(void *addr, unsigned long len);
 
 /* execvp.c */
@@ -237,12 +239,14 @@ extern void send_sigio_to_self(void);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
-extern int get_signals(void);
 extern int set_signals(int enable);
 extern int set_signals_trace(int enable);
 extern int os_is_signal_stack(void);
 extern void deliver_alarm(void);
 extern void register_pm_wake_signal(void);
+extern void block_signals_hard(void);
+extern void unblock_signals_hard(void);
+extern void mark_sigio_pending(void);
 
 /* util.c */
 extern void stack_protections(unsigned long address);
diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h
new file mode 100644 (file)
index 0000000..e5c3d69
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019-2021 Intel Corporation
+ */
+#ifndef _UM_TIME_TRAVEL_H_
+#define _UM_TIME_TRAVEL_H_
+
+enum time_travel_mode {
+       TT_MODE_OFF,
+       TT_MODE_BASIC,
+       TT_MODE_INFCPU,
+       TT_MODE_EXTERNAL,
+};
+
+#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
+    defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
+extern enum time_travel_mode time_travel_mode;
+#else
+#define time_travel_mode TT_MODE_OFF
+#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
+
+#endif /* _UM_TIME_TRAVEL_H_ */
index e698e0c..1d18e4e 100644 (file)
@@ -17,18 +17,19 @@ extra-y := vmlinux.lds
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
        physmem.o process.o ptrace.o reboot.o sigio.o \
        signal.o syscall.o sysrq.o time.o tlb.o trap.o \
-       um_arch.o umid.o maccess.o kmsg_dump.o skas/
+       um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)    += gprof_syms.o
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
 
 USER_OBJS := config.o
 
 include arch/um/scripts/Makefile.rules
 
-targets := config.c config.tmp
+targets := config.c config.tmp capflags.c
 
 # Be careful with the below Sed code - sed is pitfall-rich!
 # We use sed to lower build requirements, for "embedded" builders for instance.
@@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE   $@
 $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
        $(call if_changed,quote2)
 
+quiet_cmd_mkcapflags = MKCAP   $@
+      cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
+
+cpufeature = $(src)/../../x86/include/asm/cpufeatures.h
+vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h
+
+$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE
+       $(call if_changed,mkcapflags)
+
 quiet_cmd_quote2 = QUOTE   $@
       cmd_quote2 = sed -e '/CONFIG/{'          \
                  -e 's/"CONFIG"//'            \
diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c
new file mode 100644 (file)
index 0000000..7220615
--- /dev/null
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <asm/iomap.h>
+#include <asm-generic/pci_iomap.h>
+
+void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
+                              unsigned int nr)
+{
+       return NULL;
+}
index 82af519..a8873d9 100644 (file)
@@ -56,7 +56,7 @@ struct irq_entry {
 
 static DEFINE_SPINLOCK(irq_lock);
 static LIST_HEAD(active_fds);
-static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
+static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
 static bool irqs_suspended;
 
 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
@@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
        if (!reg->timetravel_handler)
                return false;
 
-       /* prevent nesting - we'll get it again later when we SIGIO ourselves */
-       if (reg->pending_on_resume)
-               return true;
-
+       /*
+        * Handle all messages - we might get multiple even while
+        * interrupts are already suspended, due to suspend order
+        * etc. Note that time_travel_add_irq_event() will not add
+        * an event twice, if it's pending already "first wins".
+        */
        reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
 
        if (!reg->event.pending)
@@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
 #endif
 
 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
-                             struct uml_pt_regs *regs)
+                             struct uml_pt_regs *regs,
+                             bool timetravel_handlers_only)
 {
        struct irq_reg *reg = &entry->reg[t];
 
@@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
        if (irq_do_timetravel_handler(entry, t))
                return;
 
-       if (irqs_suspended)
+       /*
+        * If we're called to only run time-travel handlers then don't
+        * actually proceed but mark sigio as pending (if applicable).
+        * For suspend/resume, timetravel_handlers_only may be true
+        * despite time-travel not being configured and used.
+        */
+       if (timetravel_handlers_only) {
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+               mark_sigio_pending();
+#endif
                return;
+       }
 
        irq_io_loop(reg, regs);
 }
 
-void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+static void _sigio_handler(struct uml_pt_regs *regs,
+                          bool timetravel_handlers_only)
 {
        struct irq_entry *irq_entry;
        int n, i;
 
-       if (irqs_suspended && !um_irq_timetravel_handler_used())
+       if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
                return;
 
        while (1) {
@@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
                        irq_entry = os_epoll_get_data_pointer(i);
 
                        for (t = 0; t < NUM_IRQ_TYPES; t++)
-                               sigio_reg_handler(i, irq_entry, t, regs);
+                               sigio_reg_handler(i, irq_entry, t, regs,
+                                                 timetravel_handlers_only);
                }
        }
 
-       if (!irqs_suspended)
+       if (!timetravel_handlers_only)
                free_irqs();
 }
 
+void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+{
+       _sigio_handler(regs, irqs_suspended);
+}
+
 static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
        struct irq_entry *walk;
@@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
 
 void um_free_irq(int irq, void *dev)
 {
-       if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
+       if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ,
+                "freeing invalid irq %d", irq))
                return;
 
        free_irq_by_irq_and_dev(irq, dev);
@@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
                               devname, dev_id, timetravel_handler);
 }
 EXPORT_SYMBOL(um_request_irq_tt);
+
+void sigio_run_timetravel_handlers(void)
+{
+       _sigio_handler(NULL, true);
+}
 #endif
 
 #ifdef CONFIG_PM_SLEEP
@@ -623,7 +649,7 @@ void __init init_IRQ(void)
 
        irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
 
-       for (i = 1; i < NR_IRQS; i++)
+       for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
        /* Initialize EPOLL Loop */
        os_setup_epoll();
index 8ade54a..b1e5634 100644 (file)
@@ -7,7 +7,7 @@
 #include <os.h>
 
 EXPORT_SYMBOL(set_signals);
-EXPORT_SYMBOL(get_signals);
+EXPORT_SYMBOL(signals_enabled);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
index 592cdb1..5afac0f 100644 (file)
@@ -29,7 +29,7 @@ stub_clone_handler(void)
        long err;
 
        err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
-                           (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
+                           (unsigned long)data + UM_KERN_PAGE_SIZE / 2);
        if (err) {
                data->parent_err = err;
                goto done;
index 2dec915..6c76df9 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/current.h>
 #include <asm/page.h>
 #include <kern_util.h>
+#include <asm/futex.h>
 #include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
@@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len)
        return 0;
 }
 EXPORT_SYMBOL(__strnlen_user);
+
+/**
+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ *                       argument and comparison of the previous
+ *                       futex value with another constant.
+ *
+ * @encoded_op:        encoded operation to execute
+ * @uaddr:     pointer to user space address
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
+ */
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+{
+       int oldval, ret;
+       struct page *page;
+       unsigned long addr = (unsigned long) uaddr;
+       pte_t *pte;
+
+       ret = -EFAULT;
+       if (!access_ok(uaddr, sizeof(*uaddr)))
+               return -EFAULT;
+       preempt_disable();
+       pte = maybe_map(addr, 1);
+       if (pte == NULL)
+               goto out_inuser;
+
+       page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+       pagefault_disable();
+       addr = (unsigned long) page_address(page) +
+                       (((unsigned long) addr) & ~PAGE_MASK);
+#else
+       addr = (unsigned long) kmap_atomic(page) +
+               ((unsigned long) addr & ~PAGE_MASK);
+#endif
+       uaddr = (u32 *) addr;
+       oldval = *uaddr;
+
+       ret = 0;
+
+       switch (op) {
+       case FUTEX_OP_SET:
+               *uaddr = oparg;
+               break;
+       case FUTEX_OP_ADD:
+               *uaddr += oparg;
+               break;
+       case FUTEX_OP_OR:
+               *uaddr |= oparg;
+               break;
+       case FUTEX_OP_ANDN:
+               *uaddr &= ~oparg;
+               break;
+       case FUTEX_OP_XOR:
+               *uaddr ^= oparg;
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+#ifdef CONFIG_64BIT
+       pagefault_enable();
+#else
+       kunmap_atomic((void *)addr);
+#endif
+
+out_inuser:
+       preempt_enable();
+
+       if (ret == 0)
+               *oval = oldval;
+
+       return ret;
+}
+EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ *                             uaddr with newval if the current value is
+ *                             oldval.
+ * @uval:      pointer to store content of @uaddr
+ * @uaddr:     pointer to user space address
+ * @oldval:    old value
+ * @newval:    new value to store to @uaddr
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
+ */
+
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       struct page *page;
+       pte_t *pte;
+       int ret = -EFAULT;
+
+       if (!access_ok(uaddr, sizeof(*uaddr)))
+               return -EFAULT;
+
+       preempt_disable();
+       pte = maybe_map((unsigned long) uaddr, 1);
+       if (pte == NULL)
+               goto out_inatomic;
+
+       page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+       pagefault_disable();
+       uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
+#else
+       uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK);
+#endif
+
+       *uval = *uaddr;
+
+       ret = cmpxchg(uaddr, oldval, newval);
+
+#ifdef CONFIG_64BIT
+       pagefault_enable();
+#else
+       kunmap_atomic(uaddr);
+#endif
+       ret = 0;
+
+out_inatomic:
+       preempt_enable();
+       return ret;
+}
+EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
index e0cdb96..fddd1de 100644 (file)
@@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
        int ret;
 
        /*
-        * Poll outside the locked section (if we're not called to only read
-        * the response) so we can get interrupts for e.g. virtio while we're
-        * here, but then we need to lock to not get interrupted between the
-        * read of the message and write of the ACK.
+        * We can't unlock here, but interrupt signals with a timetravel_handler
+        * (see um_request_irq_tt) get to the timetravel_handler anyway.
         */
        if (mode != TTMH_READ) {
-               bool disabled = irqs_disabled();
+               BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
 
-               BUG_ON(mode == TTMH_IDLE && !disabled);
-
-               if (disabled)
-                       local_irq_enable();
                while (os_poll(1, &time_travel_ext_fd) != 0) {
                        /* nothing */
                }
-               if (disabled)
-                       local_irq_disable();
        }
 
        ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
@@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
                .time = time,
                .seq = mseq,
        };
-       unsigned long flags;
 
        /*
-        * We need to save interrupts here and only restore when we
-        * got the ACK - otherwise we can get interrupted and send
-        * another request while we're still waiting for an ACK, but
-        * the peer doesn't know we got interrupted and will send
-        * the ACKs in the same order as the message, but we'd need
-        * to see them in the opposite order ...
+        * We need to block even the timetravel handlers of SIGIO here and
+        * only restore their use when we got the ACK - otherwise we may
+        * (will) get interrupted by that, try to queue the IRQ for future
+        * processing and thus send another request while we're still waiting
+        * for an ACK, but the peer doesn't know we got interrupted and will
+        * send the ACKs in the same order as the message, but we'd need to
+        * see them in the opposite order ...
         *
         * This wouldn't matter *too* much, but some ACKs carry the
         * current time (for UM_TIMETRAVEL_GET) and getting another
@@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
         * The sequence number assignment that happens here lets us
         * debug such message handling issues more easily.
         */
-       local_irq_save(flags);
+       block_signals_hard();
        os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
 
        while (msg.op != UM_TIMETRAVEL_ACK)
@@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
 
        if (op == UM_TIMETRAVEL_GET)
                time_travel_set_time(msg.time);
-       local_irq_restore(flags);
+       unblock_signals_hard();
 
        return msg.time;
 }
@@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void)
        while ((e = list_first_entry_or_null(&time_travel_irqs,
                                             struct time_travel_event,
                                             list))) {
-               WARN(e->time != time_travel_time,
-                    "time moved from %lld to %lld before IRQ delivery\n",
-                    time_travel_time, e->time);
                list_del(&e->list);
                e->pending = false;
                e->fn(e);
index 9512253..a149a5e 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/panic_notifier.h>
 #include <linux/seq_file.h>
@@ -17,6 +18,7 @@
 #include <linux/suspend.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <as-layout.h>
@@ -51,9 +53,13 @@ static void __init add_arg(char *arg)
  */
 struct cpuinfo_um boot_cpu_data = {
        .loops_per_jiffy        = 0,
-       .ipi_pipe               = { -1, -1 }
+       .ipi_pipe               = { -1, -1 },
+       .cache_alignment        = L1_CACHE_BYTES,
+       .x86_capability         = { 0 }
 };
 
+EXPORT_SYMBOL(boot_cpu_data);
+
 union thread_union cpu0_irqstack
        __section(".data..init_irqstack") =
                { .thread_info = INIT_THREAD_INFO(init_task) };
@@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-       int index = 0;
+       int i = 0;
 
-       seq_printf(m, "processor\t: %d\n", index);
+       seq_printf(m, "processor\t: %d\n", i);
        seq_printf(m, "vendor_id\t: User Mode Linux\n");
        seq_printf(m, "model name\t: UML\n");
        seq_printf(m, "mode\t\t: skas\n");
        seq_printf(m, "host\t\t: %s\n", host_info);
-       seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+       seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
+       seq_printf(m, "flags\t\t:");
+       for (i = 0; i < 32*NCAPINTS; i++)
+               if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
+                       seq_printf(m, " %s", x86_cap_flags[i]);
+       seq_printf(m, "\n");
+       seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment);
+       seq_printf(m, "bogomips\t: %lu.%02lu\n",
                   loops_per_jiffy/(500000/HZ),
                   (loops_per_jiffy/(5000/HZ)) % 100);
 
+
        return 0;
 }
 
@@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem);
 
 #define MIN_VMALLOC (32 * 1024 * 1024)
 
+static void parse_host_cpu_flags(char *line)
+{
+       int i;
+       for (i = 0; i < 32*NCAPINTS; i++) {
+               if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i]))
+                       set_cpu_cap(&boot_cpu_data, i);
+       }
+}
+static void parse_cache_line(char *line)
+{
+       long res;
+       char *to_parse = strstr(line, ":");
+       if (to_parse) {
+               to_parse++;
+               while (*to_parse != 0 && isspace(*to_parse)) {
+                       to_parse++;
+               }
+               if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res))
+                       boot_cpu_data.cache_alignment = res;
+               else
+                       boot_cpu_data.cache_alignment = L1_CACHE_BYTES;
+       }
+}
+
 int __init linux_main(int argc, char **argv)
 {
        unsigned long avail, diff;
@@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv)
        /* OS sanity checks that need to happen before the kernel runs */
        os_early_checks();
 
+       get_host_cpu_features(parse_host_cpu_flags, parse_cache_line);
+
        brk_start = (unsigned long) sbrk(0);
 
        /*
index 9fa6e41..32e88ba 100644 (file)
@@ -64,7 +64,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
                goto out_close;
        }
 
-       sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+       sp = stack + UM_KERN_PAGE_SIZE;
        data.pre_exec = pre_exec;
        data.pre_data = pre_data;
        data.argv = argv;
@@ -120,7 +120,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
        if (stack == 0)
                return -ENOMEM;
 
-       sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+       sp = stack + UM_KERN_PAGE_SIZE;
        pid = clone(proc, (void *) sp, flags, arg);
        if (pid < 0) {
                err = -errno;
index 96f511d..6de99bb 100644 (file)
@@ -18,6 +18,7 @@
 #include <sysdep/mcontext.h>
 #include <um_malloc.h>
 #include <sys/ucontext.h>
+#include <timetravel.h>
 
 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
        [SIGTRAP]       = relay_signal,
@@ -62,17 +63,30 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 #define SIGALRM_BIT 1
 #define SIGALRM_MASK (1 << SIGALRM_BIT)
 
-static int signals_enabled;
+int signals_enabled;
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+static int signals_blocked;
+#else
+#define signals_blocked false
+#endif
 static unsigned int signals_pending;
 static unsigned int signals_active = 0;
 
 void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
-       int enabled;
+       int enabled = signals_enabled;
 
-       enabled = signals_enabled;
-       if (!enabled && (sig == SIGIO)) {
-               signals_pending |= SIGIO_MASK;
+       if ((signals_blocked || !enabled) && (sig == SIGIO)) {
+               /*
+                * In TT_MODE_EXTERNAL, need to still call time-travel
+                * handlers unless signals are also blocked for the
+                * external time message processing. This will mark
+                * signals_pending by itself (only if necessary.)
+                */
+               if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL)
+                       sigio_run_timetravel_handlers();
+               else
+                       signals_pending |= SIGIO_MASK;
                return;
        }
 
@@ -129,7 +143,7 @@ void set_sigstack(void *sig_stack, int size)
        stack_t stack = {
                .ss_flags = 0,
                .ss_sp = sig_stack,
-               .ss_size = size - sizeof(void *)
+               .ss_size = size
        };
 
        if (sigaltstack(&stack, NULL) != 0)
@@ -334,11 +348,6 @@ void unblock_signals(void)
        }
 }
 
-int get_signals(void)
-{
-       return signals_enabled;
-}
-
 int set_signals(int enable)
 {
        int ret;
@@ -368,6 +377,39 @@ int set_signals_trace(int enable)
        return ret;
 }
 
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+void mark_sigio_pending(void)
+{
+       signals_pending |= SIGIO_MASK;
+}
+
+void block_signals_hard(void)
+{
+       if (signals_blocked)
+               return;
+       signals_blocked = 1;
+       barrier();
+}
+
+void unblock_signals_hard(void)
+{
+       if (!signals_blocked)
+               return;
+       /* Must be set to 0 before we check the pending bits etc. */
+       signals_blocked = 0;
+       barrier();
+
+       if (signals_pending && signals_enabled) {
+               /* this is a bit inefficient, but that's not really important */
+               block_signals();
+               unblock_signals();
+       } else if (signals_pending & SIGIO_MASK) {
+               /* we need to run time-travel handlers even if not enabled */
+               sigio_run_timetravel_handlers();
+       }
+}
+#endif
+
 int os_is_signal_stack(void)
 {
        stack_t ss;
index fba674f..87d3129 100644 (file)
@@ -327,7 +327,7 @@ int start_userspace(unsigned long stub_stack)
        }
 
        /* set stack pointer to the end of the stack page, so it can grow downwards */
-       sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
+       sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
 
        flags = CLONE_FILES | SIGCHLD;
 
index f79dc33..8a72c99 100644 (file)
@@ -321,6 +321,38 @@ static void __init check_coredump_limit(void)
                os_info("%llu\n", (unsigned long long)lim.rlim_max);
 }
 
+void  __init get_host_cpu_features(
+               void (*flags_helper_func)(char *line),
+               void (*cache_helper_func)(char *line))
+{
+       FILE *cpuinfo;
+       char *line = NULL;
+       size_t len = 0;
+       int done_parsing = 0;
+
+       cpuinfo = fopen("/proc/cpuinfo", "r");
+       if (cpuinfo == NULL) {
+               os_info("Failed to get host CPU features\n");
+       } else {
+               while ((getline(&line, &len, cpuinfo)) != -1) {
+                       if (strstr(line, "flags")) {
+                               flags_helper_func(line);
+                               done_parsing++;
+                       }
+                       if (strstr(line, "cache_alignment")) {
+                               cache_helper_func(line);
+                               done_parsing++;
+                       }
+                       free(line);
+                       line = NULL;
+                       if (done_parsing > 1)
+                               break;
+               }
+               fclose(cpuinfo);
+       }
+}
+
+
 void __init os_early_checks(void)
 {
        int pid;
index 1db7913..b3c1ae0 100644 (file)
@@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64
 
 # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example.
 
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64
+LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64
 LINK-y += -m64
 
 endif
index ec0e861..5baebf6 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menu "Input device support"
-       depends on !UML
 
 config INPUT
        tristate "Generic input layer (needed for keyboard, mouse, ...)" if EXPERT
index 4761795..5a2c2fb 100644 (file)
@@ -4,6 +4,7 @@
 #
 config GAMEPORT
        tristate "Gameport support"
+       depends on !UML
        help
          Gameport support is for the standard 15-pin PC gameport. If you
          have a joystick, gamepad, gameport card, a soundcard with a gameport
index 7dfe8ea..3b23078 100644 (file)
@@ -4,6 +4,7 @@
 #
 menuconfig INPUT_JOYSTICK
        bool "Joysticks/Gamepads"
+       depends on !UML
        help
          If you have a joystick, 6dof controller, gamepad, steering wheel,
          weapon control system or something like that you can say Y here
index 476c112..23cc988 100644 (file)
@@ -12,9 +12,8 @@ if TTY
 
 config VT
        bool "Virtual terminal" if EXPERT
-       depends on !UML
        select INPUT
-       default y
+       default y if !UML
        help
          If you say Y here, you will get support for terminal devices with
          display and keyboard devices. These are called "virtual" because you
@@ -78,7 +77,7 @@ config VT_CONSOLE_SLEEP
 
 config HW_CONSOLE
        bool
-       depends on VT && !UML
+       depends on VT
        default y
 
 config VT_HW_CONSOLE_BINDING
index ee33b8e..840d981 100644 (file)
@@ -9,7 +9,7 @@ config VGA_CONSOLE
        bool "VGA text console" if EXPERT || !X86
        depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC &&  !SUPERH && \
                (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
-               !ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390
+               !ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390 && !UML
        default y
        help
          Saying Y here will allow you to use Linux in text mode through a
diff --git a/include/asm-generic/logic_io.h b/include/asm-generic/logic_io.h
new file mode 100644 (file)
index 0000000..a53116b
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: johannes@sipsolutions.net
+ */
+#ifndef _LOGIC_IO_H
+#define _LOGIC_IO_H
+#include <linux/types.h>
+
+/* include this file into asm/io.h */
+
+#ifdef CONFIG_INDIRECT_IOMEM
+
+#ifdef CONFIG_INDIRECT_IOMEM_FALLBACK
+/*
+ * If you want emulated IO memory to fall back to 'normal' IO memory
+ * if a region wasn't registered as emulated, then you need to have
+ * all of the real_* functions implemented.
+ */
+#if !defined(real_ioremap) || !defined(real_iounmap) || \
+    !defined(real_raw_readb) || !defined(real_raw_writeb) || \
+    !defined(real_raw_readw) || !defined(real_raw_writew) || \
+    !defined(real_raw_readl) || !defined(real_raw_writel) || \
+    (defined(CONFIG_64BIT) && \
+     (!defined(real_raw_readq) || !defined(real_raw_writeq))) || \
+    !defined(real_memset_io) || \
+    !defined(real_memcpy_fromio) || \
+    !defined(real_memcpy_toio)
+#error "Must provide fallbacks for real IO memory access"
+#endif /* defined ... */
+#endif /* CONFIG_INDIRECT_IOMEM_FALLBACK */
+
+#define ioremap ioremap
+void __iomem *ioremap(phys_addr_t offset, size_t size);
+
+#define iounmap iounmap
+void iounmap(void __iomem *addr);
+
+#define __raw_readb __raw_readb
+u8 __raw_readb(const volatile void __iomem *addr);
+
+#define __raw_readw __raw_readw
+u16 __raw_readw(const volatile void __iomem *addr);
+
+#define __raw_readl __raw_readl
+u32 __raw_readl(const volatile void __iomem *addr);
+
+#ifdef CONFIG_64BIT
+#define __raw_readq __raw_readq
+u64 __raw_readq(const volatile void __iomem *addr);
+#endif /* CONFIG_64BIT */
+
+#define __raw_writeb __raw_writeb
+void __raw_writeb(u8 value, volatile void __iomem *addr);
+
+#define __raw_writew __raw_writew
+void __raw_writew(u16 value, volatile void __iomem *addr);
+
+#define __raw_writel __raw_writel
+void __raw_writel(u32 value, volatile void __iomem *addr);
+
+#ifdef CONFIG_64BIT
+#define __raw_writeq __raw_writeq
+void __raw_writeq(u64 value, volatile void __iomem *addr);
+#endif /* CONFIG_64BIT */
+
+#define memset_io memset_io
+void memset_io(volatile void __iomem *addr, int value, size_t size);
+
+#define memcpy_fromio memcpy_fromio
+void memcpy_fromio(void *buffer, const volatile void __iomem *addr,
+                  size_t size);
+
+#define memcpy_toio memcpy_toio
+void memcpy_toio(volatile void __iomem *addr, const void *buffer, size_t size);
+
+#endif /* CONFIG_INDIRECT_IOMEM */
+#endif /* _LOGIC_IO_H */
diff --git a/include/linux/logic_iomem.h b/include/linux/logic_iomem.h
new file mode 100644 (file)
index 0000000..3fa65c9
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: johannes@sipsolutions.net
+ */
+#ifndef __LOGIC_IOMEM_H
+#define __LOGIC_IOMEM_H
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+/**
+ * struct logic_iomem_ops - emulated IO memory ops
+ * @read: read an 8, 16, 32 or 64 bit quantity from the given offset,
+ *     size is given in bytes (1, 2, 4 or 8)
+ *     (64-bit only necessary if CONFIG_64BIT is set)
+ * @write: write an 8, 16 32 or 64 bit quantity to the given offset,
+ *     size is given in bytes (1, 2, 4 or 8)
+ *     (64-bit only necessary if CONFIG_64BIT is set)
+ * @set: optional, for memset_io()
+ * @copy_from: optional, for memcpy_fromio()
+ * @copy_to: optional, for memcpy_toio()
+ * @unmap: optional, this region is getting unmapped
+ */
+struct logic_iomem_ops {
+       unsigned long (*read)(void *priv, unsigned int offset, int size);
+       void (*write)(void *priv, unsigned int offset, int size,
+                     unsigned long val);
+
+       void (*set)(void *priv, unsigned int offset, u8 value, int size);
+       void (*copy_from)(void *priv, void *buffer, unsigned int offset,
+                         int size);
+       void (*copy_to)(void *priv, unsigned int offset, const void *buffer,
+                       int size);
+
+       void (*unmap)(void *priv);
+};
+
+/**
+ * struct logic_iomem_region_ops - ops for an IO memory handler
+ * @map: map a range in the registered IO memory region, must
+ *     fill *ops with the ops and may fill *priv to be passed
+ *     to the ops. The offset is given as the offset into the
+ *     registered resource region.
+ *     The return value is negative for errors, or >= 0 for
+ *     success. On success, the return value is added to the
+ *     offset for later ops, to allow for partial mappings.
+ */
+struct logic_iomem_region_ops {
+       long (*map)(unsigned long offset, size_t size,
+                   const struct logic_iomem_ops **ops,
+                   void **priv);
+};
+
+/**
+ * logic_iomem_add_region - register an IO memory region
+ * @resource: the resource description for this region
+ * @ops: the IO memory mapping ops for this resource
+ */
+int logic_iomem_add_region(struct resource *resource,
+                          const struct logic_iomem_region_ops *ops);
+
+#endif /* __LOGIC_IOMEM_H */
diff --git a/include/uapi/linux/virtio_pcidev.h b/include/uapi/linux/virtio_pcidev.h
new file mode 100644 (file)
index 0000000..89daa88
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef _UAPI_LINUX_VIRTIO_PCIDEV_H
+#define _UAPI_LINUX_VIRTIO_PCIDEV_H
+#include <linux/types.h>
+
+/**
+ * enum virtio_pcidev_ops - virtual PCI device operations
+ * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
+ *     the @data field should be filled in by the device (in little endian).
+ * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
+ *     the @data field contains the data to write (in little endian).
+ * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable;
+ *     the @data field should be filled in by the device (in little endian).
+ * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable;
+ *     the @data field contains the data to write (in little endian).
+ * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
+ *     the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
+ * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for
+ *     the number
+ * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports
+ *     the 16- or 32-bit write that would otherwise be done into memory,
+ *     analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above
+ * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be
+ *     all zeroes) to signal the PME# pin.
+ */
+enum virtio_pcidev_ops {
+       VIRTIO_PCIDEV_OP_RESERVED = 0,
+       VIRTIO_PCIDEV_OP_CFG_READ,
+       VIRTIO_PCIDEV_OP_CFG_WRITE,
+       VIRTIO_PCIDEV_OP_MMIO_READ,
+       VIRTIO_PCIDEV_OP_MMIO_WRITE,
+       VIRTIO_PCIDEV_OP_MMIO_MEMSET,
+       VIRTIO_PCIDEV_OP_INT,
+       VIRTIO_PCIDEV_OP_MSI,
+       VIRTIO_PCIDEV_OP_PME,
+};
+
+/**
+ * struct virtio_pcidev_msg - virtio PCI device operation
+ * @op: the operation to do
+ * @bar: the bar (only with BAR read/write messages)
+ * @reserved: reserved
+ * @size: the size of the read/write (in bytes)
+ * @addr: the address to read/write
+ * @data: the data, normally @size long, but just one byte for
+ *     %VIRTIO_PCIDEV_OP_MMIO_MEMSET
+ *
+ * Note: the fields are all in native (CPU) endian, however, the
+ * @data values will often be in little endian (see the ops above.)
+ */
+struct virtio_pcidev_msg {
+       __u8 op;
+       __u8 bar;
+       __u16 reserved;
+       __u32 size;
+       __u64 addr;
+       __u8 data[];
+};
+
+#endif /* _UAPI_LINUX_VIRTIO_PCIDEV_H */
index ac3b306..d241fe4 100644 (file)
@@ -102,6 +102,20 @@ config INDIRECT_PIO
 
          When in doubt, say N.
 
+config INDIRECT_IOMEM
+       bool
+       help
+         This is selected by other options/architectures to provide the
+         emulated iomem accessors.
+
+config INDIRECT_IOMEM_FALLBACK
+       bool
+       depends on INDIRECT_IOMEM
+       help
+         If INDIRECT_IOMEM is selected, this enables falling back to plain
+         mmio accesses when the IO memory address is not a registered
+         emulated region.
+
 config CRC_CCITT
        tristate "CRC-CCITT functions"
        help
index 6d765d5..5efd1b4 100644 (file)
@@ -148,6 +148,8 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
 lib-y += logic_pio.o
 
+lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o
+
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c
new file mode 100644 (file)
index 0000000..b76b92d
--- /dev/null
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/logic_iomem.h>
+
+struct logic_iomem_region {
+       const struct resource *res;
+       const struct logic_iomem_region_ops *ops;
+       struct list_head list;
+};
+
+struct logic_iomem_area {
+       const struct logic_iomem_ops *ops;
+       void *priv;
+};
+
+#define AREA_SHIFT     24
+#define MAX_AREA_SIZE  (1 << AREA_SHIFT)
+#define MAX_AREAS      ((1ULL<<32) / MAX_AREA_SIZE)
+#define AREA_BITS      ((MAX_AREAS - 1) << AREA_SHIFT)
+#define AREA_MASK      (MAX_AREA_SIZE - 1)
+#ifdef CONFIG_64BIT
+#define IOREMAP_BIAS   0xDEAD000000000000UL
+#define IOREMAP_MASK   0xFFFFFFFF00000000UL
+#else
+#define IOREMAP_BIAS   0
+#define IOREMAP_MASK   0
+#endif
+
+static DEFINE_MUTEX(regions_mtx);
+static LIST_HEAD(regions_list);
+static struct logic_iomem_area mapped_areas[MAX_AREAS];
+
+int logic_iomem_add_region(struct resource *resource,
+                          const struct logic_iomem_region_ops *ops)
+{
+       struct logic_iomem_region *rreg;
+       int err;
+
+       if (WARN_ON(!resource || !ops))
+               return -EINVAL;
+
+       if (WARN_ON((resource->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM))
+               return -EINVAL;
+
+       rreg = kzalloc(sizeof(*rreg), GFP_KERNEL);
+       if (!rreg)
+               return -ENOMEM;
+
+       err = request_resource(&iomem_resource, resource);
+       if (err) {
+               kfree(rreg);
+               return -ENOMEM;
+       }
+
+       mutex_lock(&regions_mtx);
+       rreg->res = resource;
+       rreg->ops = ops;
+       list_add_tail(&rreg->list, &regions_list);
+       mutex_unlock(&regions_mtx);
+
+       return 0;
+}
+EXPORT_SYMBOL(logic_iomem_add_region);
+
+#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
+static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
+{
+       WARN(1, "invalid ioremap(0x%llx, 0x%zx)\n",
+            (unsigned long long)offset, size);
+       return NULL;
+}
+
+static void real_iounmap(void __iomem *addr)
+{
+       WARN(1, "invalid iounmap for addr 0x%llx\n",
+            (unsigned long long)addr);
+}
+#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
+
+void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+       void __iomem *ret = NULL;
+       struct logic_iomem_region *rreg, *found = NULL;
+       int i;
+
+       mutex_lock(&regions_mtx);
+       list_for_each_entry(rreg, &regions_list, list) {
+               if (rreg->res->start > offset)
+                       continue;
+               if (rreg->res->end < offset + size - 1)
+                       continue;
+               found = rreg;
+               break;
+       }
+
+       if (!found)
+               goto out;
+
+       for (i = 0; i < MAX_AREAS; i++) {
+               long offs;
+
+               if (mapped_areas[i].ops)
+                       continue;
+
+               offs = rreg->ops->map(offset - found->res->start,
+                                     size, &mapped_areas[i].ops,
+                                     &mapped_areas[i].priv);
+               if (offs < 0) {
+                       mapped_areas[i].ops = NULL;
+                       break;
+               }
+
+               if (WARN_ON(!mapped_areas[i].ops)) {
+                       mapped_areas[i].ops = NULL;
+                       break;
+               }
+
+               ret = (void __iomem *)(IOREMAP_BIAS + (i << AREA_SHIFT) + offs);
+               break;
+       }
+out:
+       mutex_unlock(&regions_mtx);
+       if (ret)
+               return ret;
+       return real_ioremap(offset, size);
+}
+EXPORT_SYMBOL(ioremap);
+
+static inline struct logic_iomem_area *
+get_area(const volatile void __iomem *addr)
+{
+       unsigned long a = (unsigned long)addr;
+       unsigned int idx;
+
+       if (WARN_ON((a & IOREMAP_MASK) != IOREMAP_BIAS))
+               return NULL;
+
+       idx = (a & AREA_BITS) >> AREA_SHIFT;
+
+       if (mapped_areas[idx].ops)
+               return &mapped_areas[idx];
+
+       return NULL;
+}
+
+void iounmap(void __iomem *addr)
+{
+       struct logic_iomem_area *area = get_area(addr);
+
+       if (!area) {
+               real_iounmap(addr);
+               return;
+       }
+
+       if (area->ops->unmap)
+               area->ops->unmap(area->priv);
+
+       mutex_lock(&regions_mtx);
+       area->ops = NULL;
+       area->priv = NULL;
+       mutex_unlock(&regions_mtx);
+}
+EXPORT_SYMBOL(iounmap);
+
+#ifndef CONFIG_LOGIC_IOMEM_FALLBACK
+#define MAKE_FALLBACK(op, sz)                                          \
+static u##sz real_raw_read ## op(const volatile void __iomem *addr)    \
+{                                                                      \
+       WARN(1, "Invalid read" #op " at address %llx\n",                \
+            (unsigned long long)addr);                                 \
+       return (u ## sz)~0ULL;                                          \
+}                                                                      \
+                                                                       \
+void real_raw_write ## op(u ## sz val, volatile void __iomem *addr)    \
+{                                                                      \
+       WARN(1, "Invalid writeq" #op " of 0x%llx at address %llx\n",    \
+            (unsigned long long)val, (unsigned long long)addr);        \
+}                                                                      \
+
+MAKE_FALLBACK(b, 8);
+MAKE_FALLBACK(w, 16);
+MAKE_FALLBACK(l, 32);
+#ifdef CONFIG_64BIT
+MAKE_FALLBACK(q, 64);
+#endif
+
+static void real_memset_io(volatile void __iomem *addr, int value, size_t size)
+{
+       WARN(1, "Invalid memset_io at address 0x%llx\n",
+            (unsigned long long)addr);
+}
+
+static void real_memcpy_fromio(void *buffer, const volatile void __iomem *addr,
+                              size_t size)
+{
+       WARN(1, "Invalid memcpy_fromio at address 0x%llx\n",
+            (unsigned long long)addr);
+
+       memset(buffer, 0xff, size);
+}
+
+static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer,
+                            size_t size)
+{
+       WARN(1, "Invalid memcpy_toio at address 0x%llx\n",
+            (unsigned long long)addr);
+}
+#endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
+
+#define MAKE_OP(op, sz)                                                \
+u##sz __raw_read ## op(const volatile void __iomem *addr)              \
+{                                                                      \
+       struct logic_iomem_area *area = get_area(addr);                 \
+                                                                       \
+       if (!area)                                                      \
+               return real_raw_read ## op(addr);                       \
+                                                                       \
+       return (u ## sz) area->ops->read(area->priv,                    \
+                                        (unsigned long)addr & AREA_MASK,\
+                                        sz / 8);                       \
+}                                                                      \
+EXPORT_SYMBOL(__raw_read ## op);                                       \
+                                                                       \
+void __raw_write ## op(u ## sz val, volatile void __iomem *addr)       \
+{                                                                      \
+       struct logic_iomem_area *area = get_area(addr);                 \
+                                                                       \
+       if (!area) {                                                    \
+               real_raw_write ## op(val, addr);                        \
+               return;                                                 \
+       }                                                               \
+                                                                       \
+       area->ops->write(area->priv,                                    \
+                        (unsigned long)addr & AREA_MASK,               \
+                        sz / 8, val);                                  \
+}                                                                      \
+EXPORT_SYMBOL(__raw_write ## op)
+
+MAKE_OP(b, 8);
+MAKE_OP(w, 16);
+MAKE_OP(l, 32);
+#ifdef CONFIG_64BIT
+MAKE_OP(q, 64);
+#endif
+
+void memset_io(volatile void __iomem *addr, int value, size_t size)
+{
+       struct logic_iomem_area *area = get_area(addr);
+       unsigned long offs, start;
+
+       if (!area) {
+               real_memset_io(addr, value, size);
+               return;
+       }
+
+       start = (unsigned long)addr & AREA_MASK;
+
+       if (area->ops->set) {
+               area->ops->set(area->priv, start, value, size);
+               return;
+       }
+
+       for (offs = 0; offs < size; offs++)
+               area->ops->write(area->priv, start + offs, 1, value);
+}
+EXPORT_SYMBOL(memset_io);
+
+void memcpy_fromio(void *buffer, const volatile void __iomem *addr,
+                   size_t size)
+{
+       struct logic_iomem_area *area = get_area(addr);
+       u8 *buf = buffer;
+       unsigned long offs, start;
+
+       if (!area) {
+               real_memcpy_fromio(buffer, addr, size);
+               return;
+       }
+
+       start = (unsigned long)addr & AREA_MASK;
+
+       if (area->ops->copy_from) {
+               area->ops->copy_from(area->priv, buffer, start, size);
+               return;
+       }
+
+       for (offs = 0; offs < size; offs++)
+               buf[offs] = area->ops->read(area->priv, start + offs, 1);
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+void memcpy_toio(volatile void __iomem *addr, const void *buffer, size_t size)
+{
+       struct logic_iomem_area *area = get_area(addr);
+       const u8 *buf = buffer;
+       unsigned long offs, start;
+
+       if (!area) {
+               real_memcpy_toio(addr, buffer, size);
+               return;
+       }
+
+       start = (unsigned long)addr & AREA_MASK;
+
+       if (area->ops->copy_to) {
+               area->ops->copy_to(area->priv, start, buffer, size);
+               return;
+       }
+
+       for (offs = 0; offs < size; offs++)
+               area->ops->write(area->priv, start + offs, 1, buf[offs]);
+}
+EXPORT_SYMBOL(memcpy_toio);