Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 May 2020 20:10:06 +0000 (13:10 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 May 2020 20:10:06 +0000 (13:10 -0700)
Pull networking fixes from David Miller:

 1) Fix sk_psock reference count leak on receive, from Xiyu Yang.

 2) CONFIG_HNS should be invisible, from Geert Uytterhoeven.

 3) Don't allow locking route MTUs in ipv6, RFCs actually forbid this,
    from Maciej Żenczykowski.

 4) ipv4 route redirect backoff wasn't actually enforced, from Paolo
    Abeni.

 5) Fix netprio cgroup v2 leak, from Zefan Li.

 6) Fix infinite loop on rmmod in conntrack, from Florian Westphal.

 7) Fix tcp SO_RCVLOWAT hangs, from Eric Dumazet.

 8) Various bpf probe handling fixes, from Daniel Borkmann.

* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (68 commits)
  selftests: mptcp: pm: rm the right tmp file
  dpaa2-eth: properly handle buffer size restrictions
  bpf: Restrict bpf_trace_printk()'s %s usage and add %pks, %pus specifier
  bpf: Add bpf_probe_read_{user, kernel}_str() to do_refine_retval_range
  bpf: Restrict bpf_probe_read{, str}() only to archs where they work
  MAINTAINERS: Mark networking drivers as Maintained.
  ipmr: Add lockdep expression to ipmr_for_each_table macro
  ipmr: Fix RCU list debugging warning
  drivers: net: hamradio: Fix suspicious RCU usage warning in bpqether.c
  net: phy: broadcom: fix BCM54XX_SHD_SCR3_TRDDAPD value for BCM54810
  tcp: fix error recovery in tcp_zerocopy_receive()
  MAINTAINERS: Add Jakub to networking drivers.
  MAINTAINERS: another add of Karsten Graul for S390 networking
  drivers: ipa: fix typos for ipa_smp2p structure doc
  pppoe: only process PADT targeted at local interfaces
  selftests/bpf: Enforce returning 0 for fentry/fexit programs
  bpf: Enforce returning 0 for fentry/fexit progs
  net: stmmac: fix num_por initialization
  security: Fix the default value of secid_to_secctx hook
  libbpf: Fix register naming in PT_REGS s390 macros
  ...

270 files changed:
Documentation/virt/kvm/index.rst
Documentation/virt/kvm/running-nested-guests.rst [new file with mode: 0644]
MAINTAINERS
Makefile
arch/arm/include/asm/futex.h
arch/arm64/kernel/machine_kexec.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp/entry.S
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/sysreg-sr.c
arch/arm64/mm/hugetlbpage.c
arch/powerpc/kvm/powerpc.c
arch/riscv/Kconfig
arch/riscv/Kconfig.socs
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/mmio.h
arch/riscv/include/asm/mmiowb.h
arch/riscv/include/asm/perf_event.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/set_memory.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/cpu_ops.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/perf_event.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/stacktrace.c
arch/riscv/kernel/vdso/Makefile
arch/riscv/kernel/vdso/note.S [new file with mode: 0644]
arch/riscv/mm/init.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/include/asm/ftrace.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/unwind.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/ftrace.c
arch/x86/kernel/unwind_frame.c
arch/x86/kernel/unwind_orc.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/x86.c
arch/x86/mm/init_64.c
arch/x86/mm/pat/set_memory.c
block/bfq-iosched.c
block/blk-cgroup.c
block/blk-iocost.c
crypto/lrw.c
crypto/xts.c
drivers/acpi/ec.c
drivers/acpi/internal.h
drivers/acpi/sleep.c
drivers/amba/bus.c
drivers/base/component.c
drivers/base/core.c
drivers/base/dd.c
drivers/base/platform.c
drivers/bus/mhi/core/init.c
drivers/bus/mhi/core/internal.h
drivers/bus/mhi/core/main.c
drivers/bus/mhi/core/pm.c
drivers/firmware/efi/tpm.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpio-tegra.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
drivers/gpu/drm/amd/display/dc/os_types.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/drm_hdcp.c
drivers/gpu/drm/i915/display/intel_fbc.c
drivers/gpu/drm/i915/gem/i915_gem_domain.c
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_gt_irq.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/i915_gem_evict.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/i915_scheduler.h
drivers/gpu/drm/i915/i915_scheduler_types.h
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/selftests/i915_vma.c
drivers/gpu/drm/ingenic/ingenic-drm.c
drivers/gpu/drm/meson/meson_drv.c
drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/virtio/virtgpu_drv.h
drivers/gpu/drm/virtio/virtgpu_gem.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/gpu/host1x/dev.c
drivers/hwmon/da9052-hwmon.c
drivers/hwmon/drivetemp.c
drivers/hwmon/nct7904.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/rdma_core.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types_async_fd.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_hw.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/sw/rxe/rxe_mmap.c
drivers/infiniband/sw/rxe/rxe_queue.c
drivers/interconnect/qcom/osm-l3.c
drivers/interconnect/qcom/sdm845.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/virtio-iommu.c
drivers/misc/mei/hw-me.c
drivers/misc/mei/hw-me.h
drivers/misc/mei/pci-me.c
drivers/mmc/core/block.c
drivers/mmc/core/queue.c
drivers/mmc/host/alcor.c
drivers/mmc/host/sdhci-acpi.c
drivers/mmc/host/sdhci-pci-gli.c
drivers/most/core.c
drivers/nvme/host/core.c
drivers/nvme/host/pci.c
drivers/phy/qualcomm/phy-qcom-qusb2.c
drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
drivers/regulator/core.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvscsi.c
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/staging/gasket/gasket_core.c
drivers/staging/ks7010/TODO
drivers/thunderbolt/usb4.c
drivers/tty/serial/bcm63xx_uart.c
drivers/tty/serial/xilinx_uartps.c
drivers/tty/vt/vt.c
drivers/usb/chipidea/ci_hdrc_msm.c
drivers/usb/core/devio.c
drivers/usb/core/message.c
drivers/usb/serial/garmin_gps.c
drivers/usb/serial/qcserial.c
drivers/usb/storage/unusual_uas.h
drivers/usb/typec/mux/intel_pmc_mux.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/mds_client.c
fs/ceph/quota.c
fs/configfs/dir.c
fs/coredump.c
fs/eventpoll.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/inode.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/meta_io.c
fs/gfs2/quota.c
fs/gfs2/quota.h
fs/gfs2/super.c
fs/gfs2/util.c
fs/io_uring.c
fs/splice.c
fs/vboxsf/super.c
include/drm/drm_modes.h
include/linux/amba/bus.h
include/linux/backing-dev-defs.h
include/linux/backing-dev.h
include/linux/ftrace.h
include/linux/host1x.h
include/linux/lsm_hook_defs.h
include/linux/memcontrol.h
include/linux/mhi.h
include/linux/platform_device.h
include/linux/sunrpc/gss_api.h
include/linux/sunrpc/gss_krb5.h
include/linux/sunrpc/xdr.h
include/sound/rawmidi.h
include/trace/events/gpu_mem.h
include/trace/events/wbt.h
init/Kconfig
init/initramfs.c
init/main.c
ipc/mqueue.c
ipc/util.c
kernel/fork.c
kernel/kcov.c
kernel/trace/Kconfig
kernel/trace/ftrace_internal.h
kernel/trace/preemptirq_delay_test.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_boot.c
kernel/trace/trace_kprobe.c
kernel/umh.c
lib/Kconfig.ubsan
mm/backing-dev.c
mm/gup.c
mm/kasan/Makefile
mm/kasan/kasan.h
mm/memcontrol.c
mm/mremap.c
mm/page_alloc.c
mm/percpu.c
mm/slub.c
mm/vmscan.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/gss_mech_switch.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/xdr.c
samples/trace_events/trace-events-sample.h
scripts/decodecode
scripts/gdb/linux/rbtree.py
sound/core/rawmidi.c
sound/firewire/amdtp-stream-trace.h
sound/pci/hda/patch_realtek.c
sound/usb/mixer_maps.c
sound/usb/quirks-table.h
sound/usb/quirks.c
tools/bootconfig/main.c
tools/cgroup/iocost_monitor.py
tools/objtool/check.c
tools/objtool/elf.h
tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
tools/testing/selftests/ftrace/ftracetest
tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/include/evmcs.h
tools/testing/selftests/kvm/lib/x86_64/vmx.c
tools/testing/selftests/lkdtm/run.sh
tools/testing/selftests/nsfs/pidns.c
tools/testing/selftests/wireguard/qemu/debug.config
virt/kvm/arm/hyp/aarch32.c
virt/kvm/arm/psci.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v2.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-mmio.h

index dcc2526..b6833c7 100644 (file)
@@ -28,3 +28,5 @@ KVM
    arm/index
 
    devices/index
+
+   running-nested-guests
diff --git a/Documentation/virt/kvm/running-nested-guests.rst b/Documentation/virt/kvm/running-nested-guests.rst
new file mode 100644 (file)
index 0000000..d0a1fc7
--- /dev/null
@@ -0,0 +1,276 @@
+==============================
+Running nested guests with KVM
+==============================
+
+A nested guest is the ability to run a guest inside another guest (it
+can be KVM-based or a different hypervisor).  The straightforward
+example is a KVM guest that in turn runs on a KVM guest (the rest of
+this document is built on this example)::
+
+              .----------------.  .----------------.
+              |                |  |                |
+              |      L2        |  |      L2        |
+              | (Nested Guest) |  | (Nested Guest) |
+              |                |  |                |
+              |----------------'--'----------------|
+              |                                    |
+              |       L1 (Guest Hypervisor)        |
+              |          KVM (/dev/kvm)            |
+              |                                    |
+      .------------------------------------------------------.
+      |                 L0 (Host Hypervisor)                 |
+      |                    KVM (/dev/kvm)                    |
+      |------------------------------------------------------|
+      |        Hardware (with virtualization extensions)     |
+      '------------------------------------------------------'
+
+Terminology:
+
+- L0 – level-0; the bare metal host, running KVM
+
+- L1 – level-1 guest; a VM running on L0; also called the "guest
+  hypervisor", as it itself is capable of running KVM.
+
+- L2 – level-2 guest; a VM running on L1, this is the "nested guest"
+
+.. note:: The above diagram is modelled after the x86 architecture;
+          s390x, ppc64 and other architectures are likely to have
+          a different design for nesting.
+
+          For example, s390x always has an LPAR (LogicalPARtition)
+          hypervisor running on bare metal, adding another layer and
+          resulting in at least four levels in a nested setup — L0 (bare
+          metal, running the LPAR hypervisor), L1 (host hypervisor), L2
+          (guest hypervisor), L3 (nested guest).
+
+          This document will stick with the three-level terminology (L0,
+          L1, and L2) for all architectures; and will largely focus on
+          x86.
+
+
+Use Cases
+---------
+
+There are several scenarios where nested KVM can be useful, to name a
+few:
+
+- As a developer, you want to test your software on different operating
+  systems (OSes).  Instead of renting multiple VMs from a Cloud
+  Provider, using nested KVM lets you rent a large enough "guest
+  hypervisor" (level-1 guest).  This in turn allows you to create
+  multiple nested guests (level-2 guests), running different OSes, on
+  which you can develop and test your software.
+
+- Live migration of "guest hypervisors" and their nested guests, for
+  load balancing, disaster recovery, etc.
+
+- VM image creation tools (e.g. ``virt-install``,  etc) often run
+  their own VM, and users expect these to work inside a VM.
+
+- Some OSes use virtualization internally for security (e.g. to let
+  applications run safely in isolation).
+
+
+Enabling "nested" (x86)
+-----------------------
+
+From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled
+by default for Intel and AMD.  (Though your Linux distribution might
+override this default.)
+
+In case you are running a Linux kernel older than v4.19, to enable
+nesting, set the ``nested`` KVM module parameter to ``Y`` or ``1``.  To
+persist this setting across reboots, you can add it in a config file, as
+shown below:
+
+1. On the bare metal host (L0), list the kernel modules and ensure that
+   the KVM modules::
+
+    $ lsmod | grep -i kvm
+    kvm_intel             133627  0
+    kvm                   435079  1 kvm_intel
+
+2. Show information for ``kvm_intel`` module::
+
+    $ modinfo kvm_intel | grep -i nested
+    parm:           nested:bool
+
+3. For the nested KVM configuration to persist across reboots, place the
+   below in ``/etc/modprobed/kvm_intel.conf`` (create the file if it
+   doesn't exist)::
+
+    $ cat /etc/modprobe.d/kvm_intel.conf
+    options kvm-intel nested=y
+
+4. Unload and re-load the KVM Intel module::
+
+    $ sudo rmmod kvm-intel
+    $ sudo modprobe kvm-intel
+
+5. Verify if the ``nested`` parameter for KVM is enabled::
+
+    $ cat /sys/module/kvm_intel/parameters/nested
+    Y
+
+For AMD hosts, the process is the same as above, except that the module
+name is ``kvm-amd``.
+
+
+Additional nested-related kernel parameters (x86)
+-------------------------------------------------
+
+If your hardware is sufficiently advanced (Intel Haswell processor or
+higher, which has newer hardware virt extensions), the following
+additional features will also be enabled by default: "Shadow VMCS
+(Virtual Machine Control Structure)", APIC Virtualization on your bare
+metal host (L0).  Parameters for Intel hosts::
+
+    $ cat /sys/module/kvm_intel/parameters/enable_shadow_vmcs
+    Y
+
+    $ cat /sys/module/kvm_intel/parameters/enable_apicv
+    Y
+
+    $ cat /sys/module/kvm_intel/parameters/ept
+    Y
+
+.. note:: If you suspect your L2 (i.e. nested guest) is running slower,
+          ensure the above are enabled (particularly
+          ``enable_shadow_vmcs`` and ``ept``).
+
+
+Starting a nested guest (x86)
+-----------------------------
+
+Once your bare metal host (L0) is configured for nesting, you should be
+able to start an L1 guest with::
+
+    $ qemu-kvm -cpu host [...]
+
+The above will pass through the host CPU's capabilities as-is to the
+gues); or for better live migration compatibility, use a named CPU
+model supported by QEMU. e.g.::
+
+    $ qemu-kvm -cpu Haswell-noTSX-IBRS,vmx=on
+
+then the guest hypervisor will subsequently be capable of running a
+nested guest with accelerated KVM.
+
+
+Enabling "nested" (s390x)
+-------------------------
+
+1. On the host hypervisor (L0), enable the ``nested`` parameter on
+   s390x::
+
+    $ rmmod kvm
+    $ modprobe kvm nested=1
+
+.. note:: On s390x, the kernel parameter ``hpage`` is mutually exclusive
+          with the ``nested`` paramter — i.e. to be able to enable
+          ``nested``, the ``hpage`` parameter *must* be disabled.
+
+2. The guest hypervisor (L1) must be provided with the ``sie`` CPU
+   feature — with QEMU, this can be done by using "host passthrough"
+   (via the command-line ``-cpu host``).
+
+3. Now the KVM module can be loaded in the L1 (guest hypervisor)::
+
+    $ modprobe kvm
+
+
+Live migration with nested KVM
+------------------------------
+
+Migrating an L1 guest, with a  *live* nested guest in it, to another
+bare metal host, works as of Linux kernel 5.3 and QEMU 4.2.0 for
+Intel x86 systems, and even on older versions for s390x.
+
+On AMD systems, once an L1 guest has started an L2 guest, the L1 guest
+should no longer be migrated or saved (refer to QEMU documentation on
+"savevm"/"loadvm") until the L2 guest shuts down.  Attempting to migrate
+or save-and-load an L1 guest while an L2 guest is running will result in
+undefined behavior.  You might see a ``kernel BUG!`` entry in ``dmesg``, a
+kernel 'oops', or an outright kernel panic.  Such a migrated or loaded L1
+guest can no longer be considered stable or secure, and must be restarted.
+Migrating an L1 guest merely configured to support nesting, while not
+actually running L2 guests, is expected to function normally even on AMD
+systems but may fail once guests are started.
+
+Migrating an L2 guest is always expected to succeed, so all the following
+scenarios should work even on AMD systems:
+
+- Migrating a nested guest (L2) to another L1 guest on the *same* bare
+  metal host.
+
+- Migrating a nested guest (L2) to another L1 guest on a *different*
+  bare metal host.
+
+- Migrating a nested guest (L2) to a bare metal host.
+
+Reporting bugs from nested setups
+-----------------------------------
+
+Debugging "nested" problems can involve sifting through log files across
+L0, L1 and L2; this can result in tedious back-n-forth between the bug
+reporter and the bug fixer.
+
+- Mention that you are in a "nested" setup.  If you are running any kind
+  of "nesting" at all, say so.  Unfortunately, this needs to be called
+  out because when reporting bugs, people tend to forget to even
+  *mention* that they're using nested virtualization.
+
+- Ensure you are actually running KVM on KVM.  Sometimes people do not
+  have KVM enabled for their guest hypervisor (L1), which results in
+  them running with pure emulation or what QEMU calls it as "TCG", but
+  they think they're running nested KVM.  Thus confusing "nested Virt"
+  (which could also mean, QEMU on KVM) with "nested KVM" (KVM on KVM).
+
+Information to collect (generic)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following is not an exhaustive list, but a very good starting point:
+
+  - Kernel, libvirt, and QEMU version from L0
+
+  - Kernel, libvirt and QEMU version from L1
+
+  - QEMU command-line of L1 -- when using libvirt, you'll find it here:
+    ``/var/log/libvirt/qemu/instance.log``
+
+  - QEMU command-line of L2 -- as above, when using libvirt, get the
+    complete libvirt-generated QEMU command-line
+
+  - ``cat /sys/cpuinfo`` from L0
+
+  - ``cat /sys/cpuinfo`` from L1
+
+  - ``lscpu`` from L0
+
+  - ``lscpu`` from L1
+
+  - Full ``dmesg`` output from L0
+
+  - Full ``dmesg`` output from L1
+
+x86-specific info to collect
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Both the below commands, ``x86info`` and ``dmidecode``, should be
+available on most Linux distributions with the same name:
+
+  - Output of: ``x86info -a`` from L0
+
+  - Output of: ``x86info -a`` from L1
+
+  - Output of: ``dmidecode`` from L0
+
+  - Output of: ``dmidecode`` from L1
+
+s390x-specific info to collect
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Along with the earlier mentioned generic details, the below is
+also recommended:
+
+  - ``/proc/sysinfo`` from L1; this will also include the info from L0
index 2c59cc5..ecc0749 100644 (file)
@@ -3936,11 +3936,9 @@ F:       arch/powerpc/platforms/cell/
 CEPH COMMON CODE (LIBCEPH)
 M:     Ilya Dryomov <idryomov@gmail.com>
 M:     Jeff Layton <jlayton@kernel.org>
-M:     Sage Weil <sage@redhat.com>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     include/linux/ceph/
 F:     include/linux/crush/
@@ -3948,12 +3946,10 @@ F:      net/ceph/
 
 CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
 M:     Jeff Layton <jlayton@kernel.org>
-M:     Sage Weil <sage@redhat.com>
 M:     Ilya Dryomov <idryomov@gmail.com>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     Documentation/filesystems/ceph.rst
 F:     fs/ceph/
@@ -7119,9 +7115,10 @@ F:       include/uapi/asm-generic/
 
 GENERIC PHY FRAMEWORK
 M:     Kishon Vijay Abraham I <kishon@ti.com>
+M:     Vinod Koul <vkoul@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux-phy.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
 F:     Documentation/devicetree/bindings/phy/
 F:     drivers/phy/
 F:     include/linux/phy/
@@ -7746,11 +7743,6 @@ L:       platform-driver-x86@vger.kernel.org
 S:     Orphan
 F:     drivers/platform/x86/tc1100-wmi.c
 
-HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
-M:     Jaroslav Kysela <perex@perex.cz>
-S:     Obsolete
-F:     drivers/staging/hp/hp100.*
-
 HPET:  High Precision Event Timers driver
 M:     Clemens Ladisch <clemens@ladisch.de>
 S:     Maintained
@@ -14103,12 +14095,10 @@ F:    drivers/media/radio/radio-tea5777.c
 
 RADOS BLOCK DEVICE (RBD)
 M:     Ilya Dryomov <idryomov@gmail.com>
-M:     Sage Weil <sage@redhat.com>
 R:     Dongsheng Yang <dongsheng.yang@easystack.cn>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     Documentation/ABI/testing/sysfs-bus-rbd
 F:     drivers/block/rbd.c
index 3512f7b..11fe9b1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -729,10 +729,6 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS += -Os
 endif
 
-ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED
-KBUILD_CFLAGS   += -Wno-maybe-uninitialized
-endif
-
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
 KBUILD_CFLAGS  += $(call cc-option,-fno-allow-store-data-races)
@@ -881,6 +877,17 @@ KBUILD_CFLAGS += -Wno-pointer-sign
 # disable stringop warnings in gcc 8+
 KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
+# We'll want to enable this eventually, but it's not going away for 5.7 at least
+KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+
+# Another good warning that we'll want to enable eventually
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+
+# Enabled with W=2, disabled by default as noisy
+KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
index e133da3..a915188 100644 (file)
@@ -165,8 +165,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
        preempt_enable();
 #endif
 
-       if (!ret)
-               *oval = oldval;
+       /*
+        * Store unconditionally. If ret != 0 the extra store is the least
+        * of the worries but GCC cannot figure out that __futex_atomic_op()
+        * is either setting ret to -EFAULT or storing the old value in
+        * oldval which results in a uninitialized warning at the call site.
+        */
+       *oval = oldval;
 
        return ret;
 }
index 8e9c924..a0b144c 100644 (file)
@@ -177,6 +177,7 @@ void machine_kexec(struct kimage *kimage)
         * the offline CPUs. Therefore, we must use the __* variant here.
         */
        __flush_icache_range((uintptr_t)reboot_code_buffer,
+                            (uintptr_t)reboot_code_buffer +
                             arm64_relocate_new_kernel_size);
 
        /* Flush the kimage list and its buffers. */
index 23ebe51..50a279d 100644 (file)
@@ -200,6 +200,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        }
 
        memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+
+       if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
+               int i;
+
+               for (i = 0; i < 16; i++)
+                       *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+       }
 out:
        return err;
 }
index d22d053..90186cf 100644 (file)
@@ -18,6 +18,7 @@
 
 #define CPU_GP_REG_OFFSET(x)   (CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)     CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SP_EL0_OFFSET      (CPU_XREG_OFFSET(30) + 8)
 
        .text
        .pushsection    .hyp.text, "ax"
        ldp     x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
 .endm
 
+.macro save_sp_el0 ctxt, tmp
+       mrs     \tmp,   sp_el0
+       str     \tmp,   [\ctxt, #CPU_SP_EL0_OFFSET]
+.endm
+
+.macro restore_sp_el0 ctxt, tmp
+       ldr     \tmp,     [\ctxt, #CPU_SP_EL0_OFFSET]
+       msr     sp_el0, \tmp
+.endm
+
 /*
  * u64 __guest_enter(struct kvm_vcpu *vcpu,
  *                  struct kvm_cpu_context *host_ctxt);
@@ -60,6 +71,9 @@ SYM_FUNC_START(__guest_enter)
        // Store the host regs
        save_callee_saved_regs x1
 
+       // Save the host's sp_el0
+       save_sp_el0     x1, x2
+
        // Now the host state is stored if we have a pending RAS SError it must
        // affect the host. If any asynchronous exception is pending we defer
        // the guest entry. The DSB isn't necessary before v8.2 as any SError
@@ -83,6 +97,9 @@ alternative_else_nop_endif
        // when this feature is enabled for kernel code.
        ptrauth_switch_to_guest x29, x0, x1, x2
 
+       // Restore the guest's sp_el0
+       restore_sp_el0 x29, x0
+
        // Restore guest regs x0-x17
        ldp     x0, x1,   [x29, #CPU_XREG_OFFSET(0)]
        ldp     x2, x3,   [x29, #CPU_XREG_OFFSET(2)]
@@ -130,6 +147,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
        // Store the guest regs x18-x29, lr
        save_callee_saved_regs x1
 
+       // Store the guest's sp_el0
+       save_sp_el0     x1, x2
+
        get_host_ctxt   x2, x3
 
        // Macro ptrauth_switch_to_guest format:
@@ -139,6 +159,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
        // when this feature is enabled for kernel code.
        ptrauth_switch_to_host x1, x2, x3, x4, x5
 
+       // Restore the hosts's sp_el0
+       restore_sp_el0 x2, x3
+
        // Now restore the host regs
        restore_callee_saved_regs x2
 
index c2a13ab..9c5cfb0 100644 (file)
@@ -198,7 +198,6 @@ SYM_CODE_END(__hyp_panic)
 .macro invalid_vector  label, target = __hyp_panic
        .align  2
 SYM_CODE_START(\label)
-\label:
        b \target
 SYM_CODE_END(\label)
 .endm
index 75b1925..6d2df9f 100644 (file)
@@ -15,8 +15,9 @@
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
- * which are handled as part of the el2 return state) on every switch.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
  * tpidr_el0 and tpidrro_el0 only need to be switched when going
  * to host userspace or a different VCPU.  EL1 registers only need to be
  * switched when potentially going to run a different VCPU.  The latter two
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
        ctxt->sys_regs[MDSCR_EL1]       = read_sysreg(mdscr_el1);
-
-       /*
-        * The host arm64 Linux uses sp_el0 to point to 'current' and it must
-        * therefore be saved/restored on every entry/exit to/from the guest.
-        */
-       ctxt->gp_regs.regs.sp           = read_sysreg(sp_el0);
 }
 
 static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -99,12 +94,6 @@ NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
 static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
 {
        write_sysreg(ctxt->sys_regs[MDSCR_EL1],   mdscr_el1);
-
-       /*
-        * The host arm64 Linux uses sp_el0 to point to 'current' and it must
-        * therefore be saved/restored on every entry/exit to/from the guest.
-        */
-       write_sysreg(ctxt->gp_regs.regs.sp,       sp_el0);
 }
 
 static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
index bbeb6a5..0be3355 100644 (file)
@@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                ptep = (pte_t *)pudp;
        } else if (sz == (CONT_PTE_SIZE)) {
                pmdp = pmd_alloc(mm, pudp, addr);
+               if (!pmdp)
+                       return NULL;
 
                WARN_ON(addr & (sz - 1));
                /*
index e15166b..ad2f172 100644 (file)
@@ -521,6 +521,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
index 74f82cf..a31e1a4 100644 (file)
@@ -54,7 +54,7 @@ config RISCV
        select GENERIC_ARCH_TOPOLOGY if SMP
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_MMIOWB
-       select ARCH_HAS_DEBUG_VIRTUAL
+       select ARCH_HAS_DEBUG_VIRTUAL if MMU
        select HAVE_EBPF_JIT if MMU
        select EDAC_SUPPORT
        select ARCH_HAS_GIGANTIC_PAGE
@@ -136,6 +136,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
        def_bool y
 
 config SYS_SUPPORTS_HUGETLBFS
+       depends on MMU
        def_bool y
 
 config STACKTRACE_SUPPORT
index 216286d..d646332 100644 (file)
@@ -11,14 +11,15 @@ config SOC_SIFIVE
          This enables support for SiFive SoC platform hardware.
 
 config SOC_VIRT
-       bool "QEMU Virt Machine"
-       select POWER_RESET_SYSCON
-       select POWER_RESET_SYSCON_POWEROFF
-       select GOLDFISH
-       select RTC_DRV_GOLDFISH
-       select SIFIVE_PLIC
-       help
-         This enables support for QEMU Virt Machine.
+       bool "QEMU Virt Machine"
+       select POWER_RESET
+       select POWER_RESET_SYSCON
+       select POWER_RESET_SYSCON_POWEROFF
+       select GOLDFISH
+       select RTC_DRV_GOLDFISH if RTC_CLASS
+       select SIFIVE_PLIC
+       help
+         This enables support for QEMU Virt Machine.
 
 config SOC_KENDRYTE
        bool "Kendryte K210 SoC"
index 8e18d2c..cec462e 100644 (file)
 #define CAUSE_IRQ_FLAG         (_AC(1, UL) << (__riscv_xlen - 1))
 
 /* Interrupt causes (minus the high bit) */
-#define IRQ_U_SOFT             0
 #define IRQ_S_SOFT             1
 #define IRQ_M_SOFT             3
-#define IRQ_U_TIMER            4
 #define IRQ_S_TIMER            5
 #define IRQ_M_TIMER            7
-#define IRQ_U_EXT              8
 #define IRQ_S_EXT              9
 #define IRQ_M_EXT              11
 
index 1bb0cd0..5ce5046 100644 (file)
@@ -8,6 +8,7 @@
 #ifndef _ASM_RISCV_HWCAP_H
 #define _ASM_RISCV_HWCAP_H
 
+#include <linux/bits.h>
 #include <uapi/asm/hwcap.h>
 
 #ifndef __ASSEMBLY__
@@ -22,6 +23,27 @@ enum {
 };
 
 extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a                ('a' - 'a')
+#define RISCV_ISA_EXT_c                ('c' - 'a')
+#define RISCV_ISA_EXT_d                ('d' - 'a')
+#define RISCV_ISA_EXT_f                ('f' - 'a')
+#define RISCV_ISA_EXT_h                ('h' - 'a')
+#define RISCV_ISA_EXT_i                ('i' - 'a')
+#define RISCV_ISA_EXT_m                ('m' - 'a')
+#define RISCV_ISA_EXT_s                ('s' - 'a')
+#define RISCV_ISA_EXT_u                ('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX      64
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
 #endif
 
 #endif /* _ASM_RISCV_HWCAP_H */
index a2c809d..56053c9 100644 (file)
@@ -16,6 +16,8 @@
 
 #ifndef CONFIG_MMU
 #define pgprot_noncached(x)    (x)
+#define pgprot_writecombine(x) (x)
+#define pgprot_device(x)       (x)
 #endif /* CONFIG_MMU */
 
 /* Generic IO read/write.  These perform native-endian accesses. */
index bb4091f..0b2333e 100644 (file)
@@ -9,6 +9,7 @@
  */
 #define mmiowb()       __asm__ __volatile__ ("fence o,w" : : : "memory");
 
+#include <linux/smp.h>
 #include <asm-generic/mmiowb.h>
 
 #endif /* _ASM_RISCV_MMIOWB_H */
index 0234048..062efd3 100644 (file)
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 
+#ifdef CONFIG_RISCV_BASE_PMU
 #define RISCV_BASE_COUNTERS    2
 
 /*
  * The RISCV_MAX_COUNTERS parameter should be specified.
  */
 
-#ifdef CONFIG_RISCV_BASE_PMU
 #define RISCV_MAX_COUNTERS     2
-#endif
-
-#ifndef RISCV_MAX_COUNTERS
-#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU."
-#endif
 
 /*
  * These are the indexes of bits in counteren register *minus* 1,
@@ -82,6 +77,7 @@ struct riscv_pmu {
        int             irq;
 };
 
+#endif
 #ifdef CONFIG_PERF_EVENTS
 #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
 #endif
index 9c188ad..35b6003 100644 (file)
@@ -470,12 +470,15 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 
 #else /* CONFIG_MMU */
 
+#define PAGE_SHARED            __pgprot(0)
 #define PAGE_KERNEL            __pgprot(0)
 #define swapper_pg_dir         NULL
 #define VMALLOC_START          0
 
 #define TASK_SIZE 0xffffffffUL
 
+static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {}
+
 #endif /* !CONFIG_MMU */
 
 #define kern_addr_valid(addr)   (1) /* FIXME */
index c38df47..4c5bae7 100644 (file)
@@ -22,14 +22,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_ro(void);
-void set_kernel_text_rw(void);
-#else
-static inline void set_kernel_text_ro(void) { }
-static inline void set_kernel_text_rw(void) { }
-#endif
-
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 
index 86c8308..d8bbd32 100644 (file)
@@ -43,7 +43,7 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
 obj-$(CONFIG_FUNCTION_TRACER)  += mcount.o ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)   += mcount-dyn.o
 
-obj-$(CONFIG_PERF_EVENTS)      += perf_event.o
+obj-$(CONFIG_RISCV_BASE_PMU)   += perf_event.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_callchain.o
 obj-$(CONFIG_HAVE_PERF_REGS)   += perf_regs.o
 obj-$(CONFIG_RISCV_SBI)                += sbi.o
index c4c33bf..0ec2235 100644 (file)
@@ -15,8 +15,8 @@
 
 const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
 
-void *__cpu_up_stack_pointer[NR_CPUS];
-void *__cpu_up_task_pointer[NR_CPUS];
+void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
+void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
 
 extern const struct cpu_operations cpu_ops_sbi;
 extern const struct cpu_operations cpu_ops_spinwait;
index a5ad000..ac202f4 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/bitmap.h>
 #include <linux/of.h>
 #include <asm/processor.h>
 #include <asm/hwcap.h>
 #include <asm/switch_to.h>
 
 unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
 #ifdef CONFIG_FPU
 bool has_fpu __read_mostly;
 #endif
 
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+       if (!isa_bitmap)
+               return riscv_isa[0];
+       return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+       const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+       if (bit >= RISCV_ISA_EXT_MAX)
+               return false;
+
+       return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
 void riscv_fill_hwcap(void)
 {
        struct device_node *node;
        const char *isa;
-       size_t i;
+       char print_str[BITS_PER_LONG + 1];
+       size_t i, j, isa_len;
        static unsigned long isa2hwcap[256] = {0};
 
        isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -33,8 +76,11 @@ void riscv_fill_hwcap(void)
 
        elf_hwcap = 0;
 
+       bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
        for_each_of_cpu_node(node) {
                unsigned long this_hwcap = 0;
+               unsigned long this_isa = 0;
 
                if (riscv_of_processor_hartid(node) < 0)
                        continue;
@@ -44,8 +90,24 @@ void riscv_fill_hwcap(void)
                        continue;
                }
 
-               for (i = 0; i < strlen(isa); ++i)
+               i = 0;
+               isa_len = strlen(isa);
+#if IS_ENABLED(CONFIG_32BIT)
+               if (!strncmp(isa, "rv32", 4))
+                       i += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+               if (!strncmp(isa, "rv64", 4))
+                       i += 4;
+#endif
+               for (; i < isa_len; ++i) {
                        this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+                       /*
+                        * TODO: X, Y and Z extension parsing for Host ISA
+                        * bitmap will be added in-future.
+                        */
+                       if ('a' <= isa[i] && isa[i] < 'x')
+                               this_isa |= (1UL << (isa[i] - 'a'));
+               }
 
                /*
                 * All "okay" hart should have same isa. Set HWCAP based on
@@ -56,6 +118,11 @@ void riscv_fill_hwcap(void)
                        elf_hwcap &= this_hwcap;
                else
                        elf_hwcap = this_hwcap;
+
+               if (riscv_isa[0])
+                       riscv_isa[0] &= this_isa;
+               else
+                       riscv_isa[0] = this_isa;
        }
 
        /* We don't support systems with F but without D, so mask those out
@@ -65,7 +132,17 @@ void riscv_fill_hwcap(void)
                elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
        }
 
-       pr_info("elf_hwcap is 0x%lx\n", elf_hwcap);
+       memset(print_str, 0, sizeof(print_str));
+       for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+               if (riscv_isa[0] & BIT_MASK(i))
+                       print_str[j++] = (char)('a' + i);
+       pr_info("riscv: ISA extensions %s\n", print_str);
+
+       memset(print_str, 0, sizeof(print_str));
+       for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+               if (elf_hwcap & BIT_MASK(i))
+                       print_str[j++] = (char)('a' + i);
+       pr_info("riscv: ELF capabilities %s\n", print_str);
 
 #ifdef CONFIG_FPU
        if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
index 91626d9..c835f03 100644 (file)
@@ -147,7 +147,7 @@ static int riscv_map_hw_event(u64 config)
        return riscv_pmu->hw_events[config];
 }
 
-int riscv_map_cache_decode(u64 config, unsigned int *type,
+static int riscv_map_cache_decode(u64 config, unsigned int *type,
                           unsigned int *op, unsigned int *result)
 {
        return -ENOENT;
@@ -342,7 +342,7 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
 
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
-irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
+static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
 {
        return IRQ_NONE;
 }
@@ -361,7 +361,7 @@ static int reserve_pmc_hardware(void)
        return err;
 }
 
-void release_pmc_hardware(void)
+static void release_pmc_hardware(void)
 {
        mutex_lock(&pmc_reserve_mutex);
        if (riscv_pmu->irq >= 0)
@@ -464,7 +464,7 @@ static const struct of_device_id riscv_pmu_of_ids[] = {
        { /* sentinel value */ }
 };
 
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
 {
        struct device_node *node = of_find_node_by_type(NULL, "pmu");
        const struct of_device_id *of_id;
index e0a6293..a65a8fa 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
@@ -63,6 +64,7 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
        for_each_cpu(cpu, in)
                cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
 }
+EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
index 6c85487..837b9b3 100644 (file)
@@ -65,7 +65,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 
 #else /* !CONFIG_FRAME_POINTER */
 
-static void notrace walk_stackframe(struct task_struct *task,
+void notrace walk_stackframe(struct task_struct *task,
        struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg)
 {
        unsigned long sp, pc;
index a4ee3a0..4c8b2a4 100644 (file)
@@ -12,7 +12,7 @@ vdso-syms += getcpu
 vdso-syms += flush_icache
 
 # Files to link into the vdso
-obj-vdso = $(patsubst %, %.o, $(vdso-syms))
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
 
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S
new file mode 100644 (file)
index 0000000..2a956c9
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
index b55be44..27a3341 100644 (file)
@@ -150,7 +150,8 @@ void __init setup_bootmem(void)
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
        set_max_mapnr(PFN_DOWN(mem_size));
-       max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+       max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+       max_low_pfn = max_pfn;
 
 #ifdef CONFIG_BLK_DEV_INITRD
        setup_initrd();
@@ -501,22 +502,6 @@ static inline void setup_vm_final(void)
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void)
-{
-       unsigned long text_start = (unsigned long)_text;
-       unsigned long text_end = (unsigned long)_etext;
-
-       set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
-       unsigned long text_start = (unsigned long)_text;
-       unsigned long text_end = (unsigned long)_etext;
-
-       set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
 void mark_rodata_ro(void)
 {
        unsigned long text_start = (unsigned long)_text;
index 5dcf9ff..d05bb04 100644 (file)
@@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_AIS:
        case KVM_CAP_S390_AIS_MIGRATION:
        case KVM_CAP_S390_VCPU_RESETS:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_S390_HPAGE_1M:
index 69a824f..8938936 100644 (file)
@@ -626,10 +626,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
         * available for the guest are AQIC and TAPQ with the t bit set
         * since we do not set IC.3 (FIII) we currently will only intercept
         * the AQIC function code.
+        * Note: running nested under z/VM can result in intercepts for other
+        * function codes, e.g. PQAP(QCI). We do not support this and bail out.
         */
        reg0 = vcpu->run->s.regs.gprs[0];
        fc = (reg0 >> 24) & 0xff;
-       if (WARN_ON_ONCE(fc != 0x03))
+       if (fc != 0x03)
                return -EOPNOTSUPP;
 
        /* PQAP instruction is allowed for guest kernel only */
index 0789e13..1c7f13b 100644 (file)
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define SIZEOF_PTREGS  21*8
 
 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
-       /*
-        * Push registers and sanitize registers of values that a
-        * speculation attack might otherwise want to exploit. The
-        * lower registers are likely clobbered well before they
-        * could be put to use in a speculative execution gadget.
-        * Interleave XOR with PUSH for better uop scheduling:
-        */
        .if \save_ret
        pushq   %rsi            /* pt_regs->si */
        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
        pushq   %rsi            /* pt_regs->si */
        .endif
        pushq   \rdx            /* pt_regs->dx */
-       xorl    %edx, %edx      /* nospec   dx */
        pushq   %rcx            /* pt_regs->cx */
-       xorl    %ecx, %ecx      /* nospec   cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
-       xorl    %r8d, %r8d      /* nospec   r8 */
        pushq   %r9             /* pt_regs->r9 */
-       xorl    %r9d, %r9d      /* nospec   r9 */
        pushq   %r10            /* pt_regs->r10 */
-       xorl    %r10d, %r10d    /* nospec   r10 */
        pushq   %r11            /* pt_regs->r11 */
-       xorl    %r11d, %r11d    /* nospec   r11*/
        pushq   %rbx            /* pt_regs->rbx */
-       xorl    %ebx, %ebx      /* nospec   rbx*/
        pushq   %rbp            /* pt_regs->rbp */
-       xorl    %ebp, %ebp      /* nospec   rbp*/
        pushq   %r12            /* pt_regs->r12 */
-       xorl    %r12d, %r12d    /* nospec   r12*/
        pushq   %r13            /* pt_regs->r13 */
-       xorl    %r13d, %r13d    /* nospec   r13*/
        pushq   %r14            /* pt_regs->r14 */
-       xorl    %r14d, %r14d    /* nospec   r14*/
        pushq   %r15            /* pt_regs->r15 */
-       xorl    %r15d, %r15d    /* nospec   r15*/
        UNWIND_HINT_REGS
+
        .if \save_ret
        pushq   %rsi            /* return address on top of stack */
        .endif
+
+       /*
+        * Sanitize registers of values that a speculation attack might
+        * otherwise want to exploit. The lower registers are likely clobbered
+        * well before they could be put to use in a speculative execution
+        * gadget.
+        */
+       xorl    %edx,  %edx     /* nospec dx  */
+       xorl    %ecx,  %ecx     /* nospec cx  */
+       xorl    %r8d,  %r8d     /* nospec r8  */
+       xorl    %r9d,  %r9d     /* nospec r9  */
+       xorl    %r10d, %r10d    /* nospec r10 */
+       xorl    %r11d, %r11d    /* nospec r11 */
+       xorl    %ebx,  %ebx     /* nospec rbx */
+       xorl    %ebp,  %ebp     /* nospec rbp */
+       xorl    %r12d, %r12d    /* nospec r12 */
+       xorl    %r13d, %r13d    /* nospec r13 */
+       xorl    %r14d, %r14d    /* nospec r14 */
+       xorl    %r15d, %r15d    /* nospec r15 */
+
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
index 0e9504f..3063aa9 100644 (file)
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
         */
 syscall_return_via_sysret:
        /* rcx and r11 are already restored (see code above) */
-       UNWIND_HINT_EMPTY
        POP_REGS pop_rdi=0 skip_r11rcx=1
 
        /*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
         */
        movq    %rsp, %rdi
        movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       UNWIND_HINT_EMPTY
 
        pushq   RSP-RDI(%rdi)   /* RSP */
        pushq   (%rdi)          /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
  * %rdi: prev task
  * %rsi: next task
  */
-SYM_CODE_START(__switch_to_asm)
-       UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
        /*
         * Save callee-saved registers
         * This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
        popq    %rbp
 
        jmp     __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
 
 /*
  * A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
  * +----------------------------------------------------+
  */
 SYM_CODE_START(interrupt_entry)
-       UNWIND_HINT_FUNC
+       UNWIND_HINT_IRET_REGS offset=16
        ASM_CLAC
        cld
 
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
        pushq   5*8(%rdi)               /* regs->eflags */
        pushq   4*8(%rdi)               /* regs->cs */
        pushq   3*8(%rdi)               /* regs->ip */
+       UNWIND_HINT_IRET_REGS
        pushq   2*8(%rdi)               /* regs->orig_ax */
        pushq   8(%rdi)                 /* return address */
-       UNWIND_HINT_FUNC
 
        movq    (%rdi), %rdi
        jmp     2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
         */
        movq    %rsp, %rdi
        movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       UNWIND_HINT_EMPTY
 
        /* Copy the IRET frame to the trampoline stack. */
        pushq   6*8(%rdi)       /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
 
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rax
        leaq    -PTREGS_SIZE(%rax), %rsp
-       UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+       UNWIND_HINT_REGS
 
        call    do_exit
 SYM_CODE_END(rewind_stack_do_exit)
index 85be2f5..84b9449 100644 (file)
@@ -56,16 +56,23 @@ struct dyn_arch_ftrace {
 
 #ifndef __ASSEMBLY__
 
+#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
+extern void set_ftrace_ops_ro(void);
+#else
+static inline void set_ftrace_ops_ro(void) { }
+#endif
+
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
        /*
         * Compare the symbol name with the system call name. Skip the
-        * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+        * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
         */
        return !strcmp(sym + 3, name + 3) ||
                (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
-               (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+               (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+               (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
 }
 
 #ifndef COMPILE_OFFSETS
index 42a2d0d..0dea9f1 100644 (file)
@@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
 {
        /* We can only post Fixed and LowPrio IRQs */
-       return (irq->delivery_mode == dest_Fixed ||
-               irq->delivery_mode == dest_LowestPrio);
+       return (irq->delivery_mode == APIC_DM_FIXED ||
+               irq->delivery_mode == APIC_DM_LOWEST);
 }
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
index 499578f..70fc159 100644 (file)
@@ -19,7 +19,7 @@ struct unwind_state {
 #if defined(CONFIG_UNWINDER_ORC)
        bool signal, full_regs;
        unsigned long sp, bp, ip;
-       struct pt_regs *regs;
+       struct pt_regs *regs, *prev_regs;
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
        bool got_irq;
        unsigned long *bp, *orig_sp, ip;
index 81b9c63..e53dda2 100644 (file)
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
                 * According to Intel, MFENCE can do the serialization here.
                 */
                asm volatile("mfence" : : : "memory");
-
-               printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
                return;
        }
 
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
        return ~0U;
 }
 
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
        return ~0U;
 }
 
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
        return ~0U;
 }
 
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X,          &hsx_deadline_rev),
        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,        0x0b000020),
        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D,        &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
        {},
 };
 
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
 {
        const struct x86_cpu_id *m;
        u32 rev;
 
-       if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
-           boot_cpu_has(X86_FEATURE_HYPERVISOR))
-               return;
+       if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+               return false;
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+               return true;
 
        m = x86_match_cpu(deadline_match);
        if (!m)
-               return;
+               return true;
 
        /*
         * Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
                rev = (u32)m->driver_data;
 
        if (boot_cpu_data.microcode >= rev)
-               return;
+               return true;
 
        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
        pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
               "please update microcode to version: 0x%x (or later)\n", rev);
+       return false;
 }
 
 /*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
 {
        unsigned int new_apicid;
 
-       apic_check_deadline_errata();
+       if (apic_validate_deadline_timer())
+               pr_debug("TSC deadline timer available\n");
 
        if (x2apic_mode) {
                boot_cpu_physical_apicid = read_apic_id();
index 87b9789..460ae7f 100644 (file)
@@ -183,7 +183,8 @@ recursion_check:
         */
        if (visit_mask) {
                if (*visit_mask & (1UL << info->type)) {
-                       printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+                       if (task == current)
+                               printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
                        goto unknown;
                }
                *visit_mask |= 1UL << info->type;
index 37a0aea..b0e6417 100644 (file)
@@ -407,7 +407,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
        set_vm_flush_reset_perms(trampoline);
 
-       set_memory_ro((unsigned long)trampoline, npages);
+       if (likely(system_state != SYSTEM_BOOTING))
+               set_memory_ro((unsigned long)trampoline, npages);
        set_memory_x((unsigned long)trampoline, npages);
        return (unsigned long)trampoline;
 fail:
@@ -415,6 +416,32 @@ fail:
        return 0;
 }
 
+void set_ftrace_ops_ro(void)
+{
+       struct ftrace_ops *ops;
+       unsigned long start_offset;
+       unsigned long end_offset;
+       unsigned long npages;
+       unsigned long size;
+
+       do_for_each_ftrace_op(ops, ftrace_ops_list) {
+               if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+                       continue;
+
+               if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+                       start_offset = (unsigned long)ftrace_regs_caller;
+                       end_offset = (unsigned long)ftrace_regs_caller_end;
+               } else {
+                       start_offset = (unsigned long)ftrace_caller;
+                       end_offset = (unsigned long)ftrace_epilogue;
+               }
+               size = end_offset - start_offset;
+               size = size + RET_SIZE + sizeof(void *);
+               npages = DIV_ROUND_UP(size, PAGE_SIZE);
+               set_memory_ro((unsigned long)ops->trampoline, npages);
+       } while_for_each_ftrace_op(ops);
+}
+
 static unsigned long calc_trampoline_call_offset(bool save_regs)
 {
        unsigned long start_offset;
index a224b5a..5422611 100644 (file)
@@ -344,6 +344,9 @@ bad_address:
        if (IS_ENABLED(CONFIG_X86_32))
                goto the_end;
 
+       if (state->task != current)
+               goto the_end;
+
        if (state->regs) {
                printk_deferred_once(KERN_WARNING
                        "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
index e9cc182..5b0bd85 100644 (file)
@@ -8,19 +8,21 @@
 #include <asm/orc_lookup.h>
 
 #define orc_warn(fmt, ...) \
-       printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+       printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...)                                      \
+({                                                                     \
+       if (state->task == current)                                     \
+               orc_warn(args);                                         \
+})
 
 extern int __start_orc_unwind_ip[];
 extern int __stop_orc_unwind_ip[];
 extern struct orc_entry __start_orc_unwind[];
 extern struct orc_entry __stop_orc_unwind[];
 
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
 
 static inline unsigned long orc_ip(const int *ip)
 {
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
 {
        static struct orc_entry *orc;
 
-       if (!orc_init)
-               return NULL;
-
        if (ip == 0)
                return &null_orc_entry;
 
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
 
 #ifdef CONFIG_MODULES
 
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
 static void orc_sort_swap(void *_a, void *_b, int size)
 {
        struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
        return true;
 }
 
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs.  This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+                   unsigned long *val)
+{
+       unsigned int reg = reg_off/8;
+
+       if (!state->regs)
+               return false;
+
+       if (state->full_regs) {
+               *val = ((unsigned long *)state->regs)[reg];
+               return true;
+       }
+
+       if (state->prev_regs) {
+               *val = ((unsigned long *)state->prev_regs)[reg];
+               return true;
+       }
+
+       return false;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
-       unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+       unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
        bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_REG_R10:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg R10 at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+                       orc_warn_current("missing R10 value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->r10;
                break;
 
        case ORC_REG_R13:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg R13 at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+                       orc_warn_current("missing R13 value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->r13;
                break;
 
        case ORC_REG_DI:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg DI at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+                       orc_warn_current("missing RDI value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->di;
                break;
 
        case ORC_REG_DX:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg DX at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+                       orc_warn_current("missing DX value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->dx;
                break;
 
        default:
-               orc_warn("unknown SP base reg %d for ip %pB\n",
+               orc_warn("unknown SP base reg %d at %pB\n",
                         orc->sp_reg, (void *)state->ip);
                goto err;
        }
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
 
                state->sp = sp;
                state->regs = NULL;
+               state->prev_regs = NULL;
                state->signal = false;
                break;
 
        case ORC_TYPE_REGS:
                if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
-                       orc_warn("can't dereference registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
+                       orc_warn_current("can't access registers at %pB\n",
+                                        (void *)orig_ip);
                        goto err;
                }
 
                state->regs = (struct pt_regs *)sp;
+               state->prev_regs = NULL;
                state->full_regs = true;
                state->signal = true;
                break;
 
        case ORC_TYPE_REGS_IRET:
                if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
-                       orc_warn("can't dereference iret registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
+                       orc_warn_current("can't access iret registers at %pB\n",
+                                        (void *)orig_ip);
                        goto err;
                }
 
+               if (state->full_regs)
+                       state->prev_regs = state->regs;
                state->regs = (void *)sp - IRET_FRAME_OFFSET;
                state->full_regs = false;
                state->signal = true;
                break;
 
        default:
-               orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+               orc_warn("unknown .orc_unwind entry type %d at %pB\n",
                         orc->type, (void *)orig_ip);
-               break;
+               goto err;
        }
 
        /* Find BP: */
        switch (orc->bp_reg) {
        case ORC_REG_UNDEFINED:
-               if (state->regs && state->full_regs)
-                       state->bp = state->regs->bp;
+               if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+                       state->bp = tmp;
                break;
 
        case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
        if (state->stack_info.type == prev_type &&
            on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
            state->sp <= prev_sp) {
-               orc_warn("stack going in the wrong direction? ip=%pB\n",
-                        (void *)orig_ip);
+               orc_warn_current("stack going in the wrong direction? at %pB\n",
+                                (void *)orig_ip);
                goto err;
        }
 
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame)
 {
+       if (!orc_init)
+               goto done;
+
        memset(state, 0, sizeof(*state));
        state->task = task;
 
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        /* Otherwise, skip ahead to the user-specified starting frame: */
        while (!unwind_done(state) &&
               (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
-                       state->sp <= (unsigned long)first_frame))
+                       state->sp < (unsigned long)first_frame))
                unwind_next_frame(state);
 
        return;
index 750ff0b..d057376 100644 (file)
@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
        }
 
        /*
-        * AMD SVM AVIC accelerate EOI write and do not trap,
-        * in-kernel IOAPIC will not be able to receive the EOI.
-        * In this case, we do lazy update of the pending EOI when
-        * trying to set IOAPIC irq.
+        * AMD SVM AVIC accelerate EOI write iff the interrupt is edge
+        * triggered, in which case the in-kernel IOAPIC will not be able
+        * to receive the EOI.  In this case, we do a lazy update of the
+        * pending EOI when trying to set IOAPIC irq.
         */
-       if (kvm_apicv_activated(ioapic->kvm))
+       if (edge && kvm_apicv_activated(ioapic->kvm))
                ioapic_lazy_update_eoi(ioapic, irq);
 
        /*
index cf912b4..89f7f3a 100644 (file)
@@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
                return NULL;
 
        /* Pin the user virtual address. */
-       npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+       npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
        if (npinned != npages) {
                pr_err("SEV: Failure locking %lu pages.\n", npages);
                goto err;
index 2f379ba..38f6aee 100644 (file)
@@ -1752,6 +1752,8 @@ static int db_interception(struct vcpu_svm *svm)
        if (svm->vcpu.guest_debug &
            (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+               kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
                kvm_run->debug.arch.pc =
                        svm->vmcb->save.cs.base + svm->vmcb->save.rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
index fd78ffb..e44f33c 100644 (file)
@@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
         */
                break;
        default:
-               BUG_ON(1);
+               BUG();
                break;
        }
 
index 87f3f24..51d1a82 100644 (file)
@@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit)
        /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
        FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
 
+       /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+       or $1, %_ASM_AX
+
        pop %_ASM_AX
 .Lvmexit_skip_rsb:
 #endif
index c5835f9..d786c7d 100644 (file)
@@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
        __reserved_bits;                                \
 })
 
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
-       u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
-       if (kvm_cpu_cap_has(X86_FEATURE_LA57))
-               reserved_bits &= ~X86_CR4_LA57;
-
-       if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
-               reserved_bits &= ~X86_CR4_UMIP;
-
-       return reserved_bits;
-}
-
 static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & cr4_reserved_bits)
@@ -3385,6 +3372,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_GET_MSR_FEATURES:
        case KVM_CAP_MSR_PLATFORM_INFO:
        case KVM_CAP_EXCEPTION_PAYLOAD:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_SYNC_REGS:
@@ -9675,7 +9663,9 @@ int kvm_arch_hardware_setup(void *opaque)
        if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                supported_xss = 0;
 
-       cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+       cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
 
        if (kvm_has_tsc_control) {
                /*
@@ -9707,7 +9697,8 @@ int kvm_arch_check_processor_compat(void *opaque)
 
        WARN_ON(!irqs_disabled());
 
-       if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+       if (__cr4_reserved_bits(cpu_has, c) !=
+           __cr4_reserved_bits(cpu_has, &boot_cpu_data))
                return -EIO;
 
        return ops->check_processor_compatibility();
index 3b289c2..8b5f73f 100644 (file)
@@ -54,6 +54,7 @@
 #include <asm/init.h>
 #include <asm/uv/uv.h>
 #include <asm/setup.h>
+#include <asm/ftrace.h>
 
 #include "mm_internal.h"
 
@@ -1291,6 +1292,8 @@ void mark_rodata_ro(void)
        all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
        set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
 
+       set_ftrace_ops_ro();
+
 #ifdef CONFIG_CPA_DEBUG
        printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
        set_memory_rw(start, (end-start) >> PAGE_SHIFT);
index 59eca6a..b8c55a2 100644 (file)
@@ -43,7 +43,8 @@ struct cpa_data {
        unsigned long   pfn;
        unsigned int    flags;
        unsigned int    force_split             : 1,
-                       force_static_prot       : 1;
+                       force_static_prot       : 1,
+                       force_flush_all         : 1;
        struct page     **pages;
 };
 
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
                return;
        }
 
-       if (cpa->numpages <= tlb_single_page_flush_ceiling)
-               on_each_cpu(__cpa_flush_tlb, cpa, 1);
-       else
+       if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
                flush_tlb_all();
+       else
+               on_each_cpu(__cpa_flush_tlb, cpa, 1);
 
        if (!cache)
                return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                alias_cpa.curpage = 0;
 
+               cpa->force_flush_all = 1;
+
                ret = __change_page_attr_set_clr(&alias_cpa, 0);
                if (ret)
                        return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                alias_cpa.curpage = 0;
 
+               cpa->force_flush_all = 1;
                /*
                 * The high mapping range is imprecise, so ignore the
                 * return value.
index 78ba57e..3d41171 100644 (file)
 #include <linux/ioprio.h>
 #include <linux/sbitmap.h>
 #include <linux/delay.h>
+#include <linux/backing-dev.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
        ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
        switch (ioprio_class) {
        default:
-               dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
-                       "bfq: bad prio class %d\n", ioprio_class);
+               pr_err("bdi %s: bfq: bad prio class %d\n",
+                               bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
+                               ioprio_class);
                /* fall through */
        case IOPRIO_CLASS_NONE:
                /*
index c5dc833..930212c 100644 (file)
@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
 {
        /* some drivers (floppy) instantiate a queue w/o disk registered */
        if (blkg->q->backing_dev_info->dev)
-               return dev_name(blkg->q->backing_dev_info->dev);
+               return bdi_dev_name(blkg->q->backing_dev_info);
        return NULL;
 }
 
index 3ab0c1c..7c1fe60 100644 (file)
@@ -466,7 +466,7 @@ struct ioc_gq {
         */
        atomic64_t                      vtime;
        atomic64_t                      done_vtime;
-       atomic64_t                      abs_vdebt;
+       u64                             abs_vdebt;
        u64                             last_vtime;
 
        /*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
        struct iocg_wake_ctx ctx = { .iocg = iocg };
        u64 margin_ns = (u64)(ioc->period_us *
                              WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
-       u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
+       u64 vdebt, vshortage, expires, oexpires;
        s64 vbudget;
        u32 hw_inuse;
 
@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
        vbudget = now->vnow - atomic64_read(&iocg->vtime);
 
        /* pay off debt */
-       abs_vdebt = atomic64_read(&iocg->abs_vdebt);
-       vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
+       vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
        if (vdebt && vbudget > 0) {
                u64 delta = min_t(u64, vbudget, vdebt);
                u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
-                                   abs_vdebt);
+                                   iocg->abs_vdebt);
 
                atomic64_add(delta, &iocg->vtime);
                atomic64_add(delta, &iocg->done_vtime);
-               atomic64_sub(abs_delta, &iocg->abs_vdebt);
-               if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
-                       atomic64_set(&iocg->abs_vdebt, 0);
+               iocg->abs_vdebt -= abs_delta;
        }
 
        /*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
        u64 expires, oexpires;
        u32 hw_inuse;
 
+       lockdep_assert_held(&iocg->waitq.lock);
+
        /* debt-adjust vtime */
        current_hweight(iocg, NULL, &hw_inuse);
-       vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
+       vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
 
-       /* clear or maintain depending on the overage */
-       if (time_before_eq64(vtime, now->vnow)) {
+       /*
+        * Clear or maintain depending on the overage. Non-zero vdebt is what
+        * guarantees that @iocg is online and future iocg_kick_delay() will
+        * clear use_delay. Don't leave it on when there's no vdebt.
+        */
+       if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
                blkcg_clear_delay(blkg);
                return false;
        }
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
 {
        struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
        struct ioc_now now;
+       unsigned long flags;
 
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
        ioc_now(iocg->ioc, &now);
        iocg_kick_delay(iocg, &now, 0);
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
 
        return HRTIMER_NORESTART;
 }
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
         * should have woken up in the last period and expire idle iocgs.
         */
        list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
-               if (!waitqueue_active(&iocg->waitq) &&
-                   !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
+               if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
+                   !iocg_is_idle(iocg))
                        continue;
 
                spin_lock(&iocg->waitq.lock);
 
-               if (waitqueue_active(&iocg->waitq) ||
-                   atomic64_read(&iocg->abs_vdebt)) {
+               if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
                        /* might be oversleeping vtime / hweight changes, kick */
                        iocg_kick_waitq(iocg, &now);
                        iocg_kick_delay(iocg, &now, 0);
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         * tests are racy but the races aren't systemic - we only miss once
         * in a while which is fine.
         */
-       if (!waitqueue_active(&iocg->waitq) &&
-           !atomic64_read(&iocg->abs_vdebt) &&
+       if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
            time_before_eq64(vtime + cost, now.vnow)) {
                iocg_commit_bio(iocg, bio, cost);
                return;
        }
 
        /*
-        * We're over budget.  If @bio has to be issued regardless,
-        * remember the abs_cost instead of advancing vtime.
-        * iocg_kick_waitq() will pay off the debt before waking more IOs.
+        * We activated above but w/o any synchronization. Deactivation is
+        * synchronized with waitq.lock and we won't get deactivated as long
+        * as we're waiting or has debt, so we're good if we're activated
+        * here. In the unlikely case that we aren't, just issue the IO.
+        */
+       spin_lock_irq(&iocg->waitq.lock);
+
+       if (unlikely(list_empty(&iocg->active_list))) {
+               spin_unlock_irq(&iocg->waitq.lock);
+               iocg_commit_bio(iocg, bio, cost);
+               return;
+       }
+
+       /*
+        * We're over budget. If @bio has to be issued regardless, remember
+        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+        * off the debt before waking more IOs.
+        *
         * This way, the debt is continuously paid off each period with the
-        * actual budget available to the cgroup.  If we just wound vtime,
-        * we would incorrectly use the current hw_inuse for the entire
-        * amount which, for example, can lead to the cgroup staying
-        * blocked for a long time even with substantially raised hw_inuse.
+        * actual budget available to the cgroup. If we just wound vtime, we
+        * would incorrectly use the current hw_inuse for the entire amount
+        * which, for example, can lead to the cgroup staying blocked for a
+        * long time even with substantially raised hw_inuse.
+        *
+        * An iocg with vdebt should stay online so that the timer can keep
+        * deducting its vdebt and [de]activate use_delay mechanism
+        * accordingly. We don't want to race against the timer trying to
+        * clear them and leave @iocg inactive w/ dangling use_delay heavily
+        * penalizing the cgroup and its descendants.
         */
        if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               iocg->abs_vdebt += abs_cost;
                if (iocg_kick_delay(iocg, &now, cost))
                        blkcg_schedule_throttle(rqos->q,
                                        (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+               spin_unlock_irq(&iocg->waitq.lock);
                return;
        }
 
@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         * All waiters are on iocg->waitq and the wait states are
         * synchronized using waitq.lock.
         */
-       spin_lock_irq(&iocg->waitq.lock);
-
-       /*
-        * We activated above but w/o any synchronization.  Deactivation is
-        * synchronized with waitq.lock and we won't get deactivated as
-        * long as we're waiting, so we're good if we're activated here.
-        * In the unlikely case that we are deactivated, just issue the IO.
-        */
-       if (unlikely(list_empty(&iocg->active_list))) {
-               spin_unlock_irq(&iocg->waitq.lock);
-               iocg_commit_bio(iocg, bio, cost);
-               return;
-       }
-
        init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
        wait.wait.private = current;
        wait.bio = bio;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
        struct ioc_now now;
        u32 hw_inuse;
        u64 abs_cost, cost;
+       unsigned long flags;
 
        /* bypass if disabled or for root cgroup */
        if (!ioc->enabled || !iocg->level)
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
                iocg->cursor = bio_end;
 
        /*
-        * Charge if there's enough vtime budget and the existing request
-        * has cost assigned.  Otherwise, account it as debt.  See debt
-        * handling in ioc_rqos_throttle() for details.
+        * Charge if there's enough vtime budget and the existing request has
+        * cost assigned.
         */
        if (rq->bio && rq->bio->bi_iocost_cost &&
-           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
+           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
                iocg_commit_bio(iocg, bio, cost);
-       else
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               return;
+       }
+
+       /*
+        * Otherwise, account it as debt if @iocg is online, which it should
+        * be for the vast majority of cases. See debt handling in
+        * ioc_rqos_throttle() for details.
+        */
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
+       if (likely(!list_empty(&iocg->active_list))) {
+               iocg->abs_vdebt += abs_cost;
+               iocg_kick_delay(iocg, &now, cost);
+       } else {
+               iocg_commit_bio(iocg, bio, cost);
+       }
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
 }
 
 static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
        iocg->ioc = ioc;
        atomic64_set(&iocg->vtime, now.vnow);
        atomic64_set(&iocg->done_vtime, now.vnow);
-       atomic64_set(&iocg->abs_vdebt, 0);
        atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
        INIT_LIST_HEAD(&iocg->active_list);
        iocg->hweight_active = HWEIGHT_WHOLE;
index 376d7ed..3c734b8 100644 (file)
@@ -287,7 +287,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
        crypto_free_skcipher(ctx->child);
 }
 
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
 {
        crypto_drop_skcipher(skcipher_instance_ctx(inst));
        kfree(inst);
@@ -400,12 +400,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = encrypt;
        inst->alg.decrypt = decrypt;
 
-       inst->free = free;
+       inst->free = free_inst;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               free(inst);
+               free_inst(inst);
        }
        return err;
 }
index dbdd8af..6d8cea9 100644 (file)
@@ -322,7 +322,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
        crypto_free_cipher(ctx->tweak);
 }
 
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
 {
        crypto_drop_skcipher(skcipher_instance_ctx(inst));
        kfree(inst);
@@ -434,12 +434,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = encrypt;
        inst->alg.decrypt = decrypt;
 
-       inst->free = free;
+       inst->free = free_inst;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               free(inst);
+               free_inst(inst);
        }
        return err;
 }
index b4c0152..145ec0b 100644 (file)
@@ -1994,23 +1994,31 @@ void acpi_ec_set_gpe_wake_mask(u8 action)
                acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
 }
 
-bool acpi_ec_other_gpes_active(void)
-{
-       return acpi_any_gpe_status_set(first_ec ? first_ec->gpe : U32_MAX);
-}
-
 bool acpi_ec_dispatch_gpe(void)
 {
        u32 ret;
 
        if (!first_ec)
+               return acpi_any_gpe_status_set(U32_MAX);
+
+       /*
+        * Report wakeup if the status bit is set for any enabled GPE other
+        * than the EC one.
+        */
+       if (acpi_any_gpe_status_set(first_ec->gpe))
+               return true;
+
+       if (ec_no_wakeup)
                return false;
 
+       /*
+        * Dispatch the EC GPE in-band, but do not report wakeup in any case
+        * to allow the caller to process events properly after that.
+        */
        ret = acpi_dispatch_gpe(NULL, first_ec->gpe);
-       if (ret == ACPI_INTERRUPT_HANDLED) {
+       if (ret == ACPI_INTERRUPT_HANDLED)
                pm_pr_dbg("EC GPE dispatched\n");
-               return true;
-       }
+
        return false;
 }
 #endif /* CONFIG_PM_SLEEP */
index e387517..43411a7 100644 (file)
@@ -202,7 +202,6 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
 
 #ifdef CONFIG_PM_SLEEP
 void acpi_ec_flush_work(void);
-bool acpi_ec_other_gpes_active(void);
 bool acpi_ec_dispatch_gpe(void);
 #endif
 
index 4edc8a3..3850704 100644 (file)
@@ -1013,21 +1013,11 @@ static bool acpi_s2idle_wake(void)
                if (acpi_check_wakeup_handlers())
                        return true;
 
-               /*
-                * If the status bit is set for any enabled GPE other than the
-                * EC one, the wakeup is regarded as a genuine one.
-                */
-               if (acpi_ec_other_gpes_active())
+               /* Check non-EC GPE wakeups and dispatch the EC GPE. */
+               if (acpi_ec_dispatch_gpe())
                        return true;
 
                /*
-                * If the EC GPE status bit has not been set, the wakeup is
-                * regarded as a spurious one.
-                */
-               if (!acpi_ec_dispatch_gpe())
-                       return false;
-
-               /*
                 * Cancel the wakeup and process all pending events in case
                 * there are any wakeup ones in there.
                 *
index fe15236..8558b62 100644 (file)
@@ -645,6 +645,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name)
        dev->dev.release = amba_device_release;
        dev->dev.bus = &amba_bustype;
        dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
+       dev->dev.dma_parms = &dev->dma_parms;
        dev->res.name = dev_name(&dev->dev);
 }
 
index e977041..dcfbe72 100644 (file)
@@ -256,7 +256,8 @@ static int try_to_bring_up_master(struct master *master,
        ret = master->ops->bind(master->dev);
        if (ret < 0) {
                devres_release_group(master->dev, NULL);
-               dev_info(master->dev, "master bind failed: %d\n", ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_info(master->dev, "master bind failed: %d\n", ret);
                return ret;
        }
 
@@ -611,8 +612,9 @@ static int component_bind(struct component *component, struct master *master,
                devres_release_group(component->dev, NULL);
                devres_release_group(master->dev, NULL);
 
-               dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
-                       dev_name(component->dev), component->ops, ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
+                               dev_name(component->dev), component->ops, ret);
        }
 
        return ret;
index 139cdf7..073045c 100644 (file)
@@ -2370,6 +2370,11 @@ u32 fw_devlink_get_flags(void)
        return fw_devlink_flags;
 }
 
+static bool fw_devlink_is_permissive(void)
+{
+       return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
+}
+
 /**
  * device_add - add device to device hierarchy.
  * @dev: device.
@@ -2524,7 +2529,7 @@ int device_add(struct device *dev)
        if (fw_devlink_flags && is_fwnode_dev &&
            fwnode_has_op(dev->fwnode, add_links)) {
                fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
-               if (fw_ret == -ENODEV)
+               if (fw_ret == -ENODEV && !fw_devlink_is_permissive())
                        device_link_wait_for_mandatory_supplier(dev);
                else if (fw_ret)
                        device_link_wait_for_optional_supplier(dev);
index 06ec0e8..94037be 100644 (file)
@@ -224,17 +224,9 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(deferred_devs);
 
-#ifdef CONFIG_MODULES
-/*
- * In the case of modules, set the default probe timeout to
- * 30 seconds to give userland some time to load needed modules
- */
-int driver_deferred_probe_timeout = 30;
-#else
-/* In the case of !modules, no probe timeout needed */
-int driver_deferred_probe_timeout = -1;
-#endif
+int driver_deferred_probe_timeout;
 EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
+static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue);
 
 static int __init deferred_probe_timeout_setup(char *str)
 {
@@ -266,8 +258,8 @@ int driver_deferred_probe_check_state(struct device *dev)
                return -ENODEV;
        }
 
-       if (!driver_deferred_probe_timeout) {
-               dev_WARN(dev, "deferred probe timeout, ignoring dependency");
+       if (!driver_deferred_probe_timeout && initcalls_done) {
+               dev_warn(dev, "deferred probe timeout, ignoring dependency");
                return -ETIMEDOUT;
        }
 
@@ -284,6 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
 
        list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
                dev_info(private->device, "deferred probe pending");
+       wake_up(&probe_timeout_waitqueue);
 }
 static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
 
@@ -658,6 +651,9 @@ int driver_probe_done(void)
  */
 void wait_for_device_probe(void)
 {
+       /* wait for probe timeout */
+       wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout);
+
        /* wait for the deferred probe workqueue to finish */
        flush_work(&deferred_probe_work);
 
index 5255550..b27d0f6 100644 (file)
@@ -380,6 +380,8 @@ struct platform_object {
  */
 static void setup_pdev_dma_masks(struct platform_device *pdev)
 {
+       pdev->dev.dma_parms = &pdev->dma_parms;
+
        if (!pdev->dev.coherent_dma_mask)
                pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
        if (!pdev->dev.dma_mask) {
index b38359c..eb2ab05 100644 (file)
@@ -812,10 +812,9 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl,
        if (!mhi_cntrl)
                return -EINVAL;
 
-       if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put)
-               return -EINVAL;
-
-       if (!mhi_cntrl->status_cb || !mhi_cntrl->link_status)
+       if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put ||
+           !mhi_cntrl->status_cb || !mhi_cntrl->read_reg ||
+           !mhi_cntrl->write_reg)
                return -EINVAL;
 
        ret = parse_config(mhi_cntrl, config);
index 5deadfa..095d95b 100644 (file)
@@ -11,9 +11,6 @@
 
 extern struct bus_type mhi_bus_type;
 
-/* MHI MMIO register mapping */
-#define PCI_INVALID_READ(val) (val == U32_MAX)
-
 #define MHIREGLEN (0x0)
 #define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF)
 #define MHIREGLEN_MHIREGLEN_SHIFT (0)
index eb4256b..97e06cc 100644 (file)
 int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
                              void __iomem *base, u32 offset, u32 *out)
 {
-       u32 tmp = readl(base + offset);
-
-       /* If there is any unexpected value, query the link status */
-       if (PCI_INVALID_READ(tmp) &&
-           mhi_cntrl->link_status(mhi_cntrl))
-               return -EIO;
-
-       *out = tmp;
-
-       return 0;
+       return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out);
 }
 
 int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
@@ -49,7 +40,7 @@ int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
 void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
                   u32 offset, u32 val)
 {
-       writel(val, base + offset);
+       mhi_cntrl->write_reg(mhi_cntrl, base + offset, val);
 }
 
 void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
@@ -294,7 +285,7 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl)
                    !(mhi_chan->ee_mask & BIT(mhi_cntrl->ee)))
                        continue;
                mhi_dev = mhi_alloc_device(mhi_cntrl);
-               if (!mhi_dev)
+               if (IS_ERR(mhi_dev))
                        return;
 
                mhi_dev->dev_type = MHI_DEVICE_XFER;
@@ -336,7 +327,8 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl)
 
                /* Channel name is same for both UL and DL */
                mhi_dev->chan_name = mhi_chan->name;
-               dev_set_name(&mhi_dev->dev, "%04x_%s", mhi_chan->chan,
+               dev_set_name(&mhi_dev->dev, "%s_%s",
+                            dev_name(mhi_cntrl->cntrl_dev),
                             mhi_dev->chan_name);
 
                /* Init wakeup source if available */
index 52690cb..dc83d65 100644 (file)
@@ -902,7 +902,11 @@ int mhi_sync_power_up(struct mhi_controller *mhi_cntrl)
                           MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
                           msecs_to_jiffies(mhi_cntrl->timeout_ms));
 
-       return (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -EIO;
+       ret = (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -ETIMEDOUT;
+       if (ret)
+               mhi_power_down(mhi_cntrl, false);
+
+       return ret;
 }
 EXPORT_SYMBOL(mhi_sync_power_up);
 
index 31f9f0e..55b031d 100644 (file)
@@ -16,7 +16,7 @@
 int efi_tpm_final_log_size;
 EXPORT_SYMBOL(efi_tpm_final_log_size);
 
-static int tpm2_calc_event_log_size(void *data, int count, void *size_info)
+static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info)
 {
        struct tcg_pcr_event2_head *header;
        int event_size, size = 0;
index 5638b4e..4269ea9 100644 (file)
@@ -531,7 +531,7 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 {
        struct pca953x_chip *chip = gpiochip_get_data(gc);
 
-       switch (config) {
+       switch (pinconf_to_config_param(config)) {
        case PIN_CONFIG_BIAS_PULL_UP:
        case PIN_CONFIG_BIAS_PULL_DOWN:
                return pca953x_gpio_set_pull_up_down(chip, offset, config);
index acb99ef..8656815 100644 (file)
@@ -368,6 +368,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d)
        struct tegra_gpio_info *tgi = bank->tgi;
        unsigned int gpio = d->hwirq;
 
+       tegra_gpio_irq_mask(d);
        gpiochip_unlock_as_irq(&tgi->gc, gpio);
 }
 
index 40f2d7f..182136d 100644 (file)
@@ -1158,8 +1158,19 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
                                  struct gpioline_info *info)
 {
        struct gpio_chip *gc = desc->gdev->chip;
+       bool ok_for_pinctrl;
        unsigned long flags;
 
+       /*
+        * This function takes a mutex so we must check this before taking
+        * the spinlock.
+        *
+        * FIXME: find a non-racy way to retrieve this information. Maybe a
+        * lock common to both frameworks?
+        */
+       ok_for_pinctrl =
+               pinctrl_gpio_can_use_line(gc->base + info->line_offset);
+
        spin_lock_irqsave(&gpio_lock, flags);
 
        if (desc->name) {
@@ -1186,7 +1197,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
            test_bit(FLAG_USED_AS_IRQ, &desc->flags) ||
            test_bit(FLAG_EXPORT, &desc->flags) ||
            test_bit(FLAG_SYSFS, &desc->flags) ||
-           !pinctrl_gpio_can_use_line(gc->base + info->line_offset))
+           !ok_for_pinctrl)
                info->flags |= GPIOLINE_FLAG_KERNEL;
        if (test_bit(FLAG_IS_OUT, &desc->flags))
                info->flags |= GPIOLINE_FLAG_IS_OUT;
@@ -1227,6 +1238,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        void __user *ip = (void __user *)arg;
        struct gpio_desc *desc;
        __u32 offset;
+       int hwgpio;
 
        /* We fail any subsequent ioctl():s when the chip is gone */
        if (!gc)
@@ -1259,13 +1271,19 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (IS_ERR(desc))
                        return PTR_ERR(desc);
 
+               hwgpio = gpio_chip_hwgpio(desc);
+
+               if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL &&
+                   test_bit(hwgpio, priv->watched_lines))
+                       return -EBUSY;
+
                gpio_desc_to_lineinfo(desc, &lineinfo);
 
                if (copy_to_user(ip, &lineinfo, sizeof(lineinfo)))
                        return -EFAULT;
 
                if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL)
-                       set_bit(gpio_chip_hwgpio(desc), priv->watched_lines);
+                       set_bit(hwgpio, priv->watched_lines);
 
                return 0;
        } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) {
@@ -1280,7 +1298,12 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (IS_ERR(desc))
                        return PTR_ERR(desc);
 
-               clear_bit(gpio_chip_hwgpio(desc), priv->watched_lines);
+               hwgpio = gpio_chip_hwgpio(desc);
+
+               if (!test_bit(hwgpio, priv->watched_lines))
+                       return -EBUSY;
+
+               clear_bit(hwgpio, priv->watched_lines);
                return 0;
        }
        return -EINVAL;
@@ -5289,8 +5312,9 @@ static int __init gpiolib_dev_init(void)
        gpiolib_initialized = true;
        gpiochip_setup_devs();
 
-       if (IS_ENABLED(CONFIG_OF_DYNAMIC))
-               WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier));
+#if IS_ENABLED(CONFIG_OF_DYNAMIC) && IS_ENABLED(CONFIG_OF_GPIO)
+       WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier));
+#endif /* CONFIG_OF_DYNAMIC && CONFIG_OF_GPIO */
 
        return ret;
 }
index 2992a49..8ac1581 100644 (file)
@@ -945,6 +945,7 @@ struct amdgpu_device {
 
        /* s3/s4 mask */
        bool                            in_suspend;
+       bool                            in_hibernate;
 
        /* record last mm index being written through WREG32*/
        unsigned long last_mm_index;
index 9dff792..6a5b91d 100644 (file)
@@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        }
 
        /* Free the BO*/
-       amdgpu_bo_unref(&mem->bo);
+       drm_gem_object_put_unlocked(&mem->bo->tbo.base);
        mutex_destroy(&mem->lock);
        kfree(mem);
 
@@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
                | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
                | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 
-       (*mem)->bo = amdgpu_bo_ref(bo);
+       drm_gem_object_get(&bo->tbo.base);
+       (*mem)->bo = bo;
        (*mem)->va = va;
        (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
                AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
index f84f9e3..affde2d 100644 (file)
@@ -3372,15 +3372,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
                }
        }
 
-       amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
-       amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
-
-       amdgpu_amdkfd_suspend(adev, !fbcon);
-
        amdgpu_ras_suspend(adev);
 
        r = amdgpu_device_ip_suspend_phase1(adev);
 
+       amdgpu_amdkfd_suspend(adev, !fbcon);
+
        /* evict vram memory */
        amdgpu_bo_evict_vram(adev);
 
index 466bfe5..a735d79 100644 (file)
@@ -1181,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
        struct amdgpu_device *adev = drm_dev->dev_private;
        int r;
 
+       adev->in_hibernate = true;
        r = amdgpu_device_suspend(drm_dev, true);
+       adev->in_hibernate = false;
        if (r)
                return r;
        return amdgpu_asic_reset(adev);
index 9ae7b61..25ddb48 100644 (file)
@@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
        u32 cpp;
        u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
                               AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS     |
-                              AMDGPU_GEM_CREATE_VRAM_CLEARED        |
-                              AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+                              AMDGPU_GEM_CREATE_VRAM_CLEARED;
 
        info = drm_get_format_info(adev->ddev, mode_cmd);
        cpp = info->cpp[0];
index f92c158..0e0daf0 100644 (file)
@@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
                /* ===  CGCG /CGLS for GFX 3D Only === */
                gfx_v10_0_update_3d_clock_gating(adev, enable);
                /* ===  MGCG + MGLS === */
-               /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
+               gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
        }
 
        if (adev->cg_flags &
@@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
        switch (adev->asic_type) {
        case CHIP_NAVI10:
        case CHIP_NAVI14:
-               if (!enable) {
-                       amdgpu_gfx_off_ctrl(adev, false);
-                       cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
-               } else
-                       amdgpu_gfx_off_ctrl(adev, true);
+               amdgpu_gfx_off_ctrl(adev, enable);
                break;
        default:
                break;
@@ -4918,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
                                                           ref, mask);
 }
 
+static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
+                                        unsigned vmid)
+{
+       struct amdgpu_device *adev = ring->adev;
+       uint32_t value = 0;
+
+       value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+       value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+       value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+       value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+       WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                      uint32_t me, uint32_t pipe,
@@ -5309,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
        .emit_wreg = gfx_v10_0_ring_emit_wreg,
        .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+       .soft_recovery = gfx_v10_0_ring_soft_recovery,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
index 0c39048..d2d9dce 100644 (file)
@@ -1236,6 +1236,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
        { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
        /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
        { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
+       /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
+       { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
        { 0, 0, 0, 0, 0 },
 };
 
@@ -5025,10 +5027,9 @@ static int gfx_v9_0_set_powergating_state(void *handle,
        switch (adev->asic_type) {
        case CHIP_RAVEN:
        case CHIP_RENOIR:
-               if (!enable) {
+               if (!enable)
                        amdgpu_gfx_off_ctrl(adev, false);
-                       cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
-               }
+
                if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
                        gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
                        gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@@ -5052,12 +5053,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
                        amdgpu_gfx_off_ctrl(adev, true);
                break;
        case CHIP_VEGA12:
-               if (!enable) {
-                       amdgpu_gfx_off_ctrl(adev, false);
-                       cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
-               } else {
-                       amdgpu_gfx_off_ctrl(adev, true);
-               }
+               amdgpu_gfx_off_ctrl(adev, enable);
                break;
        default:
                break;
index 94c29b7..28e651b 100644 (file)
@@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
 
 /**
  * dm_crtc_high_irq() - Handles CRTC interrupt
- * @interrupt_params: ignored
+ * @interrupt_params: used for determining the CRTC instance
  *
  * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK
  * event handler.
@@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params)
        unsigned long flags;
 
        acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
-       if (acrtc) {
-               acrtc_state = to_dm_crtc_state(acrtc->base.state);
-
-               DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n",
-                             acrtc->crtc_id,
-                             amdgpu_dm_vrr_active(acrtc_state));
-
-               /* Core vblank handling at start of front-porch is only possible
-                * in non-vrr mode, as only there vblank timestamping will give
-                * valid results while done in front-porch. Otherwise defer it
-                * to dm_vupdate_high_irq after end of front-porch.
-                */
-               if (!amdgpu_dm_vrr_active(acrtc_state))
-                       drm_crtc_handle_vblank(&acrtc->base);
-
-               /* Following stuff must happen at start of vblank, for crc
-                * computation and below-the-range btr support in vrr mode.
-                */
-               amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
-
-               if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI &&
-                   acrtc_state->vrr_params.supported &&
-                   acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
-                       spin_lock_irqsave(&adev->ddev->event_lock, flags);
-                       mod_freesync_handle_v_update(
-                               adev->dm.freesync_module,
-                               acrtc_state->stream,
-                               &acrtc_state->vrr_params);
-
-                       dc_stream_adjust_vmin_vmax(
-                               adev->dm.dc,
-                               acrtc_state->stream,
-                               &acrtc_state->vrr_params.adjust);
-                       spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
-               }
-       }
-}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-/**
- * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs
- * @interrupt params - interrupt parameters
- *
- * Notify DRM's vblank event handler at VSTARTUP
- *
- * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which:
- * * We are close enough to VUPDATE - the point of no return for hw
- * * We are in the fixed portion of variable front porch when vrr is enabled
- * * We are before VUPDATE, where double-buffered vrr registers are swapped
- *
- * It is therefore the correct place to signal vblank, send user flip events,
- * and update VRR.
- */
-static void dm_dcn_crtc_high_irq(void *interrupt_params)
-{
-       struct common_irq_params *irq_params = interrupt_params;
-       struct amdgpu_device *adev = irq_params->adev;
-       struct amdgpu_crtc *acrtc;
-       struct dm_crtc_state *acrtc_state;
-       unsigned long flags;
-
-       acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
-
        if (!acrtc)
                return;
 
@@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
                         amdgpu_dm_vrr_active(acrtc_state),
                         acrtc_state->active_planes);
 
+       /**
+        * Core vblank handling at start of front-porch is only possible
+        * in non-vrr mode, as only there vblank timestamping will give
+        * valid results while done in front-porch. Otherwise defer it
+        * to dm_vupdate_high_irq after end of front-porch.
+        */
+       if (!amdgpu_dm_vrr_active(acrtc_state))
+               drm_crtc_handle_vblank(&acrtc->base);
+
+       /**
+        * Following stuff must happen at start of vblank, for crc
+        * computation and below-the-range btr support in vrr mode.
+        */
        amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
-       drm_crtc_handle_vblank(&acrtc->base);
+
+       /* BTR updates need to happen before VUPDATE on Vega and above. */
+       if (adev->family < AMDGPU_FAMILY_AI)
+               return;
 
        spin_lock_irqsave(&adev->ddev->event_lock, flags);
 
-       if (acrtc_state->vrr_params.supported &&
+       if (acrtc_state->stream && acrtc_state->vrr_params.supported &&
            acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
-               mod_freesync_handle_v_update(
-               adev->dm.freesync_module,
-               acrtc_state->stream,
-               &acrtc_state->vrr_params);
+               mod_freesync_handle_v_update(adev->dm.freesync_module,
+                                            acrtc_state->stream,
+                                            &acrtc_state->vrr_params);
 
-               dc_stream_adjust_vmin_vmax(
-                       adev->dm.dc,
-                       acrtc_state->stream,
-                       &acrtc_state->vrr_params.adjust);
+               dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream,
+                                          &acrtc_state->vrr_params.adjust);
        }
 
        /*
@@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
         * avoid race conditions between flip programming and completion,
         * which could cause too early flip completion events.
         */
-       if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
+       if (adev->family >= AMDGPU_FAMILY_RV &&
+           acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&
            acrtc_state->active_planes == 0) {
                if (acrtc->event) {
                        drm_crtc_send_vblank_event(&acrtc->base, acrtc->event);
@@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)
 
        spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 }
-#endif
 
 static int dm_set_clockgating_state(void *handle,
                  enum amd_clockgating_state state)
@@ -2008,17 +1957,22 @@ void amdgpu_dm_update_connector_after_detect(
                dc_sink_retain(aconnector->dc_sink);
                if (sink->dc_edid.length == 0) {
                        aconnector->edid = NULL;
-                       drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
+                       if (aconnector->dc_link->aux_mode) {
+                               drm_dp_cec_unset_edid(
+                                       &aconnector->dm_dp_aux.aux);
+                       }
                } else {
                        aconnector->edid =
-                               (struct edid *) sink->dc_edid.raw_edid;
-
+                               (struct edid *)sink->dc_edid.raw_edid;
 
                        drm_connector_update_edid_property(connector,
-                                       aconnector->edid);
-                       drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
-                                           aconnector->edid);
+                                                          aconnector->edid);
+
+                       if (aconnector->dc_link->aux_mode)
+                               drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
+                                                   aconnector->edid);
                }
+
                amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
                update_connector_ext_caps(aconnector);
        } else {
@@ -2440,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
                c_irq_params->adev = adev;
                c_irq_params->irq_src = int_params.irq_source;
 
+               amdgpu_dm_irq_register_interrupt(
+                       adev, &int_params, dm_crtc_high_irq, c_irq_params);
+       }
+
+       /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to
+        * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx
+        * to trigger at end of each vblank, regardless of state of the lock,
+        * matching DCE behaviour.
+        */
+       for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT;
+            i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1;
+            i++) {
+               r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
+
+               if (r) {
+                       DRM_ERROR("Failed to add vupdate irq id!\n");
+                       return r;
+               }
+
+               int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
+               int_params.irq_source =
+                       dc_interrupt_to_irq_source(dc, i, 0);
+
+               c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
+
+               c_irq_params->adev = adev;
+               c_irq_params->irq_src = int_params.irq_source;
+
                amdgpu_dm_irq_register_interrupt(adev, &int_params,
-                               dm_dcn_crtc_high_irq, c_irq_params);
+                               dm_vupdate_high_irq, c_irq_params);
        }
 
        /* Use GRPH_PFLIP interrupt */
@@ -4448,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
        struct amdgpu_device *adev = crtc->dev->dev_private;
        int rc;
 
-       /* Do not set vupdate for DCN hardware */
-       if (adev->family > AMDGPU_FAMILY_AI)
-               return 0;
-
        irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
 
        rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
@@ -7877,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc,
        struct drm_crtc_state *old_crtc_state, *new_crtc_state;
        struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
        struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
+       struct amdgpu_crtc *new_acrtc;
        bool needs_reset;
        int ret = 0;
 
@@ -7886,9 +7865,30 @@ static int dm_update_plane_state(struct dc *dc,
        dm_new_plane_state = to_dm_plane_state(new_plane_state);
        dm_old_plane_state = to_dm_plane_state(old_plane_state);
 
-       /*TODO Implement atomic check for cursor plane */
-       if (plane->type == DRM_PLANE_TYPE_CURSOR)
+       /*TODO Implement better atomic check for cursor plane */
+       if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+               if (!enable || !new_plane_crtc ||
+                       drm_atomic_plane_disabling(plane->state, new_plane_state))
+                       return 0;
+
+               new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+
+               if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) ||
+                       (new_plane_state->crtc_h > new_acrtc->max_cursor_height)) {
+                       DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n",
+                                                        new_plane_state->crtc_w, new_plane_state->crtc_h);
+                       return -EINVAL;
+               }
+
+               if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width ||
+                       new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) {
+                       DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n",
+                                                        new_plane_state->crtc_x, new_plane_state->crtc_y);
+                       return -EINVAL;
+               }
+
                return 0;
+       }
 
        needs_reset = should_reset_plane(state, plane, old_plane_state,
                                         new_plane_state);
index 78e1c11..dcf84a6 100644 (file)
@@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
        struct mod_hdcp_display *display = &hdcp_work[link_index].display;
        struct mod_hdcp_link *link = &hdcp_work[link_index].link;
 
-       memset(display, 0, sizeof(*display));
-       memset(link, 0, sizeof(*link));
-
-       display->index = aconnector->base.index;
-
        if (config->dpms_off) {
                hdcp_remove_display(hdcp_work, link_index, aconnector);
                return;
        }
+
+       memset(display, 0, sizeof(*display));
+       memset(link, 0, sizeof(*link));
+
+       display->index = aconnector->base.index;
        display->state = MOD_HDCP_DISPLAY_ACTIVE;
 
        if (aconnector->dc_sink != NULL)
index 8489f1e..47431ca 100644 (file)
@@ -834,11 +834,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 {
        int i;
-       int count = 0;
-       struct pipe_ctx *pipe;
        PERF_TRACE();
        for (i = 0; i < MAX_PIPES; i++) {
-               pipe = &context->res_ctx.pipe_ctx[i];
+               int count = 0;
+               struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
                if (!pipe->plane_state)
                        continue;
index c3535bd..e4348e3 100644 (file)
@@ -3068,25 +3068,32 @@ validate_out:
        return out;
 }
 
-
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
-               bool fast_validate)
+/*
+ * This must be noinline to ensure anything that deals with FP registers
+ * is contained within this call; previously our compiling with hard-float
+ * would result in fp instructions being emitted outside of the boundaries
+ * of the DC_FP_START/END macros, which makes sense as the compiler has no
+ * idea about what is wrapped and what is not
+ *
+ * This is largely just a workaround to avoid breakage introduced with 5.6,
+ * ideally all fp-using code should be moved into its own file, only that
+ * should be compiled with hard-float, and all code exported from there
+ * should be strictly wrapped with DC_FP_START/END
+ */
+static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
+               struct dc_state *context, bool fast_validate)
 {
        bool voltage_supported = false;
        bool full_pstate_supported = false;
        bool dummy_pstate_supported = false;
        double p_state_latency_us;
 
-       DC_FP_START();
        p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
        context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
                dc->debug.disable_dram_clock_change_vactive_support;
 
        if (fast_validate) {
-               voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true);
-
-               DC_FP_END();
-               return voltage_supported;
+               return dcn20_validate_bandwidth_internal(dc, context, true);
        }
 
        // Best case, we support full UCLK switch latency
@@ -3115,7 +3122,15 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 restore_dml_state:
        context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
+       return voltage_supported;
+}
 
+bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+               bool fast_validate)
+{
+       bool voltage_supported = false;
+       DC_FP_START();
+       voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate);
        DC_FP_END();
        return voltage_supported;
 }
index a38baa7..b8ec08e 100644 (file)
@@ -1200,7 +1200,7 @@ static void dml_rq_dlg_get_dlg_params(
        min_hratio_fact_l = 1.0;
        min_hratio_fact_c = 1.0;
 
-       if (htaps_l <= 1)
+       if (hratio_l <= 1)
                min_hratio_fact_l = 2.0;
        else if (htaps_l <= 6) {
                if ((hratio_l * 2.0) > 4.0)
@@ -1216,7 +1216,7 @@ static void dml_rq_dlg_get_dlg_params(
 
        hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
 
-       if (htaps_c <= 1)
+       if (hratio_c <= 1)
                min_hratio_fact_c = 2.0;
        else if (htaps_c <= 6) {
                if ((hratio_c * 2.0) > 4.0)
@@ -1522,8 +1522,8 @@ static void dml_rq_dlg_get_dlg_params(
 
        disp_dlg_regs->refcyc_per_vm_group_vblank   = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
        disp_dlg_regs->refcyc_per_vm_group_flip     = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
-       disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
-       disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
+       disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
+       disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
 
        // Clamp to max for now
        if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
index c34eba1..6d7bca5 100644 (file)
 #define ASSERT(expr) ASSERT_CRITICAL(expr)
 
 #else
-#define ASSERT(expr) WARN_ON(!(expr))
+#define ASSERT(expr) WARN_ON_ONCE(!(expr))
 #endif
 
 #define BREAK_TO_DEBUGGER() ASSERT(0)
index e4e5a53..8e2acb4 100644 (file)
@@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr  *hwmgr,
                if (*level & profile_mode_mask) {
                        hwmgr->saved_dpm_level = hwmgr->dpm_level;
                        hwmgr->en_umd_pstate = true;
-                       amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
-                                               AMD_IP_BLOCK_TYPE_GFX,
-                                               AMD_CG_STATE_UNGATE);
                        amdgpu_device_ip_set_powergating_state(hwmgr->adev,
                                        AMD_IP_BLOCK_TYPE_GFX,
                                        AMD_PG_STATE_UNGATE);
+                       amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
+                                               AMD_IP_BLOCK_TYPE_GFX,
+                                               AMD_CG_STATE_UNGATE);
                }
        } else {
                /* exit umd pstate, restore level, enable gfx cg*/
index e8b27fa..e770469 100644 (file)
@@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu)
        bool use_baco = !smu->is_apu &&
                ((adev->in_gpu_reset &&
                  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
-                (adev->in_runpm && amdgpu_asic_supports_baco(adev)));
+                ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
 
        ret = smu_get_smc_version(smu, NULL, &smu_version);
        if (ret) {
@@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle,
                if (*level & profile_mode_mask) {
                        smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level;
                        smu_dpm_ctx->enable_umd_pstate = true;
-                       amdgpu_device_ip_set_clockgating_state(smu->adev,
-                                                              AMD_IP_BLOCK_TYPE_GFX,
-                                                              AMD_CG_STATE_UNGATE);
                        amdgpu_device_ip_set_powergating_state(smu->adev,
                                                               AMD_IP_BLOCK_TYPE_GFX,
                                                               AMD_PG_STATE_UNGATE);
+                       amdgpu_device_ip_set_clockgating_state(smu->adev,
+                                                              AMD_IP_BLOCK_TYPE_GFX,
+                                                              AMD_CG_STATE_UNGATE);
                }
        } else {
                /* exit umd pstate, restore level, enable gfx cg*/
index 7f386ad..910108c 100644 (file)
@@ -241,8 +241,12 @@ static int drm_hdcp_request_srm(struct drm_device *drm_dev,
 
        ret = request_firmware_direct(&fw, (const char *)fw_name,
                                      drm_dev->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               *revoked_ksv_cnt = 0;
+               *revoked_ksv_list = NULL;
+               ret = 0;
                goto exit;
+       }
 
        if (fw->size && fw->data)
                ret = drm_hdcp_srm_update(fw->data, fw->size, revoked_ksv_list,
@@ -287,6 +291,8 @@ int drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs,
 
        ret = drm_hdcp_request_srm(drm_dev, &revoked_ksv_list,
                                   &revoked_ksv_cnt);
+       if (ret)
+               return ret;
 
        /* revoked_ksv_cnt will be zero when above function failed */
        for (i = 0; i < revoked_ksv_cnt; i++)
index 2e5d835..c125ca9 100644 (file)
@@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
        if (!ret)
                goto err_llb;
        else if (ret > 1) {
-               DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
-
+               DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
        }
 
        fbc->threshold = ret;
index 0cc40e7..4f96c87 100644 (file)
@@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
        struct drm_i915_private *i915 = to_i915(obj->base.dev);
        struct i915_vma *vma;
 
-       GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
        if (!atomic_read(&obj->bind_count))
                return;
 
@@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-       struct drm_i915_gem_object *obj = vma->obj;
-
-       assert_object_held(obj);
-
        /* Bump the LRU to try and avoid premature eviction whilst flipping  */
-       i915_gem_object_bump_inactive_ggtt(obj);
+       i915_gem_object_bump_inactive_ggtt(vma->obj);
 
        i915_vma_unpin(vma);
 }
index 07cb83a..ca0d4f4 100644 (file)
@@ -69,7 +69,13 @@ struct intel_context {
 #define CONTEXT_NOPREEMPT              7
 
        u32 *lrc_reg_state;
-       u64 lrc_desc;
+       union {
+               struct {
+                       u32 lrca;
+                       u32 ccid;
+               };
+               u64 desc;
+       } lrc;
        u32 tag; /* cookie passed to HW to track this context on submission */
 
        /* Time on GPU as tracked by the hw. */
index b469de0..a1aa0d3 100644 (file)
@@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
        return intel_engine_has_preemption(engine);
 }
 
-static inline bool
-intel_engine_has_timeslices(const struct intel_engine_cs *engine)
-{
-       if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
-               return false;
-
-       return intel_engine_has_semaphores(engine);
-}
-
 #endif /* _INTEL_RINGBUFFER_H_ */
index 3aa8a65..883a9b7 100644 (file)
@@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
        if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
                drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+       if (HAS_EXECLISTS(dev_priv)) {
+               drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+               drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+       }
        drm_printf(m, "\tRING_START: 0x%08x\n",
                   ENGINE_READ(engine, RING_START));
        drm_printf(m, "\tRING_HEAD:  0x%08x\n",
index 80cdde7..0be674a 100644 (file)
@@ -157,6 +157,20 @@ struct intel_engine_execlists {
        struct i915_priolist default_priolist;
 
        /**
+        * @ccid: identifier for contexts submitted to this engine
+        */
+       u32 ccid;
+
+       /**
+        * @yield: CCID at the time of the last semaphore-wait interrupt.
+        *
+        * Instead of leaving a semaphore busy-spinning on an engine, we would
+        * like to switch to another ready context, i.e. yielding the semaphore
+        * timeslice.
+        */
+       u32 yield;
+
+       /**
         * @error_interrupt: CS Master EIR
         *
         * The CS generates an interrupt when it detects an error. We capture
@@ -295,8 +309,7 @@ struct intel_engine_cs {
        u32 context_size;
        u32 mmio_base;
 
-       unsigned int context_tag;
-#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
+       unsigned long context_tag;
 
        struct rb_node uabi_node;
 
@@ -483,10 +496,11 @@ struct intel_engine_cs {
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL       BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_HAS_TIMESLICES   BIT(4)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5)
+#define I915_ENGINE_IS_VIRTUAL       BIT(6)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)
        unsigned int flags;
 
        /*
@@ -585,6 +599,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 }
 
 static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+       if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+               return false;
+
+       return engine->flags & I915_ENGINE_HAS_TIMESLICES;
+}
+
+static inline bool
 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
 {
        return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
index f0e7fd9..0cc7dd5 100644 (file)
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
                }
        }
 
+       if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+               WRITE_ONCE(engine->execlists.yield,
+                          ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+               ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+                            engine->execlists.yield);
+               if (del_timer(&engine->execlists.timer))
+                       tasklet = true;
+       }
+
        if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
                tasklet = true;
 
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
        const u32 irqs =
                GT_CS_MASTER_ERROR_INTERRUPT |
                GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
        struct intel_uncore *uncore = gt->uncore;
        const u32 dmask = irqs << 16 | irqs;
        const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
        const u32 irqs =
                GT_CS_MASTER_ERROR_INTERRUPT |
                GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
        const u32 gt_interrupts[] = {
                irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
                irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
index 683014e..2dfaddb 100644 (file)
@@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,
  * engine info, SW context ID and SW counter need to form a unique number
  * (Context ID) per lrc.
  */
-static u64
+static u32
 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-       u64 desc;
+       u32 desc;
 
        desc = INTEL_LEGACY_32B_CONTEXT;
        if (i915_vm_is_4lvl(ce->vm))
@@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
        if (IS_GEN(engine->i915, 8))
                desc |= GEN8_CTX_L3LLC_COHERENT;
 
-       desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
-       /*
-        * The following 32bits are copied into the OA reports (dword 2).
-        * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
-        * anything below.
-        */
-       if (INTEL_GEN(engine->i915) >= 11) {
-               desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
-                                                               /* bits 48-53 */
-
-               desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
-                                                               /* bits 61-63 */
-       }
-
-       return desc;
+       return i915_ggtt_offset(ce->state) | desc;
 }
 
 static inline unsigned int dword_in_page(void *addr)
@@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,
        __execlists_update_reg_state(ce, engine, head);
 
        /* We've switched away, so this should be a no-op, but intent matters */
-       ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
 }
 
 static u32 intel_context_get_runtime(const struct intel_context *ce)
@@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
                execlists_check_context(ce, engine);
 
-       ce->lrc_desc &= ~GENMASK_ULL(47, 37);
        if (ce->tag) {
                /* Use a fixed tag for OA and friends */
-               ce->lrc_desc |= (u64)ce->tag << 32;
+               GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
+               ce->lrc.ccid = ce->tag;
        } else {
                /* We don't need a strict matching tag, just different values */
-               ce->lrc_desc |=
-                       (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
-                       GEN11_SW_CTX_ID_SHIFT;
-               BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+               unsigned int tag = ffs(engine->context_tag);
+
+               GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
+               clear_bit(tag - 1, &engine->context_tag);
+               ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+
+               BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
        }
 
+       ce->lrc.ccid |= engine->execlists.ccid;
+
        __intel_gt_pm_get(engine->gt);
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
        intel_engine_context_in(engine);
@@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
-                        struct intel_engine_cs * const engine)
+                        struct intel_engine_cs * const engine,
+                        unsigned int ccid)
 {
        struct intel_context * const ce = rq->context;
 
@@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,
            i915_request_completed(rq))
                intel_engine_add_retire(engine, ce->timeline);
 
+       ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
+       ccid &= GEN12_MAX_CONTEXT_HW_ID;
+       if (ccid < BITS_PER_LONG) {
+               GEM_BUG_ON(ccid == 0);
+               GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
+               set_bit(ccid - 1, &engine->context_tag);
+       }
+
        intel_context_update_runtime(ce);
        intel_engine_context_out(engine);
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)
 {
        struct intel_context * const ce = rq->context;
        struct intel_engine_cs *cur, *old;
+       u32 ccid;
 
        trace_i915_request_out(rq);
 
+       ccid = rq->context->lrc.ccid;
        old = READ_ONCE(ce->inflight);
        do
                cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
        while (!try_cmpxchg(&ce->inflight, &old, cur));
        if (!cur)
-               __execlists_schedule_out(rq, old);
+               __execlists_schedule_out(rq, old, ccid);
 
        i915_request_put(rq);
 }
@@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)
 static u64 execlists_update_context(struct i915_request *rq)
 {
        struct intel_context *ce = rq->context;
-       u64 desc = ce->lrc_desc;
+       u64 desc = ce->lrc.desc;
        u32 tail, prev;
 
        /*
@@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)
         */
        wmb();
 
-       ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+       ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
        return desc;
 }
 
@@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
                        struct i915_request *w =
                                container_of(p->waiter, typeof(*w), sched);
 
+                       if (p->flags & I915_DEPENDENCY_WEAK)
+                               continue;
+
                        /* Leave semaphores spinning on the other engines */
                        if (w->engine != rq->engine)
                                continue;
@@ -1754,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+              const struct i915_request *rq)
 {
        int hint;
 
@@ -1768,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
        return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+               const struct i915_request *rq)
+{
+       /*
+        * Once bitten, forever smitten!
+        *
+        * If the active context ever busy-waited on a semaphore,
+        * it will be treated as a hog until the end of its timeslice (i.e.
+        * until it is scheduled out and replaced by a new submission,
+        * possibly even its own lite-restore). The HW only sends an interrupt
+        * on the first miss, and we do know if that semaphore has been
+        * signaled, or even if it is now stuck on another semaphore. Play
+        * safe, yield if it might be stuck -- it will be given a fresh
+        * timeslice in the near future.
+        */
+       return rq->context->lrc.ccid == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+                 const struct i915_request *rq)
+{
+       return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1783,8 +1815,7 @@ timeslice(const struct intel_engine_cs *engine)
        return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
        const struct intel_engine_execlists *execlists = &engine->execlists;
        const struct i915_request *rq = *execlists->active;
@@ -1946,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
                        last = NULL;
                } else if (need_timeslice(engine, last) &&
-                          timer_expired(&engine->execlists.timer)) {
+                          timeslice_expired(execlists, last)) {
                        ENGINE_TRACE(engine,
-                                    "expired last=%llx:%lld, prio=%d, hint=%d\n",
+                                    "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
                                     last->fence.context,
                                     last->fence.seqno,
                                     last->sched.attr.priority,
-                                    execlists->queue_priority_hint);
+                                    execlists->queue_priority_hint,
+                                    yesno(timeslice_yield(execlists, last)));
 
                        ring_set_paused(engine, 1);
                        defer_active(engine);
@@ -2213,6 +2245,7 @@ done:
                }
                clear_ports(port + 1, last_port - port);
 
+               WRITE_ONCE(execlists->yield, -1);
                execlists_submit_ports(engine);
                set_preempt_timeout(engine, *active);
        } else {
@@ -3043,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce,
        if (IS_ERR(vaddr))
                return PTR_ERR(vaddr);
 
-       ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
+       ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
        ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
        __execlists_update_reg_state(ce, engine, ce->ring->tail);
 
@@ -3072,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce)
                                 ce, ce->engine, ce->ring, true);
        __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
 
-       ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
 }
 
 static const struct intel_context_ops execlists_context_ops = {
@@ -3541,7 +3574,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
 
        enable_error_interrupt(engine);
 
-       engine->context_tag = 0;
+       engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
 }
 
 static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -3753,7 +3786,7 @@ out_replay:
                     head, ce->ring->tail);
        __execlists_reset_reg_state(ce, engine);
        __execlists_update_reg_state(ce, engine, head);
-       ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+       ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
 
 unwind:
        /* Push back any incomplete requests for replay after the reset. */
@@ -4369,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
        if (!intel_vgpu_active(engine->i915)) {
                engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-               if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+               if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
                        engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+                       if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+                               engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+               }
        }
 
        if (INTEL_GEN(engine->i915) >= 12)
@@ -4449,6 +4485,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
        engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
        engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+       engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4516,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
        else
                execlists->csb_size = GEN11_CSB_ENTRIES;
 
+       if (INTEL_GEN(engine->i915) >= 11) {
+               execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+               execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
+       }
+
        reset_csb_pointers(engine);
 
        /* Finally, take ownership and responsibility for cleanup! */
index 6f06ba7..f95ae15 100644 (file)
@@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce,
                        goto err;
        }
 
-       cs = intel_ring_begin(rq, 10);
+       cs = intel_ring_begin(rq, 14);
        if (IS_ERR(cs)) {
                err = PTR_ERR(cs);
                goto err;
@@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce,
        *cs++ = MI_SEMAPHORE_WAIT |
                MI_SEMAPHORE_GLOBAL_GTT |
                MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
+               MI_SEMAPHORE_SAD_GTE_SDD;
+       *cs++ = idx;
        *cs++ = offset;
        *cs++ = 0;
 
@@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce,
        *cs++ = offset + idx * sizeof(u32);
        *cs++ = 0;
 
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = offset;
+       *cs++ = 0;
+       *cs++ = idx + 1;
+
        intel_ring_advance(rq, cs);
 
        rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg)
 
        for_each_engine(engine, gt, id) {
                enum { A1, A2, B1 };
-               enum { X = 1, Y, Z };
+               enum { X = 1, Z, Y };
                struct i915_request *rq[3] = {};
                struct intel_context *ce;
                unsigned long heartbeat;
@@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg)
                        goto err;
                }
 
-               rq[0] = create_rewinder(ce, NULL, slot, 1);
+               rq[0] = create_rewinder(ce, NULL, slot, X);
                if (IS_ERR(rq[0])) {
                        intel_context_put(ce);
                        goto err;
                }
 
-               rq[1] = create_rewinder(ce, NULL, slot, 2);
+               rq[1] = create_rewinder(ce, NULL, slot, Y);
                intel_context_put(ce);
                if (IS_ERR(rq[1]))
                        goto err;
@@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg)
                        goto err;
                }
 
-               rq[2] = create_rewinder(ce, rq[0], slot, 3);
+               rq[2] = create_rewinder(ce, rq[0], slot, Z);
                intel_context_put(ce);
                if (IS_ERR(rq[2]))
                        goto err;
@@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg)
                GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
 
                /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
-               GEM_BUG_ON(!i915_request_is_active(rq[A1]));
-               GEM_BUG_ON(!i915_request_is_active(rq[A2]));
-               GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
-               /* Wait for the timeslice to kick in */
-               del_timer(&engine->execlists.timer);
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-               intel_engine_flush_submission(engine);
-
+               if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+                       /* Wait for the timeslice to kick in */
+                       del_timer(&engine->execlists.timer);
+                       tasklet_hi_schedule(&engine->execlists.tasklet);
+                       intel_engine_flush_submission(engine);
+               }
                /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
                GEM_BUG_ON(!i915_request_is_active(rq[A1]));
                GEM_BUG_ON(!i915_request_is_active(rq[B1]));
index fe7778c..aa6d56e 100644 (file)
@@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,
 static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
        struct intel_engine_cs *engine = rq->engine;
-       u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
+       u32 ctx_desc = rq->context->lrc.ccid;
        u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
        guc_wq_item_append(guc, engine->guc_id, ctx_desc,
index a83df2f..a1696e9 100644 (file)
@@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
                                SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
                                SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
                                SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
-               vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
-                               LCPLL_PLL_ENABLE |
-                               LCPLL_PLL_LOCK;
-               vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
-
+               /*
+                * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+                *  tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+                *   TRANSCODER_A can be enabled. PORT_x depends on the input of
+                *   setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x
+                *   so we fixed to DPLL0 here.
+                * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode
+                */
+               vgpu_vreg_t(vgpu, DPLL_CTRL1) =
+                       DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0);
+               vgpu_vreg_t(vgpu, DPLL_CTRL1) |=
+                       DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0);
+               vgpu_vreg_t(vgpu, LCPLL1_CTL) =
+                       LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK;
+               vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0);
+               /*
+                * Golden M/N are calculated based on:
+                *   24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+                *   DP link clk 1620 MHz and non-constant_n.
+                * TODO: calculate DP link symbol clk and stream clk m/n.
+                */
+               vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT;
+               vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+               vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+               vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+               vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
        }
 
        if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+                       ~DPLL_CTRL2_DDI_CLK_OFF(PORT_B);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);
                vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
                vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
                        ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
        }
 
        if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+                       ~DPLL_CTRL2_DDI_CLK_OFF(PORT_C);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);
                vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
                vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
                        ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
@@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
        }
 
        if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+                       ~DPLL_CTRL2_DDI_CLK_OFF(PORT_D);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D);
+               vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+                       DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);
                vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
                vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
                        ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
index cb11c31..e92ed96 100644 (file)
@@ -290,7 +290,7 @@ static void
 shadow_context_descriptor_update(struct intel_context *ce,
                                 struct intel_vgpu_workload *workload)
 {
-       u64 desc = ce->lrc_desc;
+       u64 desc = ce->lrc.desc;
 
        /*
         * Update bits 0-11 of the context descriptor which includes flags
@@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce,
        desc |= (u64)workload->ctx_desc.addressing_mode <<
                GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
-       ce->lrc_desc = desc;
+       ce->lrc.desc = desc;
 }
 
 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
@@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
                for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
                        struct i915_page_directory * const pd =
                                i915_pd_entry(ppgtt->pd, i);
-
+                       /* skip now as current i915 ppgtt alloc won't allocate
+                          top level pdp for non 4-level table, won't impact
+                          shadow ppgtt. */
+                       if (!pd)
+                               break;
                        px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
                }
        }
index 4518b9b..02ad1ac 100644 (file)
@@ -128,6 +128,13 @@ search_again:
        active = NULL;
        INIT_LIST_HEAD(&eviction_list);
        list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+               if (vma == active) { /* now seen this vma twice */
+                       if (flags & PIN_NONBLOCK)
+                               break;
+
+                       active = ERR_PTR(-EAGAIN);
+               }
+
                /*
                 * We keep this list in a rough least-recently scanned order
                 * of active elements (inactive elements are cheap to reap).
@@ -143,21 +150,12 @@ search_again:
                 * To notice when we complete one full cycle, we record the
                 * first active element seen, before moving it to the tail.
                 */
-               if (i915_vma_is_active(vma)) {
-                       if (vma == active) {
-                               if (flags & PIN_NONBLOCK)
-                                       break;
-
-                               active = ERR_PTR(-EAGAIN);
-                       }
-
-                       if (active != ERR_PTR(-EAGAIN)) {
-                               if (!active)
-                                       active = vma;
+               if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) {
+                       if (!active)
+                               active = vma;
 
-                               list_move_tail(&vma->vm_link, &vm->bound_list);
-                               continue;
-                       }
+                       list_move_tail(&vma->vm_link, &vm->bound_list);
+                       continue;
                }
 
                if (mark_free(&scan, vma, flags, &eviction_list))
index 2a4cd0b..5c8e51d 100644 (file)
@@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
 static void record_request(const struct i915_request *request,
                           struct i915_request_coredump *erq)
 {
-       const struct i915_gem_context *ctx;
-
        erq->flags = request->fence.flags;
        erq->context = request->fence.context;
        erq->seqno = request->fence.seqno;
@@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request,
 
        erq->pid = 0;
        rcu_read_lock();
-       ctx = rcu_dereference(request->context->gem_context);
-       if (ctx)
-               erq->pid = pid_nr(ctx->pid);
+       if (!intel_context_is_closed(request->context)) {
+               const struct i915_gem_context *ctx;
+
+               ctx = rcu_dereference(request->context->gem_context);
+               if (ctx)
+                       erq->pid = pid_nr(ctx->pid);
+       }
        rcu_read_unlock();
 }
 
index d91557d..8a2b838 100644 (file)
@@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
        u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) |
                GEN8_PIPE_CDCLK_CRC_DONE;
        u32 de_pipe_enables;
-       u32 de_port_masked = GEN8_AUX_CHANNEL_A;
+       u32 de_port_masked = gen8_de_port_aux_mask(dev_priv);
        u32 de_port_enables;
        u32 de_misc_masked = GEN8_DE_EDP_PSR;
        enum pipe pipe;
@@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
        if (INTEL_GEN(dev_priv) <= 10)
                de_misc_masked |= GEN8_DE_MISC_GSE;
 
-       if (INTEL_GEN(dev_priv) >= 9) {
-               de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
-                                 GEN9_AUX_CHANNEL_D;
-               if (IS_GEN9_LP(dev_priv))
-                       de_port_masked |= BXT_DE_PORT_GMBUS;
-       }
-
-       if (INTEL_GEN(dev_priv) >= 11)
-               de_port_masked |= ICL_AUX_CHANNEL_E;
-
-       if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11)
-               de_port_masked |= CNL_AUX_CHANNEL_F;
+       if (IS_GEN9_LP(dev_priv))
+               de_port_masked |= BXT_DE_PORT_GMBUS;
 
        de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |
                                           GEN8_PIPE_FIFO_UNDERRUN;
index 66a46e4..cf2c01f 100644 (file)
@@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
                         * dropped by GuC. They won't be part of the context
                         * ID in the OA reports, so squash those lower bits.
                         */
-                       stream->specific_ctx_id =
-                               lower_32_bits(ce->lrc_desc) >> 12;
+                       stream->specific_ctx_id = ce->lrc.lrca >> 12;
 
                        /*
                         * GuC uses the top bit to signal proxy submission, so
@@ -1328,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
                        ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
                /*
                 * Pick an unused context id
-                * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts
+                * 0 - BITS_PER_LONG are used by other contexts
                 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
                 */
                stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-               BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);
                break;
        }
 
index e0c6021..6e12000 100644 (file)
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT              (1 << 15)
 #define GT_BSD_USER_INTERRUPT                  (1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT            REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT            (1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT    (1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT     (1 <<  4)
index c0df71d..e2b78db 100644 (file)
@@ -1017,11 +1017,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
        GEM_BUG_ON(to == from);
        GEM_BUG_ON(to->timeline == from->timeline);
 
-       if (i915_request_completed(from))
+       if (i915_request_completed(from)) {
+               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
                return 0;
+       }
 
        if (to->engine->schedule) {
-               ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
+               ret = i915_sched_node_add_dependency(&to->sched,
+                                                    &from->sched,
+                                                    I915_DEPENDENCY_EXTERNAL);
                if (ret < 0)
                        return ret;
        }
@@ -1183,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to,
 
        /* Couple the dependency tree for PI on this exposed to->fence */
        if (to->engine->schedule) {
-               err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+               err = i915_sched_node_add_dependency(&to->sched,
+                                                    &from->sched,
+                                                    I915_DEPENDENCY_WEAK);
                if (err < 0)
                        return err;
        }
index 68b06a7..f0a9e89 100644 (file)
@@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 }
 
 int i915_sched_node_add_dependency(struct i915_sched_node *node,
-                                  struct i915_sched_node *signal)
+                                  struct i915_sched_node *signal,
+                                  unsigned long flags)
 {
        struct i915_dependency *dep;
 
@@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
                return -ENOMEM;
 
        if (!__i915_sched_node_add_dependency(node, signal, dep,
-                                             I915_DEPENDENCY_EXTERNAL |
-                                             I915_DEPENDENCY_ALLOC))
+                                             flags | I915_DEPENDENCY_ALLOC))
                i915_dependency_free(dep);
 
        return 0;
index d1dc4ef..6f0bf00 100644 (file)
@@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
                                      unsigned long flags);
 
 int i915_sched_node_add_dependency(struct i915_sched_node *node,
-                                  struct i915_sched_node *signal);
+                                  struct i915_sched_node *signal,
+                                  unsigned long flags);
 
 void i915_sched_node_fini(struct i915_sched_node *node);
 
index d18e705..7186875 100644 (file)
@@ -78,6 +78,7 @@ struct i915_dependency {
        unsigned long flags;
 #define I915_DEPENDENCY_ALLOC          BIT(0)
 #define I915_DEPENDENCY_EXTERNAL       BIT(1)
+#define I915_DEPENDENCY_WEAK           BIT(2)
 };
 
 #endif /* _I915_SCHEDULER_TYPES_H_ */
index 82e3bc2..2cd7a7e 100644 (file)
@@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma)
 
        lockdep_assert_held(&vma->vm->mutex);
 
-       /*
-        * First wait upon any activity as retiring the request may
-        * have side-effects such as unpinning or even unbinding this vma.
-        *
-        * XXX Actually waiting under the vm->mutex is a hinderance and
-        * should be pipelined wherever possible. In cases where that is
-        * unavoidable, we should lift the wait to before the mutex.
-        */
-       ret = i915_vma_sync(vma);
-       if (ret)
-               return ret;
-
        if (i915_vma_is_pinned(vma)) {
                vma_print_allocator(vma, "is pinned");
                return -EAGAIN;
@@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma)
        if (!drm_mm_node_allocated(&vma->node))
                return 0;
 
-       if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-               /* XXX not always required: nop_clear_range */
-               wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
-
        /* Optimistic wait before taking the mutex */
        err = i915_vma_sync(vma);
        if (err)
                goto out_rpm;
 
+       if (i915_vma_is_pinned(vma)) {
+               vma_print_allocator(vma, "is pinned");
+               return -EAGAIN;
+       }
+
+       if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+               /* XXX not always required: nop_clear_range */
+               wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
        err = mutex_lock_interruptible(&vm->mutex);
        if (err)
                goto out_rpm;
index 8375054..a52986a 100644 (file)
@@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,
         * WaIncreaseLatencyIPCEnabled: kbl,cfl
         * Display WA #1141: kbl,cfl
         */
-       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
+       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
            dev_priv->ipc_enabled)
                latency += 4;
 
index 58b5f40..af89c7f 100644 (file)
@@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)
                }
 
                nc = 0;
-               for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
+               for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {
                        for (; nc < num_ctx; nc++) {
                                ctx = mock_context(i915, "mock");
                                if (!ctx)
index 9dfe7cb..1754c05 100644 (file)
@@ -843,6 +843,7 @@ static const struct of_device_id ingenic_drm_of_match[] = {
        { .compatible = "ingenic,jz4770-lcd", .data = &jz4770_soc_info },
        { /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, ingenic_drm_of_match);
 
 static struct platform_driver ingenic_drm_driver = {
        .driver = {
index b5f5eb7..8c2e1b4 100644 (file)
@@ -412,9 +412,7 @@ static int __maybe_unused meson_drv_pm_resume(struct device *dev)
        if (priv->afbcd.ops)
                priv->afbcd.ops->init(priv);
 
-       drm_mode_config_helper_resume(priv->drm);
-
-       return 0;
+       return drm_mode_config_helper_resume(priv->drm);
 }
 
 static int compare_of(struct device *dev, void *data)
index 0599397..3eb89f1 100644 (file)
@@ -717,7 +717,7 @@ static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder)
        struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
        struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder);
        struct mipi_dsi_device *device = dsi->device;
-       union phy_configure_opts opts = { };
+       union phy_configure_opts opts = { };
        struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy;
        u16 delay;
        int err;
index bd26802..583cd6e 100644 (file)
@@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 
 static bool host1x_drm_wants_iommu(struct host1x_device *dev)
 {
+       struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
        struct iommu_domain *domain;
 
        /*
@@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
         * sufficient and whether or not the host1x is attached to an IOMMU
         * doesn't matter.
         */
-       if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32))
+       if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
                return true;
 
        return domain != NULL;
index c1824bd..7879ff5 100644 (file)
@@ -221,6 +221,7 @@ struct virtio_gpu_fpriv {
 /* virtio_ioctl.c */
 #define DRM_VIRTIO_NUM_IOCTLS 10
 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
+void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file);
 
 /* virtio_kms.c */
 int virtio_gpu_init(struct drm_device *dev);
index 0d6152c..f0d5a89 100644 (file)
@@ -39,6 +39,9 @@ int virtio_gpu_gem_create(struct drm_file *file,
        int ret;
        u32 handle;
 
+       if (vgdev->has_virgl_3d)
+               virtio_gpu_create_context(dev, file);
+
        ret = virtio_gpu_object_create(vgdev, params, &obj, NULL);
        if (ret < 0)
                return ret;
index 3f60bf2..512daff 100644 (file)
@@ -34,8 +34,7 @@
 
 #include "virtgpu_drv.h"
 
-static void virtio_gpu_create_context(struct drm_device *dev,
-                                     struct drm_file *file)
+void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file)
 {
        struct virtio_gpu_device *vgdev = dev->dev_private;
        struct virtio_gpu_fpriv *vfpriv = file->driver_priv;
index 388bcc2..d24344e 100644 (file)
@@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)
        }
 }
 
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+       /*
+        * If we support addressing a maximum of 32 bits of physical memory
+        * and if the host1x firewall is enabled, there's no need to enable
+        * IOMMU support. This can happen for example on Tegra20, Tegra30
+        * and Tegra114.
+        *
+        * Tegra124 and later can address up to 34 bits of physical memory and
+        * many platforms come equipped with more than 2 GiB of system memory,
+        * which requires crossing the 4 GiB boundary. But there's a catch: on
+        * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+        * only address up to 32 bits of memory in GATHER opcodes, which means
+        * that command buffers need to either be in the first 2 GiB of system
+        * memory (which could quickly lead to memory exhaustion), or command
+        * buffers need to be treated differently from other buffers (which is
+        * not possible with the current ABI).
+        *
+        * A third option is to use the IOMMU in these cases to make sure all
+        * buffers will be mapped into a 32-bit IOVA space that host1x can
+        * address. This allows all of the system memory to be used and works
+        * within the limitations of the host1x on these SoCs.
+        *
+        * In summary, default to enable IOMMU on Tegra124 and later. For any
+        * of the earlier SoCs, only use the IOMMU for additional safety when
+        * the host1x firewall is disabled.
+        */
+       if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+               if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+                       return false;
+       }
+
+       return true;
+}
+
 static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
 {
        struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
        int err;
 
        /*
-        * If the host1x firewall is enabled, there's no need to enable IOMMU
-        * support. Similarly, if host1x is already attached to an IOMMU (via
-        * the DMA API), don't try to attach again.
+        * We may not always want to enable IOMMU support (for example if the
+        * host1x firewall is already enabled and we don't support addressing
+        * more than 32 bits of physical memory), so check for that first.
+        *
+        * Similarly, if host1x is already attached to an IOMMU (via the DMA
+        * API), don't try to attach again.
         */
-       if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+       if (!host1x_wants_iommu(host) || domain)
                return domain;
 
        host->group = iommu_group_get(host->dev);
@@ -502,6 +540,19 @@ static void __exit tegra_host1x_exit(void)
 }
 module_exit(tegra_host1x_exit);
 
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+       return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
 MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
 MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
 MODULE_DESCRIPTION("Host1x driver for Tegra products");
index 53b517d..4af2fc3 100644 (file)
@@ -244,9 +244,9 @@ static ssize_t da9052_tsi_show(struct device *dev,
        int channel = to_sensor_dev_attr(devattr)->index;
        int ret;
 
-       mutex_lock(&hwmon->hwmon_lock);
+       mutex_lock(&hwmon->da9052->auxadc_lock);
        ret = __da9052_read_tsi(dev, channel);
-       mutex_unlock(&hwmon->hwmon_lock);
+       mutex_unlock(&hwmon->da9052->auxadc_lock);
 
        if (ret < 0)
                return ret;
index 9179460..0d4f3d9 100644 (file)
@@ -346,7 +346,7 @@ static int drivetemp_identify_sata(struct drivetemp_data *st)
        st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]);
 
        if (!have_sct_data_table)
-               goto skip_sct;
+               goto skip_sct_data;
 
        /* Request and read temperature history table */
        memset(buf, '\0', sizeof(st->smartdata));
index 1f5743d..a7eb10d 100644 (file)
@@ -41,6 +41,7 @@
 #define FANCTL_MAX             4       /* Counted from 1 */
 #define TCPU_MAX               8       /* Counted from 1 */
 #define TEMP_MAX               4       /* Counted from 1 */
+#define SMI_STS_MAX            10      /* Counted from 1 */
 
 #define VT_ADC_CTRL0_REG       0x20    /* Bank 0 */
 #define VT_ADC_CTRL1_REG       0x21    /* Bank 0 */
@@ -361,6 +362,7 @@ static int nct7904_read_temp(struct device *dev, u32 attr, int channel,
        struct nct7904_data *data = dev_get_drvdata(dev);
        int ret, temp;
        unsigned int reg1, reg2, reg3;
+       s8 temps;
 
        switch (attr) {
        case hwmon_temp_input:
@@ -466,7 +468,8 @@ static int nct7904_read_temp(struct device *dev, u32 attr, int channel,
 
        if (ret < 0)
                return ret;
-       *val = ret * 1000;
+       temps = ret;
+       *val = temps * 1000;
        return 0;
 }
 
@@ -1009,6 +1012,13 @@ static int nct7904_probe(struct i2c_client *client,
                data->fan_mode[i] = ret;
        }
 
+       /* Read all of SMI status register to clear alarms */
+       for (i = 0; i < SMI_STS_MAX; i++) {
+               ret = nct7904_read_reg(data, BANK_0, SMI_STS1_REG + i);
+               if (ret < 0)
+                       return ret;
+       }
+
        hwmon_dev =
                devm_hwmon_device_register_with_info(dev, client->name, data,
                                                     &nct7904_chip_info, NULL);
index 717b798..a670209 100644 (file)
@@ -1553,8 +1553,11 @@ int ib_cache_setup_one(struct ib_device *device)
        if (err)
                return err;
 
-       rdma_for_each_port (device, p)
-               ib_cache_update(device, p, true);
+       rdma_for_each_port (device, p) {
+               err = ib_cache_update(device, p, true);
+               if (err)
+                       return err;
+       }
 
        return 0;
 }
index 9eec26d..e16105b 100644 (file)
@@ -1292,11 +1292,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
        has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
 
        ret = fill_func(msg, has_cap_net_admin, res, port);
-
-       rdma_restrack_put(res);
        if (ret)
                goto err_free;
 
+       rdma_restrack_put(res);
        nlmsg_end(msg, nlh);
        ib_device_put(device);
        return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
index 177333d..bf8e149 100644 (file)
@@ -459,7 +459,8 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
        struct ib_uobject *uobj;
        struct file *filp;
 
-       if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release))
+       if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
+                   fd_type->fops->release != &uverbs_async_event_release))
                return ERR_PTR(-EINVAL);
 
        new_fd = get_unused_fd_flags(O_CLOEXEC);
index 7df7198..3d189c7 100644 (file)
@@ -219,6 +219,7 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
 void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
 void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
 void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+int uverbs_async_event_release(struct inode *inode, struct file *filp);
 
 int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
 int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
@@ -227,6 +228,9 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
                           struct ib_ucq_object *uobj);
 void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
 void ib_uverbs_release_file(struct kref *ref);
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+                            __u64 element, __u64 event,
+                            struct list_head *obj_list, u32 *counter);
 
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
index 17fc25d..1bab8de 100644 (file)
@@ -346,7 +346,7 @@ const struct file_operations uverbs_async_event_fops = {
        .owner   = THIS_MODULE,
        .read    = ib_uverbs_async_event_read,
        .poll    = ib_uverbs_async_event_poll,
-       .release = uverbs_uobject_fd_release,
+       .release = uverbs_async_event_release,
        .fasync  = ib_uverbs_async_event_fasync,
        .llseek  = no_llseek,
 };
@@ -386,10 +386,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
        kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
 }
 
-static void
-ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
-                       __u64 element, __u64 event, struct list_head *obj_list,
-                       u32 *counter)
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+                            __u64 element, __u64 event,
+                            struct list_head *obj_list, u32 *counter)
 {
        struct ib_uverbs_event *entry;
        unsigned long flags;
@@ -1187,9 +1186,6 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
                 */
                mutex_unlock(&uverbs_dev->lists_mutex);
 
-               ib_uverbs_async_handler(READ_ONCE(file->async_file), 0,
-                                       IB_EVENT_DEVICE_FATAL, NULL, NULL);
-
                uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
                kref_put(&file->ref, ib_uverbs_release_file);
 
index 82ec080..61899ea 100644 (file)
@@ -26,10 +26,38 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
                container_of(uobj, struct ib_uverbs_async_event_file, uobj);
 
        ib_unregister_event_handler(&event_file->event_handler);
-       ib_uverbs_free_event_queue(&event_file->ev_queue);
+
+       if (why == RDMA_REMOVE_DRIVER_REMOVE)
+               ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL,
+                                       NULL, NULL);
        return 0;
 }
 
+int uverbs_async_event_release(struct inode *inode, struct file *filp)
+{
+       struct ib_uverbs_async_event_file *event_file;
+       struct ib_uobject *uobj = filp->private_data;
+       int ret;
+
+       if (!uobj)
+               return uverbs_uobject_fd_release(inode, filp);
+
+       event_file =
+               container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+       /*
+        * The async event FD has to deliver IB_EVENT_DEVICE_FATAL even after
+        * disassociation, so cleaning the event list must only happen after
+        * release. The user knows it has reached the end of the event stream
+        * when it sees IB_EVENT_DEVICE_FATAL.
+        */
+       uverbs_uobject_get(uobj);
+       ret = uverbs_uobject_fd_release(inode, filp);
+       ib_uverbs_free_event_queue(&event_file->ev_queue);
+       uverbs_uobject_put(uobj);
+       return ret;
+}
+
 DECLARE_UVERBS_NAMED_METHOD(
        UVERBS_METHOD_ASYNC_EVENT_ALLOC,
        UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
index d69dece..30e08bc 100644 (file)
@@ -2891,8 +2891,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
                        srqidx = ABORT_RSS_SRQIDX_G(
                                        be32_to_cpu(req->srqidx_status));
                        if (srqidx) {
-                               complete_cached_srq_buffers(ep,
-                                                           req->srqidx_status);
+                               complete_cached_srq_buffers(ep, srqidx);
                        } else {
                                /* Hold ep ref until finish_peer_abort() */
                                c4iw_get_ep(&ep->com);
@@ -3878,8 +3877,8 @@ static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
                return 0;
        }
 
-       ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
-                       TCB_RQ_START_S);
+       ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
+                                         TCB_RQ_START_S);
 cleanup:
        pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
 
index 13e4203..a92346e 100644 (file)
@@ -589,10 +589,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 
        set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
        pq->state = SDMA_PKT_Q_ACTIVE;
-       /* Send the first N packets in the request to buy us some time */
-       ret = user_sdma_send_pkts(req, pcount);
-       if (unlikely(ret < 0 && ret != -EBUSY))
-               goto free_req;
 
        /*
         * This is a somewhat blocking send implementation.
index bb78d32..fa7a5ff 100644 (file)
@@ -1987,7 +1987,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
        struct rtable *rt;
        struct neighbour *neigh;
        int rc = arpindex;
-       struct net_device *netdev = iwdev->netdev;
        __be32 dst_ipaddr = htonl(dst_ip);
        __be32 src_ipaddr = htonl(src_ip);
 
@@ -1997,9 +1996,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
                return rc;
        }
 
-       if (netif_is_bond_slave(netdev))
-               netdev = netdev_master_upper_dev_get(netdev);
-
        neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
 
        rcu_read_lock();
@@ -2065,7 +2061,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
 {
        struct neighbour *neigh;
        int rc = arpindex;
-       struct net_device *netdev = iwdev->netdev;
        struct dst_entry *dst;
        struct sockaddr_in6 dst_addr;
        struct sockaddr_in6 src_addr;
@@ -2086,9 +2081,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
                return rc;
        }
 
-       if (netif_is_bond_slave(netdev))
-               netdev = netdev_master_upper_dev_get(netdev);
-
        neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
 
        rcu_read_lock();
index 55a1fbf..ae8b97c 100644 (file)
@@ -534,7 +534,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
        int arp_index;
 
        arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
-       if (arp_index == -1)
+       if (arp_index < 0)
                return;
        cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
        if (!cqp_request)
index 2f9f789..cf51e3c 100644 (file)
@@ -2891,6 +2891,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        int send_size;
        int header_size;
        int spc;
+       int err;
        int i;
 
        if (wr->wr.opcode != IB_WR_SEND)
@@ -2925,7 +2926,9 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 
        sqp->ud_header.lrh.virtual_lane    = 0;
        sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
-       ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+       err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+       if (err)
+               return err;
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
                sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
@@ -3212,9 +3215,14 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
        }
        sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+               err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
+                                        &pkey);
        else
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
+               err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index,
+                                        &pkey);
+       if (err)
+               return err;
+
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
index 48f4812..6a413d7 100644 (file)
@@ -151,7 +151,7 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, u32 size,
 
        ip = kmalloc(sizeof(*ip), GFP_KERNEL);
        if (!ip)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        size = PAGE_ALIGN(size);
 
index ff92704..245040c 100644 (file)
@@ -45,12 +45,15 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
 
        if (outbuf) {
                ip = rxe_create_mmap_info(rxe, buf_size, udata, buf);
-               if (!ip)
+               if (IS_ERR(ip)) {
+                       err = PTR_ERR(ip);
                        goto err1;
+               }
 
-               err = copy_to_user(outbuf, &ip->info, sizeof(ip->info));
-               if (err)
+               if (copy_to_user(outbuf, &ip->info, sizeof(ip->info))) {
+                       err = -EFAULT;
                        goto err2;
+               }
 
                spin_lock_bh(&rxe->pending_lock);
                list_add(&ip->pending_mmaps, &rxe->pending_mmaps);
@@ -64,7 +67,7 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
 err2:
        kfree(ip);
 err1:
-       return -EINVAL;
+       return err;
 }
 
 inline void rxe_queue_reset(struct rxe_queue *q)
index a03c6d6..96fb9ff 100644 (file)
@@ -78,7 +78,7 @@ static struct qcom_icc_node *sdm845_osm_l3_nodes[] = {
        [SLAVE_OSM_L3] = &sdm845_osm_l3,
 };
 
-const static struct qcom_icc_desc sdm845_icc_osm_l3 = {
+static const struct qcom_icc_desc sdm845_icc_osm_l3 = {
        .nodes = sdm845_osm_l3_nodes,
        .num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes),
 };
@@ -91,7 +91,7 @@ static struct qcom_icc_node *sc7180_osm_l3_nodes[] = {
        [SLAVE_OSM_L3] = &sc7180_osm_l3,
 };
 
-const static struct qcom_icc_desc sc7180_icc_osm_l3 = {
+static const struct qcom_icc_desc sc7180_icc_osm_l3 = {
        .nodes = sc7180_osm_l3_nodes,
        .num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes),
 };
index b013b80..f6c7b96 100644 (file)
@@ -192,7 +192,7 @@ static struct qcom_icc_node *aggre1_noc_nodes[] = {
        [SLAVE_ANOC_PCIE_A1NOC_SNOC] = &qns_pcie_a1noc_snoc,
 };
 
-const static struct qcom_icc_desc sdm845_aggre1_noc = {
+static const struct qcom_icc_desc sdm845_aggre1_noc = {
        .nodes = aggre1_noc_nodes,
        .num_nodes = ARRAY_SIZE(aggre1_noc_nodes),
        .bcms = aggre1_noc_bcms,
@@ -220,7 +220,7 @@ static struct qcom_icc_node *aggre2_noc_nodes[] = {
        [SLAVE_SERVICE_A2NOC] = &srvc_aggre2_noc,
 };
 
-const static struct qcom_icc_desc sdm845_aggre2_noc = {
+static const struct qcom_icc_desc sdm845_aggre2_noc = {
        .nodes = aggre2_noc_nodes,
        .num_nodes = ARRAY_SIZE(aggre2_noc_nodes),
        .bcms = aggre2_noc_bcms,
@@ -281,7 +281,7 @@ static struct qcom_icc_node *config_noc_nodes[] = {
        [SLAVE_SERVICE_CNOC] = &srvc_cnoc,
 };
 
-const static struct qcom_icc_desc sdm845_config_noc = {
+static const struct qcom_icc_desc sdm845_config_noc = {
        .nodes = config_noc_nodes,
        .num_nodes = ARRAY_SIZE(config_noc_nodes),
        .bcms = config_noc_bcms,
@@ -297,7 +297,7 @@ static struct qcom_icc_node *dc_noc_nodes[] = {
        [SLAVE_MEM_NOC_CFG] = &qhs_memnoc,
 };
 
-const static struct qcom_icc_desc sdm845_dc_noc = {
+static const struct qcom_icc_desc sdm845_dc_noc = {
        .nodes = dc_noc_nodes,
        .num_nodes = ARRAY_SIZE(dc_noc_nodes),
        .bcms = dc_noc_bcms,
@@ -315,7 +315,7 @@ static struct qcom_icc_node *gladiator_noc_nodes[] = {
        [SLAVE_SERVICE_GNOC] = &srvc_gnoc,
 };
 
-const static struct qcom_icc_desc sdm845_gladiator_noc = {
+static const struct qcom_icc_desc sdm845_gladiator_noc = {
        .nodes = gladiator_noc_nodes,
        .num_nodes = ARRAY_SIZE(gladiator_noc_nodes),
        .bcms = gladiator_noc_bcms,
@@ -350,7 +350,7 @@ static struct qcom_icc_node *mem_noc_nodes[] = {
        [SLAVE_EBI1] = &ebi,
 };
 
-const static struct qcom_icc_desc sdm845_mem_noc = {
+static const struct qcom_icc_desc sdm845_mem_noc = {
        .nodes = mem_noc_nodes,
        .num_nodes = ARRAY_SIZE(mem_noc_nodes),
        .bcms = mem_noc_bcms,
@@ -384,7 +384,7 @@ static struct qcom_icc_node *mmss_noc_nodes[] = {
        [SLAVE_CAMNOC_UNCOMP] = &qns_camnoc_uncomp,
 };
 
-const static struct qcom_icc_desc sdm845_mmss_noc = {
+static const struct qcom_icc_desc sdm845_mmss_noc = {
        .nodes = mmss_noc_nodes,
        .num_nodes = ARRAY_SIZE(mmss_noc_nodes),
        .bcms = mmss_noc_bcms,
@@ -430,7 +430,7 @@ static struct qcom_icc_node *system_noc_nodes[] = {
        [SLAVE_TCU] = &xs_sys_tcu_cfg,
 };
 
-const static struct qcom_icc_desc sdm845_system_noc = {
+static const struct qcom_icc_desc sdm845_system_noc = {
        .nodes = system_noc_nodes,
        .num_nodes = ARRAY_SIZE(system_noc_nodes),
        .bcms = system_noc_bcms,
index 20cce36..1dc3718 100644 (file)
@@ -101,6 +101,8 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
+static void update_and_flush_device_table(struct protection_domain *domain,
+                                         struct domain_pgtable *pgtable);
 
 /****************************************************************************
  *
@@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
        return container_of(dom, struct protection_domain, domain);
 }
 
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+                                        struct domain_pgtable *pgtable)
+{
+       u64 pt_root = atomic64_read(&domain->pt_root);
+
+       pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+       pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode)
+{
+       u64 pt_root;
+
+       /* lowest 3 bits encode pgtable mode */
+       pt_root = mode & 7;
+       pt_root |= (u64)root;
+
+       return pt_root;
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
        struct iommu_dev_data *dev_data;
@@ -1397,13 +1419,18 @@ static struct page *free_sub_pt(unsigned long root, int mode,
 
 static void free_pagetable(struct protection_domain *domain)
 {
-       unsigned long root = (unsigned long)domain->pt_root;
+       struct domain_pgtable pgtable;
        struct page *freelist = NULL;
+       unsigned long root;
 
-       BUG_ON(domain->mode < PAGE_MODE_NONE ||
-              domain->mode > PAGE_MODE_6_LEVEL);
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       atomic64_set(&domain->pt_root, 0);
 
-       freelist = free_sub_pt(root, domain->mode, freelist);
+       BUG_ON(pgtable.mode < PAGE_MODE_NONE ||
+              pgtable.mode > PAGE_MODE_6_LEVEL);
+
+       root = (unsigned long)pgtable.root;
+       freelist = free_sub_pt(root, pgtable.mode, freelist);
 
        free_page_list(freelist);
 }
@@ -1417,24 +1444,39 @@ static bool increase_address_space(struct protection_domain *domain,
                                   unsigned long address,
                                   gfp_t gfp)
 {
+       struct domain_pgtable pgtable;
        unsigned long flags;
-       bool ret = false;
-       u64 *pte;
+       bool ret = true;
+       u64 *pte, root;
 
        spin_lock_irqsave(&domain->lock, flags);
 
-       if (address <= PM_LEVEL_SIZE(domain->mode) ||
-           WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       if (address <= PM_LEVEL_SIZE(pgtable.mode))
+               goto out;
+
+       ret = false;
+       if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
                goto out;
 
        pte = (void *)get_zeroed_page(gfp);
        if (!pte)
                goto out;
 
-       *pte             = PM_LEVEL_PDE(domain->mode,
-                                       iommu_virt_to_phys(domain->pt_root));
-       domain->pt_root  = pte;
-       domain->mode    += 1;
+       *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+       pgtable.root  = pte;
+       pgtable.mode += 1;
+       update_and_flush_device_table(domain, &pgtable);
+       domain_flush_complete(domain);
+
+       /*
+        * Device Table needs to be updated and flushed before the new root can
+        * be published.
+        */
+       root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode);
+       atomic64_set(&domain->pt_root, root);
 
        ret = true;
 
@@ -1451,16 +1493,29 @@ static u64 *alloc_pte(struct protection_domain *domain,
                      gfp_t gfp,
                      bool *updated)
 {
+       struct domain_pgtable pgtable;
        int level, end_lvl;
        u64 *pte, *page;
 
        BUG_ON(!is_power_of_2(page_size));
 
-       while (address > PM_LEVEL_SIZE(domain->mode))
-               *updated = increase_address_space(domain, address, gfp) || *updated;
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+               /*
+                * Return an error if there is no memory to update the
+                * page-table.
+                */
+               if (!increase_address_space(domain, address, gfp))
+                       return NULL;
+
+               /* Read new values to check if update was successful */
+               amd_iommu_domain_get_pgtable(domain, &pgtable);
+       }
+
 
-       level   = domain->mode - 1;
-       pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+       level   = pgtable.mode - 1;
+       pte     = &pgtable.root[PM_LEVEL_INDEX(level, address)];
        address = PAGE_SIZE_ALIGN(address, page_size);
        end_lvl = PAGE_SIZE_LEVEL(page_size);
 
@@ -1536,16 +1591,19 @@ static u64 *fetch_pte(struct protection_domain *domain,
                      unsigned long address,
                      unsigned long *page_size)
 {
+       struct domain_pgtable pgtable;
        int level;
        u64 *pte;
 
        *page_size = 0;
 
-       if (address > PM_LEVEL_SIZE(domain->mode))
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       if (address > PM_LEVEL_SIZE(pgtable.mode))
                return NULL;
 
-       level      =  domain->mode - 1;
-       pte        = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+       level      =  pgtable.mode - 1;
+       pte        = &pgtable.root[PM_LEVEL_INDEX(level, address)];
        *page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
        while (level > 0) {
@@ -1660,7 +1718,13 @@ out:
                unsigned long flags;
 
                spin_lock_irqsave(&dom->lock, flags);
-               update_domain(dom);
+               /*
+                * Flush domain TLB(s) and wait for completion. Any Device-Table
+                * Updates and flushing already happened in
+                * increase_address_space().
+                */
+               domain_flush_tlb_pde(dom);
+               domain_flush_complete(dom);
                spin_unlock_irqrestore(&dom->lock, flags);
        }
 
@@ -1806,6 +1870,7 @@ static void dma_ops_domain_free(struct protection_domain *domain)
 static struct protection_domain *dma_ops_domain_alloc(void)
 {
        struct protection_domain *domain;
+       u64 *pt_root, root;
 
        domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
        if (!domain)
@@ -1814,12 +1879,14 @@ static struct protection_domain *dma_ops_domain_alloc(void)
        if (protection_domain_init(domain))
                goto free_domain;
 
-       domain->mode = PAGE_MODE_3_LEVEL;
-       domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
-       domain->flags = PD_DMA_OPS_MASK;
-       if (!domain->pt_root)
+       pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!pt_root)
                goto free_domain;
 
+       root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+       atomic64_set(&domain->pt_root, root);
+       domain->flags = PD_DMA_OPS_MASK;
+
        if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
                goto free_domain;
 
@@ -1841,16 +1908,17 @@ static bool dma_ops_domain(struct protection_domain *domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
+                         struct domain_pgtable *pgtable,
                          bool ats, bool ppr)
 {
        u64 pte_root = 0;
        u64 flags = 0;
        u32 old_domid;
 
-       if (domain->mode != PAGE_MODE_NONE)
-               pte_root = iommu_virt_to_phys(domain->pt_root);
+       if (pgtable->mode != PAGE_MODE_NONE)
+               pte_root = iommu_virt_to_phys(pgtable->root);
 
-       pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+       pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
                    << DEV_ENTRY_MODE_SHIFT;
        pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1923,6 +1991,7 @@ static void clear_dte_entry(u16 devid)
 static void do_attach(struct iommu_dev_data *dev_data,
                      struct protection_domain *domain)
 {
+       struct domain_pgtable pgtable;
        struct amd_iommu *iommu;
        bool ats;
 
@@ -1938,7 +2007,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
        domain->dev_cnt                 += 1;
 
        /* Update device table */
-       set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       set_dte_entry(dev_data->devid, domain, &pgtable,
+                     ats, dev_data->iommu_v2);
        clone_aliases(dev_data->pdev);
 
        device_flush_dte(dev_data);
@@ -2249,23 +2320,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
  *
  *****************************************************************************/
 
-static void update_device_table(struct protection_domain *domain)
+static void update_device_table(struct protection_domain *domain,
+                               struct domain_pgtable *pgtable)
 {
        struct iommu_dev_data *dev_data;
 
        list_for_each_entry(dev_data, &domain->dev_list, list) {
-               set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
-                             dev_data->iommu_v2);
+               set_dte_entry(dev_data->devid, domain, pgtable,
+                             dev_data->ats.enabled, dev_data->iommu_v2);
                clone_aliases(dev_data->pdev);
        }
 }
 
+static void update_and_flush_device_table(struct protection_domain *domain,
+                                         struct domain_pgtable *pgtable)
+{
+       update_device_table(domain, pgtable);
+       domain_flush_devices(domain);
+}
+
 static void update_domain(struct protection_domain *domain)
 {
-       update_device_table(domain);
+       struct domain_pgtable pgtable;
 
-       domain_flush_devices(domain);
+       /* Update device table */
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       update_and_flush_device_table(domain, &pgtable);
+
+       /* Flush domain TLB(s) and wait for completion */
        domain_flush_tlb_pde(domain);
+       domain_flush_complete(domain);
 }
 
 int __init amd_iommu_init_api(void)
@@ -2375,6 +2459,7 @@ out_err:
 static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 {
        struct protection_domain *pdomain;
+       u64 *pt_root, root;
 
        switch (type) {
        case IOMMU_DOMAIN_UNMANAGED:
@@ -2382,13 +2467,15 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
                if (!pdomain)
                        return NULL;
 
-               pdomain->mode    = PAGE_MODE_3_LEVEL;
-               pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
-               if (!pdomain->pt_root) {
+               pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+               if (!pt_root) {
                        protection_domain_free(pdomain);
                        return NULL;
                }
 
+               root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+               atomic64_set(&pdomain->pt_root, root);
+
                pdomain->domain.geometry.aperture_start = 0;
                pdomain->domain.geometry.aperture_end   = ~0ULL;
                pdomain->domain.geometry.force_aperture = true;
@@ -2406,7 +2493,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
                if (!pdomain)
                        return NULL;
 
-               pdomain->mode = PAGE_MODE_NONE;
+               atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE);
                break;
        default:
                return NULL;
@@ -2418,6 +2505,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 static void amd_iommu_domain_free(struct iommu_domain *dom)
 {
        struct protection_domain *domain;
+       struct domain_pgtable pgtable;
 
        domain = to_pdomain(dom);
 
@@ -2435,7 +2523,9 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
                dma_ops_domain_free(domain);
                break;
        default:
-               if (domain->mode != PAGE_MODE_NONE)
+               amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+               if (pgtable.mode != PAGE_MODE_NONE)
                        free_pagetable(domain);
 
                if (domain->flags & PD_IOMMUV2_MASK)
@@ -2518,10 +2608,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
                         gfp_t gfp)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
        int prot = 0;
        int ret;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return -EINVAL;
 
        if (iommu_prot & IOMMU_READ)
@@ -2541,8 +2633,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
                              struct iommu_iotlb_gather *gather)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return 0;
 
        return iommu_unmap_page(domain, iova, page_size);
@@ -2553,9 +2647,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 {
        struct protection_domain *domain = to_pdomain(dom);
        unsigned long offset_mask, pte_pgsize;
+       struct domain_pgtable pgtable;
        u64 *pte, __pte;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return iova;
 
        pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -2708,16 +2804,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
        unsigned long flags;
+       u64 pt_root;
 
        spin_lock_irqsave(&domain->lock, flags);
 
+       /* First save pgtable configuration*/
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
        /* Update data structure */
-       domain->mode    = PAGE_MODE_NONE;
+       pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE);
+       atomic64_set(&domain->pt_root, pt_root);
 
        /* Make changes visible to IOMMUs */
        update_domain(domain);
 
+       /* Restore old pgtable in domain->ptroot to free page-table */
+       pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode);
+       atomic64_set(&domain->pt_root, pt_root);
+
        /* Page-table is not visible to IOMMU anymore, so free it */
        free_pagetable(domain);
 
@@ -2908,9 +3014,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
 static int __set_gcr3(struct protection_domain *domain, int pasid,
                      unsigned long cr3)
 {
+       struct domain_pgtable pgtable;
        u64 *pte;
 
-       if (domain->mode != PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode != PAGE_MODE_NONE)
                return -EINVAL;
 
        pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -2924,9 +3032,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid,
 
 static int __clear_gcr3(struct protection_domain *domain, int pasid)
 {
+       struct domain_pgtable pgtable;
        u64 *pte;
 
-       if (domain->mode != PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode != PAGE_MODE_NONE)
                return -EINVAL;
 
        pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
index ca8c452..7a8fdec 100644 (file)
@@ -468,8 +468,7 @@ struct protection_domain {
                                       iommu core code */
        spinlock_t lock;        /* mostly used to lock the page table*/
        u16 id;                 /* the domain id written to the device table */
-       int mode;               /* paging mode (0-6 levels) */
-       u64 *pt_root;           /* page table root pointer */
+       atomic64_t pt_root;     /* pgtable root and pgtable mode */
        int glx;                /* Number of levels for GCR3 table */
        u64 *gcr3_tbl;          /* Guest CR3 table */
        unsigned long flags;    /* flags to find out type of domain */
@@ -477,6 +476,12 @@ struct protection_domain {
        unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
+/* For decocded pt_root */
+struct domain_pgtable {
+       int mode;
+       u64 *root;
+};
+
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
index d5cac4f..4e1d11a 100644 (file)
@@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
        if (!region)
                return -ENOMEM;
 
-       list_add(&vdev->resv_regions, &region->list);
+       list_add(&region->list, &vdev->resv_regions);
        return 0;
 }
 
index 668418d..f620442 100644 (file)
@@ -1465,6 +1465,13 @@ static const struct mei_cfg mei_me_pch12_cfg = {
        MEI_CFG_DMA_128,
 };
 
+/* LBG with quirk for SPS Firmware exclusion */
+static const struct mei_cfg mei_me_pch12_sps_cfg = {
+       MEI_CFG_PCH8_HFS,
+       MEI_CFG_FW_VER_SUPP,
+       MEI_CFG_FW_SPS,
+};
+
 /* Tiger Lake and newer devices */
 static const struct mei_cfg mei_me_pch15_cfg = {
        MEI_CFG_PCH8_HFS,
@@ -1487,6 +1494,7 @@ static const struct mei_cfg *const mei_cfg_list[] = {
        [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
        [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
        [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg,
+       [MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg,
        [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg,
 };
 
index 4a8d4dc..b6b94e2 100644 (file)
@@ -80,6 +80,9 @@ struct mei_me_hw {
  *                         servers platforms with quirk for
  *                         SPS firmware exclusion.
  * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer
+ * @MEI_ME_PCH12_SPS_CFG:  Platform Controller Hub Gen12 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
  * @MEI_ME_PCH15_CFG:      Platform Controller Hub Gen15 and newer
  * @MEI_ME_NUM_CFG:        Upper Sentinel.
  */
@@ -93,6 +96,7 @@ enum mei_cfg_idx {
        MEI_ME_PCH8_CFG,
        MEI_ME_PCH8_SPS_CFG,
        MEI_ME_PCH12_CFG,
+       MEI_ME_PCH12_SPS_CFG,
        MEI_ME_PCH15_CFG,
        MEI_ME_NUM_CFG,
 };
index 0c390fe..a1ed375 100644 (file)
@@ -70,7 +70,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_CFG)},
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)},
index 8499b56..c5367e2 100644 (file)
@@ -1370,6 +1370,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
        struct mmc_request *mrq = &mqrq->brq.mrq;
        struct request_queue *q = req->q;
        struct mmc_host *host = mq->card->host;
+       enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
        unsigned long flags;
        bool put_card;
        int err;
@@ -1399,7 +1400,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
 
        spin_lock_irqsave(&mq->lock, flags);
 
-       mq->in_flight[mmc_issue_type(mq, req)] -= 1;
+       mq->in_flight[issue_type] -= 1;
 
        put_card = (mmc_tot_in_flight(mq) == 0);
 
index 25bee3d..4b1eb89 100644 (file)
@@ -107,11 +107,10 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
        case MMC_ISSUE_DCMD:
                if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
                        if (recovery_needed)
-                               __mmc_cqe_recovery_notifier(mq);
+                               mmc_cqe_recovery_notifier(mrq);
                        return BLK_EH_RESET_TIMER;
                }
-               /* No timeout (XXX: huh? comment doesn't make much sense) */
-               blk_mq_complete_request(req);
+               /* The request has gone already */
                return BLK_EH_DONE;
        default:
                /* Timeout is handled by mmc core */
@@ -127,18 +126,13 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
        struct mmc_card *card = mq->card;
        struct mmc_host *host = card->host;
        unsigned long flags;
-       int ret;
+       bool ignore_tout;
 
        spin_lock_irqsave(&mq->lock, flags);
-
-       if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
-               ret = BLK_EH_RESET_TIMER;
-       else
-               ret = mmc_cqe_timed_out(req);
-
+       ignore_tout = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled;
        spin_unlock_irqrestore(&mq->lock, flags);
 
-       return ret;
+       return ignore_tout ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req);
 }
 
 static void mmc_mq_recovery_handler(struct work_struct *work)
index 1aee485..026ca91 100644 (file)
@@ -1104,7 +1104,7 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev)
 
        if (ret) {
                dev_err(&pdev->dev, "Failed to get irq for data line\n");
-               return ret;
+               goto free_host;
        }
 
        mutex_init(&host->cmd_mutex);
@@ -1116,6 +1116,10 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev)
        dev_set_drvdata(&pdev->dev, host);
        mmc_add_host(mmc);
        return 0;
+
+free_host:
+       mmc_free_host(mmc);
+       return ret;
 }
 
 static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev)
index faba53c..d8b76cb 100644 (file)
@@ -605,10 +605,12 @@ static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev,
 }
 
 static const struct sdhci_acpi_slot sdhci_acpi_slot_amd_emmc = {
-       .chip   = &sdhci_acpi_chip_amd,
-       .caps   = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE,
-       .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE |
-                       SDHCI_QUIRK_32BIT_ADMA_SIZE,
+       .chip           = &sdhci_acpi_chip_amd,
+       .caps           = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE,
+       .quirks         = SDHCI_QUIRK_32BIT_DMA_ADDR |
+                         SDHCI_QUIRK_32BIT_DMA_SIZE |
+                         SDHCI_QUIRK_32BIT_ADMA_SIZE,
+       .quirks2        = SDHCI_QUIRK2_BROKEN_64_BIT_DMA,
        .probe_slot     = sdhci_acpi_emmc_amd_probe_slot,
 };
 
index ce15a05..fd76aa6 100644 (file)
@@ -26,6 +26,9 @@
 #define   SDHCI_GLI_9750_DRIVING_2    GENMASK(27, 26)
 #define   GLI_9750_DRIVING_1_VALUE    0xFFF
 #define   GLI_9750_DRIVING_2_VALUE    0x3
+#define   SDHCI_GLI_9750_SEL_1        BIT(29)
+#define   SDHCI_GLI_9750_SEL_2        BIT(31)
+#define   SDHCI_GLI_9750_ALL_RST      (BIT(24)|BIT(25)|BIT(28)|BIT(30))
 
 #define SDHCI_GLI_9750_PLL           0x864
 #define   SDHCI_GLI_9750_PLL_TX2_INV    BIT(23)
@@ -122,6 +125,8 @@ static void gli_set_9750(struct sdhci_host *host)
                                    GLI_9750_DRIVING_1_VALUE);
        driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_2,
                                    GLI_9750_DRIVING_2_VALUE);
+       driving_value &= ~(SDHCI_GLI_9750_SEL_1|SDHCI_GLI_9750_SEL_2|SDHCI_GLI_9750_ALL_RST);
+       driving_value |= SDHCI_GLI_9750_SEL_2;
        sdhci_writel(host, driving_value, SDHCI_GLI_9750_DRIVING);
 
        sw_ctrl_value &= ~SDHCI_GLI_9750_SW_CTRL_4;
@@ -334,6 +339,18 @@ static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg)
        return value;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip)
+{
+       struct sdhci_pci_slot *slot = chip->slots[0];
+
+       pci_free_irq_vectors(slot->chip->pdev);
+       gli_pcie_enable_msi(slot);
+
+       return sdhci_pci_resume_host(chip);
+}
+#endif
+
 static const struct sdhci_ops sdhci_gl9755_ops = {
        .set_clock              = sdhci_set_clock,
        .enable_dma             = sdhci_pci_enable_dma,
@@ -348,6 +365,9 @@ const struct sdhci_pci_fixes sdhci_gl9755 = {
        .quirks2        = SDHCI_QUIRK2_BROKEN_DDR50,
        .probe_slot     = gli_probe_slot_gl9755,
        .ops            = &sdhci_gl9755_ops,
+#ifdef CONFIG_PM_SLEEP
+       .resume         = sdhci_pci_gli_resume,
+#endif
 };
 
 static const struct sdhci_ops sdhci_gl9750_ops = {
@@ -366,4 +386,7 @@ const struct sdhci_pci_fixes sdhci_gl9750 = {
        .quirks2        = SDHCI_QUIRK2_BROKEN_DDR50,
        .probe_slot     = gli_probe_slot_gl9750,
        .ops            = &sdhci_gl9750_ops,
+#ifdef CONFIG_PM_SLEEP
+       .resume         = sdhci_pci_gli_resume,
+#endif
 };
index 06426fc..f781c46 100644 (file)
@@ -1483,7 +1483,7 @@ static void __exit most_exit(void)
        ida_destroy(&mdev_id);
 }
 
-module_init(most_init);
+subsys_initcall(most_init);
 module_exit(most_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Gromm <christian.gromm@microchip.com>");
index f2adea9..f3c037f 100644 (file)
@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
                  * Don't treat an error as fatal, as we potentially already
                  * have a NGUID or EUI-64.
                  */
-               if (status > 0)
+               if (status > 0 && !(status & NVME_SC_DNR))
                        status = 0;
                goto free_data;
        }
index 4e79e41..e13c370 100644 (file)
@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 
 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 {
-       if (++nvmeq->cq_head == nvmeq->q_depth) {
+       u16 tmp = nvmeq->cq_head + 1;
+
+       if (tmp == nvmeq->q_depth) {
                nvmeq->cq_head = 0;
                nvmeq->cq_phase ^= 1;
+       } else {
+               nvmeq->cq_head = tmp;
        }
 }
 
index 3708d43..393011a 100644 (file)
@@ -816,6 +816,13 @@ static const struct of_device_id qusb2_phy_of_match_table[] = {
                .compatible     = "qcom,msm8998-qusb2-phy",
                .data           = &msm8998_phy_cfg,
        }, {
+               /*
+                * Deprecated. Only here to support legacy device
+                * trees that didn't include "qcom,qusb2-v2-phy"
+                */
+               .compatible     = "qcom,sdm845-qusb2-phy",
+               .data           = &qusb2_v2_phy_cfg,
+       }, {
                .compatible     = "qcom,qusb2-v2-phy",
                .data           = &qusb2_v2_phy_cfg,
        },
index d998e65..a52a9bf 100644 (file)
@@ -160,18 +160,11 @@ static int qcom_snps_hsphy_power_on(struct phy *phy)
        ret = regulator_bulk_enable(VREG_NUM, priv->vregs);
        if (ret)
                return ret;
-       ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks);
-       if (ret)
-               goto err_disable_regulator;
+
        qcom_snps_hsphy_disable_hv_interrupts(priv);
        qcom_snps_hsphy_exit_retention(priv);
 
        return 0;
-
-err_disable_regulator:
-       regulator_bulk_disable(VREG_NUM, priv->vregs);
-
-       return ret;
 }
 
 static int qcom_snps_hsphy_power_off(struct phy *phy)
@@ -180,7 +173,6 @@ static int qcom_snps_hsphy_power_off(struct phy *phy)
 
        qcom_snps_hsphy_enter_retention(priv);
        qcom_snps_hsphy_enable_hv_interrupts(priv);
-       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
        regulator_bulk_disable(VREG_NUM, priv->vregs);
 
        return 0;
@@ -266,21 +258,39 @@ static int qcom_snps_hsphy_init(struct phy *phy)
        struct hsphy_priv *priv = phy_get_drvdata(phy);
        int ret;
 
-       ret = qcom_snps_hsphy_reset(priv);
+       ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks);
        if (ret)
                return ret;
 
+       ret = qcom_snps_hsphy_reset(priv);
+       if (ret)
+               goto disable_clocks;
+
        qcom_snps_hsphy_init_sequence(priv);
 
        ret = qcom_snps_hsphy_por_reset(priv);
        if (ret)
-               return ret;
+               goto disable_clocks;
+
+       return 0;
+
+disable_clocks:
+       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
+       return ret;
+}
+
+static int qcom_snps_hsphy_exit(struct phy *phy)
+{
+       struct hsphy_priv *priv = phy_get_drvdata(phy);
+
+       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
 
        return 0;
 }
 
 static const struct phy_ops qcom_snps_hsphy_ops = {
        .init = qcom_snps_hsphy_init,
+       .exit = qcom_snps_hsphy_exit,
        .power_on = qcom_snps_hsphy_power_on,
        .power_off = qcom_snps_hsphy_power_off,
        .set_mode = qcom_snps_hsphy_set_mode,
index c340505..7486f6e 100644 (file)
@@ -5754,10 +5754,6 @@ static DECLARE_DELAYED_WORK(regulator_init_complete_work,
 
 static int __init regulator_init_complete(void)
 {
-       int delay = driver_deferred_probe_timeout;
-
-       if (delay < 0)
-               delay = 0;
        /*
         * Since DT doesn't provide an idiomatic mechanism for
         * enabling full constraints and since it's much more natural
@@ -5768,17 +5764,18 @@ static int __init regulator_init_complete(void)
                has_full_constraints = true;
 
        /*
-        * If driver_deferred_probe_timeout is set, we punt
-        * completion for that many seconds since systems like
-        * distros will load many drivers from userspace so consumers
-        * might not always be ready yet, this is particularly an
-        * issue with laptops where this might bounce the display off
-        * then on.  Ideally we'd get a notification from userspace
-        * when this happens but we don't so just wait a bit and hope
-        * we waited long enough.  It'd be better if we'd only do
-        * this on systems that need it.
+        * We punt completion for an arbitrary amount of time since
+        * systems like distros will load many drivers from userspace
+        * so consumers might not always be ready yet, this is
+        * particularly an issue with laptops where this might bounce
+        * the display off then on.  Ideally we'd get a notification
+        * from userspace when this happens but we don't so just wait
+        * a bit and hope we waited long enough.  It'd be better if
+        * we'd only do this on systems that need it, and a kernel
+        * command line option might be useful.
         */
-       schedule_delayed_work(&regulator_init_complete_work, delay * HZ);
+       schedule_delayed_work(&regulator_init_complete_work,
+                             msecs_to_jiffies(30000));
 
        return 0;
 }
index 7da9e06..635f6f9 100644 (file)
@@ -3640,6 +3640,11 @@ static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *tgt)
        struct ibmvfc_host *vhost = tgt->vhost;
        struct ibmvfc_event *evt;
 
+       if (!vhost->logged_in) {
+               ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+               return;
+       }
+
        if (vhost->discovery_threads >= disc_threads)
                return;
 
index 7f66a77..59f0f10 100644 (file)
@@ -2320,16 +2320,12 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 static int ibmvscsi_remove(struct vio_dev *vdev)
 {
        struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
-       unsigned long flags;
 
        srp_remove_host(hostdata->host);
        scsi_remove_host(hostdata->host);
 
        purge_requests(hostdata, DID_ERROR);
-
-       spin_lock_irqsave(hostdata->host->host_lock, flags);
        release_event_pool(&hostdata->pool, hostdata);
-       spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
        ibmvscsi_release_crq_queue(&hostdata->queue, hostdata,
                                        max_events);
index 97cabd7..3325596 100644 (file)
@@ -3031,11 +3031,11 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
            test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags))
                msleep(1000);
 
-       qla_nvme_delete(vha);
 
        qla24xx_disable_vp(vha);
        qla2x00_wait_for_sess_deletion(vha);
 
+       qla_nvme_delete(vha);
        vha->flags.delete_progress = 1;
 
        qlt_remove_target(ha, vha);
index 4ed9043..d6c991b 100644 (file)
@@ -3153,7 +3153,7 @@ qla24xx_abort_command(srb_t *sp)
        ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x108c,
            "Entered %s.\n", __func__);
 
-       if (vha->flags.qpairs_available && sp->qpair)
+       if (sp->qpair)
                req = sp->qpair->req;
        else
                return QLA_FUNCTION_FAILED;
index 8e0575f..67325fb 100644 (file)
@@ -925,6 +925,10 @@ do_map_region(const struct gasket_dev *gasket_dev, struct vm_area_struct *vma,
                gasket_get_bar_index(gasket_dev,
                                     (vma->vm_pgoff << PAGE_SHIFT) +
                                     driver_desc->legacy_mmap_address_offset);
+
+       if (bar_index < 0)
+               return DO_MAP_REGION_INVALID;
+
        phys_base = gasket_dev->bar_data[bar_index].phys_base + phys_offset;
        while (mapped_bytes < map_length) {
                /*
index 87a6dac..ab6f391 100644 (file)
@@ -30,5 +30,4 @@ Now the TODOs:
 
 Please send any patches to:
 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Wolfram Sang <wsa@the-dreams.de>
 Linux Driver Project Developer List <driverdev-devel@linuxdriverproject.org>
index 3d084ce..50c7534 100644 (file)
@@ -182,6 +182,9 @@ static int usb4_switch_op(struct tb_switch *sw, u16 opcode, u8 *status)
                return ret;
 
        ret = tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_26, 1);
+       if (ret)
+               return ret;
+
        if (val & ROUTER_CS_26_ONS)
                return -EOPNOTSUPP;
 
index ed0aa5c..5674da2 100644 (file)
@@ -843,10 +843,8 @@ static int bcm_uart_probe(struct platform_device *pdev)
        if (IS_ERR(clk) && pdev->dev.of_node)
                clk = of_clk_get(pdev->dev.of_node, 0);
 
-       if (IS_ERR(clk)) {
-               clk_put(clk);
+       if (IS_ERR(clk))
                return -ENODEV;
-       }
 
        port->iotype = UPIO_MEM;
        port->irq = res_irq->start;
index ac137b6..35e9e8f 100644 (file)
@@ -1459,6 +1459,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
                cdns_uart_uart_driver.nr = CDNS_UART_NR_PORTS;
 #ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
                cdns_uart_uart_driver.cons = &cdns_uart_console;
+               cdns_uart_console.index = id;
 #endif
 
                rc = uart_register_driver(&cdns_uart_uart_driver);
index e5ffed7..48a8199 100644 (file)
@@ -365,9 +365,14 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
        return uniscr;
 }
 
+static void vc_uniscr_free(struct uni_screen *uniscr)
+{
+       vfree(uniscr);
+}
+
 static void vc_uniscr_set(struct vc_data *vc, struct uni_screen *new_uniscr)
 {
-       vfree(vc->vc_uni_screen);
+       vc_uniscr_free(vc->vc_uni_screen);
        vc->vc_uni_screen = new_uniscr;
 }
 
@@ -1230,7 +1235,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
        err = resize_screen(vc, new_cols, new_rows, user);
        if (err) {
                kfree(newscreen);
-               kfree(new_uniscr);
+               vc_uniscr_free(new_uniscr);
                return err;
        }
 
index af648ba..4610545 100644 (file)
@@ -114,7 +114,7 @@ static int ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event)
                        hw_write_id_reg(ci, HS_PHY_GENCONFIG_2,
                                        HS_PHY_ULPI_TX_PKT_EN_CLR_FIX, 0);
 
-               if (!IS_ERR(ci->platdata->vbus_extcon.edev)) {
+               if (!IS_ERR(ci->platdata->vbus_extcon.edev) || ci->role_switch) {
                        hw_write_id_reg(ci, HS_PHY_GENCONFIG_2,
                                        HS_PHY_SESS_VLD_CTRL_EN,
                                        HS_PHY_SESS_VLD_CTRL_EN);
index 6833c91..b9db981 100644 (file)
@@ -217,6 +217,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct usb_memory *usbm = NULL;
        struct usb_dev_state *ps = file->private_data;
+       struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
        size_t size = vma->vm_end - vma->vm_start;
        void *mem;
        unsigned long flags;
@@ -250,9 +251,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
        usbm->vma_use_count = 1;
        INIT_LIST_HEAD(&usbm->memlist);
 
-       if (remap_pfn_range(vma, vma->vm_start,
-                       virt_to_phys(usbm->mem) >> PAGE_SHIFT,
-                       size, vma->vm_page_prot) < 0) {
+       if (dma_mmap_coherent(hcd->self.sysdev, vma, mem, dma_handle, size)) {
                dec_usb_memory_use_count(usbm, &usbm->vma_use_count);
                return -EAGAIN;
        }
index a48678a..6197938 100644 (file)
@@ -1144,11 +1144,11 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr,
 
        if (usb_endpoint_out(epaddr)) {
                ep = dev->ep_out[epnum];
-               if (reset_hardware)
+               if (reset_hardware && epnum != 0)
                        dev->ep_out[epnum] = NULL;
        } else {
                ep = dev->ep_in[epnum];
-               if (reset_hardware)
+               if (reset_hardware && epnum != 0)
                        dev->ep_in[epnum] = NULL;
        }
        if (ep) {
index ffd9841..d63072f 100644 (file)
@@ -1138,8 +1138,8 @@ static void garmin_read_process(struct garmin_data *garmin_data_p,
                   send it directly to the tty port */
                if (garmin_data_p->flags & FLAGS_QUEUING) {
                        pkt_add(garmin_data_p, data, data_length);
-               } else if (bulk_data ||
-                          getLayerId(data) == GARMIN_LAYERID_APPL) {
+               } else if (bulk_data || (data_length >= sizeof(u32) &&
+                               getLayerId(data) == GARMIN_LAYERID_APPL)) {
 
                        spin_lock_irqsave(&garmin_data_p->lock, flags);
                        garmin_data_p->flags |= APP_RESP_SEEN;
index 613f91a..ce0401d 100644 (file)
@@ -173,6 +173,7 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x413c, 0x81b3)},   /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
        {DEVICE_SWI(0x413c, 0x81b5)},   /* Dell Wireless 5811e QDL */
        {DEVICE_SWI(0x413c, 0x81b6)},   /* Dell Wireless 5811e QDL */
+       {DEVICE_SWI(0x413c, 0x81cc)},   /* Dell Wireless 5816e */
        {DEVICE_SWI(0x413c, 0x81cf)},   /* Dell Wireless 5819 */
        {DEVICE_SWI(0x413c, 0x81d0)},   /* Dell Wireless 5819 */
        {DEVICE_SWI(0x413c, 0x81d1)},   /* Dell Wireless 5818 */
index 1b23741..37157ed 100644 (file)
  * and don't forget to CC: the USB development list <linux-usb@vger.kernel.org>
  */
 
+/* Reported-by: Julian Groß <julian.g@posteo.de> */
+UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999,
+               "LaCie",
+               "2Big Quadra USB3",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_REPORT_OPCODES),
+
 /*
  * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
  * commands in UAS mode.  Observed with the 1.28 firmware; are there others?
index f5c5e0a..67c5139 100644 (file)
@@ -157,6 +157,10 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state)
        req.mode_data |= (state->mode - TYPEC_STATE_MODAL) <<
                         PMC_USB_ALTMODE_DP_MODE_SHIFT;
 
+       if (data->status & DP_STATUS_HPD_STATE)
+               req.mode_data |= PMC_USB_DP_HPD_LVL <<
+                                PMC_USB_ALTMODE_DP_MODE_SHIFT;
+
        return pmc_usb_command(port, (void *)&req, sizeof(req));
 }
 
@@ -298,11 +302,11 @@ static int pmc_usb_register_port(struct pmc_usb *pmc, int index,
        struct typec_mux_desc mux_desc = { };
        int ret;
 
-       ret = fwnode_property_read_u8(fwnode, "usb2-port", &port->usb2_port);
+       ret = fwnode_property_read_u8(fwnode, "usb2-port-number", &port->usb2_port);
        if (ret)
                return ret;
 
-       ret = fwnode_property_read_u8(fwnode, "usb3-port", &port->usb3_port);
+       ret = fwnode_property_read_u8(fwnode, "usb3-port-number", &port->usb3_port);
        if (ret)
                return ret;
 
index 185db76..5f3aa4d 100644 (file)
@@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
 
        ret = try_get_cap_refs(inode, need, want, 0, flags, got);
        /* three special error codes */
-       if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN)
+       if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
                ret = 0;
        return ret;
 }
@@ -3746,6 +3746,7 @@ retry:
                WARN_ON(1);
                tsession = NULL;
                target = -1;
+               mutex_lock(&session->s_mutex);
        }
        goto retry;
 
index 481ac97..dcaed75 100644 (file)
@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                    &congestion_kb_fops);
 
        snprintf(name, sizeof(name), "../../bdi/%s",
-                dev_name(fsc->sb->s_bdi->dev));
+                bdi_dev_name(fsc->sb->s_bdi));
        fsc->debugfs_bdi =
                debugfs_create_symlink("bdi",
                                       fsc->client->debugfs_dir,
index 486f91f..7c63abf 100644 (file)
@@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session,
        void *end = p + msg->front.iov_len;
        struct ceph_mds_session_head *h;
        u32 op;
-       u64 seq;
-       unsigned long features = 0;
+       u64 seq, features = 0;
        int wake = 0;
        bool blacklisted = false;
 
@@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session,
                        goto bad;
                /* version >= 3, feature bits */
                ceph_decode_32_safe(&p, end, len, bad);
-               ceph_decode_need(&p, end, len, bad);
-               memcpy(&features, p, min_t(size_t, len, sizeof(features)));
-               p += len;
+               ceph_decode_64_safe(&p, end, features, bad);
+               p += len - sizeof(features);
        }
 
        mutex_lock(&mdsc->mutex);
index de56dee..19507e2 100644 (file)
@@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
        }
 
        if (IS_ERR(in)) {
-               pr_warn("Can't lookup inode %llx (err: %ld)\n",
-                       realm->ino, PTR_ERR(in));
+               dout("Can't lookup inode %llx (err: %ld)\n",
+                    realm->ino, PTR_ERR(in));
                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
        } else {
                qri->timeout = 0;
index cf7b7e1..cb73365 100644 (file)
@@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                spin_lock(&configfs_dirent_lock);
                configfs_detach_rollback(dentry);
                spin_unlock(&configfs_dirent_lock);
+               config_item_put(parent_item);
                return -EINTR;
        }
        frag->frag_dead = true;
index 408418e..478a0d8 100644 (file)
@@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
        if (displaced)
                put_files_struct(displaced);
        if (!dump_interrupted()) {
+               /*
+                * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
+                * have this set to NULL.
+                */
+               if (!cprm.file) {
+                       pr_info("Core dump to |%s disabled\n", cn.corename);
+                       goto close_fail;
+               }
                file_start_write(cprm.file);
                core_dumped = binfmt->core_dump(&cprm);
                file_end_write(cprm.file);
index 8c59664..12eebcd 100644 (file)
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
 {
        struct eventpoll *ep = epi->ep;
 
+       /* Fast preliminary check */
+       if (epi->next != EP_UNACTIVE_PTR)
+               return false;
+
        /* Check that the same epi has not been just chained from another CPU */
        if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
                return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
         * chained in ep->ovflist and requeued later on.
         */
        if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
-               if (epi->next == EP_UNACTIVE_PTR &&
-                   chain_epi_lockless(epi))
+               if (chain_epi_lockless(epi))
+                       ep_pm_stay_awake_rcu(epi);
+       } else if (!ep_is_linked(epi)) {
+               /* In the usual case, add event to ready list. */
+               if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
                        ep_pm_stay_awake_rcu(epi);
-               goto out_unlock;
-       }
-
-       /* If this file is already in the ready list we exit soon */
-       if (!ep_is_linked(epi) &&
-           list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
-               ep_pm_stay_awake_rcu(epi);
        }
 
        /*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
        int res = 0, eavail, timed_out = 0;
        u64 slack = 0;
-       bool waiter = false;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;
 
@@ -1867,55 +1866,75 @@ fetch_events:
         */
        ep_reset_busy_poll_napi_id(ep);
 
-       /*
-        * We don't have any available event to return to the caller.  We need
-        * to sleep here, and we will be woken by ep_poll_callback() when events
-        * become available.
-        */
-       if (!waiter) {
-               waiter = true;
-               init_waitqueue_entry(&wait, current);
+       do {
+               /*
+                * Internally init_wait() uses autoremove_wake_function(),
+                * thus wait entry is removed from the wait queue on each
+                * wakeup. Why it is important? In case of several waiters
+                * each new wakeup will hit the next waiter, giving it the
+                * chance to harvest new event. Otherwise wakeup can be
+                * lost. This is also good performance-wise, because on
+                * normal wakeup path no need to call __remove_wait_queue()
+                * explicitly, thus ep->lock is not taken, which halts the
+                * event delivery.
+                */
+               init_wait(&wait);
 
                write_lock_irq(&ep->lock);
-               __add_wait_queue_exclusive(&ep->wq, &wait);
-               write_unlock_irq(&ep->lock);
-       }
-
-       for (;;) {
                /*
-                * We don't want to sleep if the ep_poll_callback() sends us
-                * a wakeup in between. That's why we set the task state
-                * to TASK_INTERRUPTIBLE before doing the checks.
+                * Barrierless variant, waitqueue_active() is called under
+                * the same lock on wakeup ep_poll_callback() side, so it
+                * is safe to avoid an explicit barrier.
                 */
-               set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_INTERRUPTIBLE);
+
                /*
-                * Always short-circuit for fatal signals to allow
-                * threads to make a timely exit without the chance of
-                * finding more events available and fetching
-                * repeatedly.
+                * Do the final check under the lock. ep_scan_ready_list()
+                * plays with two lists (->rdllist and ->ovflist) and there
+                * is always a race when both lists are empty for short
+                * period of time although events are pending, so lock is
+                * important.
                 */
-               if (fatal_signal_pending(current)) {
-                       res = -EINTR;
-                       break;
+               eavail = ep_events_available(ep);
+               if (!eavail) {
+                       if (signal_pending(current))
+                               res = -EINTR;
+                       else
+                               __add_wait_queue_exclusive(&ep->wq, &wait);
                }
+               write_unlock_irq(&ep->lock);
 
-               eavail = ep_events_available(ep);
-               if (eavail)
+               if (eavail || res)
                        break;
-               if (signal_pending(current)) {
-                       res = -EINTR;
-                       break;
-               }
 
                if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
                        timed_out = 1;
                        break;
                }
-       }
+
+               /* We were woken up, thus go and try to harvest some events */
+               eavail = 1;
+
+       } while (0);
 
        __set_current_state(TASK_RUNNING);
 
+       if (!list_empty_careful(&wait.entry)) {
+               write_lock_irq(&ep->lock);
+               __remove_wait_queue(&ep->wq, &wait);
+               write_unlock_irq(&ep->lock);
+       }
+
 send_events:
+       if (fatal_signal_pending(current)) {
+               /*
+                * Always short-circuit for fatal signals to allow
+                * threads to make a timely exit without the chance of
+                * finding more events available and fetching
+                * repeatedly.
+                */
+               res = -EINTR;
+       }
        /*
         * Try to transfer events to user space. In case we get 0 events and
         * there's still timeout left over, we go trying again in search of
@@ -1925,12 +1944,6 @@ send_events:
            !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
                goto fetch_events;
 
-       if (waiter) {
-               write_lock_irq(&ep->lock);
-               __remove_wait_queue(&ep->wq, &wait);
-               write_unlock_irq(&ep->lock);
-       }
-
        return res;
 }
 
index 936a8ec..6306eaa 100644 (file)
@@ -528,10 +528,12 @@ lower_metapath:
 
                /* Advance in metadata tree. */
                (mp->mp_list[hgt])++;
-               if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
-                       if (!hgt)
+               if (hgt) {
+                       if (mp->mp_list[hgt] >= sdp->sd_inptrs)
+                               goto lower_metapath;
+               } else {
+                       if (mp->mp_list[hgt] >= sdp->sd_diptrs)
                                break;
-                       goto lower_metapath;
                }
 
 fill_up_metapath:
@@ -876,10 +878,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
                                        ret = -ENOENT;
                                        goto unlock;
                                } else {
-                                       /* report a hole */
                                        iomap->offset = pos;
                                        iomap->length = length;
-                                       goto do_alloc;
+                                       goto hole_found;
                                }
                        }
                        iomap->length = size;
@@ -933,8 +934,6 @@ unlock:
        return ret;
 
 do_alloc:
-       iomap->addr = IOMAP_NULL_ADDR;
-       iomap->type = IOMAP_HOLE;
        if (flags & IOMAP_REPORT) {
                if (pos >= size)
                        ret = -ENOENT;
@@ -956,6 +955,9 @@ do_alloc:
                if (pos < size && height == ip->i_height)
                        ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
        }
+hole_found:
+       iomap->addr = IOMAP_NULL_ADDR;
+       iomap->type = IOMAP_HOLE;
        goto out;
 }
 
index 29f9b66..bf70e3b 100644 (file)
@@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock)
                                fs_err(sdp, "Error %d syncing glock \n", ret);
                                gfs2_dump_glock(NULL, gl, true);
                        }
-                       return;
+                       goto skip_inval;
                }
        }
        if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
@@ -633,6 +633,7 @@ __acquires(&gl->gl_lockref.lock)
                clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
        }
 
+skip_inval:
        gfs2_glock_hold(gl);
        /*
         * Check for an error encountered since we called go_sync and go_inval.
@@ -722,9 +723,6 @@ __acquires(&gl->gl_lockref.lock)
                        goto out_unlock;
                if (nonblock)
                        goto out_sched;
-               smp_mb();
-               if (atomic_read(&gl->gl_revokes) != 0)
-                       goto out_sched;
                set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
                GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
                gl->gl_target = gl->gl_demote_state;
index 70b2d3a..5acd3ce 100644 (file)
@@ -622,7 +622,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                                error = finish_no_open(file, NULL);
                }
                gfs2_glock_dq_uninit(ghs);
-               return error;
+               goto fail;
        } else if (error != -ENOENT) {
                goto fail_gunlock;
        }
@@ -764,9 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                error = finish_open(file, dentry, gfs2_open_common);
        }
        gfs2_glock_dq_uninit(ghs);
+       gfs2_qa_put(ip);
        gfs2_glock_dq_uninit(ghs + 1);
        clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        gfs2_glock_put(io_gl);
+       gfs2_qa_put(dip);
        return error;
 
 fail_gunlock3:
@@ -776,7 +778,6 @@ fail_gunlock2:
        clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        gfs2_glock_put(io_gl);
 fail_free_inode:
-       gfs2_qa_put(ip);
        if (ip->i_gl) {
                glock_clear_object(ip->i_gl, ip);
                gfs2_glock_put(ip->i_gl);
@@ -1005,7 +1006,7 @@ out_gunlock:
 out_child:
        gfs2_glock_dq(ghs);
 out_parent:
-       gfs2_qa_put(ip);
+       gfs2_qa_put(dip);
        gfs2_holder_uninit(ghs);
        gfs2_holder_uninit(ghs + 1);
        return error;
index 3a75843..0644e58 100644 (file)
@@ -669,13 +669,13 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct buffer_head *bh = bd->bd_bh;
        struct gfs2_glock *gl = bd->bd_gl;
 
+       sdp->sd_log_num_revoke++;
+       if (atomic_inc_return(&gl->gl_revokes) == 1)
+               gfs2_glock_hold(gl);
        bh->b_private = NULL;
        bd->bd_blkno = bh->b_blocknr;
        gfs2_remove_from_ail(bd); /* drops ref on bh */
        bd->bd_bh = NULL;
-       sdp->sd_log_num_revoke++;
-       if (atomic_inc_return(&gl->gl_revokes) == 1)
-               gfs2_glock_hold(gl);
        set_bit(GLF_LFLUSH, &gl->gl_flags);
        list_add(&bd->bd_list, &sdp->sd_log_revokes);
 }
@@ -1131,6 +1131,10 @@ int gfs2_logd(void *data)
 
        while (!kthread_should_stop()) {
 
+               if (gfs2_withdrawn(sdp)) {
+                       msleep_interruptible(HZ);
+                       continue;
+               }
                /* Check for errors writing to the journal */
                if (sdp->sd_log_error) {
                        gfs2_lm(sdp,
@@ -1139,6 +1143,7 @@ int gfs2_logd(void *data)
                                "prevent further damage.\n",
                                sdp->sd_fsname, sdp->sd_log_error);
                        gfs2_withdraw(sdp);
+                       continue;
                }
 
                did_flush = false;
index 5ea9675..48b54ec 100644 (file)
@@ -263,7 +263,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
        struct super_block *sb = sdp->sd_vfs;
        struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 
-       bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9);
+       bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
        bio_set_dev(bio, sb->s_bdev);
        bio->bi_end_io = end_io;
        bio->bi_private = sdp;
@@ -509,7 +509,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
        unsigned int bsize = sdp->sd_sb.sb_bsize, off;
        unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
        unsigned int shift = PAGE_SHIFT - bsize_shift;
-       unsigned int readahead_blocks = BIO_MAX_PAGES << shift;
+       unsigned int max_bio_size = 2 * 1024 * 1024;
        struct gfs2_journal_extent *je;
        int sz, ret = 0;
        struct bio *bio = NULL;
@@ -537,12 +537,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
                                off = 0;
                        }
 
-                       if (!bio || (bio_chained && !off)) {
+                       if (!bio || (bio_chained && !off) ||
+                           bio->bi_iter.bi_size >= max_bio_size) {
                                /* start new bio */
                        } else {
-                               sz = bio_add_page(bio, page, bsize, off);
-                               if (sz == bsize)
-                                       goto block_added;
+                               sector_t sector = dblock << sdp->sd_fsb2bb_shift;
+
+                               if (bio_end_sector(bio) == sector) {
+                                       sz = bio_add_page(bio, page, bsize, off);
+                                       if (sz == bsize)
+                                               goto block_added;
+                               }
                                if (off) {
                                        unsigned int blocks =
                                                (PAGE_SIZE - off) >> bsize_shift;
@@ -568,7 +573,7 @@ block_added:
                        off += bsize;
                        if (off == PAGE_SIZE)
                                page = NULL;
-                       if (blocks_submitted < blocks_read + readahead_blocks) {
+                       if (blocks_submitted < 2 * max_bio_size >> bsize_shift) {
                                /* Keep at least one bio in flight */
                                continue;
                        }
index 4b72abc..9856cc2 100644 (file)
@@ -252,7 +252,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        int num = 0;
 
        if (unlikely(gfs2_withdrawn(sdp)) &&
-           (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) {
+           (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) {
                *bhp = NULL;
                return -EIO;
        }
index cc0c4b5..8259fef 100644 (file)
@@ -1051,8 +1051,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        u32 x;
        int error = 0;
 
-       if (capable(CAP_SYS_RESOURCE) ||
-           sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
 
        error = gfs2_quota_hold(ip, uid, gid);
@@ -1125,7 +1124,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
        int found;
 
        if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
-               goto out;
+               return;
 
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
@@ -1162,7 +1161,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
                        qd_unlock(qda[x]);
        }
 
-out:
        gfs2_quota_unhold(ip);
 }
 
@@ -1210,9 +1208,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
        if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
                return 0;
 
-        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
-                return 0;
-
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                qd = ip->i_qadata->qa_qd[x];
 
@@ -1270,7 +1265,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
                return;
 
-       BUG_ON(ip->i_qadata->qa_ref <= 0);
+       if (gfs2_assert_withdraw(sdp, ip->i_qadata &&
+                                ip->i_qadata->qa_ref > 0))
+               return;
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                qd = ip->i_qadata->qa_qd[x];
 
index 7f9ca8e..21ada33 100644 (file)
@@ -44,7 +44,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
        int ret;
 
        ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
-       if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+       if (capable(CAP_SYS_RESOURCE) ||
+           sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return 0;
        ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (ret)
index 37fc416..956fced 100644 (file)
@@ -1404,7 +1404,6 @@ out:
        if (ip->i_qadata)
                gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
        gfs2_rs_delete(ip, NULL);
-       gfs2_qa_put(ip);
        gfs2_ordered_del_inode(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
index 9b64d40..aa087a5 100644 (file)
@@ -119,6 +119,12 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
        if (!sb_rdonly(sdp->sd_vfs))
                ret = gfs2_make_fs_ro(sdp);
 
+       if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
+               if (!ret)
+                       ret = -EIO;
+               clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+               goto skip_recovery;
+       }
        /*
         * Drop the glock for our journal so another node can recover it.
         */
@@ -159,10 +165,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
                wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
        }
 
-       if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
-               clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
-               goto skip_recovery;
-       }
        /*
         * Dequeue the "live" glock, but keep a reference so it's never freed.
         */
index 0b91b06..979d9f9 100644 (file)
@@ -680,8 +680,6 @@ struct io_op_def {
        unsigned                needs_mm : 1;
        /* needs req->file assigned */
        unsigned                needs_file : 1;
-       /* needs req->file assigned IFF fd is >= 0 */
-       unsigned                fd_non_neg : 1;
        /* hash wq insertion if file is a regular file */
        unsigned                hash_reg_file : 1;
        /* unbound wq insertion if file is a non-regular file */
@@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
        },
        [IORING_OP_OPENAT] = {
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .file_table             = 1,
                .needs_fs               = 1,
        },
@@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_STATX] = {
                .needs_mm               = 1,
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .needs_fs               = 1,
                .file_table             = 1,
        },
@@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
        },
        [IORING_OP_OPENAT2] = {
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .file_table             = 1,
                .needs_fs               = 1,
        },
@@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
        io_steal_work(req, workptr);
 }
 
-static int io_req_needs_file(struct io_kiocb *req, int fd)
-{
-       if (!io_op_defs[req->opcode].needs_file)
-               return 0;
-       if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
-               return 0;
-       return 1;
-}
-
 static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
                                              int index)
 {
@@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 }
 
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
-                          int fd, unsigned int flags)
+                          int fd)
 {
        bool fixed;
 
-       if (!io_req_needs_file(req, fd))
-               return 0;
-
-       fixed = (flags & IOSQE_FIXED_FILE);
+       fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
        if (unlikely(!fixed && req->needs_fixed_file))
                return -EBADF;
 
@@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                       struct io_submit_state *state, bool async)
 {
        unsigned int sqe_flags;
-       int id, fd;
+       int id;
 
        /*
         * All io need record the previous position, if LINK vs DARIN,
@@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                                        IOSQE_ASYNC | IOSQE_FIXED_FILE |
                                        IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);
 
-       fd = READ_ONCE(sqe->fd);
-       return io_req_set_file(state, req, fd, sqe_flags);
+       if (!io_op_defs[req->opcode].needs_file)
+               return 0;
+
+       return io_req_set_file(state, req, READ_ONCE(sqe->fd));
 }
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
@@ -7360,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file)
 static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                                  struct files_struct *files)
 {
-       struct io_kiocb *req;
-       DEFINE_WAIT(wait);
-
        while (!list_empty_careful(&ctx->inflight_list)) {
-               struct io_kiocb *cancel_req = NULL;
+               struct io_kiocb *cancel_req = NULL, *req;
+               DEFINE_WAIT(wait);
 
                spin_lock_irq(&ctx->inflight_lock);
                list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
@@ -7404,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                         */
                        if (refcount_sub_and_test(2, &cancel_req->refs)) {
                                io_put_req(cancel_req);
+                               finish_wait(&ctx->inflight_wait, &wait);
                                continue;
                        }
                }
@@ -7411,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
                io_put_req(cancel_req);
                schedule();
+               finish_wait(&ctx->inflight_wait, &wait);
        }
-       finish_wait(&ctx->inflight_wait, &wait);
 }
 
 static int io_uring_flush(struct file *file, void *data)
@@ -7761,7 +7742,8 @@ err:
        return ret;
 }
 
-static int io_uring_create(unsigned entries, struct io_uring_params *p)
+static int io_uring_create(unsigned entries, struct io_uring_params *p,
+                          struct io_uring_params __user *params)
 {
        struct user_struct *user = NULL;
        struct io_ring_ctx *ctx;
@@ -7853,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
        p->cq_off.cqes = offsetof(struct io_rings, cqes);
 
+       p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
+                       IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
+                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+
+       if (copy_to_user(params, p, sizeof(*p))) {
+               ret = -EFAULT;
+               goto err;
+       }
        /*
         * Install ring fd as the very last thing, so we don't risk someone
         * having closed it before we finish setup
@@ -7861,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        if (ret < 0)
                goto err;
 
-       p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
-                       IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
-                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
        trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
        return ret;
 err:
@@ -7879,7 +7866,6 @@ err:
 static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 {
        struct io_uring_params p;
-       long ret;
        int i;
 
        if (copy_from_user(&p, params, sizeof(p)))
@@ -7894,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
                        IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
                return -EINVAL;
 
-       ret = io_uring_create(entries, &p);
-       if (ret < 0)
-               return ret;
-
-       if (copy_to_user(params, &p, sizeof(p)))
-               return -EFAULT;
-
-       return ret;
+       return  io_uring_create(entries, &p, params);
 }
 
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
index 4735def..fd0a1e7 100644 (file)
@@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in,
        loff_t offset;
        long ret;
 
+       if (unlikely(!(in->f_mode & FMODE_READ) ||
+                    !(out->f_mode & FMODE_WRITE)))
+               return -EBADF;
+
        ipipe = get_pipe_info(in);
        opipe = get_pipe_info(out);
 
@@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
                if (off_in || off_out)
                        return -ESPIPE;
 
-               if (!(in->f_mode & FMODE_READ))
-                       return -EBADF;
-
-               if (!(out->f_mode & FMODE_WRITE))
-                       return -EBADF;
-
                /* Splicing to self would be fun, but... */
                if (ipipe == opipe)
                        return -EINVAL;
@@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
                        offset = out->f_pos;
                }
 
-               if (unlikely(!(out->f_mode & FMODE_WRITE)))
-                       return -EBADF;
-
                if (unlikely(out->f_flags & O_APPEND))
                        return -EINVAL;
 
@@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
        error = -EBADF;
        in = fdget(fd_in);
        if (in.file) {
-               if (in.file->f_mode & FMODE_READ) {
-                       out = fdget(fd_out);
-                       if (out.file) {
-                               if (out.file->f_mode & FMODE_WRITE)
-                                       error = do_splice(in.file, off_in,
-                                                         out.file, off_out,
-                                                         len, flags);
-                               fdput(out);
-                       }
+               out = fdget(fd_out);
+               if (out.file) {
+                       error = do_splice(in.file, off_in, out.file, off_out,
+                                         len, flags);
+                       fdput(out);
                }
                fdput(in);
        }
@@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len,
        struct pipe_inode_info *opipe = get_pipe_info(out);
        int ret = -EINVAL;
 
+       if (unlikely(!(in->f_mode & FMODE_READ) ||
+                    !(out->f_mode & FMODE_WRITE)))
+               return -EBADF;
+
        /*
         * Duplicate the contents of ipipe to opipe without actually
         * copying the data.
@@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 
 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
 {
-       struct fd in;
+       struct fd in, out;
        int error;
 
        if (unlikely(flags & ~SPLICE_F_ALL))
@@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
        error = -EBADF;
        in = fdget(fdin);
        if (in.file) {
-               if (in.file->f_mode & FMODE_READ) {
-                       struct fd out = fdget(fdout);
-                       if (out.file) {
-                               if (out.file->f_mode & FMODE_WRITE)
-                                       error = do_tee(in.file, out.file,
-                                                       len, flags);
-                               fdput(out);
-                       }
+               out = fdget(fdout);
+               if (out.file) {
+                       error = do_tee(in.file, out.file, len, flags);
+                       fdput(out);
                }
                fdput(in);
        }
index 675e269..8fe03b4 100644 (file)
@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
                goto fail_free;
        }
 
-       err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
+       err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
        if (err)
                goto fail_free;
 
index 99134d4..320f811 100644 (file)
@@ -48,7 +48,7 @@ struct videomode;
  * @MODE_HSYNC: hsync out of range
  * @MODE_VSYNC: vsync out of range
  * @MODE_H_ILLEGAL: mode has illegal horizontal timings
- * @MODE_V_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_V_ILLEGAL: mode has illegal vertical timings
  * @MODE_BAD_WIDTH: requires an unsupported linepitch
  * @MODE_NOMODE: no mode with a matching name
  * @MODE_NO_INTERLACE: interlaced mode not supported
index 26f0ecf..0bbfd64 100644 (file)
@@ -65,6 +65,7 @@ struct amba_device {
        struct device           dev;
        struct resource         res;
        struct clk              *pclk;
+       struct device_dma_parameters dma_parms;
        unsigned int            periphid;
        unsigned int            cid;
        struct amba_cs_uci_id   uci;
index ee577a8..7367150 100644 (file)
@@ -219,6 +219,7 @@ struct backing_dev_info {
        wait_queue_head_t wb_waitq;
 
        struct device *dev;
+       char dev_name[64];
        struct device *owner;
 
        struct timer_list laptop_mode_wb_timer;
index f88197c..c9ad5c3 100644 (file)
@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
                                  (1 << WB_async_congested));
 }
 
-extern const char *bdi_unknown_name;
-
-static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
-{
-       if (!bdi || !bdi->dev)
-               return bdi_unknown_name;
-       return dev_name(bdi->dev);
-}
+const char *bdi_dev_name(struct backing_dev_info *bdi);
 
 #endif /* _LINUX_BACKING_DEV_H */
index db95244..ab4bd15 100644 (file)
@@ -210,6 +210,29 @@ struct ftrace_ops {
 #endif
 };
 
+extern struct ftrace_ops __rcu *ftrace_ops_list;
+extern struct ftrace_ops ftrace_list_end;
+
+/*
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
+ * can use rcu_dereference_raw_check() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism.  The rcu_dereference_raw_check() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
+#define do_for_each_ftrace_op(op, list)                        \
+       op = rcu_dereference_raw_check(list);                   \
+       do
+
+/*
+ * Optimized for just a single item in the list (as that is the normal case).
+ */
+#define while_for_each_ftrace_op(op)                           \
+       while (likely(op = rcu_dereference_raw_check((op)->next)) &&    \
+              unlikely((op) != &ftrace_list_end))
+
 /*
  * Type of the current tracing.
  */
index 62d216f..c230b4e 100644 (file)
@@ -17,9 +17,12 @@ enum host1x_class {
        HOST1X_CLASS_GR3D = 0x60,
 };
 
+struct host1x;
 struct host1x_client;
 struct iommu_group;
 
+u64 host1x_get_dma_mask(struct host1x *host1x);
+
 /**
  * struct host1x_client_ops - host1x client operations
  * @init: host1x client initialization code
index 21f4fff..5616b25 100644 (file)
@@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
 LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
         struct fs_context *src_sc)
-LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc,
+LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
         struct fs_parameter *param)
 LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb)
index d275c72..977edd3 100644 (file)
@@ -783,6 +783,8 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
                atomic_long_inc(&memcg->memory_events[event]);
                cgroup_file_notify(&memcg->events_file);
 
+               if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+                       break;
                if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
                        break;
        } while ((memcg = parent_mem_cgroup(memcg)) &&
index ad19960..3d7c3c2 100644 (file)
@@ -53,9 +53,9 @@ enum mhi_callback {
  * @MHI_CHAIN: Linked transfer
  */
 enum mhi_flags {
-       MHI_EOB,
-       MHI_EOT,
-       MHI_CHAIN,
+       MHI_EOB = BIT(0),
+       MHI_EOT = BIT(1),
+       MHI_CHAIN = BIT(2),
 };
 
 /**
@@ -335,14 +335,15 @@ struct mhi_controller_config {
  * @syserr_worker: System error worker
  * @state_event: State change event
  * @status_cb: CB function to notify power states of the device (required)
- * @link_status: CB function to query link status of the device (required)
  * @wake_get: CB function to assert device wake (optional)
  * @wake_put: CB function to de-assert device wake (optional)
  * @wake_toggle: CB function to assert and de-assert device wake (optional)
  * @runtime_get: CB function to controller runtime resume (required)
- * @runtimet_put: CB function to decrement pm usage (required)
+ * @runtime_put: CB function to decrement pm usage (required)
  * @map_single: CB function to create TRE buffer
  * @unmap_single: CB function to destroy TRE buffer
+ * @read_reg: Read a MHI register via the physical link (required)
+ * @write_reg: Write a MHI register via the physical link (required)
  * @buffer_len: Bounce buffer length
  * @bounce_buf: Use of bounce buffer
  * @fbc_download: MHI host needs to do complete image transfer (optional)
@@ -417,7 +418,6 @@ struct mhi_controller {
 
        void (*status_cb)(struct mhi_controller *mhi_cntrl,
                          enum mhi_callback cb);
-       int (*link_status)(struct mhi_controller *mhi_cntrl);
        void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override);
        void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override);
        void (*wake_toggle)(struct mhi_controller *mhi_cntrl);
@@ -427,6 +427,10 @@ struct mhi_controller {
                          struct mhi_buf_info *buf);
        void (*unmap_single)(struct mhi_controller *mhi_cntrl,
                             struct mhi_buf_info *buf);
+       int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr,
+                       u32 *out);
+       void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr,
+                         u32 val);
 
        size_t buffer_len;
        bool bounce_buf;
index bdc3575..77a2aad 100644 (file)
@@ -25,6 +25,7 @@ struct platform_device {
        bool            id_auto;
        struct device   dev;
        u64             platform_dma_mask;
+       struct device_dma_parameters dma_parms;
        u32             num_resources;
        struct resource *resource;
 
index 48c1b16..bc07e51 100644 (file)
@@ -21,6 +21,7 @@
 struct gss_ctx {
        struct gss_api_mech     *mech_type;
        void                    *internal_ctx_id;
+       unsigned int            slack, align;
 };
 
 #define GSS_C_NO_BUFFER                ((struct xdr_netobj) 0)
@@ -66,6 +67,7 @@ u32 gss_wrap(
 u32 gss_unwrap(
                struct gss_ctx          *ctx_id,
                int                     offset,
+               int                     len,
                struct xdr_buf          *inbuf);
 u32 gss_delete_sec_context(
                struct gss_ctx          **ctx_id);
@@ -126,6 +128,7 @@ struct gss_api_ops {
        u32 (*gss_unwrap)(
                        struct gss_ctx          *ctx_id,
                        int                     offset,
+                       int                     len,
                        struct xdr_buf          *buf);
        void (*gss_delete_sec_context)(
                        void                    *internal_ctx_id);
index c1d77dd..e8f8ffe 100644 (file)
@@ -83,7 +83,7 @@ struct gss_krb5_enctype {
        u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
                           struct xdr_buf *buf,
                           struct page **pages); /* v2 encryption function */
-       u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
+       u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len,
                           struct xdr_buf *buf, u32 *headskip,
                           u32 *tailskip);      /* v2 decryption function */
 };
@@ -255,7 +255,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset,
                struct xdr_buf *outbuf, struct page **pages);
 
 u32
-gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
+gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len,
                struct xdr_buf *buf);
 
 
@@ -312,7 +312,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
                     struct page **pages);
 
 u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
                     struct xdr_buf *buf, u32 *plainoffset,
                     u32 *plainlen);
 
index 01bb419..22c207b 100644 (file)
@@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
index a36b722..334842d 100644 (file)
@@ -61,6 +61,7 @@ struct snd_rawmidi_runtime {
        size_t avail_min;       /* min avail for wakeup */
        size_t avail;           /* max used buffer for wakeup */
        size_t xruns;           /* over/underruns counter */
+       int buffer_ref;         /* buffer reference count */
        /* misc */
        spinlock_t lock;
        wait_queue_head_t sleep;
index 1897822..26d871f 100644 (file)
@@ -24,7 +24,7 @@
  *
  * @pid: Put 0 for global total, while positive pid for process total.
  *
- * @size: Virtual size of the allocation in bytes.
+ * @size: Size of the allocation in bytes.
  *
  */
 TRACE_EVENT(gpu_mem_total,
index 7848141..9c66e59 100644 (file)
@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->rmean          = stat[0].mean;
                __entry->rmin           = stat[0].min;
@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->lat = div_u64(lat, 1000);
        ),
@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->msg    = msg;
                __entry->step   = step;
@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->status         = status;
                __entry->step           = step;
index 6fd13a0..74a5ac6 100644 (file)
@@ -39,22 +39,6 @@ config TOOLS_SUPPORT_RELR
 config CC_HAS_ASM_INLINE
        def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
 
-config CC_HAS_WARN_MAYBE_UNINITIALIZED
-       def_bool $(cc-option,-Wmaybe-uninitialized)
-       help
-         GCC >= 4.7 supports this option.
-
-config CC_DISABLE_WARN_MAYBE_UNINITIALIZED
-       bool
-       depends on CC_HAS_WARN_MAYBE_UNINITIALIZED
-       default CC_IS_GCC && GCC_VERSION < 40900  # unreliable for GCC < 4.9
-       help
-         GCC's -Wmaybe-uninitialized is not reliable by definition.
-         Lots of false positive warnings are produced in some cases.
-
-         If this option is enabled, -Wno-maybe-uninitialzed is passed
-         to the compiler to suppress maybe-uninitialized warnings.
-
 config CONSTRUCTORS
        bool
        depends on !UML
@@ -1257,14 +1241,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
 config CC_OPTIMIZE_FOR_PERFORMANCE_O3
        bool "Optimize more for performance (-O3)"
        depends on ARC
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          Choosing this option will pass "-O3" to your compiler to optimize
          the kernel yet more for performance.
 
 config CC_OPTIMIZE_FOR_SIZE
        bool "Optimize for size (-Os)"
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          Choosing this option will pass "-Os" to your compiler resulting
          in a smaller kernel.
index 8ec1be4..7a38012 100644 (file)
@@ -542,7 +542,7 @@ void __weak free_initrd_mem(unsigned long start, unsigned long end)
 }
 
 #ifdef CONFIG_KEXEC_CORE
-static bool kexec_free_initrd(void)
+static bool __init kexec_free_initrd(void)
 {
        unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
        unsigned long crashk_end   = (unsigned long)__va(crashk_res.end);
index a48617f..5803ecb 100644 (file)
@@ -257,6 +257,47 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+#ifdef CONFIG_BLK_DEV_INITRD
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+       u32 size, csum;
+       char *data;
+       u32 *hdr;
+
+       if (!initrd_end)
+               return NULL;
+
+       data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+       if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+               return NULL;
+
+       hdr = (u32 *)(data - 8);
+       size = hdr[0];
+       csum = hdr[1];
+
+       data = ((void *)hdr) - size;
+       if ((unsigned long)data < initrd_start) {
+               pr_err("bootconfig size %d is greater than initrd size %ld\n",
+                       size, initrd_end - initrd_start);
+               return NULL;
+       }
+
+       /* Remove bootconfig from initramfs/initrd */
+       initrd_end = (unsigned long)data;
+       if (_size)
+               *_size = size;
+       if (_csum)
+               *_csum = csum;
+
+       return data;
+}
+#else
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+       return NULL;
+}
+#endif
+
 #ifdef CONFIG_BOOT_CONFIG
 
 char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
@@ -357,9 +398,11 @@ static void __init setup_boot_config(const char *cmdline)
        int pos;
        u32 size, csum;
        char *data, *copy;
-       u32 *hdr;
        int ret;
 
+       /* Cut out the bootconfig data even if we have no bootconfig option */
+       data = get_boot_config_from_initrd(&size, &csum);
+
        strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
        parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
                   bootconfig_params);
@@ -367,16 +410,10 @@ static void __init setup_boot_config(const char *cmdline)
        if (!bootconfig_found)
                return;
 
-       if (!initrd_end)
-               goto not_found;
-
-       data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
-       if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
-               goto not_found;
-
-       hdr = (u32 *)(data - 8);
-       size = hdr[0];
-       csum = hdr[1];
+       if (!data) {
+               pr_err("'bootconfig' found on command line, but no bootconfig found\n");
+               return;
+       }
 
        if (size >= XBC_DATA_MAX) {
                pr_err("bootconfig size %d greater than max size %d\n",
@@ -384,10 +421,6 @@ static void __init setup_boot_config(const char *cmdline)
                return;
        }
 
-       data = ((void *)hdr) - size;
-       if ((unsigned long)data < initrd_start)
-               goto not_found;
-
        if (boot_config_checksum((unsigned char *)data, size) != csum) {
                pr_err("bootconfig checksum failed\n");
                return;
@@ -417,11 +450,15 @@ static void __init setup_boot_config(const char *cmdline)
                extra_init_args = xbc_make_cmdline("init");
        }
        return;
-not_found:
-       pr_err("'bootconfig' found on command line, but no bootconfig found\n");
 }
+
 #else
-#define setup_boot_config(cmdline)     do { } while (0)
+
+static void __init setup_boot_config(const char *cmdline)
+{
+       /* Remove bootconfig data from initrd */
+       get_boot_config_from_initrd(NULL, NULL);
+}
 
 static int __init warn_bootconfig(char *str)
 {
index dc8307b..beff0cf 100644 (file)
@@ -142,6 +142,7 @@ struct mqueue_inode_info {
 
        struct sigevent notify;
        struct pid *notify_owner;
+       u32 notify_self_exec_id;
        struct user_namespace *notify_user_ns;
        struct user_struct *user;       /* user who created, for accounting */
        struct sock *notify_sock;
@@ -773,28 +774,44 @@ static void __do_notify(struct mqueue_inode_info *info)
         * synchronously. */
        if (info->notify_owner &&
            info->attr.mq_curmsgs == 1) {
-               struct kernel_siginfo sig_i;
                switch (info->notify.sigev_notify) {
                case SIGEV_NONE:
                        break;
-               case SIGEV_SIGNAL:
-                       /* sends signal */
+               case SIGEV_SIGNAL: {
+                       struct kernel_siginfo sig_i;
+                       struct task_struct *task;
+
+                       /* do_mq_notify() accepts sigev_signo == 0, why?? */
+                       if (!info->notify.sigev_signo)
+                               break;
 
                        clear_siginfo(&sig_i);
                        sig_i.si_signo = info->notify.sigev_signo;
                        sig_i.si_errno = 0;
                        sig_i.si_code = SI_MESGQ;
                        sig_i.si_value = info->notify.sigev_value;
-                       /* map current pid/uid into info->owner's namespaces */
                        rcu_read_lock();
+                       /* map current pid/uid into info->owner's namespaces */
                        sig_i.si_pid = task_tgid_nr_ns(current,
                                                ns_of_pid(info->notify_owner));
-                       sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid());
+                       sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
+                                               current_uid());
+                       /*
+                        * We can't use kill_pid_info(), this signal should
+                        * bypass check_kill_permission(). It is from kernel
+                        * but si_fromuser() can't know this.
+                        * We do check the self_exec_id, to avoid sending
+                        * signals to programs that don't expect them.
+                        */
+                       task = pid_task(info->notify_owner, PIDTYPE_TGID);
+                       if (task && task->self_exec_id ==
+                                               info->notify_self_exec_id) {
+                               do_send_sig_info(info->notify.sigev_signo,
+                                               &sig_i, task, PIDTYPE_TGID);
+                       }
                        rcu_read_unlock();
-
-                       kill_pid_info(info->notify.sigev_signo,
-                                     &sig_i, info->notify_owner);
                        break;
+               }
                case SIGEV_THREAD:
                        set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
                        netlink_sendskb(info->notify_sock, info->notify_cookie);
@@ -1383,6 +1400,7 @@ retry:
                        info->notify.sigev_signo = notification->sigev_signo;
                        info->notify.sigev_value = notification->sigev_value;
                        info->notify.sigev_notify = SIGEV_SIGNAL;
+                       info->notify_self_exec_id = current->self_exec_id;
                        break;
                }
 
index 7acccfd..cfa0045 100644 (file)
@@ -764,21 +764,21 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
                        total++;
        }
 
-       *new_pos = pos + 1;
+       ipc = NULL;
        if (total >= ids->in_use)
-               return NULL;
+               goto out;
 
        for (; pos < ipc_mni; pos++) {
                ipc = idr_find(&ids->ipcs_idr, pos);
                if (ipc != NULL) {
                        rcu_read_lock();
                        ipc_lock_object(ipc);
-                       return ipc;
+                       break;
                }
        }
-
-       /* Out of range - return NULL to terminate iteration */
-       return NULL;
+out:
+       *new_pos = pos + 1;
+       return ipc;
 }
 
 static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
index 8c700f8..48ed227 100644 (file)
@@ -2486,11 +2486,11 @@ long do_fork(unsigned long clone_flags,
              int __user *child_tidptr)
 {
        struct kernel_clone_args args = {
-               .flags          = (clone_flags & ~CSIGNAL),
+               .flags          = (lower_32_bits(clone_flags) & ~CSIGNAL),
                .pidfd          = parent_tidptr,
                .child_tid      = child_tidptr,
                .parent_tid     = parent_tidptr,
-               .exit_signal    = (clone_flags & CSIGNAL),
+               .exit_signal    = (lower_32_bits(clone_flags) & CSIGNAL),
                .stack          = stack_start,
                .stack_size     = stack_size,
        };
@@ -2508,8 +2508,9 @@ long do_fork(unsigned long clone_flags,
 pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
        struct kernel_clone_args args = {
-               .flags          = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
-               .exit_signal    = (flags & CSIGNAL),
+               .flags          = ((lower_32_bits(flags) | CLONE_VM |
+                                   CLONE_UNTRACED) & ~CSIGNAL),
+               .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
                .stack          = (unsigned long)fn,
                .stack_size     = (unsigned long)arg,
        };
@@ -2570,11 +2571,11 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 #endif
 {
        struct kernel_clone_args args = {
-               .flags          = (clone_flags & ~CSIGNAL),
+               .flags          = (lower_32_bits(clone_flags) & ~CSIGNAL),
                .pidfd          = parent_tidptr,
                .child_tid      = child_tidptr,
                .parent_tid     = parent_tidptr,
-               .exit_signal    = (clone_flags & CSIGNAL),
+               .exit_signal    = (lower_32_bits(clone_flags) & CSIGNAL),
                .stack          = newsp,
                .tls            = tls,
        };
index f503542..8accc97 100644 (file)
@@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
  * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
  * arbitrary 4-byte non-zero number as the instance id). This common handle
  * then gets saved into the task_struct of the process that issued the
- * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
- * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
+ * kernel threads, the common handle must be retrieved via kcov_common_handle()
  * and passed to the spawned threads via custom annotations. Those kernel
  * threads must in turn be annotated with kcov_remote_start(common_handle) and
  * kcov_remote_stop(). All of the threads that are spawned by the same process
index 402eef8..7436470 100644 (file)
@@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES
 config PROFILE_ALL_BRANCHES
        bool "Profile all if conditionals" if !FORTIFY_SOURCE
        select TRACE_BRANCH_PROFILING
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          This tracer profiles all branch conditions. Every if ()
          taken in the kernel is recorded whether it hit or miss.
index 0456e0a..382775e 100644 (file)
@@ -4,28 +4,6 @@
 
 #ifdef CONFIG_FUNCTION_TRACER
 
-/*
- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
- * can use rcu_dereference_raw_check() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism.  The rcu_dereference_raw_check() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
-#define do_for_each_ftrace_op(op, list)                        \
-       op = rcu_dereference_raw_check(list);                   \
-       do
-
-/*
- * Optimized for just a single item in the list (as that is the normal case).
- */
-#define while_for_each_ftrace_op(op)                           \
-       while (likely(op = rcu_dereference_raw_check((op)->next)) &&    \
-              unlikely((op) != &ftrace_list_end))
-
-extern struct ftrace_ops __rcu *ftrace_ops_list;
-extern struct ftrace_ops ftrace_list_end;
 extern struct mutex ftrace_lock;
 extern struct ftrace_ops global_ops;
 
index 31c0fad..312d1a0 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/printk.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
+#include <linux/completion.h>
 
 static ulong delay = 100;
 static char test_mode[12] = "irq";
@@ -28,6 +29,8 @@ MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)");
 MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)");
 MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)");
 
+static struct completion done;
+
 #define MIN(x, y) ((x) < (y) ? (x) : (y))
 
 static void busy_wait(ulong time)
@@ -113,22 +116,47 @@ static int preemptirq_delay_run(void *data)
 
        for (i = 0; i < s; i++)
                (testfuncs[i])(i);
+
+       complete(&done);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop()) {
+               schedule();
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+
+       __set_current_state(TASK_RUNNING);
+
        return 0;
 }
 
-static struct task_struct *preemptirq_start_test(void)
+static int preemptirq_run_test(void)
 {
+       struct task_struct *task;
        char task_name[50];
 
+       init_completion(&done);
+
        snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
-       return kthread_run(preemptirq_delay_run, NULL, task_name);
+       task =  kthread_run(preemptirq_delay_run, NULL, task_name);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       if (task) {
+               wait_for_completion(&done);
+               kthread_stop(task);
+       }
+       return 0;
 }
 
 
 static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
                         const char *buf, size_t count)
 {
-       preemptirq_start_test();
+       ssize_t ret;
+
+       ret = preemptirq_run_test();
+       if (ret)
+               return ret;
        return count;
 }
 
@@ -148,11 +176,9 @@ static struct kobject *preemptirq_delay_kobj;
 
 static int __init preemptirq_delay_init(void)
 {
-       struct task_struct *test_task;
        int retval;
 
-       test_task = preemptirq_start_test();
-       retval = PTR_ERR_OR_ZERO(test_task);
+       retval = preemptirq_run_test();
        if (retval != 0)
                return retval;
 
index 6f0b42c..b8e1ca4 100644 (file)
@@ -193,7 +193,7 @@ rb_event_length(struct ring_buffer_event *event)
        case RINGBUF_TYPE_DATA:
                return rb_event_data_length(event);
        default:
-               BUG();
+               WARN_ON_ONCE(1);
        }
        /* not hit */
        return 0;
@@ -249,7 +249,7 @@ rb_event_data(struct ring_buffer_event *event)
 {
        if (extended_time(event))
                event = skip_time_extend(event);
-       BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+       WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
        if (event->type_len)
                return (void *)&event->array[0];
@@ -3727,7 +3727,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                return;
 
        default:
-               BUG();
+               RB_WARN_ON(cpu_buffer, 1);
        }
        return;
 }
@@ -3757,7 +3757,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
                return;
 
        default:
-               BUG();
+               RB_WARN_ON(iter->cpu_buffer, 1);
        }
        return;
 }
@@ -4020,7 +4020,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
                return event;
 
        default:
-               BUG();
+               RB_WARN_ON(cpu_buffer, 1);
        }
 
        return NULL;
@@ -4034,7 +4034,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
        int nr_loops = 0;
-       bool failed = false;
 
        if (ts)
                *ts = 0;
@@ -4056,19 +4055,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
                return NULL;
 
        /*
-        * We repeat when a time extend is encountered or we hit
-        * the end of the page. Since the time extend is always attached
-        * to a data event, we should never loop more than three times.
-        * Once for going to next page, once on time extend, and
-        * finally once to get the event.
-        * We should never hit the following condition more than thrice,
-        * unless the buffer is very small, and there's a writer
-        * that is causing the reader to fail getting an event.
+        * As the writer can mess with what the iterator is trying
+        * to read, just give up if we fail to get an event after
+        * three tries. The iterator is not as reliable when reading
+        * the ring buffer with an active write as the consumer is.
+        * Do not warn if the three failures is reached.
         */
-       if (++nr_loops > 3) {
-               RB_WARN_ON(cpu_buffer, !failed);
+       if (++nr_loops > 3)
                return NULL;
-       }
 
        if (rb_per_cpu_empty(cpu_buffer))
                return NULL;
@@ -4079,10 +4073,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        }
 
        event = rb_iter_head_event(iter);
-       if (!event) {
-               failed = true;
+       if (!event)
                goto again;
-       }
 
        switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
@@ -4117,7 +4109,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
                return event;
 
        default:
-               BUG();
+               RB_WARN_ON(cpu_buffer, 1);
        }
 
        return NULL;
index 8d2b988..29615f1 100644 (file)
@@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str)
 EXPORT_SYMBOL_GPL(__trace_bputs);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
+static void tracing_snapshot_instance_cond(struct trace_array *tr,
+                                          void *cond_data)
 {
        struct tracer *tracer = tr->current_trace;
        unsigned long flags;
@@ -8525,6 +8526,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
         */
        allocate_snapshot = false;
 #endif
+
+       /*
+        * Because of some magic with the way alloc_percpu() works on
+        * x86_64, we need to synchronize the pgd of all the tables,
+        * otherwise the trace events that happen in x86_64 page fault
+        * handlers can't cope with accessing the chance that a
+        * alloc_percpu()'d memory might be touched in the page fault trace
+        * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
+        * calls in tracing, because something might get triggered within a
+        * page fault trace event!
+        */
+       vmalloc_sync_mappings();
+
        return 0;
 }
 
index 06d7feb..9de29bb 100644 (file)
@@ -95,24 +95,20 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
        struct xbc_node *anode;
        char buf[MAX_BUF_LEN];
        const char *val;
-       int ret;
+       int ret = 0;
 
-       kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
+       xbc_node_for_each_array_value(node, "probes", anode, val) {
+               kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
 
-       ret = kprobe_event_gen_cmd_start(&cmd, event, NULL);
-       if (ret)
-               return ret;
+               ret = kprobe_event_gen_cmd_start(&cmd, event, val);
+               if (ret)
+                       break;
 
-       xbc_node_for_each_array_value(node, "probes", anode, val) {
-               ret = kprobe_event_add_field(&cmd, val);
+               ret = kprobe_event_gen_cmd_end(&cmd);
                if (ret)
-                       return ret;
+                       pr_err("Failed to add probe: %s\n", buf);
        }
 
-       ret = kprobe_event_gen_cmd_end(&cmd);
-       if (ret)
-               pr_err("Failed to add probe: %s\n", buf);
-
        return ret;
 }
 #else
index d0568af..3598938 100644 (file)
@@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr)
 
 static bool within_notrace_func(struct trace_kprobe *tk)
 {
-       unsigned long addr = addr = trace_kprobe_address(tk);
+       unsigned long addr = trace_kprobe_address(tk);
        char symname[KSYM_NAME_LEN], *p;
 
        if (!__within_notrace_func(addr))
@@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init);
  * complete command or only the first part of it; in the latter case,
  * kprobe_event_add_fields() can be used to add more fields following this.
  *
+ * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This
+ * returns -EINVAL if @loc == NULL.
+ *
  * Return: 0 if successful, error otherwise.
  */
 int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
@@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
        if (cmd->type != DYNEVENT_TYPE_KPROBE)
                return -EINVAL;
 
+       if (!loc)
+               return -EINVAL;
+
        if (kretprobe)
                snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name);
        else
index 20d51e0..3474d6a 100644 (file)
@@ -550,6 +550,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob);
  * Runs a user-space application.  The application is started
  * asynchronously if wait is not set, and runs as a child of system workqueues.
  * (ie. it runs with full root capabilities and optimized affinity).
+ *
+ * Note: successful return value does not guarantee the helper was called at
+ * all. You can't rely on sub_info->{init,cleanup} being called even for
+ * UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers
+ * into a successful no-op.
  */
 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 {
index 48469c9..9292110 100644 (file)
@@ -60,18 +60,15 @@ config UBSAN_SANITIZE_ALL
          Enabling this option will get kernel image size increased
          significantly.
 
-config UBSAN_NO_ALIGNMENT
-       bool "Disable checking of pointers alignment"
-       default y if HAVE_EFFICIENT_UNALIGNED_ACCESS
+config UBSAN_ALIGNMENT
+       bool "Enable checks for pointers alignment"
+       default !HAVE_EFFICIENT_UNALIGNED_ACCESS
+       depends on !X86 || !COMPILE_TEST
        help
-         This option disables the check of unaligned memory accesses.
-         This option should be used when building allmodconfig.
-         Disabling this option on architectures that support unaligned
+         This option enables the check of unaligned memory accesses.
+         Enabling this option on architectures that support unaligned
          accesses may produce a lot of false positives.
 
-config UBSAN_ALIGNMENT
-       def_bool !UBSAN_NO_ALIGNMENT
-
 config TEST_UBSAN
        tristate "Module for testing for undefined behavior detection"
        depends on m
index c81b4f3..efc5b83 100644 (file)
@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
 EXPORT_SYMBOL_GPL(noop_backing_dev_info);
 
 static struct class *bdi_class;
-const char *bdi_unknown_name = "(unknown)";
+static const char *bdi_unknown_name = "(unknown)";
 
 /*
  * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
        if (bdi->dev)   /* The driver needs to use separate queues per device */
                return 0;
 
-       dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
+       vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
+       dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_put);
 
+const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+       if (!bdi || !bdi->dev)
+               return bdi_unknown_name;
+       return bdi->dev_name;
+}
+EXPORT_SYMBOL_GPL(bdi_dev_name);
+
 static wait_queue_head_t congestion_wqh[2] = {
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
index 50681f0..87a6a59 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1218,6 +1218,10 @@ retry:
        if (!vma_permits_fault(vma, fault_flags))
                return -EFAULT;
 
+       if ((fault_flags & FAULT_FLAG_KILLABLE) &&
+           fatal_signal_pending(current))
+               return -EINTR;
+
        ret = handle_mm_fault(vma, address, fault_flags);
        major |= ret & VM_FAULT_MAJOR;
        if (ret & VM_FAULT_ERROR) {
@@ -1230,11 +1234,9 @@ retry:
 
        if (ret & VM_FAULT_RETRY) {
                down_read(&mm->mmap_sem);
-               if (!(fault_flags & FAULT_FLAG_TRIED)) {
-                       *unlocked = true;
-                       fault_flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+               *unlocked = true;
+               fault_flags |= FAULT_FLAG_TRIED;
+               goto retry;
        }
 
        if (tsk) {
index 08b43de..434d503 100644 (file)
@@ -1,23 +1,28 @@
 # SPDX-License-Identifier: GPL-2.0
 KASAN_SANITIZE := n
-UBSAN_SANITIZE_common.o := n
-UBSAN_SANITIZE_generic.o := n
-UBSAN_SANITIZE_generic_report.o := n
-UBSAN_SANITIZE_tags.o := n
+UBSAN_SANITIZE := n
 KCOV_INSTRUMENT := n
 
+# Disable ftrace to avoid recursion.
 CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE)
 
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
-
 CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_init.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_quarantine.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_tags_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 obj-$(CONFIG_KASAN) := common.o init.o report.o
 obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o
index e8f3719..cfade64 100644 (file)
@@ -212,8 +212,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
 void __asan_register_globals(struct kasan_global *globals, size_t size);
 void __asan_unregister_globals(struct kasan_global *globals, size_t size);
-void __asan_loadN(unsigned long addr, size_t size);
-void __asan_storeN(unsigned long addr, size_t size);
 void __asan_handle_no_return(void);
 void __asan_alloca_poison(unsigned long addr, size_t size);
 void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom);
@@ -228,6 +226,8 @@ void __asan_load8(unsigned long addr);
 void __asan_store8(unsigned long addr);
 void __asan_load16(unsigned long addr);
 void __asan_store16(unsigned long addr);
+void __asan_loadN(unsigned long addr, size_t size);
+void __asan_storeN(unsigned long addr, size_t size);
 
 void __asan_load1_noabort(unsigned long addr);
 void __asan_store1_noabort(unsigned long addr);
@@ -239,6 +239,21 @@ void __asan_load8_noabort(unsigned long addr);
 void __asan_store8_noabort(unsigned long addr);
 void __asan_load16_noabort(unsigned long addr);
 void __asan_store16_noabort(unsigned long addr);
+void __asan_loadN_noabort(unsigned long addr, size_t size);
+void __asan_storeN_noabort(unsigned long addr, size_t size);
+
+void __asan_report_load1_noabort(unsigned long addr);
+void __asan_report_store1_noabort(unsigned long addr);
+void __asan_report_load2_noabort(unsigned long addr);
+void __asan_report_store2_noabort(unsigned long addr);
+void __asan_report_load4_noabort(unsigned long addr);
+void __asan_report_store4_noabort(unsigned long addr);
+void __asan_report_load8_noabort(unsigned long addr);
+void __asan_report_store8_noabort(unsigned long addr);
+void __asan_report_load16_noabort(unsigned long addr);
+void __asan_report_store16_noabort(unsigned long addr);
+void __asan_report_load_n_noabort(unsigned long addr, size_t size);
+void __asan_report_store_n_noabort(unsigned long addr, size_t size);
 
 void __asan_set_shadow_00(const void *addr, size_t size);
 void __asan_set_shadow_f1(const void *addr, size_t size);
@@ -247,4 +262,19 @@ void __asan_set_shadow_f3(const void *addr, size_t size);
 void __asan_set_shadow_f5(const void *addr, size_t size);
 void __asan_set_shadow_f8(const void *addr, size_t size);
 
+void __hwasan_load1_noabort(unsigned long addr);
+void __hwasan_store1_noabort(unsigned long addr);
+void __hwasan_load2_noabort(unsigned long addr);
+void __hwasan_store2_noabort(unsigned long addr);
+void __hwasan_load4_noabort(unsigned long addr);
+void __hwasan_store4_noabort(unsigned long addr);
+void __hwasan_load8_noabort(unsigned long addr);
+void __hwasan_store8_noabort(unsigned long addr);
+void __hwasan_load16_noabort(unsigned long addr);
+void __hwasan_store16_noabort(unsigned long addr);
+void __hwasan_loadN_noabort(unsigned long addr, size_t size);
+void __hwasan_storeN_noabort(unsigned long addr, size_t size);
+
+void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size);
+
 #endif
index 5beea03..a3b97f1 100644 (file)
@@ -4990,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        unsigned int size;
        int node;
        int __maybe_unused i;
+       long error = -ENOMEM;
 
        size = sizeof(struct mem_cgroup);
        size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
 
        memcg = kzalloc(size, GFP_KERNEL);
        if (!memcg)
-               return NULL;
+               return ERR_PTR(error);
 
        memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
                                 1, MEM_CGROUP_ID_MAX,
                                 GFP_KERNEL);
-       if (memcg->id.id < 0)
+       if (memcg->id.id < 0) {
+               error = memcg->id.id;
                goto fail;
+       }
 
        memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
        if (!memcg->vmstats_local)
@@ -5046,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 fail:
        mem_cgroup_id_remove(memcg);
        __mem_cgroup_free(memcg);
-       return NULL;
+       return ERR_PTR(error);
 }
 
 static struct cgroup_subsys_state * __ref
@@ -5057,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        long error = -ENOMEM;
 
        memcg = mem_cgroup_alloc();
-       if (!memcg)
-               return ERR_PTR(error);
+       if (IS_ERR(memcg))
+               return ERR_CAST(memcg);
 
        WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
        memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5108,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 fail:
        mem_cgroup_id_remove(memcg);
        mem_cgroup_free(memcg);
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(error);
 }
 
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
index c881abe..6aa6ea6 100644 (file)
@@ -794,7 +794,7 @@ out:
        if (locked && new_len > old_len)
                mm_populate(new_addr + old_len, new_len - old_len);
        userfaultfd_unmap_complete(mm, &uf_unmap_early);
-       mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
+       mremap_userfaultfd_complete(&uf, addr, ret, old_len);
        userfaultfd_unmap_complete(mm, &uf_unmap);
        return ret;
 }
index 69827d4..13cc653 100644 (file)
@@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone)
                if (!__pageblock_pfn_to_page(block_start_pfn,
                                             block_end_pfn, zone))
                        return;
+               cond_resched();
        }
 
        /* We confirm that there is no hole */
@@ -2400,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone)
 
        if (!watermark_boost_factor)
                return;
+       /*
+        * Don't bother in zones that are unlikely to produce results.
+        * On small machines, including kdump capture kernels running
+        * in a small area, boosting the watermark can cause an out of
+        * memory situation immediately.
+        */
+       if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
+               return;
 
        max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
                        watermark_boost_factor, 10000);
index d7e3bc6..7da7d77 100644 (file)
@@ -80,6 +80,7 @@
 #include <linux/workqueue.h>
 #include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                 gfp_t gfp)
 {
-       /* whitelisted flags that can be passed to the backing allocators */
-       gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
-       bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
-       bool do_warn = !(gfp & __GFP_NOWARN);
+       gfp_t pcpu_gfp;
+       bool is_atomic;
+       bool do_warn;
        static int warn_limit = 10;
        struct pcpu_chunk *chunk, *next;
        const char *err;
@@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
        void __percpu *ptr;
        size_t bits, bit_align;
 
+       gfp = current_gfp_context(gfp);
+       /* whitelisted flags that can be passed to the backing allocators */
+       pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+       is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
+       do_warn = !(gfp & __GFP_NOWARN);
+
        /*
         * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
         * therefore alignment must be a minimum of that many bytes.
index 9bf4495..b762450 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr,
        metadata_access_disable();
 }
 
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+       return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+       if (freeptr_outside_object(s))
+               return s->inuse + sizeof(void *);
+       else
+               return s->inuse;
+}
+
 static struct track *get_track(struct kmem_cache *s, void *object,
        enum track_item alloc)
 {
        struct track *p;
 
-       if (s->offset)
-               p = object + s->offset + sizeof(void *);
-       else
-               p = object + s->inuse;
+       p = object + get_info_end(s);
 
        return p + alloc;
 }
@@ -686,10 +703,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
                print_section(KERN_ERR, "Redzone ", p + s->object_size,
                        s->inuse - s->object_size);
 
-       if (s->offset)
-               off = s->offset + sizeof(void *);
-       else
-               off = s->inuse;
+       off = get_info_end(s);
 
        if (s->flags & SLAB_STORE_USER)
                off += 2 * sizeof(struct track);
@@ -782,7 +796,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
  * object address
  *     Bytes of the object to be managed.
  *     If the freepointer may overlay the object then the free
- *     pointer is the first word of the object.
+ *     pointer is at the middle of the object.
  *
  *     Poisoning uses 0x6b (POISON_FREE) and the last byte is
  *     0xa5 (POISON_END)
@@ -816,11 +830,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 
 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
 {
-       unsigned long off = s->inuse;   /* The end of info */
-
-       if (s->offset)
-               /* Freepointer is placed after the object. */
-               off += sizeof(void *);
+       unsigned long off = get_info_end(s);    /* The end of info */
 
        if (s->flags & SLAB_STORE_USER)
                /* We also have user information there */
@@ -907,7 +917,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
                check_pad_bytes(s, page, p);
        }
 
-       if (!s->offset && val == SLUB_RED_ACTIVE)
+       if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
                /*
                 * Object and freepointer overlap. Cannot check
                 * freepointer while object is allocated.
@@ -3587,6 +3597,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 *
                 * This is the case if we do RCU, have a constructor or
                 * destructor or are poisoning the objects.
+                *
+                * The assumption that s->offset >= s->inuse means free
+                * pointer is outside of the object is used in the
+                * freeptr_outside_object() function. If that is no
+                * longer true, the function needs to be modified.
                 */
                s->offset = size;
                size += sizeof(void *);
index b06868f..a37c87b 100644 (file)
@@ -1625,7 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  * @dst:       The temp list to put pages on to.
  * @nr_scanned:        The number of pages that were scanned.
  * @sc:                The scan_control struct for this reclaim session
- * @mode:      One of the LRU isolation modes
  * @lru:       LRU list id for isolating
  *
  * returns how many pages were moved onto *@dst.
index 25fbd8d..ac5cac0 100644 (file)
@@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
        struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
        struct kvec *head = rqstp->rq_rcv_buf.head;
        struct rpc_auth *auth = cred->cr_auth;
-       unsigned int savedlen = rcv_buf->len;
        u32 offset, opaque_len, maj_stat;
        __be32 *p;
 
@@ -2043,9 +2042,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
        offset = (u8 *)(p) - (u8 *)head->iov_base;
        if (offset + opaque_len > rcv_buf->len)
                goto unwrap_failed;
-       rcv_buf->len = offset + opaque_len;
 
-       maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+       maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset,
+                             offset + opaque_len, rcv_buf);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        if (maj_stat != GSS_S_COMPLETE)
@@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
         */
        xdr_init_decode(xdr, rcv_buf, p, rqstp);
 
-       auth->au_rslack = auth->au_verfsize + 2 +
-                         XDR_QUADLEN(savedlen - rcv_buf->len);
-       auth->au_ralign = auth->au_verfsize + 2 +
-                         XDR_QUADLEN(savedlen - rcv_buf->len);
+       auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack;
+       auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align;
+
        return 0;
 unwrap_failed:
        trace_rpcgss_unwrap_failed(task);
index 6f2d30d..e7180da 100644 (file)
@@ -851,8 +851,8 @@ out_err:
 }
 
 u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
-                    u32 *headskip, u32 *tailskip)
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+                    struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
 {
        struct xdr_buf subbuf;
        u32 ret = 0;
@@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 
        /* create a segment skipping the header and leaving out the checksum */
        xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
-                                   (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+                                   (len - offset - GSS_KRB5_TOK_HDR_LEN -
                                     kctx->gk5e->cksumlength));
 
        nblocks = (subbuf.len + blocksize - 1) / blocksize;
@@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
                goto out_err;
 
        /* Get the packet's hmac value */
-       ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+       ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
                                      pkt_hmac, kctx->gk5e->cksumlength);
        if (ret)
                goto out_err;
index 6c1920e..cf0fd17 100644 (file)
@@ -261,7 +261,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 }
 
 static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
+                      struct xdr_buf *buf, unsigned int *slack,
+                      unsigned int *align)
 {
        int                     signalg;
        int                     sealalg;
@@ -279,12 +281,13 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        u32                     conflen = kctx->gk5e->conflen;
        int                     crypt_offset;
        u8                      *cksumkey;
+       unsigned int            saved_len = buf->len;
 
        dprintk("RPC:       gss_unwrap_kerberos\n");
 
        ptr = (u8 *)buf->head[0].iov_base + offset;
        if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
-                                       buf->len - offset))
+                                       len - offset))
                return GSS_S_DEFECTIVE_TOKEN;
 
        if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
@@ -324,6 +327,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
            (!kctx->initiate && direction != 0))
                return GSS_S_BAD_SIG;
 
+       buf->len = len;
        if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
                struct crypto_sync_skcipher *cipher;
                int err;
@@ -376,11 +380,15 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
        memmove(orig_start, data_start, data_len);
        buf->head[0].iov_len -= (data_start - orig_start);
-       buf->len -= (data_start - orig_start);
+       buf->len = len - (data_start - orig_start);
 
        if (gss_krb5_remove_padding(buf, blocksize))
                return GSS_S_DEFECTIVE_TOKEN;
 
+       /* slack must include room for krb5 padding */
+       *slack = XDR_QUADLEN(saved_len - buf->len);
+       /* The GSS blob always precedes the RPC message payload */
+       *align = *slack;
        return GSS_S_COMPLETE;
 }
 
@@ -486,7 +494,9 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 }
 
 static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
+                      struct xdr_buf *buf, unsigned int *slack,
+                      unsigned int *align)
 {
        time64_t        now;
        u8              *ptr;
@@ -532,7 +542,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        if (rrc != 0)
                rotate_left(offset + 16, buf, rrc);
 
-       err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+       err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
                                        &headskip, &tailskip);
        if (err)
                return GSS_S_FAILURE;
@@ -542,7 +552,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
         * it against the original
         */
        err = read_bytes_from_xdr_buf(buf,
-                               buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+                               len - GSS_KRB5_TOK_HDR_LEN - tailskip,
                                decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
        if (err) {
                dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
@@ -568,18 +578,19 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
         * Note that buf->head[0].iov_len may indicate the available
         * head buffer space rather than that actually occupied.
         */
-       movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+       movelen = min_t(unsigned int, buf->head[0].iov_len, len);
        movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
-       if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
-           buf->head[0].iov_len)
-               return GSS_S_FAILURE;
+       BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+                                                       buf->head[0].iov_len);
        memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
        buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
-       buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+       buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip;
 
        /* Trim off the trailing "extra count" and checksum blob */
-       buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+       xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
 
+       *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip);
+       *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
        return GSS_S_COMPLETE;
 }
 
@@ -603,7 +614,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 }
 
 u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
+                   int len, struct xdr_buf *buf)
 {
        struct krb5_ctx *kctx = gctx->internal_ctx_id;
 
@@ -613,9 +625,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
        case ENCTYPE_DES_CBC_RAW:
        case ENCTYPE_DES3_CBC_RAW:
        case ENCTYPE_ARCFOUR_HMAC:
-               return gss_unwrap_kerberos_v1(kctx, offset, buf);
+               return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
+                                             &gctx->slack, &gctx->align);
        case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
        case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-               return gss_unwrap_kerberos_v2(kctx, offset, buf);
+               return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
+                                             &gctx->slack, &gctx->align);
        }
 }
index db550bf..69316ab 100644 (file)
@@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx   *ctx_id,
 u32
 gss_unwrap(struct gss_ctx      *ctx_id,
           int                  offset,
+          int                  len,
           struct xdr_buf       *buf)
 {
        return ctx_id->mech_type->gm_ops
-               ->gss_unwrap(ctx_id, offset, buf);
+               ->gss_unwrap(ctx_id, offset, len, buf);
 }
 
 
index 54ae5be..50d93c4 100644 (file)
@@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
        if (svc_getnl(&buf->head[0]) != seq)
                goto out;
        /* trim off the mic and padding at the end before returning */
-       buf->len -= 4 + round_up_to_quad(mic.len);
+       xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
        stat = 0;
 out:
        kfree(mic.data);
@@ -934,7 +934,7 @@ static int
 unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 {
        u32 priv_len, maj_stat;
-       int pad, saved_len, remaining_len, offset;
+       int pad, remaining_len, offset;
 
        clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
@@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
        buf->len -= pad;
        fix_priv_head(buf, pad);
 
-       /* Maybe it would be better to give gss_unwrap a length parameter: */
-       saved_len = buf->len;
-       buf->len = priv_len;
-       maj_stat = gss_unwrap(ctx, 0, buf);
+       maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
        pad = priv_len - buf->len;
-       buf->len = saved_len;
        buf->len -= pad;
        /* The upper layers assume the buffer is aligned on 4-byte boundaries.
         * In the krb5p case, at least, the data ends up offset, so we need to
index 15b58c5..6f7d82f 100644 (file)
@@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+       size_t cur;
+       unsigned int trim = len;
+
+       if (buf->tail[0].iov_len) {
+               cur = min_t(size_t, buf->tail[0].iov_len, trim);
+               buf->tail[0].iov_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->page_len) {
+               cur = min_t(unsigned int, buf->page_len, trim);
+               buf->page_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->head[0].iov_len) {
+               cur = min_t(size_t, buf->head[0].iov_len, trim);
+               buf->head[0].iov_len -= cur;
+               trim -= cur;
+       }
+fix_len:
+       buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
        unsigned int this_len;
index 80b4a70..13a35f7 100644 (file)
@@ -416,7 +416,7 @@ TRACE_EVENT_FN(foo_bar_with_fn,
  * Note, TRACE_EVENT() itself is simply defined as:
  *
  * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk)  \
- *  DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
+ *  DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
  *  DEFINE_EVENT(name, name, proto, args)
  *
  * The DEFINE_EVENT() also can be declared with conditions and reg functions:
index ba8b8d5..fbdb325 100755 (executable)
@@ -126,7 +126,7 @@ faultlinenum=$(( $(wc -l $T.oo  | cut -d" " -f1) - \
 faultline=`cat $T.dis | head -1 | cut -d":" -f2-`
 faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'`
 
-cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
+cat $T.oo | sed -e "${faultlinenum}s/^\([^:]*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
 echo
 cat $T.aa
 cleanup
index 39db889..c4b9916 100644 (file)
@@ -12,7 +12,7 @@ rb_node_type = utils.CachedType("struct rb_node")
 
 def rb_first(root):
     if root.type == rb_root_type.get_type():
-        node = node.address.cast(rb_root_type.get_type().pointer())
+        node = root.address.cast(rb_root_type.get_type().pointer())
     elif root.type != rb_root_type.get_type().pointer():
         raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
 
@@ -28,7 +28,7 @@ def rb_first(root):
 
 def rb_last(root):
     if root.type == rb_root_type.get_type():
-        node = node.address.cast(rb_root_type.get_type().pointer())
+        node = root.address.cast(rb_root_type.get_type().pointer())
     elif root.type != rb_root_type.get_type().pointer():
         raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
 
index 20dd08e..2a688b7 100644 (file)
@@ -120,6 +120,17 @@ static void snd_rawmidi_input_event_work(struct work_struct *work)
                runtime->event(runtime->substream);
 }
 
+/* buffer refcount management: call with runtime->lock held */
+static inline void snd_rawmidi_buffer_ref(struct snd_rawmidi_runtime *runtime)
+{
+       runtime->buffer_ref++;
+}
+
+static inline void snd_rawmidi_buffer_unref(struct snd_rawmidi_runtime *runtime)
+{
+       runtime->buffer_ref--;
+}
+
 static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream)
 {
        struct snd_rawmidi_runtime *runtime;
@@ -669,6 +680,11 @@ static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime,
                if (!newbuf)
                        return -ENOMEM;
                spin_lock_irq(&runtime->lock);
+               if (runtime->buffer_ref) {
+                       spin_unlock_irq(&runtime->lock);
+                       kvfree(newbuf);
+                       return -EBUSY;
+               }
                oldbuf = runtime->buffer;
                runtime->buffer = newbuf;
                runtime->buffer_size = params->buffer_size;
@@ -1019,8 +1035,10 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
        long result = 0, count1;
        struct snd_rawmidi_runtime *runtime = substream->runtime;
        unsigned long appl_ptr;
+       int err = 0;
 
        spin_lock_irqsave(&runtime->lock, flags);
+       snd_rawmidi_buffer_ref(runtime);
        while (count > 0 && runtime->avail) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
@@ -1039,16 +1057,19 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream,
                if (userbuf) {
                        spin_unlock_irqrestore(&runtime->lock, flags);
                        if (copy_to_user(userbuf + result,
-                                        runtime->buffer + appl_ptr, count1)) {
-                               return result > 0 ? result : -EFAULT;
-                       }
+                                        runtime->buffer + appl_ptr, count1))
+                               err = -EFAULT;
                        spin_lock_irqsave(&runtime->lock, flags);
+                       if (err)
+                               goto out;
                }
                result += count1;
                count -= count1;
        }
+ out:
+       snd_rawmidi_buffer_unref(runtime);
        spin_unlock_irqrestore(&runtime->lock, flags);
-       return result;
+       return result > 0 ? result : err;
 }
 
 long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream,
@@ -1342,6 +1363,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
                        return -EAGAIN;
                }
        }
+       snd_rawmidi_buffer_ref(runtime);
        while (count > 0 && runtime->avail > 0) {
                count1 = runtime->buffer_size - runtime->appl_ptr;
                if (count1 > count)
@@ -1373,6 +1395,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream,
        }
       __end:
        count1 = runtime->avail < runtime->buffer_size;
+       snd_rawmidi_buffer_unref(runtime);
        spin_unlock_irqrestore(&runtime->lock, flags);
        if (count1)
                snd_rawmidi_output_trigger(substream, 1);
index 16c7f66..26e7cb5 100644 (file)
@@ -66,8 +66,7 @@ TRACE_EVENT(amdtp_packet,
                __entry->irq,
                __entry->index,
                __print_array(__get_dynamic_array(cip_header),
-                             __get_dynamic_array_len(cip_header),
-                             sizeof(u8)))
+                             __get_dynamic_array_len(cip_header), 1))
 );
 
 #endif
index c16f639..dc23021 100644 (file)
@@ -5856,6 +5856,15 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
        }
 }
 
+static void alc225_fixup_s3_pop_noise(struct hda_codec *codec,
+                                     const struct hda_fixup *fix, int action)
+{
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       codec->power_save_node = 1;
+}
+
 /* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
 static void alc274_fixup_bind_dacs(struct hda_codec *codec,
                                    const struct hda_fixup *fix, int action)
@@ -5960,6 +5969,7 @@ enum {
        ALC269_FIXUP_HP_LINE1_MIC1_LED,
        ALC269_FIXUP_INV_DMIC,
        ALC269_FIXUP_LENOVO_DOCK,
+       ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST,
        ALC269_FIXUP_NO_SHUTUP,
        ALC286_FIXUP_SONY_MIC_NO_PRESENCE,
        ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
@@ -6045,6 +6055,7 @@ enum {
        ALC233_FIXUP_ACER_HEADSET_MIC,
        ALC294_FIXUP_LENOVO_MIC_LOCATION,
        ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE,
+       ALC225_FIXUP_S3_POP_NOISE,
        ALC700_FIXUP_INTEL_REFERENCE,
        ALC274_FIXUP_DELL_BIND_DACS,
        ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
@@ -6080,9 +6091,12 @@ enum {
        ALC294_FIXUP_ASUS_DUAL_SPK,
        ALC285_FIXUP_THINKPAD_HEADSET_JACK,
        ALC294_FIXUP_ASUS_HPE,
+       ALC294_FIXUP_ASUS_COEF_1B,
        ALC285_FIXUP_HP_GPIO_LED,
        ALC285_FIXUP_HP_MUTE_LED,
        ALC236_FIXUP_HP_MUTE_LED,
+       ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET,
+       ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6280,6 +6294,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT
        },
+       [ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269_fixup_limit_int_mic_boost,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_LENOVO_DOCK,
+       },
        [ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc269_fixup_pincfg_no_hp_to_lineout,
@@ -6932,6 +6952,12 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
                .chained = true,
+               .chain_id = ALC225_FIXUP_S3_POP_NOISE
+       },
+       [ALC225_FIXUP_S3_POP_NOISE] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc225_fixup_s3_pop_noise,
+               .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
        },
        [ALC700_FIXUP_INTEL_REFERENCE] = {
@@ -7204,6 +7230,17 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
        },
+       [ALC294_FIXUP_ASUS_COEF_1B] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       /* Set bit 10 to correct noisy output after reboot from
+                        * Windows 10 (due to pop noise reduction?)
+                        */
+                       { 0x20, AC_VERB_SET_COEF_INDEX, 0x1b },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x4e4b },
+                       { }
+               },
+       },
        [ALC285_FIXUP_HP_GPIO_LED] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc285_fixup_hp_gpio_led,
@@ -7216,6 +7253,22 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc236_fixup_hp_mute_led,
        },
+       [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc5 },
+                       { }
+               },
+       },
+       [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7383,8 +7436,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE),
+       SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
        SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -7410,6 +7465,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
        SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
+       SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
+       SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
        SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
@@ -7426,7 +7483,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x21ca, "Thinkpad L412", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x21e9, "Thinkpad Edge 15", ALC269_FIXUP_SKU_IGNORE),
-       SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK),
+       SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x21fa, "Thinkpad X230", ALC269_FIXUP_LENOVO_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x21f3, "Thinkpad T430", ALC269_FIXUP_LENOVO_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK),
@@ -7565,6 +7622,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC269_FIXUP_HEADSET_MODE, .name = "headset-mode"},
        {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
        {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
+       {.id = ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST, .name = "lenovo-dock-limit-boost"},
        {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
        {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
        {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
@@ -7993,6 +8051,18 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
                {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
+               {0x12, 0x90a60120},
+               {0x17, 0x90170110},
+               {0x21, 0x04211030}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x17, 0x90170110},
+               {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x17, 0x90170110},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                {0x14, 0x90170110},
                {0x21, 0x04211020}),
@@ -8209,8 +8279,6 @@ static int patch_alc269(struct hda_codec *codec)
                spec->gen.mixer_nid = 0;
                break;
        case 0x10ec0225:
-               codec->power_save_node = 1;
-               /* fall through */
        case 0x10ec0295:
        case 0x10ec0299:
                spec->codec_variant = ALC269_TYPE_ALC225;
index 0260c75..bfdc6ad 100644 (file)
@@ -549,6 +549,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .map = trx40_mobo_map,
                .connector_map = trx40_mobo_connector_map,
        },
+       {       /* Asrock TRX40 Creator */
+               .id = USB_ID(0x26ce, 0x0a01),
+               .map = trx40_mobo_map,
+               .connector_map = trx40_mobo_connector_map,
+       },
        { 0 } /* terminator */
 };
 
index a1df4c5..6313c30 100644 (file)
@@ -3563,6 +3563,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
 ALC1220_VB_DESKTOP(0x0414, 0xa002), /* Gigabyte TRX40 Aorus Pro WiFi */
 ALC1220_VB_DESKTOP(0x0db0, 0x0d64), /* MSI TRX40 Creator */
 ALC1220_VB_DESKTOP(0x0db0, 0x543d), /* MSI TRX40 */
+ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */
 #undef ALC1220_VB_DESKTOP
 
 #undef USB_DEVICE_VENDOR_SPEC
index 848a4cc..d8a765b 100644 (file)
@@ -1636,13 +1636,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                msleep(20);
 
-       /* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here,
-        * otherwise requests like get/set frequency return as failed despite
-        * actually succeeding.
+       /* Zoom R16/24, Logitech H650e, Jabra 550a, Kingston HyperX needs a tiny
+        * delay here, otherwise requests like get/set frequency return as
+        * failed despite actually succeeding.
         */
        if ((chip->usb_id == USB_ID(0x1686, 0x00dd) ||
             chip->usb_id == USB_ID(0x046d, 0x0a46) ||
-            chip->usb_id == USB_ID(0x0b0e, 0x0349)) &&
+            chip->usb_id == USB_ID(0x0b0e, 0x0349) ||
+            chip->usb_id == USB_ID(0x0951, 0x16ad)) &&
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                usleep_range(1000, 2000);
 }
index 16b9a42..0efaf45 100644 (file)
@@ -314,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path)
        ret = delete_xbc(path);
        if (ret < 0) {
                pr_err("Failed to delete previous boot config: %d\n", ret);
+               free(data);
                return ret;
        }
 
@@ -321,24 +322,27 @@ int apply_xbc(const char *path, const char *xbc_path)
        fd = open(path, O_RDWR | O_APPEND);
        if (fd < 0) {
                pr_err("Failed to open %s: %d\n", path, fd);
+               free(data);
                return fd;
        }
        /* TODO: Ensure the @path is initramfs/initrd image */
        ret = write(fd, data, size + 8);
        if (ret < 0) {
                pr_err("Failed to apply a boot config: %d\n", ret);
-               return ret;
+               goto out;
        }
        /* Write a magic word of the bootconfig */
        ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
        if (ret < 0) {
                pr_err("Failed to apply a boot config magic: %d\n", ret);
-               return ret;
+               goto out;
        }
+       ret = 0;
+out:
        close(fd);
        free(data);
 
-       return 0;
+       return ret;
 }
 
 int usage(void)
index 7427a5e..9d8e961 100644 (file)
@@ -159,7 +159,12 @@ class IocgStat:
         else:
             self.inflight_pct = 0
 
-        self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+        # vdebt used to be an atomic64_t and is now u64, support both
+        try:
+            self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+        except:
+            self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+
         self.use_delay = blkg.use_delay.counter.value_()
         self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
 
index 4b170fd..3c6da70 100644 (file)
@@ -72,6 +72,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
        return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sym(struct objtool_file *file,
+                                              struct instruction *insn)
+{
+       struct instruction *prev = list_prev_entry(insn, list);
+
+       if (&prev->list != &file->insn_list && prev->func == insn->func)
+               return prev;
+
+       return NULL;
+}
+
 #define func_for_each_insn(file, func, insn)                           \
        for (insn = find_insn(file, func->sec, func->offset);           \
             insn;                                                      \
@@ -1050,8 +1061,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
         * it.
         */
        for (;
-            &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func;
-            insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+            insn && insn->func && insn->func->pfunc == func;
+            insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
 
                if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
                        break;
@@ -1449,7 +1460,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
        struct cfi_reg *cfa = &state->cfa;
        struct stack_op *op = &insn->stack_op;
 
-       if (cfa->base != CFI_SP)
+       if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
                return 0;
 
        /* push */
index 0b79c23..12e01ac 100644 (file)
@@ -87,9 +87,10 @@ struct elf {
 #define OFFSET_STRIDE          (1UL << OFFSET_STRIDE_BITS)
 #define OFFSET_STRIDE_MASK     (~(OFFSET_STRIDE - 1))
 
-#define for_offset_range(_offset, _start, _end)                \
-       for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
-            _offset <= ((_end) & OFFSET_STRIDE_MASK);  \
+#define for_offset_range(_offset, _start, _end)                        \
+       for (_offset = ((_start) & OFFSET_STRIDE_MASK);         \
+            _offset >= ((_start) & OFFSET_STRIDE_MASK) &&      \
+            _offset <= ((_end) & OFFSET_STRIDE_MASK);          \
             _offset += OFFSET_STRIDE)
 
 static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
index cd5e1f6..909da9c 100644 (file)
@@ -351,6 +351,7 @@ static int test_alloc_errors(char *heap_name)
        }
 
        printf("Expected error checking passed\n");
+       ret = 0;
 out:
        if (dmabuf_fd >= 0)
                close(dmabuf_fd);
index 11eee0b..d979ff1 100644 (file)
@@ -3,6 +3,7 @@
 #define _GNU_SOURCE
 #include <poll.h>
 #include <unistd.h>
+#include <assert.h>
 #include <signal.h>
 #include <pthread.h>
 #include <sys/epoll.h>
@@ -3136,4 +3137,149 @@ TEST(epoll59)
        close(ctx.sfd[0]);
 }
 
+enum {
+       EPOLL60_EVENTS_NR = 10,
+};
+
+struct epoll60_ctx {
+       volatile int stopped;
+       int ready;
+       int waiters;
+       int epfd;
+       int evfd[EPOLL60_EVENTS_NR];
+};
+
+static void *epoll60_wait_thread(void *ctx_)
+{
+       struct epoll60_ctx *ctx = ctx_;
+       struct epoll_event e;
+       sigset_t sigmask;
+       uint64_t v;
+       int ret;
+
+       /* Block SIGUSR1 */
+       sigemptyset(&sigmask);
+       sigaddset(&sigmask, SIGUSR1);
+       sigprocmask(SIG_SETMASK, &sigmask, NULL);
+
+       /* Prepare empty mask for epoll_pwait() */
+       sigemptyset(&sigmask);
+
+       while (!ctx->stopped) {
+               /* Mark we are ready */
+               __atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE);
+
+               /* Start when all are ready */
+               while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) &&
+                      !ctx->stopped);
+
+               /* Account this waiter */
+               __atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE);
+
+               ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask);
+               if (ret != 1) {
+                       /* We expect only signal delivery on stop */
+                       assert(ret < 0 && errno == EINTR && "Lost wakeup!\n");
+                       assert(ctx->stopped);
+                       break;
+               }
+
+               ret = read(e.data.fd, &v, sizeof(v));
+               /* Since we are on ET mode, thus each thread gets its own fd. */
+               assert(ret == sizeof(v));
+
+               __atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE);
+       }
+
+       return NULL;
+}
+
+static inline unsigned long long msecs(void)
+{
+       struct timespec ts;
+       unsigned long long msecs;
+
+       clock_gettime(CLOCK_REALTIME, &ts);
+       msecs = ts.tv_sec * 1000ull;
+       msecs += ts.tv_nsec / 1000000ull;
+
+       return msecs;
+}
+
+static inline int count_waiters(struct epoll60_ctx *ctx)
+{
+       return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE);
+}
+
+TEST(epoll60)
+{
+       struct epoll60_ctx ctx = { 0 };
+       pthread_t waiters[ARRAY_SIZE(ctx.evfd)];
+       struct epoll_event e;
+       int i, n, ret;
+
+       signal(SIGUSR1, signal_handler);
+
+       ctx.epfd = epoll_create1(0);
+       ASSERT_GE(ctx.epfd, 0);
+
+       /* Create event fds */
+       for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) {
+               ctx.evfd[i] = eventfd(0, EFD_NONBLOCK);
+               ASSERT_GE(ctx.evfd[i], 0);
+
+               e.events = EPOLLIN | EPOLLET;
+               e.data.fd = ctx.evfd[i];
+               ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0);
+       }
+
+       /* Create waiter threads */
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               ASSERT_EQ(pthread_create(&waiters[i], NULL,
+                                        epoll60_wait_thread, &ctx), 0);
+
+       for (i = 0; i < 300; i++) {
+               uint64_t v = 1, ms;
+
+               /* Wait for all to be ready */
+               while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) !=
+                      ARRAY_SIZE(ctx.evfd))
+                       ;
+
+               /* Steady, go */
+               __atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd),
+                                  __ATOMIC_ACQUIRE);
+
+               /* Wait all have gone to kernel */
+               while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd))
+                       ;
+
+               /* 1ms should be enough to schedule away */
+               usleep(1000);
+
+               /* Quickly signal all handles at once */
+               for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) {
+                       ret = write(ctx.evfd[n], &v, sizeof(v));
+                       ASSERT_EQ(ret, sizeof(v));
+               }
+
+               /* Busy loop for 1s and wait for all waiters to wake up */
+               ms = msecs();
+               while (count_waiters(&ctx) && msecs() < ms + 1000)
+                       ;
+
+               ASSERT_EQ(count_waiters(&ctx), 0);
+       }
+       ctx.stopped = 1;
+       /* Stop waiters */
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               ret = pthread_kill(waiters[i], SIGUSR1);
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               pthread_join(waiters[i], NULL);
+
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               close(ctx.evfd[i]);
+       close(ctx.epfd);
+}
+
 TEST_HARNESS_MAIN
index 063ecb2..a4605b5 100755 (executable)
@@ -17,6 +17,7 @@ echo "                -v|--verbose Increase verbosity of test messages"
 echo "         -vv        Alias of -v -v (Show all results in stdout)"
 echo "         -vvv       Alias of -v -v -v (Show all commands immediately)"
 echo "         --fail-unsupported Treat UNSUPPORTED as a failure"
+echo "         --fail-unresolved Treat UNRESOLVED as a failure"
 echo "         -d|--debug Debug mode (trace all shell commands)"
 echo "         -l|--logdir <dir> Save logs on the <dir>"
 echo "                     If <dir> is -, all logs output in console only"
@@ -29,8 +30,25 @@ err_ret=1
 # kselftest skip code is 4
 err_skip=4
 
+# cgroup RT scheduling prevents chrt commands from succeeding, which
+# induces failures in test wakeup tests.  Disable for the duration of
+# the tests.
+
+readonly sched_rt_runtime=/proc/sys/kernel/sched_rt_runtime_us
+
+sched_rt_runtime_orig=$(cat $sched_rt_runtime)
+
+setup() {
+  echo -1 > $sched_rt_runtime
+}
+
+cleanup() {
+  echo $sched_rt_runtime_orig > $sched_rt_runtime
+}
+
 errexit() { # message
   echo "Error: $1" 1>&2
+  cleanup
   exit $err_ret
 }
 
@@ -39,6 +57,8 @@ if [ `id -u` -ne 0 ]; then
   errexit "this must be run by root user"
 fi
 
+setup
+
 # Utilities
 absdir() { # file_path
   (cd `dirname $1`; pwd)
@@ -93,6 +113,10 @@ parse_opts() { # opts
       UNSUPPORTED_RESULT=1
       shift 1
     ;;
+    --fail-unresolved)
+      UNRESOLVED_RESULT=1
+      shift 1
+    ;;
     --logdir|-l)
       LOG_DIR=$2
       shift 2
@@ -157,6 +181,7 @@ KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
 UNSUPPORTED_RESULT=0
+UNRESOLVED_RESULT=0
 STOP_FAILURE=0
 # Parse command-line options
 parse_opts $*
@@ -235,6 +260,7 @@ TOTAL_RESULT=0
 
 INSTANCE=
 CASENO=0
+
 testcase() { # testfile
   CASENO=$((CASENO+1))
   desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
@@ -260,7 +286,7 @@ eval_result() { # sigval
     $UNRESOLVED)
       prlog "  [${color_blue}UNRESOLVED${color_reset}]"
       UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
-      return 1 # this is a kind of bug.. something happened.
+      return $UNRESOLVED_RESULT # depends on use case
     ;;
     $UNTESTED)
       prlog "  [${color_blue}UNTESTED${color_reset}]"
@@ -273,7 +299,7 @@ eval_result() { # sigval
       return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
-      prlog "  [${color_red}XFAIL${color_reset}]"
+      prlog "  [${color_green}XFAIL${color_reset}]"
       XFAILED_CASES="$XFAILED_CASES $CASENO"
       return 0
     ;;
@@ -406,5 +432,7 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
 prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
 prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
 
+cleanup
+
 # if no error, return 0
 exit $TOTAL_RESULT
index cbd1743..2b82c80 100644 (file)
@@ -17,7 +17,14 @@ unsup() { #msg
     exit_unsupported
 }
 
-modprobe $MOD || unsup "$MOD module not available"
+unres() { #msg
+    reset_tracer
+    rmmod $MOD || true
+    echo $1
+    exit_unresolved
+}
+
+modprobe $MOD || unres "$MOD module not available"
 rmmod $MOD
 
 grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
index 712a2dd..b728c0a 100644 (file)
@@ -5,8 +5,34 @@ all:
 
 top_srcdir = ../../../..
 KSFT_KHDR_INSTALL := 1
+
+# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
+# directories and targets in this Makefile. "uname -m" doesn't map to
+# arch specific sub-directory names.
+#
+# UNAME_M variable to used to run the compiles pointing to the right arch
+# directories and build the right targets for these supported architectures.
+#
+# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
+# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
+#
+# x86_64 targets are named to include x86_64 as a suffix and directories
+# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
+# same convention. "uname -m" doesn't result in the correct mapping for
+# s390x and aarch64.
+#
+# No change necessary for x86_64
 UNAME_M := $(shell uname -m)
 
+# Set UNAME_M for arm64 compile/install to work
+ifeq ($(ARCH),arm64)
+       UNAME_M := aarch64
+endif
+# Set UNAME_M s390x compile/install to work
+ifeq ($(ARCH),s390)
+       UNAME_M := s390x
+endif
+
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
@@ -53,7 +79,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
@@ -84,6 +110,7 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
 $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
        $(AR) crs $@ $^
 
+x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
 all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
 
index d8f4d6b..a034438 100644 (file)
@@ -219,8 +219,8 @@ struct hv_enlightened_vmcs {
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
                (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
+extern struct hv_enlightened_vmcs *current_evmcs;
+extern struct hv_vp_assist_page *current_vp_assist;
 
 int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
 
index 6f17f69..4ae104f 100644 (file)
@@ -17,6 +17,9 @@
 
 bool enable_evmcs;
 
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
 struct eptPageTableEntry {
        uint64_t readable:1;
        uint64_t writable:1;
index dadf819..ee64ff8 100755 (executable)
@@ -25,13 +25,13 @@ fi
 # Figure out which test to run from our script name.
 test=$(basename $0 .sh)
 # Look up details about the test from master list of LKDTM tests.
-line=$(egrep '^#?'"$test"'\b' tests.txt)
+line=$(grep -E '^#?'"$test"'\b' tests.txt)
 if [ -z "$line" ]; then
        echo "Skipped: missing test '$test' in tests.txt"
        exit $KSELFTEST_SKIP_TEST
 fi
 # Check that the test is known to LKDTM.
-if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then
+if ! grep -E -q '^'"$test"'$' "$TRIGGER" ; then
        echo "Skipped: test '$test' missing in $TRIGGER!"
        exit $KSELFTEST_SKIP_TEST
 fi
@@ -59,30 +59,32 @@ if [ -z "$expect" ]; then
        expect="call trace:"
 fi
 
-# Clear out dmesg for output reporting
-dmesg -c >/dev/null
-
 # Prepare log for report checking
-LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX)
+LOG=$(mktemp --tmpdir -t lkdtm-log-XXXXXX)
+DMESG=$(mktemp --tmpdir -t lkdtm-dmesg-XXXXXX)
 cleanup() {
-       rm -f "$LOG"
+       rm -f "$LOG" "$DMESG"
 }
 trap cleanup EXIT
 
+# Save existing dmesg so we can detect new content below
+dmesg > "$DMESG"
+
 # Most shells yell about signals and we're expecting the "cat" process
 # to usually be killed by the kernel. So we have to run it in a sub-shell
 # and silence errors.
 ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
 
 # Record and dump the results
-dmesg -c >"$LOG"
+dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+
 cat "$LOG"
 # Check for expected output
-if egrep -qi "$expect" "$LOG" ; then
+if grep -E -qi "$expect" "$LOG" ; then
        echo "$test: saw '$expect': ok"
        exit 0
 else
-       if egrep -qi XFAIL: "$LOG" ; then
+       if grep -E -qi XFAIL: "$LOG" ; then
                echo "$test: saw 'XFAIL': [SKIP]"
                exit $KSELFTEST_SKIP_TEST
        else
index e0d86e1..e3c772c 100644 (file)
@@ -27,7 +27,7 @@
 #define __stack_aligned__      __attribute__((aligned(16)))
 struct cr_clone_arg {
        char stack[128] __stack_aligned__;
-       char stack_ptr[0];
+       char stack_ptr[];
 };
 
 static int child(void *args)
index 5909e7e..9803dbb 100644 (file)
@@ -25,7 +25,6 @@ CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_UBSAN=y
 CONFIG_UBSAN_SANITIZE_ALL=y
-CONFIG_UBSAN_NO_ALIGNMENT=y
 CONFIG_UBSAN_NULL=y
 CONFIG_DEBUG_KMEMLEAK=y
 CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
index d31f267..25c0e47 100644 (file)
@@ -125,12 +125,16 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
  */
 void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
+       u32 pc = *vcpu_pc(vcpu);
        bool is_thumb;
 
        is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT);
        if (is_thumb && !is_wide_instr)
-               *vcpu_pc(vcpu) += 2;
+               pc += 2;
        else
-               *vcpu_pc(vcpu) += 4;
+               pc += 4;
+
+       *vcpu_pc(vcpu) = pc;
+
        kvm_adjust_itstate(vcpu);
 }
index 14a162e..ae36471 100644 (file)
@@ -186,6 +186,33 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
        kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
 }
 
+static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       /*
+        * Zero the input registers' upper 32 bits. They will be fully
+        * zeroed on exit, so we're fine changing them in place.
+        */
+       for (i = 1; i < 4; i++)
+               vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i)));
+}
+
+static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
+{
+       switch(fn) {
+       case PSCI_0_2_FN64_CPU_SUSPEND:
+       case PSCI_0_2_FN64_CPU_ON:
+       case PSCI_0_2_FN64_AFFINITY_INFO:
+               /* Disallow these functions for 32bit guests */
+               if (vcpu_mode_is_32bit(vcpu))
+                       return PSCI_RET_NOT_SUPPORTED;
+               break;
+       }
+
+       return 0;
+}
+
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -193,6 +220,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
        unsigned long val;
        int ret = 1;
 
+       val = kvm_psci_check_allowed_function(vcpu, psci_fn);
+       if (val)
+               goto out;
+
        switch (psci_fn) {
        case PSCI_0_2_FN_PSCI_VERSION:
                /*
@@ -210,12 +241,16 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
                val = PSCI_RET_SUCCESS;
                break;
        case PSCI_0_2_FN_CPU_ON:
+               kvm_psci_narrow_to_32bit(vcpu);
+               fallthrough;
        case PSCI_0_2_FN64_CPU_ON:
                mutex_lock(&kvm->lock);
                val = kvm_psci_vcpu_on(vcpu);
                mutex_unlock(&kvm->lock);
                break;
        case PSCI_0_2_FN_AFFINITY_INFO:
+               kvm_psci_narrow_to_32bit(vcpu);
+               fallthrough;
        case PSCI_0_2_FN64_AFFINITY_INFO:
                val = kvm_psci_vcpu_affinity_info(vcpu);
                break;
@@ -256,6 +291,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
                break;
        }
 
+out:
        smccc_set_retval(vcpu, val, 0, 0, 0);
        return ret;
 }
@@ -273,6 +309,10 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
                break;
        case PSCI_1_0_FN_PSCI_FEATURES:
                feature = smccc_get_arg1(vcpu);
+               val = kvm_psci_check_allowed_function(vcpu, feature);
+               if (val)
+                       break;
+
                switch(feature) {
                case PSCI_0_2_FN_PSCI_VERSION:
                case PSCI_0_2_FN_CPU_SUSPEND:
index a963b9d..32e32d6 100644 (file)
@@ -294,8 +294,15 @@ int vgic_init(struct kvm *kvm)
                }
        }
 
-       if (vgic_has_its(kvm)) {
+       if (vgic_has_its(kvm))
                vgic_lpi_translation_cache_init(kvm);
+
+       /*
+        * If we have GICv4.1 enabled, unconditionnaly request enable the
+        * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
+        * enable it if we present a virtual ITS to the guest.
+        */
+       if (vgic_supports_direct_msis(kvm)) {
                ret = vgic_v4_init(kvm);
                if (ret)
                        goto out;
@@ -348,6 +355,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
+       /*
+        * Retire all pending LPIs on this vcpu anyway as we're
+        * going to destroy it.
+        */
+       vgic_flush_pending_lpis(vcpu);
+
        INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
 }
 
@@ -359,10 +372,10 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
 
        vgic_debug_destroy(kvm);
 
-       kvm_vgic_dist_destroy(kvm);
-
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_vgic_vcpu_destroy(vcpu);
+
+       kvm_vgic_dist_destroy(kvm);
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
index d53d34a..c012a52 100644 (file)
@@ -96,14 +96,21 @@ out_unlock:
         * We "cache" the configuration table entries in our struct vgic_irq's.
         * However we only have those structs for mapped IRQs, so we read in
         * the respective config data from memory here upon mapping the LPI.
+        *
+        * Should any of these fail, behave as if we couldn't create the LPI
+        * by dropping the refcount and returning the error.
         */
        ret = update_lpi_config(kvm, irq, NULL, false);
-       if (ret)
+       if (ret) {
+               vgic_put_irq(kvm, irq);
                return ERR_PTR(ret);
+       }
 
        ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
-       if (ret)
+       if (ret) {
+               vgic_put_irq(kvm, irq);
                return ERR_PTR(ret);
+       }
 
        return irq;
 }
index 5945f06..a016f07 100644 (file)
@@ -409,24 +409,28 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
                NULL, vgic_mmio_uaccess_write_v2_group, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+               NULL, vgic_uaccess_write_cenable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
-               vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1,
+               vgic_mmio_read_pending, vgic_mmio_write_spending,
+               NULL, vgic_uaccess_write_spending, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
-               vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1,
+               vgic_mmio_read_pending, vgic_mmio_write_cpending,
+               NULL, vgic_uaccess_write_cpending, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
                vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
index e72dcc4..89a14ec 100644 (file)
@@ -50,7 +50,8 @@ bool vgic_has_its(struct kvm *kvm)
 
 bool vgic_supports_direct_msis(struct kvm *kvm)
 {
-       return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
+       return (kvm_vgic_global_state.has_gicv4_1 ||
+               (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
 }
 
 /*
@@ -538,10 +539,12 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
                vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+              NULL, vgic_uaccess_write_cenable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
                vgic_mmio_read_pending, vgic_mmio_write_spending,
@@ -553,11 +556,11 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive,
                1, VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
                vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
@@ -609,11 +612,13 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
        REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
                vgic_mmio_read_group, vgic_mmio_write_group, 4,
                VGIC_ACCESS_32bit),
-       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 4,
                VGIC_ACCESS_32bit),
-       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+               NULL, vgic_uaccess_write_cenable, 4,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
                vgic_mmio_read_pending, vgic_mmio_write_spending,
@@ -625,12 +630,12 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive,
-               4, VGIC_ACCESS_32bit),
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4,
+               VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive,
-               4, VGIC_ACCESS_32bit),
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4,
+               VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
                vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
                VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
index 2199302..b2d73fc 100644 (file)
@@ -184,6 +184,48 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
        }
 }
 
+int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->enabled = true;
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->enabled = false;
+               raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len)
 {
@@ -219,17 +261,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
        return value;
 }
 
-/* Must be called with irq->irq_lock held */
-static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                                bool is_uaccess)
-{
-       if (is_uaccess)
-               return;
-
-       irq->pending_latch = true;
-       vgic_irq_set_phys_active(irq, true);
-}
-
 static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
 {
        return (vgic_irq_is_sgi(irq->intid) &&
@@ -240,7 +271,6 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
-       bool is_uaccess = !kvm_get_running_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -270,22 +300,48 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                        continue;
                }
 
+               irq->pending_latch = true;
                if (irq->hw)
-                       vgic_hw_irq_spending(vcpu, irq, is_uaccess);
-               else
-                       irq->pending_latch = true;
+                       vgic_irq_set_phys_active(irq, true);
+
                vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
                vgic_put_irq(vcpu->kvm, irq);
        }
 }
 
-/* Must be called with irq->irq_lock held */
-static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                                bool is_uaccess)
+int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val)
 {
-       if (is_uaccess)
-               return;
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->pending_latch = true;
 
+               /*
+                * GICv2 SGIs are terribly broken. We can't restore
+                * the source of the interrupt, so just pick the vcpu
+                * itself as the source...
+                */
+               if (is_vgic_v2_sgi(vcpu, irq))
+                       irq->source |= BIT(vcpu->vcpu_id);
+
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+{
        irq->pending_latch = false;
 
        /*
@@ -308,7 +364,6 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
-       bool is_uaccess = !kvm_get_running_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -339,7 +394,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                }
 
                if (irq->hw)
-                       vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
+                       vgic_hw_irq_cpending(vcpu, irq);
                else
                        irq->pending_latch = false;
 
@@ -348,8 +403,68 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
        }
 }
 
-unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
-                                   gpa_t addr, unsigned int len)
+int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               /*
+                * More fun with GICv2 SGIs! If we're clearing one of them
+                * from userspace, which source vcpu to clear? Let's not
+                * even think of it, and blow the whole set.
+                */
+               if (is_vgic_v2_sgi(vcpu, irq))
+                       irq->source = 0;
+
+               irq->pending_latch = false;
+
+               raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+/*
+ * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
+ * is not queued on some running VCPU's LRs, because then the change to the
+ * active state can be overwritten when the VCPU's state is synced coming back
+ * from the guest.
+ *
+ * For shared interrupts as well as GICv3 private interrupts, we have to
+ * stop all the VCPUs because interrupts can be migrated while we don't hold
+ * the IRQ locks and we don't want to be chasing moving targets.
+ *
+ * For GICv2 private interrupts we don't have to do anything because
+ * userspace accesses to the VGIC state already require all VCPUs to be
+ * stopped, and only the VCPU itself can modify its private interrupts
+ * active state, which guarantees that the VCPU is not running.
+ */
+static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
+{
+       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+           intid >= VGIC_NR_PRIVATE_IRQS)
+               kvm_arm_halt_guest(vcpu->kvm);
+}
+
+/* See vgic_access_active_prepare */
+static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid)
+{
+       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+           intid >= VGIC_NR_PRIVATE_IRQS)
+               kvm_arm_resume_guest(vcpu->kvm);
+}
+
+static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
 {
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        u32 value = 0;
@@ -359,6 +474,10 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
        for (i = 0; i < len * 8; i++) {
                struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
+               /*
+                * Even for HW interrupts, don't evaluate the HW state as
+                * all the guest is interested in is the virtual state.
+                */
                if (irq->active)
                        value |= (1U << i);
 
@@ -368,6 +487,29 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
        return value;
 }
 
+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       u32 val;
+
+       mutex_lock(&vcpu->kvm->lock);
+       vgic_access_active_prepare(vcpu, intid);
+
+       val = __vgic_mmio_read_active(vcpu, addr, len);
+
+       vgic_access_active_finish(vcpu, intid);
+       mutex_unlock(&vcpu->kvm->lock);
+
+       return val;
+}
+
+unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len)
+{
+       return __vgic_mmio_read_active(vcpu, addr, len);
+}
+
 /* Must be called with irq->irq_lock held */
 static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                                      bool active, bool is_uaccess)
@@ -426,36 +568,6 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 }
 
-/*
- * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
- * is not queued on some running VCPU's LRs, because then the change to the
- * active state can be overwritten when the VCPU's state is synced coming back
- * from the guest.
- *
- * For shared interrupts, we have to stop all the VCPUs because interrupts can
- * be migrated while we don't hold the IRQ locks and we don't want to be
- * chasing moving targets.
- *
- * For private interrupts we don't have to do anything because userspace
- * accesses to the VGIC state already require all VCPUs to be stopped, and
- * only the VCPU itself can modify its private interrupts active state, which
- * guarantees that the VCPU is not running.
- */
-static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
-{
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
-           intid > VGIC_NR_PRIVATE_IRQS)
-               kvm_arm_halt_guest(vcpu->kvm);
-}
-
-/* See vgic_change_active_prepare */
-static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
-{
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
-           intid > VGIC_NR_PRIVATE_IRQS)
-               kvm_arm_resume_guest(vcpu->kvm);
-}
-
 static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
                                      gpa_t addr, unsigned int len,
                                      unsigned long val)
@@ -477,11 +589,11 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 
        mutex_lock(&vcpu->kvm->lock);
-       vgic_change_active_prepare(vcpu, intid);
+       vgic_access_active_prepare(vcpu, intid);
 
        __vgic_mmio_write_cactive(vcpu, addr, len, val);
 
-       vgic_change_active_finish(vcpu, intid);
+       vgic_access_active_finish(vcpu, intid);
        mutex_unlock(&vcpu->kvm->lock);
 }
 
@@ -514,11 +626,11 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 
        mutex_lock(&vcpu->kvm->lock);
-       vgic_change_active_prepare(vcpu, intid);
+       vgic_access_active_prepare(vcpu, intid);
 
        __vgic_mmio_write_sactive(vcpu, addr, len, val);
 
-       vgic_change_active_finish(vcpu, intid);
+       vgic_access_active_finish(vcpu, intid);
        mutex_unlock(&vcpu->kvm->lock);
 }
 
index 5af2aef..fefcca2 100644 (file)
@@ -138,6 +138,14 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
                             gpa_t addr, unsigned int len,
                             unsigned long val);
 
+int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val);
+
+int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val);
+
 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len);
 
@@ -149,9 +157,20 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val);
 
+int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val);
+
+int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val);
+
 unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
                                    gpa_t addr, unsigned int len);
 
+unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len);
+
 void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
                             gpa_t addr, unsigned int len,
                             unsigned long val);