Merge tag 'drm-fixes-2021-10-01' of git://anongit.freedesktop.org/drm/drm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Oct 2021 17:27:44 +0000 (10:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Oct 2021 17:27:44 +0000 (10:27 -0700)
Pull drm fixes from Daniel Vetter:
 "Dave is out on a long w/e, should be back next week.

  Nothing nefarious, just a bunch of driver fixes: amdgpu, i915, tegra,
  and one exynos driver fix"

* tag 'drm-fixes-2021-10-01' of git://anongit.freedesktop.org/drm/drm:
  drm/amdgpu: force exit gfxoff on sdma resume for rmb s0ix
  drm/amdgpu: check tiling flags when creating FB on GFX8-
  drm/amd/display: Pass PCI deviceid into DC
  drm/amd/display: initialize backlight_ramping_override to false
  drm/amdgpu: correct initial cp_hqd_quantum for gfx9
  drm/amd/display: Fix Display Flicker on embedded panels
  drm/amdgpu: fix gart.bo pin_count leak
  drm/i915: Remove warning from the rps worker
  drm/i915/request: fix early tracepoints
  drm/i915/guc, docs: Fix pdfdocs build error by removing nested grid
  gpu: host1x: Plug potential memory leak
  gpu/host1x: fence: Make spinlock static
  drm/tegra: uapi: Fix wrong mapping end address in case of disabled IOMMU
  drm/tegra: dc: Remove unused variables
  drm/exynos: Make use of the helper function devm_platform_ioremap_resource()
  drm/i915/gvt: fix the usage of ww lock in gvt scheduler.

254 files changed:
MAINTAINERS
arch/arm64/kvm/hyp/nvhe/Makefile
arch/arm64/kvm/perf.c
arch/arm64/kvm/pmu-emul.c
arch/m68k/68000/entry.S
arch/m68k/Kconfig
arch/m68k/coldfire/entry.S
arch/m68k/include/asm/processor.h
arch/m68k/include/asm/segment.h [deleted file]
arch/m68k/include/asm/thread_info.h
arch/m68k/include/asm/tlbflush.h
arch/m68k/include/asm/traps.h
arch/m68k/include/asm/uaccess.h
arch/m68k/kernel/asm-offsets.c
arch/m68k/kernel/entry.S
arch/m68k/kernel/process.c
arch/m68k/kernel/signal.c
arch/m68k/kernel/traps.c
arch/m68k/mac/misc.c
arch/m68k/mm/cache.c
arch/m68k/mm/init.c
arch/m68k/mm/kmap.c
arch/m68k/mm/memory.c
arch/m68k/mm/motorola.c
arch/m68k/sun3/config.c
arch/m68k/sun3/mmu_emu.c
arch/m68k/sun3/sun3ints.c
arch/m68k/sun3x/prom.c
arch/mips/net/bpf_jit.c
arch/nios2/Kconfig.debug
arch/nios2/kernel/setup.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/x86/crypto/sm4-aesni-avx-asm_64.S
arch/x86/include/asm/kvm_page_track.h
arch/x86/kvm/emulate.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/ioapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/evmcs.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/net/bpf_jit_comp.c
drivers/crypto/ccp/ccp-ops.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpio-rockchip.c
drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
drivers/hid/hid-apple.c
drivers/hid/hid-betopff.c
drivers/hid/hid-u2fzero.c
drivers/hid/wacom_wac.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_priv.h
drivers/infiniband/hw/hfi1/ipoib_tx.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/irdma/cm.c
drivers/infiniband/hw/irdma/hw.c
drivers/infiniband/hw/irdma/i40iw_if.c
drivers/infiniband/hw/irdma/main.h
drivers/infiniband/hw/irdma/user.h
drivers/infiniband/hw/irdma/utils.c
drivers/infiniband/hw/irdma/verbs.c
drivers/infiniband/hw/qib/qib_sysfs.c
drivers/infiniband/hw/usnic/usnic_ib.h
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/iommu/apple-dart.c
drivers/iommu/intel/dmar.c
drivers/media/platform/s5p-jpeg/jpeg-core.c
drivers/media/platform/s5p-jpeg/jpeg-core.h
drivers/media/rc/ir_toy.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global1.c
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/ethernet/broadcom/bgmac-platform.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns_mdio.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/micrel/Makefile
drivers/net/ethernet/micrel/ks8851_common.c
drivers/net/ethernet/pensando/ionic/ionic_stats.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/sun/Kconfig
drivers/net/hamradio/Kconfig
drivers/net/mdio/mdio-ipq4019.c
drivers/net/mdio/mdio-mscc-miim.c
drivers/net/mhi_net.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/mdio_bus.c
drivers/net/phy/mxl-gpy.c
drivers/net/usb/smsc95xx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/perf/arm_pmu.c
drivers/pinctrl/core.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-amd.h
drivers/pinctrl/pinctrl-rockchip.c
drivers/pinctrl/pinctrl-rockchip.h
drivers/pinctrl/qcom/pinctrl-sc7280.c
drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/staging/media/hantro/hantro_drv.c
drivers/staging/media/sunxi/cedrus/cedrus_video.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vfio/pci/vfio_pci_core.c
drivers/vhost/vdpa.c
drivers/virtio/virtio.c
drivers/watchdog/Kconfig
fs/vboxsf/super.c
fs/verity/enable.c
fs/verity/open.c
include/kvm/arm_pmu.h
include/linux/bpf.h
include/linux/kvm_host.h
include/linux/perf/arm_pmu.h
include/net/ip_fib.h
include/net/mac80211.h
include/net/nexthop.h
include/net/pkt_sched.h
include/net/sock.h
include/sound/rawmidi.h
include/uapi/sound/asound.h
kernel/bpf/bpf_struct_ops.c
kernel/bpf/core.c
kernel/cgroup/cgroup.c
net/bpf/test_run.c
net/bridge/br_multicast.c
net/bridge/br_private.h
net/core/dev_addr_lists.c
net/core/sock.c
net/ipv4/fib_semantics.c
net/ipv4/netfilter/iptable_raw.c
net/ipv4/udp.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/route.c
net/ipv6/udp.c
net/mac80211/mesh_pathtbl.c
net/mac80211/mesh_ps.c
net/mac80211/rate.c
net/mac80211/rx.c
net/mac80211/tx.c
net/mac80211/wpa.c
net/mptcp/mptcp_diag.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/mptcp/syncookies.c
net/mptcp/token.c
net/mptcp/token_test.c
net/netfilter/ipset/ip_set_hash_gen.h
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_nat_core.c
net/netfilter/nf_nat_masquerade.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_compat.c
net/netfilter/xt_LOG.c
net/netfilter/xt_NFLOG.c
net/sched/cls_flower.c
net/sched/sch_api.c
net/sctp/input.c
net/unix/af_unix.c
sound/core/rawmidi.c
sound/drivers/pcsp/pcsp_lib.c
sound/firewire/motu/amdtp-motu.c
sound/firewire/oxfw/oxfw.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_cs8409.c
sound/pci/hda/patch_realtek.c
sound/pci/pcxhr/pcxhr_core.c
sound/soc/fsl/fsl_esai.c
sound/soc/fsl/fsl_micfil.c
sound/soc/fsl/fsl_sai.c
sound/soc/fsl/fsl_spdif.c
sound/soc/fsl/fsl_xcvr.c
sound/soc/intel/boards/sof_sdw.c
sound/soc/mediatek/Kconfig
sound/soc/mediatek/common/mtk-afe-fe-dai.c
sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
sound/soc/sof/core.c
sound/soc/sof/imx/imx8.c
sound/soc/sof/imx/imx8m.c
sound/soc/sof/loader.c
sound/soc/sof/trace.c
sound/soc/sof/xtensa/core.c
sound/usb/card.c
sound/usb/mixer.c
sound/usb/mixer.h
sound/usb/mixer_quirks.c
tools/lib/bpf/linker.c
tools/perf/Documentation/jitdump-specification.txt
tools/perf/Documentation/perf-c2c.txt
tools/perf/Documentation/perf-intel-pt.txt
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-script-perl.txt
tools/perf/Documentation/perf-script-python.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/topdown.txt
tools/perf/arch/arm/util/auxtrace.c
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm/util/perf_regs.c
tools/perf/arch/arm/util/pmu.c
tools/perf/arch/arm/util/unwind-libdw.c
tools/perf/arch/arm/util/unwind-libunwind.c
tools/perf/arch/x86/util/iostat.c
tools/perf/builtin-stat.c
tools/perf/pmu-events/arch/powerpc/power8/other.json
tools/perf/tests/code-reading.c
tools/perf/tests/dwarf-unwind.c
tools/perf/util/config.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_lwt_ip_encap.sh
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/access_tracking_perf_test.c
tools/testing/selftests/kvm/demand_paging_test.c
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/kvm/rseq_test.c
tools/testing/selftests/kvm/steal_time.c
tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c [new file with mode: 0644]
tools/testing/selftests/netfilter/nft_nat_zones.sh [new file with mode: 0755]
tools/testing/selftests/netfilter/nft_zones_many.sh [new file with mode: 0755]
virt/kvm/kvm_main.c

index 5b33791..76e0fdc 100644 (file)
@@ -810,7 +810,7 @@ F:  Documentation/devicetree/bindings/dma/altr,msgdma.yaml
 F:     drivers/dma/altera-msgdma.c
 
 ALTERA PIO DRIVER
-M:     Joyce Ooi <joyce.ooi@intel.com>
+M:     Mun Yew Tham <mun.yew.tham@intel.com>
 L:     linux-gpio@vger.kernel.org
 S:     Maintained
 F:     drivers/gpio/gpio-altera.c
@@ -2961,7 +2961,7 @@ F:        crypto/async_tx/
 F:     include/linux/async_tx.h
 
 AT24 EEPROM DRIVER
-M:     Bartosz Golaszewski <bgolaszewski@baylibre.com>
+M:     Bartosz Golaszewski <brgl@bgdev.pl>
 L:     linux-i2c@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -3384,9 +3384,11 @@ F:       Documentation/networking/filter.rst
 F:     Documentation/userspace-api/ebpf/
 F:     arch/*/net/*
 F:     include/linux/bpf*
+F:     include/linux/btf*
 F:     include/linux/filter.h
 F:     include/trace/events/xdp.h
 F:     include/uapi/linux/bpf*
+F:     include/uapi/linux/btf*
 F:     include/uapi/linux/filter.h
 F:     kernel/bpf/
 F:     kernel/trace/bpf_trace.c
@@ -3820,7 +3822,6 @@ F:        drivers/scsi/mpi3mr/
 
 BROADCOM NETXTREME-E ROCE DRIVER
 M:     Selvin Xavier <selvin.xavier@broadcom.com>
-M:     Naresh Kumar PBS <nareshkumar.pbs@broadcom.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 W:     http://www.broadcom.com
@@ -7985,7 +7986,7 @@ F:        include/linux/gpio/regmap.h
 
 GPIO SUBSYSTEM
 M:     Linus Walleij <linus.walleij@linaro.org>
-M:     Bartosz Golaszewski <bgolaszewski@baylibre.com>
+M:     Bartosz Golaszewski <brgl@bgdev.pl>
 L:     linux-gpio@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
@@ -11366,7 +11367,7 @@ F:      Documentation/devicetree/bindings/iio/proximity/maxbotix,mb1232.yaml
 F:     drivers/iio/proximity/mb1232.c
 
 MAXIM MAX77650 PMIC MFD DRIVER
-M:     Bartosz Golaszewski <bgolaszewski@baylibre.com>
+M:     Bartosz Golaszewski <brgl@bgdev.pl>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/*/*max77650.yaml
@@ -17883,7 +17884,8 @@ M:      Olivier Moysan <olivier.moysan@foss.st.com>
 M:     Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
-F:     Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml
+F:     Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
+F:     Documentation/devicetree/bindings/sound/st,stm32-*.yaml
 F:     sound/soc/stm/
 
 STM32 TIMER/LPTIMER DRIVERS
@@ -18682,7 +18684,7 @@ F:      include/linux/clk/ti.h
 
 TI DAVINCI MACHINE SUPPORT
 M:     Sekhar Nori <nsekhar@ti.com>
-R:     Bartosz Golaszewski <bgolaszewski@baylibre.com>
+R:     Bartosz Golaszewski <brgl@bgdev.pl>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
index 5df6193..8d741f7 100644 (file)
@@ -54,7 +54,7 @@ $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
 #    runtime. Because the hypervisor is part of the kernel binary, relocations
 #    produce a kernel VA. We enumerate relocations targeting hyp at build time
 #    and convert the kernel VAs at those positions to hyp VAs.
-$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
+$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE
        $(call if_changed,hyprel)
 
 # 5) Compile hyp-reloc.S and link it into the existing partially linked object.
index f9bb3b1..c84fe24 100644 (file)
@@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
-       if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
-               static_branch_enable(&kvm_arm_pmu_available);
-
        return perf_register_guest_info_callbacks(&kvm_guest_cbs);
 }
 
index f5065f2..2af3c37 100644 (file)
@@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
        kvm_pmu_create_perf_event(vcpu, select_idx);
 }
 
-int kvm_pmu_probe_pmuver(void)
+void kvm_host_pmu_init(struct arm_pmu *pmu)
+{
+       if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
+           !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
+               static_branch_enable(&kvm_arm_pmu_available);
+}
+
+static int kvm_pmu_probe_pmuver(void)
 {
        struct perf_event_attr attr = { };
        struct perf_event *event;
index 259b366..997b549 100644 (file)
@@ -15,7 +15,6 @@
 #include <asm/unistd.h>
 #include <asm/errno.h>
 #include <asm/setup.h>
-#include <asm/segment.h>
 #include <asm/traps.h>
 #include <asm/asm-offsets.h>
 #include <asm/entry.h>
@@ -25,7 +24,6 @@
 .globl system_call
 .globl resume
 .globl ret_from_exception
-.globl ret_from_signal
 .globl sys_call_table
 .globl bad_interrupt
 .globl inthandler1
@@ -59,8 +57,6 @@ do_trace:
        subql   #4,%sp                  /* dummy return address */
        SAVE_SWITCH_STACK
        jbsr    syscall_trace_leave
-
-ret_from_signal:
        RESTORE_SWITCH_STACK
        addql   #4,%sp
        jra     ret_from_exception
index 774c35f..0b50da0 100644 (file)
@@ -29,7 +29,6 @@ config M68K
        select NO_DMA if !MMU && !COLDFIRE
        select OLD_SIGACTION
        select OLD_SIGSUSPEND3
-       select SET_FS
        select UACCESS_MEMCPY if !MMU
        select VIRT_TO_BUS
        select ZONE_DMA
index d43a027..9f337c7 100644 (file)
@@ -31,7 +31,6 @@
 #include <asm/thread_info.h>
 #include <asm/errno.h>
 #include <asm/setup.h>
-#include <asm/segment.h>
 #include <asm/asm-offsets.h>
 #include <asm/entry.h>
 
@@ -51,7 +50,6 @@ sw_usp:
 .globl system_call
 .globl resume
 .globl ret_from_exception
-.globl ret_from_signal
 .globl sys_call_table
 .globl inthandler
 
@@ -98,8 +96,6 @@ ENTRY(system_call)
        subql   #4,%sp                  /* dummy return address */
        SAVE_SWITCH_STACK
        jbsr    syscall_trace_leave
-
-ret_from_signal:
        RESTORE_SWITCH_STACK
        addql   #4,%sp
 
index 3750819..f4d82c6 100644 (file)
@@ -9,7 +9,6 @@
 #define __ASM_M68K_PROCESSOR_H
 
 #include <linux/thread_info.h>
-#include <asm/segment.h>
 #include <asm/fpu.h>
 #include <asm/ptrace.h>
 
@@ -75,11 +74,37 @@ static inline void wrusp(unsigned long usp)
 #define TASK_UNMAPPED_BASE     0
 #endif
 
+/* Address spaces (or Function Codes in Motorola lingo) */
+#define USER_DATA     1
+#define USER_PROGRAM  2
+#define SUPER_DATA    5
+#define SUPER_PROGRAM 6
+#define CPU_SPACE     7
+
+#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
+/*
+ * Set the SFC/DFC registers for special MM operations.  For most normal
+ * operation these remain set to USER_DATA for the uaccess routines.
+ */
+static inline void set_fc(unsigned long val)
+{
+       WARN_ON_ONCE(in_interrupt());
+
+       __asm__ __volatile__ ("movec %0,%/sfc\n\t"
+                             "movec %0,%/dfc\n\t"
+                             : /* no outputs */ : "r" (val) : "memory");
+}
+#else
+static inline void set_fc(unsigned long val)
+{
+}
+#endif /* CONFIG_CPU_HAS_ADDRESS_SPACES */
+
 struct thread_struct {
        unsigned long  ksp;             /* kernel stack pointer */
        unsigned long  usp;             /* user stack pointer */
        unsigned short sr;              /* saved status register */
-       unsigned short fs;              /* saved fs (sfc, dfc) */
+       unsigned short fc;              /* saved fc (sfc, dfc) */
        unsigned long  crp[2];          /* cpu root pointer */
        unsigned long  esp0;            /* points to SR of stack frame */
        unsigned long  faddr;           /* info about last fault */
@@ -92,7 +117,7 @@ struct thread_struct {
 #define INIT_THREAD  {                                                 \
        .ksp    = sizeof(init_stack) + (unsigned long) init_stack,      \
        .sr     = PS_S,                                                 \
-       .fs     = __KERNEL_DS,                                          \
+       .fc     = USER_DATA,                                            \
 }
 
 /*
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
deleted file mode 100644 (file)
index 2b5e68a..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _M68K_SEGMENT_H
-#define _M68K_SEGMENT_H
-
-/* define constants */
-/* Address spaces (FC0-FC2) */
-#define USER_DATA     (1)
-#ifndef __USER_DS
-#define __USER_DS     (USER_DATA)
-#endif
-#define USER_PROGRAM  (2)
-#define SUPER_DATA    (5)
-#ifndef __KERNEL_DS
-#define __KERNEL_DS   (SUPER_DATA)
-#endif
-#define SUPER_PROGRAM (6)
-#define CPU_SPACE     (7)
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
-       unsigned long seg;
-} mm_segment_t;
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
-/*
- * Get/set the SFC/DFC registers for MOVES instructions
- */
-#define USER_DS                MAKE_MM_SEG(__USER_DS)
-#define KERNEL_DS      MAKE_MM_SEG(__KERNEL_DS)
-
-static inline mm_segment_t get_fs(void)
-{
-       mm_segment_t _v;
-       __asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
-       return _v;
-}
-
-static inline void set_fs(mm_segment_t val)
-{
-       __asm__ __volatile__ ("movec %0,%/sfc\n\t"
-                             "movec %0,%/dfc\n\t"
-                             : /* no outputs */ : "r" (val.seg) : "memory");
-}
-
-#else
-#define USER_DS                MAKE_MM_SEG(TASK_SIZE)
-#define KERNEL_DS      MAKE_MM_SEG(0xFFFFFFFF)
-#define get_fs()       (current_thread_info()->addr_limit)
-#define set_fs(x)      (current_thread_info()->addr_limit = (x))
-#endif
-
-#define uaccess_kernel()       (get_fs().seg == KERNEL_DS.seg)
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _M68K_SEGMENT_H */
index 15a7570..c952658 100644 (file)
@@ -4,7 +4,6 @@
 
 #include <asm/types.h>
 #include <asm/page.h>
-#include <asm/segment.h>
 
 /*
  * On machines with 4k pages we default to an 8k thread size, though we
@@ -27,7 +26,6 @@
 struct thread_info {
        struct task_struct      *task;          /* main task structure */
        unsigned long           flags;
-       mm_segment_t            addr_limit;     /* thread address space */
        int                     preempt_count;  /* 0 => preemptable, <0 => BUG */
        __u32                   cpu;            /* should always be 0 on m68k */
        unsigned long           tp_value;       /* thread pointer */
@@ -37,7 +35,6 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)                  \
 {                                              \
        .task           = &tsk,                 \
-       .addr_limit     = KERNEL_DS,            \
        .preempt_count  = INIT_PREEMPT_COUNT,   \
 }
 
index a6318cc..b882e2f 100644 (file)
@@ -13,13 +13,12 @@ static inline void flush_tlb_kernel_page(void *addr)
        if (CPU_IS_COLDFIRE) {
                mmu_write(MMUOR, MMUOR_CNL);
        } else if (CPU_IS_040_OR_060) {
-               mm_segment_t old_fs = get_fs();
-               set_fs(KERNEL_DS);
+               set_fc(SUPER_DATA);
                __asm__ __volatile__(".chip 68040\n\t"
                                     "pflush (%0)\n\t"
                                     ".chip 68k"
                                     : : "a" (addr));
-               set_fs(old_fs);
+               set_fc(USER_DATA);
        } else if (CPU_IS_020_OR_030)
                __asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
 }
@@ -84,12 +83,8 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 
 static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-       if (vma->vm_mm == current->active_mm) {
-               mm_segment_t old_fs = force_uaccess_begin();
-
+       if (vma->vm_mm == current->active_mm)
                __flush_tlb_one(addr);
-               force_uaccess_end(old_fs);
-       }
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
index 4aff335..a9d5c1c 100644 (file)
@@ -267,6 +267,10 @@ struct frame {
     } un;
 };
 
+#ifdef CONFIG_M68040
+asmlinkage void berr_040cleanup(struct frame *fp);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _M68K_TRAPS_H */
index f98208c..ba67052 100644 (file)
@@ -9,13 +9,16 @@
  */
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <asm/segment.h>
 #include <asm/extable.h>
 
 /* We let the MMU do all checking */
 static inline int access_ok(const void __user *addr,
                            unsigned long size)
 {
+       /*
+        * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check
+        * for TASK_SIZE!
+        */
        return 1;
 }
 
@@ -35,12 +38,9 @@ static inline int access_ok(const void __user *addr,
 #define        MOVES   "move"
 #endif
 
-extern int __put_user_bad(void);
-extern int __get_user_bad(void);
-
-#define __put_user_asm(res, x, ptr, bwl, reg, err)     \
+#define __put_user_asm(inst, res, x, ptr, bwl, reg, err) \
 asm volatile ("\n"                                     \
-       "1:     "MOVES"."#bwl"  %2,%1\n"                \
+       "1:     "inst"."#bwl"   %2,%1\n"                \
        "2:\n"                                          \
        "       .section .fixup,\"ax\"\n"               \
        "       .even\n"                                \
@@ -56,6 +56,31 @@ asm volatile ("\n"                                   \
        : "+d" (res), "=m" (*(ptr))                     \
        : #reg (x), "i" (err))
 
+#define __put_user_asm8(inst, res, x, ptr)                     \
+do {                                                           \
+       const void *__pu_ptr = (const void __force *)(ptr);     \
+                                                               \
+       asm volatile ("\n"                                      \
+               "1:     "inst".l %2,(%1)+\n"                    \
+               "2:     "inst".l %R2,(%1)\n"                    \
+               "3:\n"                                          \
+               "       .section .fixup,\"ax\"\n"               \
+               "       .even\n"                                \
+               "10:    movel %3,%0\n"                          \
+               "       jra 3b\n"                               \
+               "       .previous\n"                            \
+               "\n"                                            \
+               "       .section __ex_table,\"a\"\n"            \
+               "       .align 4\n"                             \
+               "       .long 1b,10b\n"                         \
+               "       .long 2b,10b\n"                         \
+               "       .long 3b,10b\n"                         \
+               "       .previous"                              \
+               : "+d" (res), "+a" (__pu_ptr)                   \
+               : "r" (x), "i" (-EFAULT)                        \
+               : "memory");                                    \
+} while (0)
+
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
@@ -68,51 +93,29 @@ asm volatile ("\n"                                  \
        __chk_user_ptr(ptr);                                            \
        switch (sizeof (*(ptr))) {                                      \
        case 1:                                                         \
-               __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \
+               __put_user_asm(MOVES, __pu_err, __pu_val, ptr, b, d, -EFAULT); \
                break;                                                  \
        case 2:                                                         \
-               __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \
+               __put_user_asm(MOVES, __pu_err, __pu_val, ptr, w, r, -EFAULT); \
                break;                                                  \
        case 4:                                                         \
-               __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \
+               __put_user_asm(MOVES, __pu_err, __pu_val, ptr, l, r, -EFAULT); \
                break;                                                  \
        case 8:                                                         \
-           {                                                           \
-               const void __user *__pu_ptr = (ptr);                    \
-               asm volatile ("\n"                                      \
-                       "1:     "MOVES".l       %2,(%1)+\n"             \
-                       "2:     "MOVES".l       %R2,(%1)\n"             \
-                       "3:\n"                                          \
-                       "       .section .fixup,\"ax\"\n"               \
-                       "       .even\n"                                \
-                       "10:    movel %3,%0\n"                          \
-                       "       jra 3b\n"                               \
-                       "       .previous\n"                            \
-                       "\n"                                            \
-                       "       .section __ex_table,\"a\"\n"            \
-                       "       .align 4\n"                             \
-                       "       .long 1b,10b\n"                         \
-                       "       .long 2b,10b\n"                         \
-                       "       .long 3b,10b\n"                         \
-                       "       .previous"                              \
-                       : "+d" (__pu_err), "+a" (__pu_ptr)              \
-                       : "r" (__pu_val), "i" (-EFAULT)                 \
-                       : "memory");                                    \
+               __put_user_asm8(MOVES, __pu_err, __pu_val, ptr);        \
                break;                                                  \
-           }                                                           \
        default:                                                        \
-               __pu_err = __put_user_bad();                            \
-               break;                                                  \
+               BUILD_BUG();                                            \
        }                                                               \
        __pu_err;                                                       \
 })
 #define put_user(x, ptr)       __put_user(x, ptr)
 
 
-#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({            \
+#define __get_user_asm(inst, res, x, ptr, type, bwl, reg, err) ({      \
        type __gu_val;                                                  \
        asm volatile ("\n"                                              \
-               "1:     "MOVES"."#bwl"  %2,%1\n"                        \
+               "1:     "inst"."#bwl"   %2,%1\n"                        \
                "2:\n"                                                  \
                "       .section .fixup,\"ax\"\n"                       \
                "       .even\n"                                        \
@@ -130,53 +133,57 @@ asm volatile ("\n"                                        \
        (x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val;  \
 })
 
+#define __get_user_asm8(inst, res, x, ptr)                             \
+do {                                                                   \
+       const void *__gu_ptr = (const void __force *)(ptr);             \
+       union {                                                         \
+               u64 l;                                                  \
+               __typeof__(*(ptr)) t;                                   \
+       } __gu_val;                                                     \
+                                                                       \
+       asm volatile ("\n"                                              \
+               "1:     "inst".l (%2)+,%1\n"                            \
+               "2:     "inst".l (%2),%R1\n"                            \
+               "3:\n"                                                  \
+               "       .section .fixup,\"ax\"\n"                       \
+               "       .even\n"                                        \
+               "10:    move.l  %3,%0\n"                                \
+               "       sub.l   %1,%1\n"                                \
+               "       sub.l   %R1,%R1\n"                              \
+               "       jra     3b\n"                                   \
+               "       .previous\n"                                    \
+               "\n"                                                    \
+               "       .section __ex_table,\"a\"\n"                    \
+               "       .align  4\n"                                    \
+               "       .long   1b,10b\n"                               \
+               "       .long   2b,10b\n"                               \
+               "       .previous"                                      \
+               : "+d" (res), "=&r" (__gu_val.l),                       \
+                 "+a" (__gu_ptr)                                       \
+               : "i" (-EFAULT)                                         \
+               : "memory");                                            \
+       (x) = __gu_val.t;                                               \
+} while (0)
+
 #define __get_user(x, ptr)                                             \
 ({                                                                     \
        int __gu_err = 0;                                               \
        __chk_user_ptr(ptr);                                            \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT);    \
+               __get_user_asm(MOVES, __gu_err, x, ptr, u8, b, d, -EFAULT); \
                break;                                                  \
        case 2:                                                         \
-               __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT);   \
+               __get_user_asm(MOVES, __gu_err, x, ptr, u16, w, r, -EFAULT); \
                break;                                                  \
        case 4:                                                         \
-               __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);   \
+               __get_user_asm(MOVES, __gu_err, x, ptr, u32, l, r, -EFAULT); \
                break;                                                  \
-       case 8: {                                                       \
-               const void __user *__gu_ptr = (ptr);                    \
-               union {                                                 \
-                       u64 l;                                          \
-                       __typeof__(*(ptr)) t;                           \
-               } __gu_val;                                             \
-               asm volatile ("\n"                                      \
-                       "1:     "MOVES".l       (%2)+,%1\n"             \
-                       "2:     "MOVES".l       (%2),%R1\n"             \
-                       "3:\n"                                          \
-                       "       .section .fixup,\"ax\"\n"               \
-                       "       .even\n"                                \
-                       "10:    move.l  %3,%0\n"                        \
-                       "       sub.l   %1,%1\n"                        \
-                       "       sub.l   %R1,%R1\n"                      \
-                       "       jra     3b\n"                           \
-                       "       .previous\n"                            \
-                       "\n"                                            \
-                       "       .section __ex_table,\"a\"\n"            \
-                       "       .align  4\n"                            \
-                       "       .long   1b,10b\n"                       \
-                       "       .long   2b,10b\n"                       \
-                       "       .previous"                              \
-                       : "+d" (__gu_err), "=&r" (__gu_val.l),          \
-                         "+a" (__gu_ptr)                               \
-                       : "i" (-EFAULT)                                 \
-                       : "memory");                                    \
-               (x) = __gu_val.t;                                       \
+       case 8:                                                         \
+               __get_user_asm8(MOVES, __gu_err, x, ptr);               \
                break;                                                  \
-       }                                                               \
        default:                                                        \
-               __gu_err = __get_user_bad();                            \
-               break;                                                  \
+               BUILD_BUG();                                            \
        }                                                               \
        __gu_err;                                                       \
 })
@@ -322,16 +329,19 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
 
        switch (n) {
        case 1:
-               __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
+               __put_user_asm(MOVES, res, *(u8 *)from, (u8 __user *)to,
+                               b, d, 1);
                break;
        case 2:
-               __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
+               __put_user_asm(MOVES, res, *(u16 *)from, (u16 __user *)to,
+                               w, r, 2);
                break;
        case 3:
                __constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
                break;
        case 4:
-               __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
+               __put_user_asm(MOVES, res, *(u32 *)from, (u32 __user *)to,
+                               l, r, 4);
                break;
        case 5:
                __constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
@@ -380,8 +390,65 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 #define INLINE_COPY_FROM_USER
 #define INLINE_COPY_TO_USER
 
-#define user_addr_max() \
-       (uaccess_kernel() ? ~0UL : TASK_SIZE)
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       type *__gk_dst = (type *)(dst);                                 \
+       type *__gk_src = (type *)(src);                                 \
+       int __gk_err = 0;                                               \
+                                                                       \
+       switch (sizeof(type)) {                                         \
+       case 1:                                                         \
+               __get_user_asm("move", __gk_err, *__gk_dst, __gk_src,   \
+                               u8, b, d, -EFAULT);                     \
+               break;                                                  \
+       case 2:                                                         \
+               __get_user_asm("move", __gk_err, *__gk_dst, __gk_src,   \
+                               u16, w, r, -EFAULT);                    \
+               break;                                                  \
+       case 4:                                                         \
+               __get_user_asm("move", __gk_err, *__gk_dst, __gk_src,   \
+                               u32, l, r, -EFAULT);                    \
+               break;                                                  \
+       case 8:                                                         \
+               __get_user_asm8("move", __gk_err, *__gk_dst, __gk_src); \
+               break;                                                  \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+       if (unlikely(__gk_err))                                         \
+               goto err_label;                                         \
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       type __pk_src = *(type *)(src);                                 \
+       type *__pk_dst = (type *)(dst);                                 \
+       int __pk_err = 0;                                               \
+                                                                       \
+       switch (sizeof(type)) {                                         \
+       case 1:                                                         \
+               __put_user_asm("move", __pk_err, __pk_src, __pk_dst,    \
+                               b, d, -EFAULT);                         \
+               break;                                                  \
+       case 2:                                                         \
+               __put_user_asm("move", __pk_err, __pk_src, __pk_dst,    \
+                               w, r, -EFAULT);                         \
+               break;                                                  \
+       case 4:                                                         \
+               __put_user_asm("move", __pk_err, __pk_src, __pk_dst,    \
+                               l, r, -EFAULT);                         \
+               break;                                                  \
+       case 8:                                                         \
+               __put_user_asm8("move", __pk_err, __pk_src, __pk_dst);  \
+               break;                                                  \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+       if (unlikely(__pk_err))                                         \
+               goto err_label;                                         \
+} while (0)
 
 extern long strncpy_from_user(char *dst, const char __user *src, long count);
 extern __must_check long strnlen_user(const char __user *str, long n);
index ccea355..906d732 100644 (file)
@@ -31,7 +31,7 @@ int main(void)
        DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
        DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
        DEFINE(THREAD_SR, offsetof(struct thread_struct, sr));
-       DEFINE(THREAD_FS, offsetof(struct thread_struct, fs));
+       DEFINE(THREAD_FC, offsetof(struct thread_struct, fc));
        DEFINE(THREAD_CRP, offsetof(struct thread_struct, crp));
        DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0));
        DEFINE(THREAD_FPREG, offsetof(struct thread_struct, fp));
index 9dd76fb..9434fca 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/setup.h>
-#include <asm/segment.h>
 #include <asm/traps.h>
 #include <asm/unistd.h>
 #include <asm/asm-offsets.h>
@@ -78,20 +77,38 @@ ENTRY(__sys_clone3)
 
 ENTRY(sys_sigreturn)
        SAVE_SWITCH_STACK
-       movel   %sp,%sp@-                 | switch_stack pointer
-       pea     %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
+       movel   %sp,%a1                         | switch_stack pointer
+       lea     %sp@(SWITCH_STACK_SIZE),%a0     | pt_regs pointer
+       lea     %sp@(-84),%sp                   | leave a gap
+       movel   %a1,%sp@-
+       movel   %a0,%sp@-
        jbsr    do_sigreturn
-       addql   #8,%sp
-       RESTORE_SWITCH_STACK
-       rts
+       jra     1f                              | shared with rt_sigreturn()
 
 ENTRY(sys_rt_sigreturn)
        SAVE_SWITCH_STACK
-       movel   %sp,%sp@-                 | switch_stack pointer
-       pea     %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
+       movel   %sp,%a1                         | switch_stack pointer
+       lea     %sp@(SWITCH_STACK_SIZE),%a0     | pt_regs pointer
+       lea     %sp@(-84),%sp                   | leave a gap
+       movel   %a1,%sp@-
+       movel   %a0,%sp@-
+       | stack contents:
+       |   [original pt_regs address] [original switch_stack address]
+       |   [gap] [switch_stack] [pt_regs] [exception frame]
        jbsr    do_rt_sigreturn
-       addql   #8,%sp
+
+1:
+       | stack contents now:
+       |   [original pt_regs address] [original switch_stack address]
+       |   [unused part of the gap] [moved switch_stack] [moved pt_regs]
+       |   [replacement exception frame]
+       | return value of do_{rt_,}sigreturn() points to moved switch_stack.
+
+       movel   %d0,%sp                         | discard the leftover junk
        RESTORE_SWITCH_STACK
+       | stack contents now is just [syscall return address] [pt_regs] [frame]
+       | return pt_regs.d0
+       movel   %sp@(PT_OFF_D0+4),%d0
        rts
 
 ENTRY(buserr)
@@ -182,25 +199,6 @@ do_trace_exit:
        addql   #4,%sp
        jra     .Lret_from_exception
 
-ENTRY(ret_from_signal)
-       movel   %curptr@(TASK_STACK),%a1
-       tstb    %a1@(TINFO_FLAGS+2)
-       jge     1f
-       jbsr    syscall_trace
-1:     RESTORE_SWITCH_STACK
-       addql   #4,%sp
-/* on 68040 complete pending writebacks if any */
-#ifdef CONFIG_M68040
-       bfextu  %sp@(PT_OFF_FORMATVEC){#0,#4},%d0
-       subql   #7,%d0                          | bus error frame ?
-       jbne    1f
-       movel   %sp,%sp@-
-       jbsr    berr_040cleanup
-       addql   #4,%sp
-1:
-#endif
-       jra     .Lret_from_exception
-
 ENTRY(system_call)
        SAVE_ALL_SYS
 
@@ -338,7 +336,7 @@ resume:
 
        /* save fs (sfc,%dfc) (may be pointing to kernel memory) */
        movec   %sfc,%d0
-       movew   %d0,%a0@(TASK_THREAD+THREAD_FS)
+       movew   %d0,%a0@(TASK_THREAD+THREAD_FC)
 
        /* save usp */
        /* it is better to use a movel here instead of a movew 8*) */
@@ -424,7 +422,7 @@ resume:
        movel   %a0,%usp
 
        /* restore fs (sfc,%dfc) */
-       movew   %a1@(TASK_THREAD+THREAD_FS),%a0
+       movew   %a1@(TASK_THREAD+THREAD_FC),%a0
        movec   %a0,%sfc
        movec   %a0,%dfc
 
index db49f90..1ab692b 100644 (file)
@@ -92,7 +92,7 @@ void show_regs(struct pt_regs * regs)
 
 void flush_thread(void)
 {
-       current->thread.fs = __USER_DS;
+       current->thread.fc = USER_DATA;
 #ifdef CONFIG_FPU
        if (!FPU_IS_EMU) {
                unsigned long zero = 0;
@@ -155,7 +155,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
         * Must save the current SFC/DFC value, NOT the value when
         * the parent was last descheduled - RGH  10-08-96
         */
-       p->thread.fs = get_fs().seg;
+       p->thread.fc = USER_DATA;
 
        if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
index 8f215e7..338817d 100644 (file)
@@ -447,7 +447,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs)
 
        if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
                fpu_version = sc->sc_fpstate[0];
-               if (CPU_IS_020_OR_030 &&
+               if (CPU_IS_020_OR_030 && !regs->stkadj &&
                    regs->vector >= (VEC_FPBRUC * 4) &&
                    regs->vector <= (VEC_FPNAN * 4)) {
                        /* Clear pending exception in 68882 idle frame */
@@ -510,7 +510,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs *
                if (!(CPU_IS_060 || CPU_IS_COLDFIRE))
                        context_size = fpstate[1];
                fpu_version = fpstate[0];
-               if (CPU_IS_020_OR_030 &&
+               if (CPU_IS_020_OR_030 && !regs->stkadj &&
                    regs->vector >= (VEC_FPBRUC * 4) &&
                    regs->vector <= (VEC_FPNAN * 4)) {
                        /* Clear pending exception in 68882 idle frame */
@@ -641,56 +641,35 @@ static inline void siginfo_build_tests(void)
 static int mangle_kernel_stack(struct pt_regs *regs, int formatvec,
                               void __user *fp)
 {
-       int fsize = frame_extra_sizes(formatvec >> 12);
-       if (fsize < 0) {
+       int extra = frame_extra_sizes(formatvec >> 12);
+       char buf[sizeof_field(struct frame, un)];
+
+       if (extra < 0) {
                /*
                 * user process trying to return with weird frame format
                 */
                pr_debug("user process returning with weird frame format\n");
-               return 1;
+               return -1;
        }
-       if (!fsize) {
-               regs->format = formatvec >> 12;
-               regs->vector = formatvec & 0xfff;
-       } else {
-               struct switch_stack *sw = (struct switch_stack *)regs - 1;
-               /* yes, twice as much as max(sizeof(frame.un.fmt<x>)) */
-               unsigned long buf[sizeof_field(struct frame, un) / 2];
-
-               /* that'll make sure that expansion won't crap over data */
-               if (copy_from_user(buf + fsize / 4, fp, fsize))
-                       return 1;
-
-               /* point of no return */
-               regs->format = formatvec >> 12;
-               regs->vector = formatvec & 0xfff;
-#define frame_offset (sizeof(struct pt_regs)+sizeof(struct switch_stack))
-               __asm__ __volatile__ (
-#ifdef CONFIG_COLDFIRE
-                        "   movel %0,%/sp\n\t"
-                        "   bra ret_from_signal\n"
-#else
-                        "   movel %0,%/a0\n\t"
-                        "   subl %1,%/a0\n\t"     /* make room on stack */
-                        "   movel %/a0,%/sp\n\t"  /* set stack pointer */
-                        /* move switch_stack and pt_regs */
-                        "1: movel %0@+,%/a0@+\n\t"
-                        "   dbra %2,1b\n\t"
-                        "   lea %/sp@(%c3),%/a0\n\t" /* add offset of fmt */
-                        "   lsrl  #2,%1\n\t"
-                        "   subql #1,%1\n\t"
-                        /* copy to the gap we'd made */
-                        "2: movel %4@+,%/a0@+\n\t"
-                        "   dbra %1,2b\n\t"
-                        "   bral ret_from_signal\n"
+       if (extra && copy_from_user(buf, fp, extra))
+               return -1;
+       regs->format = formatvec >> 12;
+       regs->vector = formatvec & 0xfff;
+       if (extra) {
+               void *p = (struct switch_stack *)regs - 1;
+               struct frame *new = (void *)regs - extra;
+               int size = sizeof(struct pt_regs)+sizeof(struct switch_stack);
+
+               memmove(p - extra, p, size);
+               memcpy(p - extra + size, buf, extra);
+               current->thread.esp0 = (unsigned long)&new->ptregs;
+#ifdef CONFIG_M68040
+               /* on 68040 complete pending writebacks if any */
+               if (new->ptregs.format == 7) // bus error frame
+                       berr_040cleanup(new);
 #endif
-                        : /* no outputs, it doesn't ever return */
-                        : "a" (sw), "d" (fsize), "d" (frame_offset/4-1),
-                          "n" (frame_offset), "a" (buf + fsize/4)
-                        : "a0");
-#undef frame_offset
        }
-       return 0;
+       return extra;
 }
 
 static inline int
@@ -698,7 +677,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
 {
        int formatvec;
        struct sigcontext context;
-       int err = 0;
 
        siginfo_build_tests();
 
@@ -707,7 +685,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
 
        /* get previous context */
        if (copy_from_user(&context, usc, sizeof(context)))
-               goto badframe;
+               return -1;
 
        /* restore passed registers */
        regs->d0 = context.sc_d0;
@@ -720,15 +698,10 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
        wrusp(context.sc_usp);
        formatvec = context.sc_formatvec;
 
-       err = restore_fpu_state(&context);
+       if (restore_fpu_state(&context))
+               return -1;
 
-       if (err || mangle_kernel_stack(regs, formatvec, fp))
-               goto badframe;
-
-       return 0;
-
-badframe:
-       return 1;
+       return mangle_kernel_stack(regs, formatvec, fp);
 }
 
 static inline int
@@ -745,7 +718,7 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
 
        err = __get_user(temp, &uc->uc_mcontext.version);
        if (temp != MCONTEXT_VERSION)
-               goto badframe;
+               return -1;
        /* restore passed registers */
        err |= __get_user(regs->d0, &gregs[0]);
        err |= __get_user(regs->d1, &gregs[1]);
@@ -774,22 +747,17 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
        err |= restore_altstack(&uc->uc_stack);
 
        if (err)
-               goto badframe;
+               return -1;
 
-       if (mangle_kernel_stack(regs, temp, &uc->uc_extra))
-               goto badframe;
-
-       return 0;
-
-badframe:
-       return 1;
+       return mangle_kernel_stack(regs, temp, &uc->uc_extra);
 }
 
-asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
+asmlinkage void *do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
 {
        unsigned long usp = rdusp();
        struct sigframe __user *frame = (struct sigframe __user *)(usp - 4);
        sigset_t set;
+       int size;
 
        if (!access_ok(frame, sizeof(*frame)))
                goto badframe;
@@ -801,20 +769,22 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->sc, frame + 1))
+       size = restore_sigcontext(regs, &frame->sc, frame + 1);
+       if (size < 0)
                goto badframe;
-       return regs->d0;
+       return (void *)sw - size;
 
 badframe:
        force_sig(SIGSEGV);
-       return 0;
+       return sw;
 }
 
-asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
+asmlinkage void *do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
 {
        unsigned long usp = rdusp();
        struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4);
        sigset_t set;
+       int size;
 
        if (!access_ok(frame, sizeof(*frame)))
                goto badframe;
@@ -823,27 +793,34 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
 
        set_current_blocked(&set);
 
-       if (rt_restore_ucontext(regs, sw, &frame->uc))
+       size = rt_restore_ucontext(regs, sw, &frame->uc);
+       if (size < 0)
                goto badframe;
-       return regs->d0;
+       return (void *)sw - size;
 
 badframe:
        force_sig(SIGSEGV);
-       return 0;
+       return sw;
+}
+
+static inline struct pt_regs *rte_regs(struct pt_regs *regs)
+{
+       return (void *)regs + regs->stkadj;
 }
 
 static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
                             unsigned long mask)
 {
+       struct pt_regs *tregs = rte_regs(regs);
        sc->sc_mask = mask;
        sc->sc_usp = rdusp();
        sc->sc_d0 = regs->d0;
        sc->sc_d1 = regs->d1;
        sc->sc_a0 = regs->a0;
        sc->sc_a1 = regs->a1;
-       sc->sc_sr = regs->sr;
-       sc->sc_pc = regs->pc;
-       sc->sc_formatvec = regs->format << 12 | regs->vector;
+       sc->sc_sr = tregs->sr;
+       sc->sc_pc = tregs->pc;
+       sc->sc_formatvec = tregs->format << 12 | tregs->vector;
        save_a5_state(sc, regs);
        save_fpu_state(sc, regs);
 }
@@ -851,6 +828,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs)
 {
        struct switch_stack *sw = (struct switch_stack *)regs - 1;
+       struct pt_regs *tregs = rte_regs(regs);
        greg_t __user *gregs = uc->uc_mcontext.gregs;
        int err = 0;
 
@@ -871,9 +849,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *
        err |= __put_user(sw->a5, &gregs[13]);
        err |= __put_user(sw->a6, &gregs[14]);
        err |= __put_user(rdusp(), &gregs[15]);
-       err |= __put_user(regs->pc, &gregs[16]);
-       err |= __put_user(regs->sr, &gregs[17]);
-       err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec);
+       err |= __put_user(tregs->pc, &gregs[16]);
+       err |= __put_user(tregs->sr, &gregs[17]);
+       err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec);
        err |= rt_save_fpu_state(uc, regs);
        return err;
 }
@@ -890,13 +868,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
                        struct pt_regs *regs)
 {
        struct sigframe __user *frame;
-       int fsize = frame_extra_sizes(regs->format);
+       struct pt_regs *tregs = rte_regs(regs);
+       int fsize = frame_extra_sizes(tregs->format);
        struct sigcontext context;
        int err = 0, sig = ksig->sig;
 
        if (fsize < 0) {
                pr_debug("setup_frame: Unknown frame format %#x\n",
-                        regs->format);
+                        tregs->format);
                return -EFAULT;
        }
 
@@ -907,7 +886,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
 
        err |= __put_user(sig, &frame->sig);
 
-       err |= __put_user(regs->vector, &frame->code);
+       err |= __put_user(tregs->vector, &frame->code);
        err |= __put_user(&frame->sc, &frame->psc);
 
        if (_NSIG_WORDS > 1)
@@ -934,33 +913,27 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
        push_cache ((unsigned long) &frame->retcode);
 
        /*
-        * Set up registers for signal handler.  All the state we are about
-        * to destroy is successfully copied to sigframe.
-        */
-       wrusp ((unsigned long) frame);
-       regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
-       adjustformat(regs);
-
-       /*
         * This is subtle; if we build more than one sigframe, all but the
         * first one will see frame format 0 and have fsize == 0, so we won't
         * screw stkadj.
         */
-       if (fsize)
+       if (fsize) {
                regs->stkadj = fsize;
-
-       /* Prepare to skip over the extra stuff in the exception frame.  */
-       if (regs->stkadj) {
-               struct pt_regs *tregs =
-                       (struct pt_regs *)((ulong)regs + regs->stkadj);
+               tregs = rte_regs(regs);
                pr_debug("Performing stackadjust=%04lx\n", regs->stkadj);
-               /* This must be copied with decreasing addresses to
-                   handle overlaps.  */
                tregs->vector = 0;
                tregs->format = 0;
-               tregs->pc = regs->pc;
                tregs->sr = regs->sr;
        }
+
+       /*
+        * Set up registers for signal handler.  All the state we are about
+        * to destroy is successfully copied to sigframe.
+        */
+       wrusp ((unsigned long) frame);
+       tregs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+       adjustformat(regs);
+
        return 0;
 }
 
@@ -968,7 +941,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
                           struct pt_regs *regs)
 {
        struct rt_sigframe __user *frame;
-       int fsize = frame_extra_sizes(regs->format);
+       struct pt_regs *tregs = rte_regs(regs);
+       int fsize = frame_extra_sizes(tregs->format);
        int err = 0, sig = ksig->sig;
 
        if (fsize < 0) {
@@ -1019,33 +993,26 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
        push_cache ((unsigned long) &frame->retcode);
 
        /*
-        * Set up registers for signal handler.  All the state we are about
-        * to destroy is successfully copied to sigframe.
-        */
-       wrusp ((unsigned long) frame);
-       regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
-       adjustformat(regs);
-
-       /*
         * This is subtle; if we build more than one sigframe, all but the
         * first one will see frame format 0 and have fsize == 0, so we won't
         * screw stkadj.
         */
-       if (fsize)
+       if (fsize) {
                regs->stkadj = fsize;
-
-       /* Prepare to skip over the extra stuff in the exception frame.  */
-       if (regs->stkadj) {
-               struct pt_regs *tregs =
-                       (struct pt_regs *)((ulong)regs + regs->stkadj);
+               tregs = rte_regs(regs);
                pr_debug("Performing stackadjust=%04lx\n", regs->stkadj);
-               /* This must be copied with decreasing addresses to
-                   handle overlaps.  */
                tregs->vector = 0;
                tregs->format = 0;
-               tregs->pc = regs->pc;
                tregs->sr = regs->sr;
        }
+
+       /*
+        * Set up registers for signal handler.  All the state we are about
+        * to destroy is successfully copied to sigframe.
+        */
+       wrusp ((unsigned long) frame);
+       tregs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+       adjustformat(regs);
        return 0;
 }
 
index 5b19fcd..9718ce9 100644 (file)
@@ -181,9 +181,8 @@ static inline void access_error060 (struct frame *fp)
 static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
 {
        unsigned long mmusr;
-       mm_segment_t old_fs = get_fs();
 
-       set_fs(MAKE_MM_SEG(wbs));
+       set_fc(wbs);
 
        if (iswrite)
                asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
@@ -192,7 +191,7 @@ static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
 
        asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
 
-       set_fs(old_fs);
+       set_fc(USER_DATA);
 
        return mmusr;
 }
@@ -201,10 +200,8 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
                                   unsigned long wbd)
 {
        int res = 0;
-       mm_segment_t old_fs = get_fs();
 
-       /* set_fs can not be moved, otherwise put_user() may oops */
-       set_fs(MAKE_MM_SEG(wbs));
+       set_fc(wbs);
 
        switch (wbs & WBSIZ_040) {
        case BA_SIZE_BYTE:
@@ -218,9 +215,7 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
                break;
        }
 
-       /* set_fs can not be moved, otherwise put_user() may oops */
-       set_fs(old_fs);
-
+       set_fc(USER_DATA);
 
        pr_debug("do_040writeback1, res=%d\n", res);
 
index 90f4e9c..4fab347 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <linux/uaccess.h>
 #include <asm/io.h>
-#include <asm/segment.h>
 #include <asm/setup.h>
 #include <asm/macintosh.h>
 #include <asm/mac_via.h>
index b486c08..dde978e 100644 (file)
@@ -49,24 +49,7 @@ static unsigned long virt_to_phys_slow(unsigned long vaddr)
                if (mmusr & MMU_R_040)
                        return (mmusr & PAGE_MASK) | (vaddr & ~PAGE_MASK);
        } else {
-               unsigned short mmusr;
-               unsigned long *descaddr;
-
-               asm volatile ("ptestr %3,%2@,#7,%0\n\t"
-                             "pmove %%psr,%1"
-                             : "=a&" (descaddr), "=m" (mmusr)
-                             : "a" (vaddr), "d" (get_fs().seg));
-               if (mmusr & (MMU_I|MMU_B|MMU_L))
-                       return 0;
-               descaddr = phys_to_virt((unsigned long)descaddr);
-               switch (mmusr & MMU_NUM) {
-               case 1:
-                       return (*descaddr & 0xfe000000) | (vaddr & 0x01ffffff);
-               case 2:
-                       return (*descaddr & 0xfffc0000) | (vaddr & 0x0003ffff);
-               case 3:
-                       return (*descaddr & PAGE_MASK) | (vaddr & ~PAGE_MASK);
-               }
+               WARN_ON_ONCE(!CPU_IS_040_OR_060);
        }
        return 0;
 }
@@ -107,11 +90,9 @@ void flush_icache_user_range(unsigned long address, unsigned long endaddr)
 
 void flush_icache_range(unsigned long address, unsigned long endaddr)
 {
-       mm_segment_t old_fs = get_fs();
-
-       set_fs(KERNEL_DS);
+       set_fc(SUPER_DATA);
        flush_icache_user_range(address, endaddr);
-       set_fs(old_fs);
+       set_fc(USER_DATA);
 }
 EXPORT_SYMBOL(flush_icache_range);
 
index 5d749e1..1b47bec 100644 (file)
@@ -72,12 +72,6 @@ void __init paging_init(void)
        if (!empty_zero_page)
                panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
                      __func__, PAGE_SIZE, PAGE_SIZE);
-
-       /*
-        * Set up SFC/DFC registers (user data space).
-        */
-       set_fs (USER_DS);
-
        max_zone_pfn[ZONE_DMA] = end_mem >> PAGE_SHIFT;
        free_area_init(max_zone_pfn);
 }
index 1269d51..20ddf71 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/vmalloc.h>
 
 #include <asm/setup.h>
-#include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
index fe75aec..c2c03b0 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
-#include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/traps.h>
 #include <asm/machdep.h>
index 3a653f0..9f3f777 100644 (file)
@@ -467,7 +467,7 @@ void __init paging_init(void)
        /*
         * Set up SFC/DFC registers
         */
-       set_fs(KERNEL_DS);
+       set_fc(USER_DATA);
 
 #ifdef DEBUG
        printk ("before free_area_init\n");
index f7dd472..203f428 100644 (file)
@@ -31,7 +31,6 @@
 #include <asm/intersil.h>
 #include <asm/irq.h>
 #include <asm/sections.h>
-#include <asm/segment.h>
 #include <asm/sun3ints.h>
 
 char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
@@ -89,7 +88,7 @@ void __init sun3_init(void)
        sun3_reserved_pmeg[249] = 1;
        sun3_reserved_pmeg[252] = 1;
        sun3_reserved_pmeg[253] = 1;
-       set_fs(KERNEL_DS);
+       set_fc(USER_DATA);
 }
 
 /* Without this, Bad Things happen when something calls arch_reset. */
index 7aa879b..7ec2081 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/uaccess.h>
 #include <asm/page.h>
 #include <asm/sun3mmu.h>
-#include <asm/segment.h>
 #include <asm/oplib.h>
 #include <asm/mmu_context.h>
 #include <asm/dvma.h>
@@ -191,14 +190,13 @@ void __init mmu_emu_init(unsigned long bootmem_end)
        for(seg = 0; seg < PAGE_OFFSET; seg += SUN3_PMEG_SIZE)
                sun3_put_segmap(seg, SUN3_INVALID_PMEG);
 
-       set_fs(MAKE_MM_SEG(3));
+       set_fc(3);
        for(seg = 0; seg < 0x10000000; seg += SUN3_PMEG_SIZE) {
                i = sun3_get_segmap(seg);
                for(j = 1; j < CONTEXTS_NUM; j++)
                        (*(romvec->pv_setctxt))(j, (void *)seg, i);
        }
-       set_fs(KERNEL_DS);
-
+       set_fc(USER_DATA);
 }
 
 /* erase the mappings for a dead context.  Uses the pg_dir for hints
index 41ae422..36cc280 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-#include <asm/segment.h>
 #include <asm/intersil.h>
 #include <asm/oplib.h>
 #include <asm/sun3ints.h>
index 74d2fe5..64c23bf 100644 (file)
@@ -14,7 +14,6 @@
 #include <asm/traps.h>
 #include <asm/sun3xprom.h>
 #include <asm/idprom.h>
-#include <asm/segment.h>
 #include <asm/sun3ints.h>
 #include <asm/openprom.h>
 #include <asm/machines.h>
index 0af8862..cb6d224 100644 (file)
@@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx)
        ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
         func##_positive)
 
+static bool is_bad_offset(int b_off)
+{
+       return b_off > 0x1ffff || b_off < -0x20000;
+}
+
 static int build_body(struct jit_ctx *ctx)
 {
        const struct bpf_prog *prog = ctx->skf;
@@ -728,7 +733,10 @@ load_common:
                        /* Load return register on DS for failures */
                        emit_reg_move(r_ret, r_zero, ctx);
                        /* Return with error */
-                       emit_b(b_imm(prog->len, ctx), ctx);
+                       b_off = b_imm(prog->len, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_b(b_off, ctx);
                        emit_nop(ctx);
                        break;
                case BPF_LD | BPF_W | BPF_IND:
@@ -775,8 +783,10 @@ load_ind:
                        emit_jalr(MIPS_R_RA, r_s0, ctx);
                        emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
                        /* Check the error value */
-                       emit_bcond(MIPS_COND_NE, r_ret, 0,
-                                  b_imm(prog->len, ctx), ctx);
+                       b_off = b_imm(prog->len, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx);
                        emit_reg_move(r_ret, r_zero, ctx);
                        /* We are good */
                        /* X <- P[1:K] & 0xf */
@@ -855,8 +865,10 @@ load_ind:
                        /* A /= X */
                        ctx->flags |= SEEN_X | SEEN_A;
                        /* Check if r_X is zero */
-                       emit_bcond(MIPS_COND_EQ, r_X, r_zero,
-                                  b_imm(prog->len, ctx), ctx);
+                       b_off = b_imm(prog->len, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
                        emit_load_imm(r_ret, 0, ctx); /* delay slot */
                        emit_div(r_A, r_X, ctx);
                        break;
@@ -864,8 +876,10 @@ load_ind:
                        /* A %= X */
                        ctx->flags |= SEEN_X | SEEN_A;
                        /* Check if r_X is zero */
-                       emit_bcond(MIPS_COND_EQ, r_X, r_zero,
-                                  b_imm(prog->len, ctx), ctx);
+                       b_off = b_imm(prog->len, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
                        emit_load_imm(r_ret, 0, ctx); /* delay slot */
                        emit_mod(r_A, r_X, ctx);
                        break;
@@ -926,7 +940,10 @@ load_ind:
                        break;
                case BPF_JMP | BPF_JA:
                        /* pc += K */
-                       emit_b(b_imm(i + k + 1, ctx), ctx);
+                       b_off = b_imm(i + k + 1, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_b(b_off, ctx);
                        emit_nop(ctx);
                        break;
                case BPF_JMP | BPF_JEQ | BPF_K:
@@ -1056,12 +1073,16 @@ jmp_cmp:
                        break;
                case BPF_RET | BPF_A:
                        ctx->flags |= SEEN_A;
-                       if (i != prog->len - 1)
+                       if (i != prog->len - 1) {
                                /*
                                 * If this is not the last instruction
                                 * then jump to the epilogue
                                 */
-                               emit_b(b_imm(prog->len, ctx), ctx);
+                               b_off = b_imm(prog->len, ctx);
+                               if (is_bad_offset(b_off))
+                                       return -E2BIG;
+                               emit_b(b_off, ctx);
+                       }
                        emit_reg_move(r_ret, r_A, ctx); /* delay slot */
                        break;
                case BPF_RET | BPF_K:
@@ -1075,7 +1096,10 @@ jmp_cmp:
                                 * If this is not the last instruction
                                 * then jump to the epilogue
                                 */
-                               emit_b(b_imm(prog->len, ctx), ctx);
+                               b_off = b_imm(prog->len, ctx);
+                               if (is_bad_offset(b_off))
+                                       return -E2BIG;
+                               emit_b(b_off, ctx);
                                emit_nop(ctx);
                        }
                        break;
@@ -1133,8 +1157,10 @@ jmp_cmp:
                        /* Load *dev pointer */
                        emit_load_ptr(r_s0, r_skb, off, ctx);
                        /* error (0) in the delay slot */
-                       emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
-                                  b_imm(prog->len, ctx), ctx);
+                       b_off = b_imm(prog->len, ctx);
+                       if (is_bad_offset(b_off))
+                               return -E2BIG;
+                       emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx);
                        emit_reg_move(r_ret, r_zero, ctx);
                        if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
                                BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
@@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
        /* Generate the actual JIT code */
        build_prologue(&ctx);
-       build_body(&ctx);
+       if (build_body(&ctx)) {
+               module_memfree(ctx.target);
+               goto out;
+       }
        build_epilogue(&ctx);
 
        /* Update the icache */
index a8bc06e..ca1beb8 100644 (file)
@@ -3,9 +3,10 @@
 config EARLY_PRINTK
        bool "Activate early kernel debugging"
        default y
+       depends on TTY
        select SERIAL_CORE_CONSOLE
        help
-         Enable early printk on console
+         Enable early printk on console.
          This is useful for kernel debugging when your machine crashes very
          early before the console code is initialized.
          You should normally say N here, unless you want to debug such a crash.
index cf8d687..40bc8fb 100644 (file)
@@ -149,8 +149,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low,
 
 void __init setup_arch(char **cmdline_p)
 {
-       int dram_start;
-
        console_verbose();
 
        memory_start = memblock_start_of_DRAM();
index 16256e1..1072245 100644 (file)
@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
        kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
-       set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+       set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
-       clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+       clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
index 752a0ff..6a6dd5e 100644 (file)
@@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
                kvm_s390_patch_guest_per_regs(vcpu);
        }
 
-       clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+       clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
 
        vcpu->arch.sie_block->icptcode = 0;
        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
index ecd741e..52bc8fb 100644 (file)
@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-       return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+       return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
index fa2c3f5..18d2f51 100644 (file)
@@ -367,10 +367,11 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8)
         *      %rdx: src (1..8 blocks)
         *      %rcx: num blocks (1..8)
         */
-       FRAME_BEGIN
-
        cmpq $5, %rcx;
        jb sm4_aesni_avx_crypt4;
+
+       FRAME_BEGIN
+
        vmovdqu (0 * 16)(%rdx), RA0;
        vmovdqu (1 * 16)(%rdx), RA1;
        vmovdqu (2 * 16)(%rdx), RA2;
index 87bd602..6a5f3ac 100644 (file)
@@ -46,7 +46,7 @@ struct kvm_page_track_notifier_node {
                            struct kvm_page_track_notifier_node *node);
 };
 
-void kvm_page_track_init(struct kvm *kvm);
+int kvm_page_track_init(struct kvm *kvm);
 void kvm_page_track_cleanup(struct kvm *kvm);
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
index 2837110..c589ac8 100644 (file)
@@ -4206,7 +4206,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
        u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
 
        if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
-               return emulate_ud(ctxt);
+               return emulate_gp(ctxt, 0);
 
        return X86EMUL_CONTINUE;
 }
index 232a86a..d5124b5 100644 (file)
@@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
        for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
                stimer_init(&hv_vcpu->stimer[i], i);
 
-       hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+       hv_vcpu->vp_index = vcpu->vcpu_idx;
 
        return 0;
 }
@@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
        switch (msr) {
        case HV_X64_MSR_VP_INDEX: {
                struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
-               int vcpu_idx = kvm_vcpu_get_idx(vcpu);
                u32 new_vp_index = (u32)data;
 
                if (!host || new_vp_index >= KVM_MAX_VCPUS)
@@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
                 * VP index is changing, adjust num_mismatched_vp_indexes if
                 * it now matches or no longer matches vcpu_idx.
                 */
-               if (hv_vcpu->vp_index == vcpu_idx)
+               if (hv_vcpu->vp_index == vcpu->vcpu_idx)
                        atomic_inc(&hv->num_mismatched_vp_indexes);
-               else if (new_vp_index == vcpu_idx)
+               else if (new_vp_index == vcpu->vcpu_idx)
                        atomic_dec(&hv->num_mismatched_vp_indexes);
 
                hv_vcpu->vp_index = new_vp_index;
index 730da85..ed1c4e5 100644 (file)
@@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-       return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu);
+       return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx;
 }
 
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
index ff005fe..8c065da 100644 (file)
@@ -319,8 +319,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
        unsigned index;
        bool mask_before, mask_after;
        union kvm_ioapic_redirect_entry *e;
-       unsigned long vcpu_bitmap;
        int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode;
+       DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
 
        switch (ioapic->ioregsel) {
        case IOAPIC_REG_VERSION:
@@ -384,9 +384,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
                        irq.shorthand = APIC_DEST_NOSHORT;
                        irq.dest_id = e->fields.dest_id;
                        irq.msi_redir_hint = false;
-                       bitmap_zero(&vcpu_bitmap, 16);
+                       bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
                        kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
-                                                &vcpu_bitmap);
+                                                vcpu_bitmap);
                        if (old_dest_mode != e->fields.dest_mode ||
                            old_dest_id != e->fields.dest_id) {
                                /*
@@ -399,10 +399,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
                                    kvm_lapic_irq_dest_mode(
                                        !!e->fields.dest_mode);
                                kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq,
-                                                        &vcpu_bitmap);
+                                                        vcpu_bitmap);
                        }
                        kvm_make_scan_ioapic_request_mask(ioapic->kvm,
-                                                         &vcpu_bitmap);
+                                                         vcpu_bitmap);
                } else {
                        kvm_make_scan_ioapic_request(ioapic->kvm);
                }
index 2d7e611..1a64ba5 100644 (file)
@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
        } while (!sp->unsync_children);
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
-                             struct kvm_mmu_page *parent)
+static int mmu_sync_children(struct kvm_vcpu *vcpu,
+                            struct kvm_mmu_page *parent, bool can_yield)
 {
        int i;
        struct kvm_mmu_page *sp;
@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
                }
                if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
                        kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+                       if (!can_yield) {
+                               kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+                               return -EINTR;
+                       }
+
                        cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
                        flush = false;
                }
        }
 
        kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+       return 0;
 }
 
 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                        kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                }
 
-               if (sp->unsync_children)
-                       kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-
                __clear_sp_write_flooding_count(sp);
 
 trace_get_page:
@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
                write_lock(&vcpu->kvm->mmu_lock);
                kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 
-               mmu_sync_children(vcpu, sp);
+               mmu_sync_children(vcpu, sp, true);
 
                kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
                write_unlock(&vcpu->kvm->mmu_lock);
@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
                if (IS_VALID_PAE_ROOT(root)) {
                        root &= PT64_BASE_ADDR_MASK;
                        sp = to_shadow_page(root);
-                       mmu_sync_children(vcpu, sp);
+                       mmu_sync_children(vcpu, sp, true);
                }
        }
 
index 269f11f..21427e8 100644 (file)
@@ -164,13 +164,13 @@ void kvm_page_track_cleanup(struct kvm *kvm)
        cleanup_srcu_struct(&head->track_srcu);
 }
 
-void kvm_page_track_init(struct kvm *kvm)
+int kvm_page_track_init(struct kvm *kvm)
 {
        struct kvm_page_track_notifier_head *head;
 
        head = &kvm->arch.track_notifier_head;
-       init_srcu_struct(&head->track_srcu);
        INIT_HLIST_HEAD(&head->track_notifier_list);
+       return init_srcu_struct(&head->track_srcu);
 }
 
 /*
index 7d03e9b..913d52a 100644 (file)
@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
                if (!is_shadow_present_pte(*it.sptep)) {
                        table_gfn = gw->table_gfn[it.level - 2];
                        access = gw->pt_access[it.level - 2];
-                       sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-                                             false, access);
+                       sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
+                                             it.level-1, false, access);
+                       /*
+                        * We must synchronize the pagetable before linking it
+                        * because the guest doesn't need to flush tlb when
+                        * the gpte is changed from non-present to present.
+                        * Otherwise, the guest may use the wrong mapping.
+                        *
+                        * For PG_LEVEL_4K, kvm_mmu_get_page() has already
+                        * synchronized it transiently via kvm_sync_page().
+                        *
+                        * For higher level pagetable, we synchronize it via
+                        * the slower mmu_sync_children().  If it needs to
+                        * break, some progress has been made; return
+                        * RET_PF_RETRY and retry on the next #PF.
+                        * KVM_REQ_MMU_SYNC is not necessary but it
+                        * expedites the process.
+                        */
+                       if (sp->unsync_children &&
+                           mmu_sync_children(vcpu, sp, false))
+                               return RET_PF_RETRY;
                }
 
                /*
@@ -1047,14 +1066,6 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
  * Using the cached information from sp->gfns is safe because:
  * - The spte has a reference to the struct page, so the pfn for a given gfn
  *   can't change unless all sptes pointing to it are nuked first.
- *
- * Note:
- *   We should flush all tlbs if spte is dropped even though guest is
- *   responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
- *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
- *   used by guest then tlbs are not flushed, so guest is allowed to access the
- *   freed pages.
- *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
  */
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1107,13 +1118,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                        return 0;
 
                if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
-                       /*
-                        * Update spte before increasing tlbs_dirty to make
-                        * sure no tlb flush is lost after spte is zapped; see
-                        * the comments in kvm_flush_remote_tlbs().
-                        */
-                       smp_wmb();
-                       vcpu->kvm->tlbs_dirty++;
+                       set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
                        continue;
                }
 
@@ -1128,12 +1133,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
                if (gfn != sp->gfns[i]) {
                        drop_spte(vcpu->kvm, &sp->spt[i]);
-                       /*
-                        * The same as above where we are doing
-                        * prefetch_invalid_gpte().
-                        */
-                       smp_wmb();
-                       vcpu->kvm->tlbs_dirty++;
+                       set_spte_ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
                        continue;
                }
 
index 2545d0c..510b833 100644 (file)
@@ -545,7 +545,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
                (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
                (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
 
-       svm->vmcb->control.virt_ext            = svm->nested.ctl.virt_ext;
        svm->vmcb->control.int_vector          = svm->nested.ctl.int_vector;
        svm->vmcb->control.int_state           = svm->nested.ctl.int_state;
        svm->vmcb->control.event_inj           = svm->nested.ctl.event_inj;
@@ -579,7 +578,7 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
 }
 
 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
-                        struct vmcb *vmcb12)
+                        struct vmcb *vmcb12, bool from_vmrun)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        int ret;
@@ -609,13 +608,16 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
        nested_vmcb02_prepare_save(svm, vmcb12);
 
        ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
-                                 nested_npt_enabled(svm), true);
+                                 nested_npt_enabled(svm), from_vmrun);
        if (ret)
                return ret;
 
        if (!npt_enabled)
                vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
 
+       if (!from_vmrun)
+               kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
        svm_set_gif(svm, true);
 
        return 0;
@@ -681,7 +683,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 
        svm->nested.nested_run_pending = 1;
 
-       if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
+       if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
                goto out_exit_err;
 
        if (nested_svm_vmrun_msrpm(svm))
index 75e0b21..c36b5fe 100644 (file)
@@ -595,43 +595,50 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
        return 0;
 }
 
-static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
+                                   int *error)
 {
-       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
        struct sev_data_launch_update_vmsa vmsa;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       int ret;
+
+       /* Perform some pre-encryption checks against the VMSA */
+       ret = sev_es_sync_vmsa(svm);
+       if (ret)
+               return ret;
+
+       /*
+        * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
+        * the VMSA memory content (i.e it will write the same memory region
+        * with the guest's key), so invalidate it first.
+        */
+       clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+       vmsa.reserved = 0;
+       vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
+       vmsa.address = __sme_pa(svm->vmsa);
+       vmsa.len = PAGE_SIZE;
+       return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
        struct kvm_vcpu *vcpu;
        int i, ret;
 
        if (!sev_es_guest(kvm))
                return -ENOTTY;
 
-       vmsa.reserved = 0;
-
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               struct vcpu_svm *svm = to_svm(vcpu);
-
-               /* Perform some pre-encryption checks against the VMSA */
-               ret = sev_es_sync_vmsa(svm);
+               ret = mutex_lock_killable(&vcpu->mutex);
                if (ret)
                        return ret;
 
-               /*
-                * The LAUNCH_UPDATE_VMSA command will perform in-place
-                * encryption of the VMSA memory content (i.e it will write
-                * the same memory region with the guest's key), so invalidate
-                * it first.
-                */
-               clflush_cache_range(svm->vmsa, PAGE_SIZE);
+               ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
 
-               vmsa.handle = sev->handle;
-               vmsa.address = __sme_pa(svm->vmsa);
-               vmsa.len = PAGE_SIZE;
-               ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
-                                   &argp->error);
+               mutex_unlock(&vcpu->mutex);
                if (ret)
                        return ret;
-
-               svm->vcpu.arch.guest_state_protected = true;
        }
 
        return 0;
@@ -1397,8 +1404,10 @@ static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
        /* Bind ASID to this guest */
        ret = sev_bind_asid(kvm, start.handle, error);
-       if (ret)
+       if (ret) {
+               sev_decommission(start.handle);
                goto e_free_session;
+       }
 
        params.handle = start.handle;
        if (copy_to_user((void __user *)(uintptr_t)argp->data,
@@ -1464,7 +1473,7 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
        /* Pin guest memory */
        guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
-                                   PAGE_SIZE, &n, 0);
+                                   PAGE_SIZE, &n, 1);
        if (IS_ERR(guest_page)) {
                ret = PTR_ERR(guest_page);
                goto e_free_trans;
@@ -1501,6 +1510,20 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
        return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
 }
 
+static bool cmd_allowed_from_miror(u32 cmd_id)
+{
+       /*
+        * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
+        * active mirror VMs. Also allow the debugging and status commands.
+        */
+       if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
+           cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
+           cmd_id == KVM_SEV_DBG_ENCRYPT)
+               return true;
+
+       return false;
+}
+
 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
        struct kvm_sev_cmd sev_cmd;
@@ -1517,8 +1540,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 
        mutex_lock(&kvm->lock);
 
-       /* enc_context_owner handles all memory enc operations */
-       if (is_mirroring_enc_context(kvm)) {
+       /* Only the enc_context_owner handles some memory enc operations. */
+       if (is_mirroring_enc_context(kvm) &&
+           !cmd_allowed_from_miror(sev_cmd.id)) {
                r = -EINVAL;
                goto out;
        }
@@ -1715,8 +1739,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
 {
        struct file *source_kvm_file;
        struct kvm *source_kvm;
-       struct kvm_sev_info *mirror_sev;
-       unsigned int asid;
+       struct kvm_sev_info source_sev, *mirror_sev;
        int ret;
 
        source_kvm_file = fget(source_fd);
@@ -1739,7 +1762,8 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
                goto e_source_unlock;
        }
 
-       asid = to_kvm_svm(source_kvm)->sev_info.asid;
+       memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
+              sizeof(source_sev));
 
        /*
         * The mirror kvm holds an enc_context_owner ref so its asid can't
@@ -1759,8 +1783,16 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
        /* Set enc_context_owner and copy its encryption context over */
        mirror_sev = &to_kvm_svm(kvm)->sev_info;
        mirror_sev->enc_context_owner = source_kvm;
-       mirror_sev->asid = asid;
        mirror_sev->active = true;
+       mirror_sev->asid = source_sev.asid;
+       mirror_sev->fd = source_sev.fd;
+       mirror_sev->es_active = source_sev.es_active;
+       mirror_sev->handle = source_sev.handle;
+       /*
+        * Do not copy ap_jump_table. Since the mirror does not share the same
+        * KVM contexts as the original, and they may have different
+        * memory-views.
+        */
 
        mutex_unlock(&kvm->lock);
        return 0;
index 05e8d4d..9896850 100644 (file)
@@ -1566,6 +1566,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
 
                svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
                        V_IRQ_INJECTION_BITS_MASK;
+
+               svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
        }
 
        vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@ -2222,6 +2224,10 @@ static int gp_interception(struct kvm_vcpu *vcpu)
        if (error_code)
                goto reinject;
 
+       /* All SVM instructions expect page aligned RAX */
+       if (svm->vmcb->save.rax & ~PAGE_MASK)
+               goto reinject;
+
        /* Decode the instruction for usage later */
        if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
                goto reinject;
@@ -4285,43 +4291,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
        struct kvm_host_map map_save;
        int ret;
 
-       if (is_guest_mode(vcpu)) {
-               /* FED8h - SVM Guest */
-               put_smstate(u64, smstate, 0x7ed8, 1);
-               /* FEE0h - SVM Guest VMCB Physical Address */
-               put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+       if (!is_guest_mode(vcpu))
+               return 0;
 
-               svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
-               svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
-               svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+       /* FED8h - SVM Guest */
+       put_smstate(u64, smstate, 0x7ed8, 1);
+       /* FEE0h - SVM Guest VMCB Physical Address */
+       put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
 
-               ret = nested_svm_vmexit(svm);
-               if (ret)
-                       return ret;
+       svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+       svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+       svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
 
-               /*
-                * KVM uses VMCB01 to store L1 host state while L2 runs but
-                * VMCB01 is going to be used during SMM and thus the state will
-                * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
-                * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
-                * format of the area is identical to guest save area offsetted
-                * by 0x400 (matches the offset of 'struct vmcb_save_area'
-                * within 'struct vmcb'). Note: HSAVE area may also be used by
-                * L1 hypervisor to save additional host context (e.g. KVM does
-                * that, see svm_prepare_guest_switch()) which must be
-                * preserved.
-                */
-               if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
-                                &map_save) == -EINVAL)
-                       return 1;
+       ret = nested_svm_vmexit(svm);
+       if (ret)
+               return ret;
 
-               BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
+       /*
+        * KVM uses VMCB01 to store L1 host state while L2 runs but
+        * VMCB01 is going to be used during SMM and thus the state will
+        * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
+        * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
+        * format of the area is identical to guest save area offsetted
+        * by 0x400 (matches the offset of 'struct vmcb_save_area'
+        * within 'struct vmcb'). Note: HSAVE area may also be used by
+        * L1 hypervisor to save additional host context (e.g. KVM does
+        * that, see svm_prepare_guest_switch()) which must be
+        * preserved.
+        */
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
+                        &map_save) == -EINVAL)
+               return 1;
 
-               svm_copy_vmrun_state(map_save.hva + 0x400,
-                                    &svm->vmcb01.ptr->save);
+       BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
 
-               kvm_vcpu_unmap(vcpu, &map_save, true);
-       }
+       svm_copy_vmrun_state(map_save.hva + 0x400,
+                            &svm->vmcb01.ptr->save);
+
+       kvm_vcpu_unmap(vcpu, &map_save, true);
        return 0;
 }
 
@@ -4329,50 +4336,54 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_host_map map, map_save;
-       int ret = 0;
+       u64 saved_efer, vmcb12_gpa;
+       struct vmcb *vmcb12;
+       int ret;
 
-       if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
-               u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
-               u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
-               u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
-               struct vmcb *vmcb12;
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+               return 0;
 
-               if (guest) {
-                       if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
-                               return 1;
+       /* Non-zero if SMI arrived while vCPU was in guest mode. */
+       if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+               return 0;
 
-                       if (!(saved_efer & EFER_SVME))
-                               return 1;
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+               return 1;
 
-                       if (kvm_vcpu_map(vcpu,
-                                        gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
-                               return 1;
+       saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
+       if (!(saved_efer & EFER_SVME))
+               return 1;
 
-                       if (svm_allocate_nested(svm))
-                               return 1;
+       vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+               return 1;
 
-                       vmcb12 = map.hva;
+       ret = 1;
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+               goto unmap_map;
 
-                       nested_load_control_from_vmcb12(svm, &vmcb12->control);
+       if (svm_allocate_nested(svm))
+               goto unmap_save;
 
-                       ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
-                       kvm_vcpu_unmap(vcpu, &map, true);
+       /*
+        * Restore L1 host state from L1 HSAVE area as VMCB01 was
+        * used during SMM (see svm_enter_smm())
+        */
 
-                       /*
-                        * Restore L1 host state from L1 HSAVE area as VMCB01 was
-                        * used during SMM (see svm_enter_smm())
-                        */
-                       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
-                                        &map_save) == -EINVAL)
-                               return 1;
+       svm_copy_vmrun_state(&svm->vmcb01.ptr->save, map_save.hva + 0x400);
 
-                       svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
-                                            map_save.hva + 0x400);
+       /*
+        * Enter the nested guest now
+        */
 
-                       kvm_vcpu_unmap(vcpu, &map_save, true);
-               }
-       }
+       vmcb12 = map.hva;
+       nested_load_control_from_vmcb12(svm, &vmcb12->control);
+       ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
 
+unmap_save:
+       kvm_vcpu_unmap(vcpu, &map_save, true);
+unmap_map:
+       kvm_vcpu_unmap(vcpu, &map, true);
        return ret;
 }
 
index 524d943..128a54b 100644 (file)
@@ -459,7 +459,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
        return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
 }
 
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
+                        u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
 void svm_leave_nested(struct vcpu_svm *svm);
 void svm_free_nested(struct vcpu_svm *svm);
 int svm_allocate_nested(struct vcpu_svm *svm);
index 0dab1b7..ba6f99f 100644 (file)
@@ -353,14 +353,20 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
        switch (msr_index) {
        case MSR_IA32_VMX_EXIT_CTLS:
        case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-               ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+               ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
                break;
        case MSR_IA32_VMX_ENTRY_CTLS:
        case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-               ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+               ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
                break;
        case MSR_IA32_VMX_PROCBASED_CTLS2:
-               ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+               break;
+       case MSR_IA32_VMX_PINBASED_CTLS:
+               ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+               break;
+       case MSR_IA32_VMX_VMFUNC:
+               ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
                break;
        }
 
index ccb03d6..eedcebf 100644 (file)
@@ -2583,8 +2583,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         * Guest state is invalid and unrestricted guest is disabled,
         * which means L1 attempted VMEntry to L2 with invalid state.
         * Fail the VMEntry.
+        *
+        * However when force loading the guest state (SMM exit or
+        * loading nested state after migration, it is possible to
+        * have invalid guest state now, which will be later fixed by
+        * restoring L2 register state
         */
-       if (CC(!vmx_guest_state_valid(vcpu))) {
+       if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
                *entry_failure_code = ENTRY_FAIL_DEFAULT;
                return -EINVAL;
        }
@@ -4351,6 +4356,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
                                vmcs12->vm_exit_msr_load_count))
                nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
+
+       to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
 }
 
 static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
@@ -4899,14 +4906,7 @@ out_vmcs02:
        return -ENOMEM;
 }
 
-/*
- * Emulate the VMXON instruction.
- * Currently, we just remember that VMX is active, and do not save or even
- * inspect the argument to VMXON (the so-called "VMXON pointer") because we
- * do not currently need to store anything in that guest-allocated memory
- * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
- * argument is different from the VMXON pointer (which the spec says they do).
- */
+/* Emulate the VMXON instruction. */
 static int handle_vmon(struct kvm_vcpu *vcpu)
 {
        int ret;
@@ -5903,6 +5903,12 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
        case EXIT_REASON_VMFUNC:
                /* VM functions are emulated through L2->L0 vmexits. */
                return true;
+       case EXIT_REASON_BUS_LOCK:
+               /*
+                * At present, bus lock VM exit is never exposed to L1.
+                * Handle L2's bus locks in L0 directly.
+                */
+               return true;
        default:
                break;
        }
index 0c2c0d5..9ecfcf1 100644 (file)
@@ -1323,7 +1323,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
        vmx_prepare_switch_to_host(to_vmx(vcpu));
 }
 
-static bool emulation_required(struct kvm_vcpu *vcpu)
+bool vmx_emulation_required(struct kvm_vcpu *vcpu)
 {
        return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
 }
@@ -1367,7 +1367,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
        vmcs_writel(GUEST_RFLAGS, rflags);
 
        if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
-               vmx->emulation_required = emulation_required(vcpu);
+               vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
@@ -1837,10 +1837,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                                    &msr_info->data))
                        return 1;
                /*
-                * Enlightened VMCS v1 doesn't have certain fields, but buggy
-                * Hyper-V versions are still trying to use corresponding
-                * features when they are exposed. Filter out the essential
-                * minimum.
+                * Enlightened VMCS v1 doesn't have certain VMCS fields but
+                * instead of just ignoring the features, different Hyper-V
+                * versions are either trying to use them and fail or do some
+                * sanity checking and refuse to boot. Filter all unsupported
+                * features out.
                 */
                if (!msr_info->host_initiated &&
                    vmx->nested.enlightened_vmcs_enabled)
@@ -3077,7 +3078,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 
        /* depends on vcpu->arch.cr0 to be set to a new value */
-       vmx->emulation_required = emulation_required(vcpu);
+       vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
 static int vmx_get_max_tdp_level(void)
@@ -3330,7 +3331,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int
 {
        __vmx_set_segment(vcpu, var, seg);
 
-       to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
+       to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
 }
 
 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -6621,10 +6622,24 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                     vmx->loaded_vmcs->soft_vnmi_blocked))
                vmx->loaded_vmcs->entry_time = ktime_get();
 
-       /* Don't enter VMX if guest state is invalid, let the exit handler
-          start emulation until we arrive back to a valid state */
-       if (vmx->emulation_required)
+       /*
+        * Don't enter VMX if guest state is invalid, let the exit handler
+        * start emulation until we arrive back to a valid state.  Synthesize a
+        * consistency check VM-Exit due to invalid guest state and bail.
+        */
+       if (unlikely(vmx->emulation_required)) {
+
+               /* We don't emulate invalid state of a nested guest */
+               vmx->fail = is_guest_mode(vcpu);
+
+               vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
+               vmx->exit_reason.failed_vmentry = 1;
+               kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1);
+               vmx->exit_qualification = ENTRY_FAIL_DEFAULT;
+               kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2);
+               vmx->exit_intr_info = 0;
                return EXIT_FASTPATH_NONE;
+       }
 
        trace_kvm_entry(vcpu);
 
index 4858c5f..592217f 100644 (file)
@@ -248,12 +248,8 @@ struct vcpu_vmx {
         * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
         * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
         * be loaded into hardware if those conditions aren't met.
-        * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
-        * into hardware when running the guest.  guest_uret_msrs[] is resorted
-        * whenever the number of "active" uret MSRs is modified.
         */
        struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-       int                   nr_active_uret_msrs;
        bool                  guest_uret_msrs_loaded;
 #ifdef CONFIG_X86_64
        u64                   msr_host_kernel_gs_base;
@@ -359,6 +355,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
                        unsigned long fs_base, unsigned long gs_base);
 int vmx_get_cpl(struct kvm_vcpu *vcpu);
+bool vmx_emulation_required(struct kvm_vcpu *vcpu);
 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
index 28ef141..aabd3a2 100644 (file)
@@ -1332,6 +1332,13 @@ static const u32 msrs_to_save_all[] = {
        MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
        MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
        MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+
+       MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
+       MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
+       MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
+       MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
+       MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
+       MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
 };
 
 static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
@@ -2969,7 +2976,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
                                       offsetof(struct compat_vcpu_info, time));
        if (vcpu->xen.vcpu_time_info_set)
                kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
-       if (v == kvm_get_vcpu(v->kvm, 0))
+       if (!v->vcpu_idx)
                kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
        return 0;
 }
@@ -7658,6 +7665,13 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 
                /* Process a latched INIT or SMI, if any.  */
                kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+               /*
+                * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
+                * on SMM exit we still need to reload them from
+                * guest memory
+                */
+               vcpu->arch.pdptrs_from_userspace = false;
        }
 
        kvm_mmu_reset_context(vcpu);
@@ -10652,6 +10666,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        int r;
 
        vcpu->arch.last_vmentry_cpu = -1;
+       vcpu->arch.regs_avail = ~0;
+       vcpu->arch.regs_dirty = ~0;
 
        if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -10893,6 +10909,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
        kvm_rip_write(vcpu, 0xfff0);
 
+       vcpu->arch.cr3 = 0;
+       kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
        /*
         * CR0.CD/NW are set on RESET, preserved on INIT.  Note, some versions
         * of Intel's SDM list CD/NW as being set on INIT, but they contradict
@@ -11139,9 +11158,15 @@ void kvm_arch_free_vm(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+       int ret;
+
        if (type)
                return -EINVAL;
 
+       ret = kvm_page_track_init(kvm);
+       if (ret)
+               return ret;
+
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
@@ -11174,7 +11199,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        kvm_apicv_init(kvm);
        kvm_hv_init_vm(kvm);
-       kvm_page_track_init(kvm);
        kvm_mmu_init_vm(kvm);
        kvm_xen_init_vm(kvm);
 
index 0fe6aac..9ea5738 100644 (file)
@@ -1341,9 +1341,10 @@ st:                      if (is_imm8(insn->off))
                        if (insn->imm == (BPF_AND | BPF_FETCH) ||
                            insn->imm == (BPF_OR | BPF_FETCH) ||
                            insn->imm == (BPF_XOR | BPF_FETCH)) {
-                               u8 *branch_target;
                                bool is64 = BPF_SIZE(insn->code) == BPF_DW;
                                u32 real_src_reg = src_reg;
+                               u32 real_dst_reg = dst_reg;
+                               u8 *branch_target;
 
                                /*
                                 * Can't be implemented with a single x86 insn.
@@ -1354,11 +1355,13 @@ st:                     if (is_imm8(insn->off))
                                emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
                                if (src_reg == BPF_REG_0)
                                        real_src_reg = BPF_REG_AX;
+                               if (dst_reg == BPF_REG_0)
+                                       real_dst_reg = BPF_REG_AX;
 
                                branch_target = prog;
                                /* Load old value */
                                emit_ldx(&prog, BPF_SIZE(insn->code),
-                                        BPF_REG_0, dst_reg, insn->off);
+                                        BPF_REG_0, real_dst_reg, insn->off);
                                /*
                                 * Perform the (commutative) operation locally,
                                 * put the result in the AUX_REG.
@@ -1369,7 +1372,8 @@ st:                       if (is_imm8(insn->off))
                                      add_2reg(0xC0, AUX_REG, real_src_reg));
                                /* Attempt to swap in new value */
                                err = emit_atomic(&prog, BPF_CMPXCHG,
-                                                 dst_reg, AUX_REG, insn->off,
+                                                 real_dst_reg, AUX_REG,
+                                                 insn->off,
                                                  BPF_SIZE(insn->code));
                                if (WARN_ON(err))
                                        return err;
@@ -1383,11 +1387,10 @@ st:                     if (is_imm8(insn->off))
                                /* Restore R0 after clobbering RAX */
                                emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
                                break;
-
                        }
 
                        err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
-                                                 insn->off, BPF_SIZE(insn->code));
+                                         insn->off, BPF_SIZE(insn->code));
                        if (err)
                                return err;
                        break;
@@ -1744,7 +1747,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
 }
 
 static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
-                          struct bpf_prog *p, int stack_size, bool mod_ret)
+                          struct bpf_prog *p, int stack_size, bool save_ret)
 {
        u8 *prog = *pprog;
        u8 *jmp_insn;
@@ -1777,11 +1780,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
        if (emit_call(&prog, p->bpf_func, prog))
                return -EINVAL;
 
-       /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
+       /*
+        * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
         * of the previous call which is then passed on the stack to
         * the next BPF program.
+        *
+        * BPF_TRAMP_FENTRY trampoline may need to return the return
+        * value of BPF_PROG_TYPE_STRUCT_OPS prog.
         */
-       if (mod_ret)
+       if (save_ret)
                emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
        /* replace 2 nops with JE insn, since jmp target is known */
@@ -1828,13 +1835,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 }
 
 static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
-                     struct bpf_tramp_progs *tp, int stack_size)
+                     struct bpf_tramp_progs *tp, int stack_size,
+                     bool save_ret)
 {
        int i;
        u8 *prog = *pprog;
 
        for (i = 0; i < tp->nr_progs; i++) {
-               if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
+               if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
+                                   save_ret))
                        return -EINVAL;
        }
        *pprog = prog;
@@ -1877,6 +1886,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
        return 0;
 }
 
+static bool is_valid_bpf_tramp_flags(unsigned int flags)
+{
+       if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+           (flags & BPF_TRAMP_F_SKIP_FRAME))
+               return false;
+
+       /*
+        * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+        * and it must be used alone.
+        */
+       if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
+           (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
+               return false;
+
+       return true;
+}
+
 /* Example:
  * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
  * its 'struct btf_func_model' will be nr_args=2
@@ -1949,17 +1975,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
        struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
        u8 **branches = NULL;
        u8 *prog;
+       bool save_ret;
 
        /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
        if (nr_args > 6)
                return -ENOTSUPP;
 
-       if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
-           (flags & BPF_TRAMP_F_SKIP_FRAME))
+       if (!is_valid_bpf_tramp_flags(flags))
                return -EINVAL;
 
-       if (flags & BPF_TRAMP_F_CALL_ORIG)
-               stack_size += 8; /* room for return value of orig_call */
+       /* room for return value of orig_call or fentry prog */
+       save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+       if (save_ret)
+               stack_size += 8;
 
        if (flags & BPF_TRAMP_F_IP_ARG)
                stack_size += 8; /* room for IP address argument */
@@ -2005,7 +2033,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
        }
 
        if (fentry->nr_progs)
-               if (invoke_bpf(m, &prog, fentry, stack_size))
+               if (invoke_bpf(m, &prog, fentry, stack_size,
+                              flags & BPF_TRAMP_F_RET_FENTRY_RET))
                        return -EINVAL;
 
        if (fmod_ret->nr_progs) {
@@ -2052,7 +2081,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
        }
 
        if (fexit->nr_progs)
-               if (invoke_bpf(m, &prog, fexit, stack_size)) {
+               if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
                        ret = -EINVAL;
                        goto cleanup;
                }
@@ -2072,9 +2101,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
                        ret = -EINVAL;
                        goto cleanup;
                }
-               /* restore original return value back into RAX */
-               emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
        }
+       /* restore return value of orig_call or fentry prog back into RAX */
+       if (save_ret)
+               emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 
        EMIT1(0x5B); /* pop rbx */
        EMIT1(0xC9); /* leave */
index bb88198..aa4e1a5 100644 (file)
@@ -778,7 +778,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                                    in_place ? DMA_BIDIRECTIONAL
                                             : DMA_TO_DEVICE);
                if (ret)
-                       goto e_ctx;
+                       goto e_aad;
 
                if (in_place) {
                        dst = src;
@@ -863,7 +863,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        op.u.aes.size = 0;
        ret = cmd_q->ccp->vdata->perform->aes(&op);
        if (ret)
-               goto e_dst;
+               goto e_final_wa;
 
        if (aes->action == CCP_AES_ACTION_ENCRYPT) {
                /* Put the ciphered tag after the ciphertext. */
@@ -873,17 +873,19 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
                                           DMA_BIDIRECTIONAL);
                if (ret)
-                       goto e_tag;
+                       goto e_final_wa;
                ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
-               if (ret)
-                       goto e_tag;
+               if (ret) {
+                       ccp_dm_free(&tag);
+                       goto e_final_wa;
+               }
 
                ret = crypto_memneq(tag.address, final_wa.address,
                                    authsize) ? -EBADMSG : 0;
                ccp_dm_free(&tag);
        }
 
-e_tag:
+e_final_wa:
        ccp_dm_free(&final_wa);
 
 e_dst:
index f5cfc06..8ebf369 100644 (file)
@@ -468,15 +468,8 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off)
        mutex_lock(&chip->i2c_lock);
        ret = regmap_read(chip->regmap, inreg, &reg_val);
        mutex_unlock(&chip->i2c_lock);
-       if (ret < 0) {
-               /*
-                * NOTE:
-                * diagnostic already emitted; that's all we should
-                * do unless gpio_*_value_cansleep() calls become different
-                * from their nonsleeping siblings (and report faults).
-                */
-               return 0;
-       }
+       if (ret < 0)
+               return ret;
 
        return !!(reg_val & bit);
 }
index 3335bd5..ce63cbd 100644 (file)
@@ -689,6 +689,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev)
        struct device_node *pctlnp = of_get_parent(np);
        struct pinctrl_dev *pctldev = NULL;
        struct rockchip_pin_bank *bank = NULL;
+       struct rockchip_pin_output_deferred *cfg;
        static int gpio;
        int id, ret;
 
@@ -716,12 +717,33 @@ static int rockchip_gpio_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       /*
+        * Prevent clashes with a deferred output setting
+        * being added right at this moment.
+        */
+       mutex_lock(&bank->deferred_lock);
+
        ret = rockchip_gpiolib_register(bank);
        if (ret) {
                clk_disable_unprepare(bank->clk);
+               mutex_unlock(&bank->deferred_lock);
                return ret;
        }
 
+       while (!list_empty(&bank->deferred_output)) {
+               cfg = list_first_entry(&bank->deferred_output,
+                                      struct rockchip_pin_output_deferred, head);
+               list_del(&cfg->head);
+
+               ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg);
+               if (ret)
+                       dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, cfg->arg);
+
+               kfree(cfg);
+       }
+
+       mutex_unlock(&bank->deferred_lock);
+
        platform_set_drvdata(pdev, bank);
        dev_info(dev, "probed %pOF\n", np);
 
index 79b138f..05c007b 100644 (file)
@@ -255,13 +255,13 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        if (!privdata->cl_data)
                return -ENOMEM;
 
-       rc = devm_add_action_or_reset(&pdev->dev, amd_mp2_pci_remove, privdata);
+       mp2_select_ops(privdata);
+
+       rc = amd_sfh_hid_client_init(privdata);
        if (rc)
                return rc;
 
-       mp2_select_ops(privdata);
-
-       return amd_sfh_hid_client_init(privdata);
+       return devm_add_action_or_reset(&pdev->dev, amd_mp2_pci_remove, privdata);
 }
 
 static int __maybe_unused amd_mp2_pci_resume(struct device *dev)
index 833fcf0..6ccfa0c 100644 (file)
@@ -336,12 +336,19 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field,
 
 /*
  * MacBook JIS keyboard has wrong logical maximum
+ * Magic Keyboard JIS has wrong logical maximum
  */
 static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
 {
        struct apple_sc *asc = hid_get_drvdata(hdev);
 
+       if(*rsize >=71 && rdesc[70] == 0x65 && rdesc[64] == 0x65) {
+               hid_info(hdev,
+                        "fixing up Magic Keyboard JIS report descriptor\n");
+               rdesc[64] = rdesc[70] = 0xe7;
+       }
+
        if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 &&
                        rdesc[53] == 0x65 && rdesc[59] == 0x65) {
                hid_info(hdev,
index 0790fbd..467d789 100644 (file)
@@ -56,15 +56,22 @@ static int betopff_init(struct hid_device *hid)
 {
        struct betopff_device *betopff;
        struct hid_report *report;
-       struct hid_input *hidinput =
-                       list_first_entry(&hid->inputs, struct hid_input, list);
+       struct hid_input *hidinput;
        struct list_head *report_list =
                        &hid->report_enum[HID_OUTPUT_REPORT].report_list;
-       struct input_dev *dev = hidinput->input;
+       struct input_dev *dev;
        int field_count = 0;
        int error;
        int i, j;
 
+       if (list_empty(&hid->inputs)) {
+               hid_err(hid, "no inputs found\n");
+               return -ENODEV;
+       }
+
+       hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+       dev = hidinput->input;
+
        if (list_empty(report_list)) {
                hid_err(hid, "no output reports found\n");
                return -ENODEV;
index 95e0807..d70cd3d 100644 (file)
@@ -198,7 +198,9 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data,
        }
 
        ret = u2fzero_recv(dev, &req, &resp);
-       if (ret < 0)
+
+       /* ignore errors or packets without data */
+       if (ret < offsetof(struct u2f_hid_msg, init.data))
                return 0;
 
        /* only take the minimum amount of data it is safe to take */
index fd51769..33a6908 100644 (file)
@@ -4746,6 +4746,12 @@ static const struct wacom_features wacom_features_0x393 =
        { "Wacom Intuos Pro S", 31920, 19950, 8191, 63,
          INTUOSP2S_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 7,
          .touch_max = 10 };
+static const struct wacom_features wacom_features_0x3c6 =
+       { "Wacom Intuos BT S", 15200, 9500, 4095, 63,
+         INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 };
+static const struct wacom_features wacom_features_0x3c8 =
+       { "Wacom Intuos BT M", 21600, 13500, 4095, 63,
+         INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
        { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };
@@ -4919,6 +4925,8 @@ const struct hid_device_id wacom_ids[] = {
        { USB_DEVICE_WACOM(0x37A) },
        { USB_DEVICE_WACOM(0x37B) },
        { BT_DEVICE_WACOM(0x393) },
+       { BT_DEVICE_WACOM(0x3c6) },
+       { BT_DEVICE_WACOM(0x3c8) },
        { USB_DEVICE_WACOM(0x4001) },
        { USB_DEVICE_WACOM(0x4004) },
        { USB_DEVICE_WACOM(0x5000) },
index c40791b..704ce59 100644 (file)
@@ -1746,15 +1746,16 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
        }
 }
 
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
 {
        struct rdma_id_private *dev_id_priv;
 
+       lockdep_assert_held(&lock);
+
        /*
         * Remove from listen_any_list to prevent added devices from spawning
         * additional listen requests.
         */
-       mutex_lock(&lock);
        list_del(&id_priv->list);
 
        while (!list_empty(&id_priv->listen_list)) {
@@ -1768,6 +1769,12 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
                rdma_destroy_id(&dev_id_priv->id);
                mutex_lock(&lock);
        }
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+       mutex_lock(&lock);
+       _cma_cancel_listens(id_priv);
        mutex_unlock(&lock);
 }
 
@@ -1776,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
 {
        switch (state) {
        case RDMA_CM_ADDR_QUERY:
+               /*
+                * We can avoid doing the rdma_addr_cancel() based on state,
+                * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+                * Notice that the addr_handler work could still be exiting
+                * outside this state, however due to the interaction with the
+                * handler_mutex the work is guaranteed not to touch id_priv
+                * during exit.
+                */
                rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
                break;
        case RDMA_CM_ROUTE_QUERY:
@@ -1810,6 +1825,8 @@ static void cma_release_port(struct rdma_id_private *id_priv)
 static void destroy_mc(struct rdma_id_private *id_priv,
                       struct cma_multicast *mc)
 {
+       bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
+
        if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
                ib_sa_free_multicast(mc->sa_mc);
 
@@ -1826,7 +1843,10 @@ static void destroy_mc(struct rdma_id_private *id_priv,
 
                        cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
                                     &mgid);
-                       cma_igmp_send(ndev, &mgid, false);
+
+                       if (!send_only)
+                               cma_igmp_send(ndev, &mgid, false);
+
                        dev_put(ndev);
                }
 
@@ -2574,7 +2594,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv)
        return 0;
 
 err_listen:
-       list_del(&id_priv->list);
+       _cma_cancel_listens(id_priv);
        mutex_unlock(&lock);
        if (to_destroy)
                rdma_destroy_id(&to_destroy->id);
@@ -3413,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
                if (dst_addr->sa_family == AF_IB) {
                        ret = cma_resolve_ib_addr(id_priv);
                } else {
+                       /*
+                        * The FSM can return back to RDMA_CM_ADDR_BOUND after
+                        * rdma_resolve_ip() is called, eg through the error
+                        * path in addr_handler(). If this happens the existing
+                        * request must be canceled before issuing a new one.
+                        * Since canceling a request is a bit slow and this
+                        * oddball path is rare, keep track once a request has
+                        * been issued. The track turns out to be a permanent
+                        * state since this is the only cancel as it is
+                        * immediately before rdma_resolve_ip().
+                        */
+                       if (id_priv->used_resolve_ip)
+                               rdma_addr_cancel(&id->route.addr.dev_addr);
+                       else
+                               id_priv->used_resolve_ip = 1;
                        ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
                                              &id->route.addr.dev_addr,
                                              timeout_ms, addr_handler,
@@ -3771,9 +3806,13 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
        int ret;
 
        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+               struct sockaddr_in any_in = {
+                       .sin_family = AF_INET,
+                       .sin_addr.s_addr = htonl(INADDR_ANY),
+               };
+
                /* For a well behaved ULP state will be RDMA_CM_IDLE */
-               id->route.addr.src_addr.ss_family = AF_INET;
-               ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+               ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
                if (ret)
                        return ret;
                if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
index 5c463da..f92f101 100644 (file)
@@ -91,6 +91,7 @@ struct rdma_id_private {
        u8                      afonly;
        u8                      timeout;
        u8                      min_rnr_timer;
+       u8 used_resolve_ip;
        enum ib_gid_type        gid_type;
 
        /*
index e74ddbe..15b0cb0 100644 (file)
@@ -876,14 +876,14 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
        struct hfi1_ipoib_txq *txq = &priv->txqs[q];
        u64 completed = atomic64_read(&txq->complete_txreqs);
 
-       dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n",
-                   (unsigned long long)txq, q,
+       dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
+                   txq, q,
                    __netif_subqueue_stopped(dev, txq->q_idx),
                    atomic_read(&txq->stops),
                    atomic_read(&txq->no_desc),
                    atomic_read(&txq->ring_full));
-       dd_dev_info(priv->dd, "sde %llx engine %u\n",
-                   (unsigned long long)txq->sde,
+       dd_dev_info(priv->dd, "sde %p engine %u\n",
+                   txq->sde,
                    txq->sde ? txq->sde->this_idx : 0);
        dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
        dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
index 1e9c3c5..d763f09 100644 (file)
@@ -326,19 +326,30 @@ static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector,
        INIT_LIST_HEAD(&hr_cq->rq_list);
 }
 
-static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
-                        struct hns_roce_ib_create_cq *ucmd)
+static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+                       struct hns_roce_ib_create_cq *ucmd)
 {
        struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
 
-       if (udata) {
-               if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size))
-                       hr_cq->cqe_size = ucmd->cqe_size;
-               else
-                       hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
-       } else {
+       if (!udata) {
                hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+               return 0;
+       }
+
+       if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) {
+               if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE &&
+                   ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) {
+                       ibdev_err(&hr_dev->ib_dev,
+                                 "invalid cqe size %u.\n", ucmd->cqe_size);
+                       return -EINVAL;
+               }
+
+               hr_cq->cqe_size = ucmd->cqe_size;
+       } else {
+               hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
        }
+
+       return 0;
 }
 
 int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
@@ -366,7 +377,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 
        set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
 
-       set_cqe_size(hr_cq, udata, &ucmd);
+       ret = set_cqe_size(hr_cq, udata, &ucmd);
+       if (ret)
+               return ret;
 
        ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
        if (ret) {
index 5b99531..d5f3faa 100644 (file)
@@ -3299,7 +3299,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
                        dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
                                          hr_cq->ib_cq.cqe);
                        owner_bit = hr_reg_read(dest, CQE_OWNER);
-                       memcpy(dest, cqe, sizeof(*cqe));
+                       memcpy(dest, cqe, hr_cq->cqe_size);
                        hr_reg_write(dest, CQE_OWNER, owner_bit);
                }
        }
@@ -4397,7 +4397,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
        hr_qp->path_mtu = ib_mtu;
 
        mtu = ib_mtu_enum_to_int(ib_mtu);
-       if (WARN_ON(mtu < 0))
+       if (WARN_ON(mtu <= 0))
+               return -EINVAL;
+#define MAX_LP_MSG_LEN 65536
+       /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
+       lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
+       if (WARN_ON(lp_pktn_ini >= 0xF))
                return -EINVAL;
 
        if (attr_mask & IB_QP_PATH_MTU) {
@@ -4405,10 +4410,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
                hr_reg_clear(qpc_mask, QPC_MTU);
        }
 
-#define MAX_LP_MSG_LEN 65536
-       /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */
-       lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
-
        hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
        hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
 
index 6b62299..6dea0a4 100644 (file)
@@ -3496,7 +3496,7 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
             original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
             last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
             last_ae == IRDMA_AE_BAD_CLOSE ||
-            last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->reset)) {
+            last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
                issue_close = 1;
                iwqp->cm_id = NULL;
                qp->term_flags = 0;
@@ -4250,7 +4250,7 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
                                       teardown_entry);
                attr.qp_state = IB_QPS_ERR;
                irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
-               if (iwdev->reset)
+               if (iwdev->rf->reset)
                        irdma_cm_disconn(cm_node->iwqp);
                irdma_rem_ref_cm_node(cm_node);
        }
index 00de5ee..7de525a 100644 (file)
@@ -176,6 +176,14 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
        case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
                qp->flush_code = FLUSH_GENERAL_ERR;
                break;
+       case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+               qp->flush_code = FLUSH_RETRY_EXC_ERR;
+               break;
+       case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+       case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+       case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+               qp->flush_code = FLUSH_MW_BIND_ERR;
+               break;
        default:
                qp->flush_code = FLUSH_FATAL_ERR;
                break;
@@ -1489,7 +1497,7 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi)
 
        irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false);
        if (irdma_initialize_ieq(iwdev)) {
-               iwdev->reset = true;
+               iwdev->rf->reset = true;
                rf->gen_ops.request_reset(rf);
        }
 }
@@ -1632,13 +1640,13 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev)
        case IEQ_CREATED:
                if (!iwdev->roce_mode)
                        irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ,
-                                            iwdev->reset);
+                                            iwdev->rf->reset);
                fallthrough;
        case ILQ_CREATED:
                if (!iwdev->roce_mode)
                        irdma_puda_dele_rsrc(&iwdev->vsi,
                                             IRDMA_PUDA_RSRC_TYPE_ILQ,
-                                            iwdev->reset);
+                                            iwdev->rf->reset);
                break;
        default:
                ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state);
index bddf881..d219f64 100644 (file)
@@ -55,7 +55,7 @@ static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client,
 
        iwdev = to_iwdev(ibdev);
        if (reset)
-               iwdev->reset = true;
+               iwdev->rf->reset = true;
 
        iwdev->iw_status = 0;
        irdma_port_ibevent(iwdev);
index 743d9e1..b678fe7 100644 (file)
@@ -346,7 +346,6 @@ struct irdma_device {
        bool roce_mode:1;
        bool roce_dcqcn_en:1;
        bool dcb:1;
-       bool reset:1;
        bool iw_ooo:1;
        enum init_completion_state init_state;
 
index ff705f3..3dcbb1f 100644 (file)
@@ -102,6 +102,8 @@ enum irdma_flush_opcode {
        FLUSH_REM_OP_ERR,
        FLUSH_LOC_LEN_ERR,
        FLUSH_FATAL_ERR,
+       FLUSH_RETRY_EXC_ERR,
+       FLUSH_MW_BIND_ERR,
 };
 
 enum irdma_cmpl_status {
index e944709..ac91ea5 100644 (file)
@@ -2507,7 +2507,7 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
        struct irdma_qp *qp = sc_qp->qp_uk.back_qp;
        struct ib_qp_attr attr;
 
-       if (qp->iwdev->reset)
+       if (qp->iwdev->rf->reset)
                return;
        attr.qp_state = IB_QPS_ERR;
 
index 4fc3234..7110ebf 100644 (file)
@@ -535,8 +535,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
        irdma_qp_rem_ref(&iwqp->ibqp);
        wait_for_completion(&iwqp->free_qp);
        irdma_free_lsmm_rsrc(iwqp);
-       if (!iwdev->reset)
-               irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+       irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
 
        if (!iwqp->user_mode) {
                if (iwqp->iwscq) {
@@ -2035,7 +2034,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
                /* Kmode allocations */
                int rsize;
 
-               if (entries > rf->max_cqe) {
+               if (entries < 1 || entries > rf->max_cqe) {
                        err_code = -EINVAL;
                        goto cq_free_rsrc;
                }
@@ -3353,6 +3352,10 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
                return IB_WC_LOC_LEN_ERR;
        case FLUSH_GENERAL_ERR:
                return IB_WC_WR_FLUSH_ERR;
+       case FLUSH_RETRY_EXC_ERR:
+               return IB_WC_RETRY_EXC_ERR;
+       case FLUSH_MW_BIND_ERR:
+               return IB_WC_MW_BIND_ERR;
        case FLUSH_FATAL_ERR:
        default:
                return IB_WC_FATAL_ERR;
index 452e235..0a3b281 100644 (file)
@@ -403,7 +403,7 @@ static ssize_t diagc_attr_store(struct ib_device *ibdev, u32 port_num,
 }
 
 #define QIB_DIAGC_ATTR(N)                                                      \
-       static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL);    \
+       static_assert(__same_type(((struct qib_ibport *)0)->rvp.n_##N, u64));  \
        static struct qib_diagc_attr qib_diagc_attr_##N = {                    \
                .attr = __ATTR(N, 0664, diagc_attr_show, diagc_attr_store),    \
                .counter =                                                     \
index 84dd682..b350081 100644 (file)
@@ -90,7 +90,7 @@ struct usnic_ib_dev {
 
 struct usnic_ib_vf {
        struct usnic_ib_dev             *pf;
-       spinlock_t                      lock;
+       struct mutex                    lock;
        struct usnic_vnic               *vnic;
        unsigned int                    qp_grp_ref_cnt;
        struct usnic_ib_pd              *pd;
index 228e9a3..d346dd4 100644 (file)
@@ -572,7 +572,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
        }
 
        vf->pf = pf;
-       spin_lock_init(&vf->lock);
+       mutex_init(&vf->lock);
        mutex_lock(&pf->usdev_lock);
        list_add_tail(&vf->link, &pf->vf_dev_list);
        /*
index 06a4e9d..756a83b 100644 (file)
@@ -196,7 +196,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
                for (i = 0; dev_list[i]; i++) {
                        dev = dev_list[i];
                        vf = dev_get_drvdata(dev);
-                       spin_lock(&vf->lock);
+                       mutex_lock(&vf->lock);
                        vnic = vf->vnic;
                        if (!usnic_vnic_check_room(vnic, res_spec)) {
                                usnic_dbg("Found used vnic %s from %s\n",
@@ -208,10 +208,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
                                                             vf, pd, res_spec,
                                                             trans_spec);
 
-                               spin_unlock(&vf->lock);
+                               mutex_unlock(&vf->lock);
                                goto qp_grp_check;
                        }
-                       spin_unlock(&vf->lock);
+                       mutex_unlock(&vf->lock);
 
                }
                usnic_uiom_free_dev_list(dev_list);
@@ -220,7 +220,7 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
 
        /* Try to find resources on an unused vf */
        list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
-               spin_lock(&vf->lock);
+               mutex_lock(&vf->lock);
                vnic = vf->vnic;
                if (vf->qp_grp_ref_cnt == 0 &&
                    usnic_vnic_check_room(vnic, res_spec) == 0) {
@@ -228,10 +228,10 @@ find_free_vf_and_create_qp_grp(struct ib_qp *qp,
                                                     vf, pd, res_spec,
                                                     trans_spec);
 
-                       spin_unlock(&vf->lock);
+                       mutex_unlock(&vf->lock);
                        goto qp_grp_check;
                }
-               spin_unlock(&vf->lock);
+               mutex_unlock(&vf->lock);
        }
 
        usnic_info("No free qp grp found on %s\n",
@@ -253,9 +253,9 @@ static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
 
        WARN_ON(qp_grp->state != IB_QPS_RESET);
 
-       spin_lock(&vf->lock);
+       mutex_lock(&vf->lock);
        usnic_ib_qp_grp_destroy(qp_grp);
-       spin_unlock(&vf->lock);
+       mutex_unlock(&vf->lock);
 }
 
 static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
index 559db92..fdfa39e 100644 (file)
@@ -183,7 +183,6 @@ struct apple_dart_master_cfg {
 
 static struct platform_driver apple_dart_driver;
 static const struct iommu_ops apple_dart_iommu_ops;
-static const struct iommu_flush_ops apple_dart_tlb_ops;
 
 static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom)
 {
@@ -338,22 +337,6 @@ static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
        apple_dart_domain_flush_tlb(to_dart_domain(domain));
 }
 
-static void apple_dart_tlb_flush_all(void *cookie)
-{
-       apple_dart_domain_flush_tlb(cookie);
-}
-
-static void apple_dart_tlb_flush_walk(unsigned long iova, size_t size,
-                                     size_t granule, void *cookie)
-{
-       apple_dart_domain_flush_tlb(cookie);
-}
-
-static const struct iommu_flush_ops apple_dart_tlb_ops = {
-       .tlb_flush_all = apple_dart_tlb_flush_all,
-       .tlb_flush_walk = apple_dart_tlb_flush_walk,
-};
-
 static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
                                           dma_addr_t iova)
 {
@@ -435,7 +418,6 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
                .ias = 32,
                .oas = 36,
                .coherent_walk = 1,
-               .tlb = &apple_dart_tlb_ops,
                .iommu_dev = dart->dev,
        };
 
@@ -661,16 +643,34 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
        return -EINVAL;
 }
 
+static DEFINE_MUTEX(apple_dart_groups_lock);
+
+static void apple_dart_release_group(void *iommu_data)
+{
+       int i, sid;
+       struct apple_dart_stream_map *stream_map;
+       struct apple_dart_master_cfg *group_master_cfg = iommu_data;
+
+       mutex_lock(&apple_dart_groups_lock);
+
+       for_each_stream_map(i, group_master_cfg, stream_map)
+               for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+                       stream_map->dart->sid2group[sid] = NULL;
+
+       kfree(iommu_data);
+       mutex_unlock(&apple_dart_groups_lock);
+}
+
 static struct iommu_group *apple_dart_device_group(struct device *dev)
 {
-       static DEFINE_MUTEX(lock);
        int i, sid;
        struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
        struct apple_dart_stream_map *stream_map;
+       struct apple_dart_master_cfg *group_master_cfg;
        struct iommu_group *group = NULL;
        struct iommu_group *res = ERR_PTR(-EINVAL);
 
-       mutex_lock(&lock);
+       mutex_lock(&apple_dart_groups_lock);
 
        for_each_stream_map(i, cfg, stream_map) {
                for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) {
@@ -698,6 +698,20 @@ static struct iommu_group *apple_dart_device_group(struct device *dev)
 #endif
                group = generic_device_group(dev);
 
+       res = ERR_PTR(-ENOMEM);
+       if (!group)
+               goto out;
+
+       group_master_cfg = kzalloc(sizeof(*group_master_cfg), GFP_KERNEL);
+       if (!group_master_cfg) {
+               iommu_group_put(group);
+               goto out;
+       }
+
+       memcpy(group_master_cfg, cfg, sizeof(*group_master_cfg));
+       iommu_group_set_iommudata(group, group_master_cfg,
+               apple_dart_release_group);
+
        for_each_stream_map(i, cfg, stream_map)
                for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
                        stream_map->dart->sid2group[sid] = group;
@@ -705,7 +719,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev)
        res = group;
 
 out:
-       mutex_unlock(&lock);
+       mutex_unlock(&apple_dart_groups_lock);
        return res;
 }
 
index 0ec5514..b7708b9 100644 (file)
@@ -1942,18 +1942,18 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
        reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
        if (fault_type == INTR_REMAP)
-               pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
+               pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
                       PCI_FUNC(source_id & 0xFF), addr >> 48,
                       fault_reason, reason);
        else if (pasid == INVALID_IOASID)
-               pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+               pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
                       type ? "DMA Read" : "DMA Write",
                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
                       PCI_FUNC(source_id & 0xFF), addr,
                       fault_reason, reason);
        else
-               pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+               pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
                       type ? "DMA Read" : "DMA Write", pasid,
                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
                       PCI_FUNC(source_id & 0xFF), addr,
index d402e45..7d0ab19 100644 (file)
@@ -1140,8 +1140,8 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result,
                        continue;
                length = 0;
                switch (c) {
-               /* SOF0: baseline JPEG */
-               case SOF0:
+               /* JPEG_MARKER_SOF0: baseline JPEG */
+               case JPEG_MARKER_SOF0:
                        if (get_word_be(&jpeg_buffer, &word))
                                break;
                        length = (long)word - 2;
@@ -1172,7 +1172,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result,
                        notfound = 0;
                        break;
 
-               case DQT:
+               case JPEG_MARKER_DQT:
                        if (get_word_be(&jpeg_buffer, &word))
                                break;
                        length = (long)word - 2;
@@ -1185,7 +1185,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result,
                        skip(&jpeg_buffer, length);
                        break;
 
-               case DHT:
+               case JPEG_MARKER_DHT:
                        if (get_word_be(&jpeg_buffer, &word))
                                break;
                        length = (long)word - 2;
@@ -1198,15 +1198,15 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result,
                        skip(&jpeg_buffer, length);
                        break;
 
-               case SOS:
+               case JPEG_MARKER_SOS:
                        sos = jpeg_buffer.curr - 2; /* 0xffda */
                        break;
 
                /* skip payload-less markers */
-               case RST ... RST + 7:
-               case SOI:
-               case EOI:
-               case TEM:
+               case JPEG_MARKER_RST ... JPEG_MARKER_RST + 7:
+               case JPEG_MARKER_SOI:
+               case JPEG_MARKER_EOI:
+               case JPEG_MARKER_TEM:
                        break;
 
                /* skip uninteresting payload markers */
index a77d93c..8473a01 100644 (file)
 #define EXYNOS3250_IRQ_TIMEOUT         0x10000000
 
 /* a selection of JPEG markers */
-#define TEM                            0x01
-#define SOF0                           0xc0
-#define DHT                            0xc4
-#define RST                            0xd0
-#define SOI                            0xd8
-#define EOI                            0xd9
-#define        SOS                             0xda
-#define DQT                            0xdb
-#define DHP                            0xde
+#define JPEG_MARKER_TEM                                0x01
+#define JPEG_MARKER_SOF0                               0xc0
+#define JPEG_MARKER_DHT                                0xc4
+#define JPEG_MARKER_RST                                0xd0
+#define JPEG_MARKER_SOI                                0xd8
+#define JPEG_MARKER_EOI                                0xd9
+#define        JPEG_MARKER_SOS                         0xda
+#define JPEG_MARKER_DQT                                0xdb
+#define JPEG_MARKER_DHP                                0xde
 
 /* Flags that indicate a format can be used for capture/output */
 #define SJPEG_FMT_FLAG_ENC_CAPTURE     (1 << 0)
@@ -187,11 +187,11 @@ struct s5p_jpeg_marker {
  * @fmt:       driver-specific format of this queue
  * @w:         image width
  * @h:         image height
- * @sos:       SOS marker's position relative to the buffer beginning
- * @dht:       DHT markers' positions relative to the buffer beginning
- * @dqt:       DQT markers' positions relative to the buffer beginning
- * @sof:       SOF0 marker's position relative to the buffer beginning
- * @sof_len:   SOF0 marker's payload length (without length field itself)
+ * @sos:       JPEG_MARKER_SOS's position relative to the buffer beginning
+ * @dht:       JPEG_MARKER_DHT' positions relative to the buffer beginning
+ * @dqt:       JPEG_MARKER_DQT' positions relative to the buffer beginning
+ * @sof:       JPEG_MARKER_SOF0's position relative to the buffer beginning
+ * @sof_len:   JPEG_MARKER_SOF0's payload length (without length field itself)
  * @size:      image buffer size in bytes
  */
 struct s5p_jpeg_q_data {
index 3e729a1..48d52ba 100644 (file)
@@ -24,6 +24,7 @@ static const u8 COMMAND_VERSION[] = { 'v' };
 // End transmit and repeat reset command so we exit sump mode
 static const u8 COMMAND_RESET[] = { 0xff, 0xff, 0, 0, 0, 0, 0 };
 static const u8 COMMAND_SMODE_ENTER[] = { 's' };
+static const u8 COMMAND_SMODE_EXIT[] = { 0 };
 static const u8 COMMAND_TXSTART[] = { 0x26, 0x24, 0x25, 0x03 };
 
 #define REPLY_XMITCOUNT 't'
@@ -309,12 +310,30 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count)
                buf[i] = cpu_to_be16(v);
        }
 
-       buf[count] = cpu_to_be16(0xffff);
+       buf[count] = 0xffff;
 
        irtoy->tx_buf = buf;
        irtoy->tx_len = size;
        irtoy->emitted = 0;
 
+       // There is an issue where if the unit is receiving IR while the
+       // first TXSTART command is sent, the device might end up hanging
+       // with its led on. It does not respond to any command when this
+       // happens. To work around this, re-enter sample mode.
+       err = irtoy_command(irtoy, COMMAND_SMODE_EXIT,
+                           sizeof(COMMAND_SMODE_EXIT), STATE_RESET);
+       if (err) {
+               dev_err(irtoy->dev, "exit sample mode: %d\n", err);
+               return err;
+       }
+
+       err = irtoy_command(irtoy, COMMAND_SMODE_ENTER,
+                           sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND);
+       if (err) {
+               dev_err(irtoy->dev, "enter sample mode: %d\n", err);
+               return err;
+       }
+
        err = irtoy_command(irtoy, COMMAND_TXSTART, sizeof(COMMAND_TXSTART),
                            STATE_TX);
        kfree(buf);
index 6578cc6..380f9aa 100644 (file)
@@ -1802,10 +1802,15 @@ static enum hrtimer_restart dw_mci_fault_timer(struct hrtimer *t)
 
        spin_lock_irqsave(&host->irq_lock, flags);
 
-       if (!host->data_status)
+       /*
+        * Only inject an error if we haven't already got an error or data over
+        * interrupt.
+        */
+       if (!host->data_status) {
                host->data_status = SDMMC_INT_DCRC;
-       set_bit(EVENT_DATA_ERROR, &host->pending_events);
-       tasklet_schedule(&host->tasklet);
+               set_bit(EVENT_DATA_ERROR, &host->pending_events);
+               tasklet_schedule(&host->tasklet);
+       }
 
        spin_unlock_irqrestore(&host->irq_lock, flags);
 
@@ -2721,12 +2726,16 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
                }
 
                if (pending & DW_MCI_DATA_ERROR_FLAGS) {
+                       spin_lock(&host->irq_lock);
+
                        /* if there is an error report DATA_ERROR */
                        mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
                        host->data_status = pending;
                        smp_wmb(); /* drain writebuffer */
                        set_bit(EVENT_DATA_ERROR, &host->pending_events);
                        tasklet_schedule(&host->tasklet);
+
+                       spin_unlock(&host->irq_lock);
                }
 
                if (pending & SDMMC_INT_DATA_OVER) {
index 6fc4cf3..a4407f3 100644 (file)
@@ -561,6 +561,8 @@ static void renesas_sdhi_reset(struct tmio_mmc_host *host)
                /* Unknown why but without polling reset status, it will hang */
                read_poll_timeout(reset_control_status, ret, ret == 0, 1, 100,
                                  false, priv->rstc);
+               /* At least SDHI_VER_GEN2_SDR50 needs manual release of reset */
+               sd_ctrl_write16(host, CTL_RESET_SD, 0x0001);
                priv->needs_adjust_hs400 = false;
                renesas_sdhi_set_clock(host, host->clk_cache);
        } else if (priv->scc_ctl) {
index 8ab0be7..03744d1 100644 (file)
@@ -2834,8 +2834,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        if (err)
                return err;
 
-       /* Port Control 2: don't force a good FCS, set the maximum frame size to
-        * 10240 bytes, disable 802.1q tags checking, don't discard tagged or
+       /* Port Control 2: don't force a good FCS, set the MTU size to
+        * 10222 bytes, disable 802.1q tags checking, don't discard tagged or
         * untagged frames on this port, do a destination address lookup on all
         * received packets as usual, disable ARP mirroring and don't send a
         * copy of all transmitted/received frames on this port to the CPU.
@@ -2854,7 +2854,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                return err;
 
        if (chip->info->ops->port_set_jumbo_size) {
-               err = chip->info->ops->port_set_jumbo_size(chip, port, 10240);
+               err = chip->info->ops->port_set_jumbo_size(chip, port, 10218);
                if (err)
                        return err;
        }
@@ -2944,10 +2944,10 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
        struct mv88e6xxx_chip *chip = ds->priv;
 
        if (chip->info->ops->port_set_jumbo_size)
-               return 10240;
+               return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
        else if (chip->info->ops->set_max_frame_size)
-               return 1632;
-       return 1522;
+               return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+       return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
 }
 
 static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
@@ -2955,6 +2955,9 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
        struct mv88e6xxx_chip *chip = ds->priv;
        int ret = 0;
 
+       if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+               new_mtu += EDSA_HLEN;
+
        mv88e6xxx_reg_lock(chip);
        if (chip->info->ops->port_set_jumbo_size)
                ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
@@ -3725,7 +3728,6 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
        .port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
        .port_set_ether_type = mv88e6351_port_set_ether_type,
-       .port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
        .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
        .port_pause_limit = mv88e6097_port_pause_limit,
        .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
@@ -3750,6 +3752,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .avb_ops = &mv88e6165_avb_ops,
        .ptp_ops = &mv88e6165_ptp_ops,
        .phylink_validate = mv88e6185_phylink_validate,
+       .set_max_frame_size = mv88e6185_g1_set_max_frame_size,
 };
 
 static const struct mv88e6xxx_ops mv88e6165_ops = {
index 675b1f3..59f316c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/timecounter.h>
 #include <net/dsa.h>
 
+#define EDSA_HLEN              8
 #define MV88E6XXX_N_FID                4096
 
 /* PVT limits for 4-bit port and 5-bit switch */
index 815b0f6..5848112 100644 (file)
@@ -232,6 +232,8 @@ int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu)
        u16 val;
        int err;
 
+       mtu += ETH_HLEN + ETH_FCS_LEN;
+
        err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val);
        if (err)
                return err;
index f77e2ee..451028c 100644 (file)
@@ -1277,6 +1277,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
        u16 reg;
        int err;
 
+       size += VLAN_ETH_HLEN + ETH_FCS_LEN;
+
        err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, &reg);
        if (err)
                return err;
index 4ab5bf6..df8ff83 100644 (file)
@@ -192,6 +192,9 @@ static int bgmac_probe(struct platform_device *pdev)
        bgmac->dma_dev = &pdev->dev;
 
        ret = of_get_mac_address(np, bgmac->net_dev->dev_addr);
+       if (ret == -EPROBE_DEFER)
+               return ret;
+
        if (ret)
                dev_warn(&pdev->dev,
                         "MAC address not present in device tree\n");
index 60d94e0..4c977df 100644 (file)
@@ -541,8 +541,7 @@ static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)
 
        if (phy_interface_mode_is_rgmii(phy_mode)) {
                val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
-               val &= ~ENETC_PM0_IFM_EN_AUTO;
-               val &= ENETC_PM0_IFM_IFMODE_MASK;
+               val &= ~(ENETC_PM0_IFM_EN_AUTO | ENETC_PM0_IFM_IFMODE_MASK);
                val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG;
                enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
        }
index 546a605..8ba21d6 100644 (file)
@@ -752,7 +752,6 @@ struct hnae3_tc_info {
        u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */
        u16 tqp_count[HNAE3_MAX_TC];
        u16 tqp_offset[HNAE3_MAX_TC];
-       unsigned long tc_en; /* bitmap of TC enabled */
        u8 num_tc; /* Total number of enabled TCs */
        bool mqprio_active;
 };
index adc54a7..468b8f0 100644 (file)
@@ -623,13 +623,9 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
                        return ret;
                }
 
-               for (i = 0; i < HNAE3_MAX_TC; i++) {
-                       if (!test_bit(i, &tc_info->tc_en))
-                               continue;
-
+               for (i = 0; i < tc_info->num_tc; i++)
                        netdev_set_tc_queue(netdev, i, tc_info->tqp_count[i],
                                            tc_info->tqp_offset[i]);
-               }
        }
 
        ret = netif_set_real_num_tx_queues(netdev, queue_size);
@@ -779,6 +775,11 @@ static int hns3_nic_net_open(struct net_device *netdev)
        if (hns3_nic_resetting(netdev))
                return -EBUSY;
 
+       if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) {
+               netdev_warn(netdev, "net open repeatedly!\n");
+               return 0;
+       }
+
        netif_carrier_off(netdev);
 
        ret = hns3_nic_set_real_num_queue(netdev);
@@ -4865,12 +4866,9 @@ static void hns3_init_tx_ring_tc(struct hns3_nic_priv *priv)
        struct hnae3_tc_info *tc_info = &kinfo->tc_info;
        int i;
 
-       for (i = 0; i < HNAE3_MAX_TC; i++) {
+       for (i = 0; i < tc_info->num_tc; i++) {
                int j;
 
-               if (!test_bit(i, &tc_info->tc_en))
-                       continue;
-
                for (j = 0; j < tc_info->tqp_count[i]; j++) {
                        struct hnae3_queue *q;
 
index 7ea511d..5ebd96f 100644 (file)
@@ -334,7 +334,8 @@ static void hns3_selftest_prepare(struct net_device *ndev,
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
        /* Disable the vlan filter for selftest does not support it */
-       if (h->ae_algo->ops->enable_vlan_filter)
+       if (h->ae_algo->ops->enable_vlan_filter &&
+           ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
                h->ae_algo->ops->enable_vlan_filter(h, false);
 #endif
 
@@ -359,7 +360,8 @@ static void hns3_selftest_restore(struct net_device *ndev, bool if_running)
                h->ae_algo->ops->halt_autoneg(h, false);
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-       if (h->ae_algo->ops->enable_vlan_filter)
+       if (h->ae_algo->ops->enable_vlan_filter &&
+           ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
                h->ae_algo->ops->enable_vlan_filter(h, true);
 #endif
 
index ac9b695..9c2eeaa 100644 (file)
@@ -467,7 +467,7 @@ err_csq:
        return ret;
 }
 
-static int hclge_firmware_compat_config(struct hclge_dev *hdev)
+static int hclge_firmware_compat_config(struct hclge_dev *hdev, bool en)
 {
        struct hclge_firmware_compat_cmd *req;
        struct hclge_desc desc;
@@ -475,13 +475,16 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_IMP_COMPAT_CFG, false);
 
-       req = (struct hclge_firmware_compat_cmd *)desc.data;
+       if (en) {
+               req = (struct hclge_firmware_compat_cmd *)desc.data;
 
-       hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
-       hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
-       if (hnae3_dev_phy_imp_supported(hdev))
-               hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
-       req->compat = cpu_to_le32(compat);
+               hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
+               hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
+               if (hnae3_dev_phy_imp_supported(hdev))
+                       hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
+
+               req->compat = cpu_to_le32(compat);
+       }
 
        return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -538,7 +541,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
        /* ask the firmware to enable some features, driver can work without
         * it.
         */
-       ret = hclge_firmware_compat_config(hdev);
+       ret = hclge_firmware_compat_config(hdev, true);
        if (ret)
                dev_warn(&hdev->pdev->dev,
                         "Firmware compatible features not enabled(%d).\n",
@@ -568,6 +571,8 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
 
 void hclge_cmd_uninit(struct hclge_dev *hdev)
 {
+       hclge_firmware_compat_config(hdev, false);
+
        set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
        /* wait to ensure that the firmware completes the possible left
         * over commands.
index 4a619e5..307c9e8 100644 (file)
@@ -247,6 +247,10 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
        }
 
        hclge_tm_schd_info_update(hdev, num_tc);
+       if (num_tc > 1)
+               hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+       else
+               hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 
        ret = hclge_ieee_ets_to_tm_info(hdev, ets);
        if (ret)
@@ -306,8 +310,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
        u8 i, j, pfc_map, *prio_tc;
        int ret;
 
-       if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
-           hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+       if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
                return -EINVAL;
 
        if (pfc->pfc_en == hdev->tm_info.pfc_en)
@@ -441,8 +444,6 @@ static int hclge_mqprio_qopt_check(struct hclge_dev *hdev,
 static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info,
                                   struct tc_mqprio_qopt_offload *mqprio_qopt)
 {
-       int i;
-
        memset(tc_info, 0, sizeof(*tc_info));
        tc_info->num_tc = mqprio_qopt->qopt.num_tc;
        memcpy(tc_info->prio_tc, mqprio_qopt->qopt.prio_tc_map,
@@ -451,9 +452,6 @@ static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info,
               sizeof_field(struct hnae3_tc_info, tqp_count));
        memcpy(tc_info->tqp_offset, mqprio_qopt->qopt.offset,
               sizeof_field(struct hnae3_tc_info, tqp_offset));
-
-       for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
-               set_bit(tc_info->prio_tc[i], &tc_info->tc_en);
 }
 
 static int hclge_config_tc(struct hclge_dev *hdev,
@@ -519,12 +517,17 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        return hclge_notify_init_up(hdev);
 
 err_out:
-       /* roll-back */
-       memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info));
-       if (hclge_config_tc(hdev, &kinfo->tc_info))
-               dev_err(&hdev->pdev->dev,
-                       "failed to roll back tc configuration\n");
-
+       if (!tc) {
+               dev_warn(&hdev->pdev->dev,
+                        "failed to destroy mqprio, will active after reset, ret = %d\n",
+                        ret);
+       } else {
+               /* roll-back */
+               memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info));
+               if (hclge_config_tc(hdev, &kinfo->tc_info))
+                       dev_err(&hdev->pdev->dev,
+                               "failed to roll back tc configuration\n");
+       }
        hclge_notify_init_up(hdev);
 
        return ret;
index 87d96f8..32f62cd 100644 (file)
@@ -719,9 +719,9 @@ static void hclge_dbg_fill_shaper_content(struct hclge_tm_shaper_para *para,
        sprintf(result[(*index)++], "%6u", para->rate);
 }
 
-static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
+static int __hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *data_str,
+                                 char *buf, int len)
 {
-       char data_str[ARRAY_SIZE(tm_pg_items)][HCLGE_DBG_DATA_STR_LEN];
        struct hclge_tm_shaper_para c_shaper_para, p_shaper_para;
        char *result[ARRAY_SIZE(tm_pg_items)], *sch_mode_str;
        u8 pg_id, sch_mode, weight, pri_bit_map, i, j;
@@ -729,8 +729,10 @@ static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
        int pos = 0;
        int ret;
 
-       for (i = 0; i < ARRAY_SIZE(tm_pg_items); i++)
-               result[i] = &data_str[i][0];
+       for (i = 0; i < ARRAY_SIZE(tm_pg_items); i++) {
+               result[i] = data_str;
+               data_str += HCLGE_DBG_DATA_STR_LEN;
+       }
 
        hclge_dbg_fill_content(content, sizeof(content), tm_pg_items,
                               NULL, ARRAY_SIZE(tm_pg_items));
@@ -781,6 +783,24 @@ static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
        return 0;
 }
 
+static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
+{
+       char *data_str;
+       int ret;
+
+       data_str = kcalloc(ARRAY_SIZE(tm_pg_items),
+                          HCLGE_DBG_DATA_STR_LEN, GFP_KERNEL);
+
+       if (!data_str)
+               return -ENOMEM;
+
+       ret = __hclge_dbg_dump_tm_pg(hdev, data_str, buf, len);
+
+       kfree(data_str);
+
+       return ret;
+}
+
 static int hclge_dbg_dump_tm_port(struct hclge_dev *hdev,  char *buf, int len)
 {
        struct hclge_tm_shaper_para shaper_para;
index 47fea89..f5b8d1f 100644 (file)
@@ -8708,15 +8708,8 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
        }
 
        /* check if we just hit the duplicate */
-       if (!ret) {
-               dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n",
-                        vport->vport_id, addr);
-               return 0;
-       }
-
-       dev_err(&hdev->pdev->dev,
-               "PF failed to add unicast entry(%pM) in the MAC table\n",
-               addr);
+       if (!ret)
+               return -EEXIST;
 
        return ret;
 }
@@ -8868,7 +8861,13 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport,
                } else {
                        set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
                                &vport->state);
-                       break;
+
+                       /* If one unicast mac address is existing in hardware,
+                        * we need to try whether other unicast mac addresses
+                        * are new addresses that can be added.
+                        */
+                       if (ret != -EEXIST)
+                               break;
                }
        }
 }
@@ -12797,8 +12796,12 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
                        continue;
 
                if (vport->vf_info.trusted) {
-                       uc_en = vport->vf_info.request_uc_en > 0;
-                       mc_en = vport->vf_info.request_mc_en > 0;
+                       uc_en = vport->vf_info.request_uc_en > 0 ||
+                               vport->overflow_promisc_flags &
+                               HNAE3_OVERFLOW_UPE;
+                       mc_en = vport->vf_info.request_mc_en > 0 ||
+                               vport->overflow_promisc_flags &
+                               HNAE3_OVERFLOW_MPE;
                }
                bc_en = vport->vf_info.request_bc_en > 0;
 
index 44618cc..f314dbd 100644 (file)
@@ -687,12 +687,10 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 
        for (i = 0; i < HNAE3_MAX_TC; i++) {
                if (hdev->hw_tc_map & BIT(i) && i < kinfo->tc_info.num_tc) {
-                       set_bit(i, &kinfo->tc_info.tc_en);
                        kinfo->tc_info.tqp_offset[i] = i * kinfo->rss_size;
                        kinfo->tc_info.tqp_count[i] = kinfo->rss_size;
                } else {
                        /* Set to default queue if TC is disable */
-                       clear_bit(i, &kinfo->tc_info.tc_en);
                        kinfo->tc_info.tqp_offset[i] = 0;
                        kinfo->tc_info.tqp_count[i] = 1;
                }
@@ -729,14 +727,6 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
        for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
                hdev->tm_info.prio_tc[i] =
                        (i >= hdev->tm_info.num_tc) ? 0 : i;
-
-       /* DCB is enabled if we have more than 1 TC or pfc_en is
-        * non-zero.
-        */
-       if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
-               hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 }
 
 static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
@@ -767,10 +757,10 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 
 static void hclge_update_fc_mode_by_dcb_flag(struct hclge_dev *hdev)
 {
-       if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
+       if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) {
                if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
                        dev_warn(&hdev->pdev->dev,
-                                "DCB is disable, but last mode is FC_PFC\n");
+                                "Only 1 tc used, but last mode is FC_PFC\n");
 
                hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
        } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
@@ -796,7 +786,7 @@ static void hclge_update_fc_mode(struct hclge_dev *hdev)
        }
 }
 
-static void hclge_pfc_info_init(struct hclge_dev *hdev)
+void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
 {
        if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
                hclge_update_fc_mode(hdev);
@@ -812,7 +802,7 @@ static void hclge_tm_schd_info_init(struct hclge_dev *hdev)
 
        hclge_tm_vport_info_update(hdev);
 
-       hclge_pfc_info_init(hdev);
+       hclge_tm_pfc_info_update(hdev);
 }
 
 static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
@@ -1558,19 +1548,6 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
        hclge_tm_schd_info_init(hdev);
 }
 
-void hclge_tm_pfc_info_update(struct hclge_dev *hdev)
-{
-       /* DCB is enabled if we have more than 1 TC or pfc_en is
-        * non-zero.
-        */
-       if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en)
-               hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
-
-       hclge_pfc_info_init(hdev);
-}
-
 int hclge_tm_init_hw(struct hclge_dev *hdev, bool init)
 {
        int ret;
@@ -1616,7 +1593,7 @@ int hclge_tm_vport_map_update(struct hclge_dev *hdev)
        if (ret)
                return ret;
 
-       if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE))
+       if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en)
                return 0;
 
        return hclge_tm_bp_setup(hdev);
index 3e54017..07fdab5 100644 (file)
@@ -354,7 +354,7 @@ static int hns_mdio_reset(struct mii_bus *bus)
 
        if (dev_of_node(bus->parent)) {
                if (!mdio_dev->subctrl_vbase) {
-                       dev_err(&bus->dev, "mdio sys ctl reg has not maped\n");
+                       dev_err(&bus->dev, "mdio sys ctl reg has not mapped\n");
                        return -ENODEV;
                }
 
index a4579b3..6aa6ff8 100644 (file)
@@ -4708,14 +4708,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
                return 0;
        }
 
-       if (adapter->failover_pending) {
-               adapter->init_done_rc = -EAGAIN;
-               netdev_dbg(netdev, "Failover pending, ignoring login response\n");
-               complete(&adapter->init_done);
-               /* login response buffer will be released on reset */
-               return 0;
-       }
-
        netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
        netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
index 373eb02..09ae193 100644 (file)
@@ -2437,11 +2437,15 @@ static void e100_get_drvinfo(struct net_device *netdev,
                sizeof(info->bus_info));
 }
 
-#define E100_PHY_REGS 0x1C
+#define E100_PHY_REGS 0x1D
 static int e100_get_regs_len(struct net_device *netdev)
 {
        struct nic *nic = netdev_priv(netdev);
-       return 1 + E100_PHY_REGS + sizeof(nic->mem->dump_buf);
+
+       /* We know the number of registers, and the size of the dump buffer.
+        * Calculate the total size in bytes.
+        */
+       return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf);
 }
 
 static void e100_get_regs(struct net_device *netdev,
@@ -2455,14 +2459,18 @@ static void e100_get_regs(struct net_device *netdev,
        buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 |
                ioread8(&nic->csr->scb.cmd_lo) << 16 |
                ioread16(&nic->csr->scb.status);
-       for (i = E100_PHY_REGS; i >= 0; i--)
-               buff[1 + E100_PHY_REGS - i] =
-                       mdio_read(netdev, nic->mii.phy_id, i);
+       for (i = 0; i < E100_PHY_REGS; i++)
+               /* Note that we read the registers in reverse order. This
+                * ordering is the ABI apparently used by ethtool and other
+                * applications.
+                */
+               buff[1 + i] = mdio_read(netdev, nic->mii.phy_id,
+                                       E100_PHY_REGS - 1 - i);
        memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf));
        e100_exec_cb(nic, NULL, e100_dump);
        msleep(10);
-       memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf,
-               sizeof(nic->mem->dump_buf));
+       memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf,
+              sizeof(nic->mem->dump_buf));
 }
 
 static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
index fc26e4d..beda8e0 100644 (file)
@@ -3208,7 +3208,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter)
                max_combined = ixgbe_max_rss_indices(adapter);
        }
 
-       return max_combined;
+       return min_t(int, max_combined, num_online_cpus());
 }
 
 static void ixgbe_get_channels(struct net_device *dev,
index 24e06ba..13c4782 100644 (file)
@@ -10112,6 +10112,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
        struct ixgbe_adapter *adapter = netdev_priv(dev);
        struct bpf_prog *old_prog;
        bool need_reset;
+       int num_queues;
 
        if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
                return -EINVAL;
@@ -10161,11 +10162,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
        /* Kick start the NAPI context if there is an AF_XDP socket open
         * on that queue id. This so that receiving will start.
         */
-       if (need_reset && prog)
-               for (i = 0; i < adapter->num_rx_queues; i++)
+       if (need_reset && prog) {
+               num_queues = min_t(int, adapter->num_rx_queues,
+                                  adapter->num_xdp_queues);
+               for (i = 0; i < num_queues; i++)
                        if (adapter->xdp_ring[i]->xsk_pool)
                                (void)ixgbe_xsk_wakeup(adapter->netdev, i,
                                                       XDP_WAKEUP_RX);
+       }
 
        return 0;
 }
index 5cc00d2..6ecc4eb 100644 (file)
@@ -4,8 +4,6 @@
 #
 
 obj-$(CONFIG_KS8842) += ks8842.o
-obj-$(CONFIG_KS8851) += ks8851.o
-ks8851-objs = ks8851_common.o ks8851_spi.o
-obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o
-ks8851_mll-objs = ks8851_common.o ks8851_par.o
+obj-$(CONFIG_KS8851) += ks8851_common.o ks8851_spi.o
+obj-$(CONFIG_KS8851_MLL) += ks8851_common.o ks8851_par.o
 obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o
index 3f69bb5..a6db1a8 100644 (file)
@@ -1057,6 +1057,7 @@ int ks8851_suspend(struct device *dev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ks8851_suspend);
 
 int ks8851_resume(struct device *dev)
 {
@@ -1070,6 +1071,7 @@ int ks8851_resume(struct device *dev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ks8851_resume);
 #endif
 
 static int ks8851_register_mdiobus(struct ks8851_net *ks, struct device *dev)
@@ -1243,6 +1245,7 @@ err_reg:
 err_reg_io:
        return ret;
 }
+EXPORT_SYMBOL_GPL(ks8851_probe_common);
 
 int ks8851_remove_common(struct device *dev)
 {
@@ -1261,3 +1264,8 @@ int ks8851_remove_common(struct device *dev)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(ks8851_remove_common);
+
+MODULE_DESCRIPTION("KS8851 Network driver");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_LICENSE("GPL");
index 58a8546..c14de5f 100644 (file)
@@ -380,15 +380,6 @@ static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf,
                                          &ionic_dbg_intr_stats_desc[i]);
                (*buf)++;
        }
-       for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
-               **buf = IONIC_READ_STAT64(&txqcq->napi_stats,
-                                         &ionic_dbg_napi_stats_desc[i]);
-               (*buf)++;
-       }
-       for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
-               **buf = txqcq->napi_stats.work_done_cntr[i];
-               (*buf)++;
-       }
        for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
                **buf = txstats->sg_cntr[i];
                (*buf)++;
index 553c440..981ccf4 100644 (file)
@@ -486,6 +486,10 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
                timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
                stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
                                     eee_tw_timer);
+               if (priv->hw->xpcs)
+                       xpcs_config_eee(priv->hw->xpcs,
+                                       priv->plat->mult_fact_100ns,
+                                       true);
        }
 
        if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) {
index 309de38..b0d3f9a 100644 (file)
@@ -73,6 +73,7 @@ config CASSINI
 config SUNVNET_COMMON
        tristate "Common routines to support Sun Virtual Networking"
        depends on SUN_LDOMS
+       depends on INET
        default m
 
 config SUNVNET
index f4843f9..441da03 100644 (file)
@@ -48,6 +48,7 @@ config BPQETHER
 config DMASCC
        tristate "High-speed (DMA) SCC driver for AX.25"
        depends on ISA && AX25 && BROKEN_ON_SMP && ISA_DMA_API
+       depends on VIRT_TO_BUS
        help
          This is a driver for high-speed SCC boards, i.e. those supporting
          DMA on one port. You usually use those boards to connect your
index 0d7d3e1..5f4cd24 100644 (file)
@@ -207,6 +207,7 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
 {
        struct ipq4019_mdio_data *priv;
        struct mii_bus *bus;
+       struct resource *res;
        int ret;
 
        bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*priv));
@@ -224,7 +225,10 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
                return PTR_ERR(priv->mdio_clk);
 
        /* The platform resource is provided on the chipset IPQ5018 */
-       priv->eth_ldo_rdy = devm_platform_ioremap_resource(pdev, 1);
+       /* This resource is optional */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (res)
+               priv->eth_ldo_rdy = devm_ioremap_resource(&pdev->dev, res);
 
        bus->name = "ipq4019_mdio";
        bus->read = ipq4019_mdio_read;
index 1ee592d..17f98f6 100644 (file)
@@ -134,8 +134,9 @@ static int mscc_miim_reset(struct mii_bus *bus)
 
 static int mscc_miim_probe(struct platform_device *pdev)
 {
-       struct mii_bus *bus;
        struct mscc_miim_dev *dev;
+       struct resource *res;
+       struct mii_bus *bus;
        int ret;
 
        bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*dev));
@@ -156,10 +157,14 @@ static int mscc_miim_probe(struct platform_device *pdev)
                return PTR_ERR(dev->regs);
        }
 
-       dev->phy_regs = devm_platform_ioremap_resource(pdev, 1);
-       if (IS_ERR(dev->phy_regs)) {
-               dev_err(&pdev->dev, "Unable to map internal phy registers\n");
-               return PTR_ERR(dev->phy_regs);
+       /* This resource is optional */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (res) {
+               dev->phy_regs = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(dev->phy_regs)) {
+                       dev_err(&pdev->dev, "Unable to map internal phy registers\n");
+                       return PTR_ERR(dev->phy_regs);
+               }
        }
 
        ret = of_mdiobus_register(bus, pdev->dev.of_node);
index d127eb6..aaa628f 100644 (file)
@@ -321,7 +321,7 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
        /* Start MHI channels */
        err = mhi_prepare_for_transfer(mhi_dev);
        if (err)
-               goto out_err;
+               return err;
 
        /* Number of transfer descriptors determines size of the queue */
        mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
@@ -331,10 +331,6 @@ static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
                return err;
 
        return 0;
-
-out_err:
-       free_netdev(ndev);
-       return err;
 }
 
 static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
index e79297a..27b6a3f 100644 (file)
 #define MII_BCM7XXX_SHD_2_ADDR_CTRL    0xe
 #define MII_BCM7XXX_SHD_2_CTRL_STAT    0xf
 #define MII_BCM7XXX_SHD_2_BIAS_TRIM    0x1a
+#define MII_BCM7XXX_SHD_3_PCS_CTRL     0x0
+#define MII_BCM7XXX_SHD_3_PCS_STATUS   0x1
+#define MII_BCM7XXX_SHD_3_EEE_CAP      0x2
 #define MII_BCM7XXX_SHD_3_AN_EEE_ADV   0x3
+#define MII_BCM7XXX_SHD_3_EEE_LP       0x4
+#define MII_BCM7XXX_SHD_3_EEE_WK_ERR   0x5
 #define MII_BCM7XXX_SHD_3_PCS_CTRL_2   0x6
 #define  MII_BCM7XXX_PCS_CTRL_2_DEF    0x4400
 #define MII_BCM7XXX_SHD_3_AN_STAT      0xb
@@ -216,25 +221,37 @@ static int bcm7xxx_28nm_resume(struct phy_device *phydev)
        return genphy_config_aneg(phydev);
 }
 
-static int phy_set_clr_bits(struct phy_device *dev, int location,
-                                       int set_mask, int clr_mask)
+static int __phy_set_clr_bits(struct phy_device *dev, int location,
+                             int set_mask, int clr_mask)
 {
        int v, ret;
 
-       v = phy_read(dev, location);
+       v = __phy_read(dev, location);
        if (v < 0)
                return v;
 
        v &= ~clr_mask;
        v |= set_mask;
 
-       ret = phy_write(dev, location, v);
+       ret = __phy_write(dev, location, v);
        if (ret < 0)
                return ret;
 
        return v;
 }
 
+static int phy_set_clr_bits(struct phy_device *dev, int location,
+                           int set_mask, int clr_mask)
+{
+       int ret;
+
+       mutex_lock(&dev->mdio.bus->mdio_lock);
+       ret = __phy_set_clr_bits(dev, location, set_mask, clr_mask);
+       mutex_unlock(&dev->mdio.bus->mdio_lock);
+
+       return ret;
+}
+
 static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev)
 {
        int ret;
@@ -398,6 +415,93 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev)
        return bcm7xxx_28nm_ephy_apd_enable(phydev);
 }
 
+#define MII_BCM7XXX_REG_INVALID        0xff
+
+static u8 bcm7xxx_28nm_ephy_regnum_to_shd(u16 regnum)
+{
+       switch (regnum) {
+       case MDIO_CTRL1:
+               return MII_BCM7XXX_SHD_3_PCS_CTRL;
+       case MDIO_STAT1:
+               return MII_BCM7XXX_SHD_3_PCS_STATUS;
+       case MDIO_PCS_EEE_ABLE:
+               return MII_BCM7XXX_SHD_3_EEE_CAP;
+       case MDIO_AN_EEE_ADV:
+               return MII_BCM7XXX_SHD_3_AN_EEE_ADV;
+       case MDIO_AN_EEE_LPABLE:
+               return MII_BCM7XXX_SHD_3_EEE_LP;
+       case MDIO_PCS_EEE_WK_ERR:
+               return MII_BCM7XXX_SHD_3_EEE_WK_ERR;
+       default:
+               return MII_BCM7XXX_REG_INVALID;
+       }
+}
+
+static bool bcm7xxx_28nm_ephy_dev_valid(int devnum)
+{
+       return devnum == MDIO_MMD_AN || devnum == MDIO_MMD_PCS;
+}
+
+static int bcm7xxx_28nm_ephy_read_mmd(struct phy_device *phydev,
+                                     int devnum, u16 regnum)
+{
+       u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum);
+       int ret;
+
+       if (!bcm7xxx_28nm_ephy_dev_valid(devnum) ||
+           shd == MII_BCM7XXX_REG_INVALID)
+               return -EOPNOTSUPP;
+
+       /* set shadow mode 2 */
+       ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
+                                MII_BCM7XXX_SHD_MODE_2, 0);
+       if (ret < 0)
+               return ret;
+
+       /* Access the desired shadow register address */
+       ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       ret = __phy_read(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT);
+
+reset_shadow_mode:
+       /* reset shadow mode 2 */
+       __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
+                          MII_BCM7XXX_SHD_MODE_2);
+       return ret;
+}
+
+static int bcm7xxx_28nm_ephy_write_mmd(struct phy_device *phydev,
+                                      int devnum, u16 regnum, u16 val)
+{
+       u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum);
+       int ret;
+
+       if (!bcm7xxx_28nm_ephy_dev_valid(devnum) ||
+           shd == MII_BCM7XXX_REG_INVALID)
+               return -EOPNOTSUPP;
+
+       /* set shadow mode 2 */
+       ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
+                                MII_BCM7XXX_SHD_MODE_2, 0);
+       if (ret < 0)
+               return ret;
+
+       /* Access the desired shadow register address */
+       ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd);
+       if (ret < 0)
+               goto reset_shadow_mode;
+
+       /* Write the desired value in the shadow register */
+       __phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT, val);
+
+reset_shadow_mode:
+       /* reset shadow mode 2 */
+       return __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0,
+                                 MII_BCM7XXX_SHD_MODE_2);
+}
+
 static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev)
 {
        int ret;
@@ -595,6 +699,8 @@ static void bcm7xxx_28nm_remove(struct phy_device *phydev)
        .get_stats      = bcm7xxx_28nm_get_phy_stats,                   \
        .probe          = bcm7xxx_28nm_probe,                           \
        .remove         = bcm7xxx_28nm_remove,                          \
+       .read_mmd       = bcm7xxx_28nm_ephy_read_mmd,                   \
+       .write_mmd      = bcm7xxx_28nm_ephy_write_mmd,                  \
 }
 
 #define BCM7XXX_40NM_EPHY(_oui, _name)                                 \
index 53f034f..6f4b4e5 100644 (file)
@@ -537,6 +537,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
        err = device_register(&bus->dev);
        if (err) {
                pr_err("mii_bus %s failed to register\n", bus->id);
+               put_device(&bus->dev);
                return -EINVAL;
        }
 
index 2d5d508..5ce1bf0 100644 (file)
@@ -493,6 +493,25 @@ static int gpy_loopback(struct phy_device *phydev, bool enable)
        return ret;
 }
 
+static int gpy115_loopback(struct phy_device *phydev, bool enable)
+{
+       int ret;
+       int fw_minor;
+
+       if (enable)
+               return gpy_loopback(phydev, enable);
+
+       ret = phy_read(phydev, PHY_FWV);
+       if (ret < 0)
+               return ret;
+
+       fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, ret);
+       if (fw_minor > 0x0076)
+               return gpy_loopback(phydev, 0);
+
+       return genphy_soft_reset(phydev);
+}
+
 static struct phy_driver gpy_drivers[] = {
        {
                PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx),
@@ -527,7 +546,7 @@ static struct phy_driver gpy_drivers[] = {
                .handle_interrupt = gpy_handle_interrupt,
                .set_wol        = gpy_set_wol,
                .get_wol        = gpy_get_wol,
-               .set_loopback   = gpy_loopback,
+               .set_loopback   = gpy115_loopback,
        },
        {
                PHY_ID_MATCH_MODEL(PHY_ID_GPY115C),
@@ -544,7 +563,7 @@ static struct phy_driver gpy_drivers[] = {
                .handle_interrupt = gpy_handle_interrupt,
                .set_wol        = gpy_set_wol,
                .get_wol        = gpy_get_wol,
-               .set_loopback   = gpy_loopback,
+               .set_loopback   = gpy115_loopback,
        },
        {
                .phy_id         = PHY_ID_GPY211B,
index 7d95397..26b1bd8 100644 (file)
@@ -1178,7 +1178,10 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 
 static void smsc95xx_handle_link_change(struct net_device *net)
 {
+       struct usbnet *dev = netdev_priv(net);
+
        phy_print_status(net->phydev);
+       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
 }
 
 static int smsc95xx_start_phy(struct usbnet *dev)
index ffa894f..0adae76 100644 (file)
@@ -1867,8 +1867,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer)
                bcn_int -= data->bcn_delta;
                data->bcn_delta = 0;
        }
-       hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer),
-                       ns_to_ktime(bcn_int * NSEC_PER_USEC));
+       hrtimer_forward_now(&data->beacon_timer,
+                           ns_to_ktime(bcn_int * NSEC_PER_USEC));
        return HRTIMER_RESTART;
 }
 
index 3cbc3ba..295cc79 100644 (file)
@@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu)
                pmu->name, pmu->num_events,
                has_nmi ? ", using NMIs" : "");
 
+       kvm_host_pmu_init(pmu);
+
        return 0;
 
 out_destroy:
index a4ac87c..5082102 100644 (file)
@@ -2306,7 +2306,7 @@ EXPORT_SYMBOL_GPL(devm_pinctrl_register_and_init);
 
 /**
  * devm_pinctrl_unregister() - Resource managed version of pinctrl_unregister().
- * @dev: device for which which resource was allocated
+ * @dev: device for which resource was allocated
  * @pctldev: the pinctrl device to unregister.
  */
 void devm_pinctrl_unregister(struct device *dev, struct pinctrl_dev *pctldev)
index c001f2e..8d0f88e 100644 (file)
@@ -445,6 +445,7 @@ static int amd_gpio_irq_set_wake(struct irq_data *d, unsigned int on)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
        u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3);
+       int err;
 
        raw_spin_lock_irqsave(&gpio_dev->lock, flags);
        pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
@@ -457,6 +458,15 @@ static int amd_gpio_irq_set_wake(struct irq_data *d, unsigned int on)
        writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
+       if (on)
+               err = enable_irq_wake(gpio_dev->irq);
+       else
+               err = disable_irq_wake(gpio_dev->irq);
+
+       if (err)
+               dev_err(&gpio_dev->pdev->dev, "failed to %s wake-up interrupt\n",
+                       on ? "enable" : "disable");
+
        return 0;
 }
 
@@ -902,7 +912,6 @@ static struct pinctrl_desc amd_pinctrl_desc = {
 static int amd_gpio_probe(struct platform_device *pdev)
 {
        int ret = 0;
-       int irq_base;
        struct resource *res;
        struct amd_gpio *gpio_dev;
        struct gpio_irq_chip *girq;
@@ -925,9 +934,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
        if (!gpio_dev->base)
                return -ENOMEM;
 
-       irq_base = platform_get_irq(pdev, 0);
-       if (irq_base < 0)
-               return irq_base;
+       gpio_dev->irq = platform_get_irq(pdev, 0);
+       if (gpio_dev->irq < 0)
+               return gpio_dev->irq;
 
 #ifdef CONFIG_PM_SLEEP
        gpio_dev->saved_regs = devm_kcalloc(&pdev->dev, amd_pinctrl_desc.npins,
@@ -987,7 +996,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
                goto out2;
        }
 
-       ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler,
+       ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
                               IRQF_SHARED, KBUILD_MODNAME, gpio_dev);
        if (ret)
                goto out2;
index 95e7634..1d43170 100644 (file)
@@ -98,6 +98,7 @@ struct amd_gpio {
        struct resource         *res;
        struct platform_device  *pdev;
        u32                     *saved_regs;
+       int                     irq;
 };
 
 /*  KERNCZ configuration*/
index ae33e37..5ce260f 100644 (file)
@@ -2092,6 +2092,23 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl,
        return false;
 }
 
+static int rockchip_pinconf_defer_output(struct rockchip_pin_bank *bank,
+                                        unsigned int pin, u32 arg)
+{
+       struct rockchip_pin_output_deferred *cfg;
+
+       cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+       if (!cfg)
+               return -ENOMEM;
+
+       cfg->pin = pin;
+       cfg->arg = arg;
+
+       list_add_tail(&cfg->head, &bank->deferred_output);
+
+       return 0;
+}
+
 /* set the pin config settings for a specified pin */
 static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
                                unsigned long *configs, unsigned num_configs)
@@ -2136,6 +2153,22 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
                        if (rc != RK_FUNC_GPIO)
                                return -EINVAL;
 
+                       /*
+                        * Check for gpio driver not being probed yet.
+                        * The lock makes sure that either gpio-probe has completed
+                        * or the gpio driver hasn't probed yet.
+                        */
+                       mutex_lock(&bank->deferred_lock);
+                       if (!gpio || !gpio->direction_output) {
+                               rc = rockchip_pinconf_defer_output(bank, pin - bank->pin_base, arg);
+                               mutex_unlock(&bank->deferred_lock);
+                               if (rc)
+                                       return rc;
+
+                               break;
+                       }
+                       mutex_unlock(&bank->deferred_lock);
+
                        rc = gpio->direction_output(gpio, pin - bank->pin_base,
                                                    arg);
                        if (rc)
@@ -2204,6 +2237,11 @@ static int rockchip_pinconf_get(struct pinctrl_dev *pctldev, unsigned int pin,
                if (rc != RK_FUNC_GPIO)
                        return -EINVAL;
 
+               if (!gpio || !gpio->get) {
+                       arg = 0;
+                       break;
+               }
+
                rc = gpio->get(gpio, pin - bank->pin_base);
                if (rc < 0)
                        return rc;
@@ -2450,6 +2488,9 @@ static int rockchip_pinctrl_register(struct platform_device *pdev,
                                                pin_bank->name, pin);
                        pdesc++;
                }
+
+               INIT_LIST_HEAD(&pin_bank->deferred_output);
+               mutex_init(&pin_bank->deferred_lock);
        }
 
        ret = rockchip_pinctrl_parse_dt(pdev, info);
@@ -2716,6 +2757,31 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev)
        return 0;
 }
 
+static int rockchip_pinctrl_remove(struct platform_device *pdev)
+{
+       struct rockchip_pinctrl *info = platform_get_drvdata(pdev);
+       struct rockchip_pin_bank *bank;
+       struct rockchip_pin_output_deferred *cfg;
+       int i;
+
+       of_platform_depopulate(&pdev->dev);
+
+       for (i = 0; i < info->ctrl->nr_banks; i++) {
+               bank = &info->ctrl->pin_banks[i];
+
+               mutex_lock(&bank->deferred_lock);
+               while (!list_empty(&bank->deferred_output)) {
+                       cfg = list_first_entry(&bank->deferred_output,
+                                              struct rockchip_pin_output_deferred, head);
+                       list_del(&cfg->head);
+                       kfree(cfg);
+               }
+               mutex_unlock(&bank->deferred_lock);
+       }
+
+       return 0;
+}
+
 static struct rockchip_pin_bank px30_pin_banks[] = {
        PIN_BANK_IOMUX_FLAGS(0, 32, "gpio0", IOMUX_SOURCE_PMU,
                                             IOMUX_SOURCE_PMU,
@@ -3175,6 +3241,7 @@ static const struct of_device_id rockchip_pinctrl_dt_match[] = {
 
 static struct platform_driver rockchip_pinctrl_driver = {
        .probe          = rockchip_pinctrl_probe,
+       .remove         = rockchip_pinctrl_remove,
        .driver = {
                .name   = "rockchip-pinctrl",
                .pm = &rockchip_pinctrl_dev_pm_ops,
index 589d4d2..91f1027 100644 (file)
@@ -141,6 +141,8 @@ struct rockchip_drv {
  * @toggle_edge_mode: bit mask to toggle (falling/rising) edge mode
  * @recalced_mask: bit mask to indicate a need to recalulate the mask
  * @route_mask: bits describing the routing pins of per bank
+ * @deferred_output: gpio output settings to be done after gpio bank probed
+ * @deferred_lock: mutex for the deferred_output shared btw gpio and pinctrl
  */
 struct rockchip_pin_bank {
        struct device                   *dev;
@@ -169,6 +171,8 @@ struct rockchip_pin_bank {
        u32                             toggle_edge_mode;
        u32                             recalced_mask;
        u32                             route_mask;
+       struct list_head                deferred_output;
+       struct mutex                    deferred_lock;
 };
 
 /**
@@ -243,6 +247,12 @@ struct rockchip_pin_config {
        unsigned int            nconfigs;
 };
 
+struct rockchip_pin_output_deferred {
+       struct list_head head;
+       unsigned int pin;
+       u32 arg;
+};
+
 /**
  * struct rockchip_pin_group: represent group of pins of a pinmux function.
  * @name: name of the pin group, used to lookup the group.
index afddf6d..9017ede 100644 (file)
@@ -1496,6 +1496,7 @@ static const struct of_device_id sc7280_pinctrl_of_match[] = {
 static struct platform_driver sc7280_pinctrl_driver = {
        .driver = {
                .name = "sc7280-pinctrl",
+               .pm = &msm_pinctrl_dev_pm_ops,
                .of_match_table = sc7280_pinctrl_of_match,
        },
        .probe = sc7280_pinctrl_probe,
index 98bf0e2..b2562e8 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2014, 2016-2021 The Linux Foundation. All rights reserved.
  */
 
 #include <linux/gpio/driver.h>
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/spmi.h>
 #include <linux/types.h>
 
 #include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
@@ -171,6 +172,8 @@ struct pmic_gpio_state {
        struct pinctrl_dev *ctrl;
        struct gpio_chip chip;
        struct irq_chip irq;
+       u8 usid;
+       u8 pid_base;
 };
 
 static const struct pinconf_generic_params pmic_gpio_bindings[] = {
@@ -949,12 +952,36 @@ static int pmic_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
                                           unsigned int *parent_hwirq,
                                           unsigned int *parent_type)
 {
-       *parent_hwirq = child_hwirq + 0xc0;
+       struct pmic_gpio_state *state = gpiochip_get_data(chip);
+
+       *parent_hwirq = child_hwirq + state->pid_base;
        *parent_type = child_type;
 
        return 0;
 }
 
+static void *pmic_gpio_populate_parent_fwspec(struct gpio_chip *chip,
+                                            unsigned int parent_hwirq,
+                                            unsigned int parent_type)
+{
+       struct pmic_gpio_state *state = gpiochip_get_data(chip);
+       struct irq_fwspec *fwspec;
+
+       fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL);
+       if (!fwspec)
+               return NULL;
+
+       fwspec->fwnode = chip->irq.parent_domain->fwnode;
+
+       fwspec->param_count = 4;
+       fwspec->param[0] = state->usid;
+       fwspec->param[1] = parent_hwirq;
+       /* param[2] must be left as 0 */
+       fwspec->param[3] = parent_type;
+
+       return fwspec;
+}
+
 static int pmic_gpio_probe(struct platform_device *pdev)
 {
        struct irq_domain *parent_domain;
@@ -965,6 +992,7 @@ static int pmic_gpio_probe(struct platform_device *pdev)
        struct pmic_gpio_pad *pad, *pads;
        struct pmic_gpio_state *state;
        struct gpio_irq_chip *girq;
+       const struct spmi_device *parent_spmi_dev;
        int ret, npins, i;
        u32 reg;
 
@@ -984,6 +1012,9 @@ static int pmic_gpio_probe(struct platform_device *pdev)
 
        state->dev = &pdev->dev;
        state->map = dev_get_regmap(dev->parent, NULL);
+       parent_spmi_dev = to_spmi_device(dev->parent);
+       state->usid = parent_spmi_dev->usid;
+       state->pid_base = reg >> 8;
 
        pindesc = devm_kcalloc(dev, npins, sizeof(*pindesc), GFP_KERNEL);
        if (!pindesc)
@@ -1059,7 +1090,7 @@ static int pmic_gpio_probe(struct platform_device *pdev)
        girq->fwnode = of_node_to_fwnode(state->dev->of_node);
        girq->parent_domain = parent_domain;
        girq->child_to_parent_hwirq = pmic_gpio_child_to_parent_hwirq;
-       girq->populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_fourcell;
+       girq->populate_parent_alloc_arg = pmic_gpio_populate_parent_fwspec;
        girq->child_offset_to_irq = pmic_gpio_child_offset_to_irq;
        girq->child_irq_domain_ops.translate = pmic_gpio_domain_translate;
 
index 118939a..623d526 100644 (file)
@@ -361,6 +361,7 @@ err_list:
        mutex_lock(&matrix_dev->lock);
        list_del(&matrix_mdev->node);
        mutex_unlock(&matrix_dev->lock);
+       vfio_uninit_group_dev(&matrix_mdev->vdev);
        kfree(matrix_mdev);
 err_dec_available:
        atomic_inc(&matrix_dev->available_instances);
@@ -376,9 +377,10 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
        mutex_lock(&matrix_dev->lock);
        vfio_ap_mdev_reset_queues(matrix_mdev);
        list_del(&matrix_mdev->node);
+       mutex_unlock(&matrix_dev->lock);
+       vfio_uninit_group_dev(&matrix_mdev->vdev);
        kfree(matrix_mdev);
        atomic_inc(&matrix_dev->available_instances);
-       mutex_unlock(&matrix_dev->lock);
 }
 
 static ssize_t name_show(struct mdev_type *mtype,
index 8a2edd6..20e5081 100644 (file)
@@ -919,7 +919,7 @@ static int hantro_probe(struct platform_device *pdev)
                if (!vpu->variant->irqs[i].handler)
                        continue;
 
-               if (vpu->variant->num_clocks > 1) {
+               if (vpu->variant->num_irqs > 1) {
                        irq_name = vpu->variant->irqs[i].name;
                        irq = platform_get_irq_byname(vpu->pdev, irq_name);
                } else {
index c589fe9..825af5f 100644 (file)
@@ -135,7 +135,7 @@ void cedrus_prepare_format(struct v4l2_pix_format *pix_fmt)
                sizeimage = bytesperline * height;
 
                /* Chroma plane size. */
-               sizeimage += bytesperline * height / 2;
+               sizeimage += bytesperline * ALIGN(height, 64) / 2;
 
                break;
 
index 294ba05..bd56de7 100644 (file)
@@ -1714,6 +1714,9 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
        struct mlx5_vdpa_virtqueue *mvq;
 
+       if (!mvdev->actual_features)
+               return;
+
        if (!is_index_valid(mvdev, idx))
                return;
 
@@ -2145,6 +2148,8 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
 
        for (i = 0; i < ndev->mvdev.max_vqs; i++)
                ndev->vqs[i].ready = false;
+
+       ndev->mvdev.cvq.ready = false;
 }
 
 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
index 29a38ec..26e3d90 100644 (file)
@@ -665,13 +665,11 @@ static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
 {
        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
-
-       if (vduse_dev_set_status(dev, 0))
-               return -EIO;
+       int ret = vduse_dev_set_status(dev, 0);
 
        vduse_dev_reset(dev);
 
-       return 0;
+       return ret;
 }
 
 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
@@ -1593,8 +1591,10 @@ static int vduse_init(void)
 
        vduse_irq_wq = alloc_workqueue("vduse-irq",
                                WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
-       if (!vduse_irq_wq)
+       if (!vduse_irq_wq) {
+               ret = -ENOMEM;
                goto err_wq;
+       }
 
        ret = vduse_domain_init();
        if (ret)
index 68198e0..a03b5a9 100644 (file)
@@ -565,7 +565,7 @@ static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
 }
 
 struct vfio_pci_walk_info {
-       int (*fn)(struct pci_dev *, void *data);
+       int (*fn)(struct pci_dev *pdev, void *data);
        void *data;
        struct pci_dev *pdev;
        bool slot;
index f41d081..35927ce 100644 (file)
@@ -640,7 +640,7 @@ static int vhost_vdpa_va_map(struct vhost_vdpa *v,
        u64 offset, map_size, map_iova = iova;
        struct vdpa_map_file *map_file;
        struct vm_area_struct *vma;
-       int ret;
+       int ret = 0;
 
        mmap_read_lock(dev->mm);
 
index 588e02f..0a5b540 100644 (file)
@@ -345,8 +345,13 @@ static int virtio_device_of_init(struct virtio_device *dev)
        ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device);
        BUG_ON(ret >= sizeof(compat));
 
+       /*
+        * On powerpc/pseries virtio devices are PCI devices so PCI
+        * vendor/device ids play the role of the "compatible" property.
+        * Simply don't init of_node in this case.
+        */
        if (!of_device_is_compatible(np, compat)) {
-               ret = -EINVAL;
+               ret = 0;
                goto out;
        }
 
index b81fe4f..bf59fae 100644 (file)
@@ -1666,7 +1666,7 @@ config WDT_MTX1
 
 config SIBYTE_WDOG
        tristate "Sibyte SoC hardware watchdog"
-       depends on CPU_SB1 || (MIPS && COMPILE_TEST)
+       depends on CPU_SB1
        help
          Watchdog driver for the built in watchdog hardware in Sibyte
          SoC processors.  There are apparently two watchdog timers
index 4f5e59f..37dd3fe 100644 (file)
 
 #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
 
-#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000')
-#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377')
-#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376')
-#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375')
+static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375";
 
 static int follow_symlinks;
 module_param(follow_symlinks, int, 0444);
@@ -386,12 +383,7 @@ fail_nomem:
 
 static int vboxsf_parse_monolithic(struct fs_context *fc, void *data)
 {
-       unsigned char *options = data;
-
-       if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 &&
-                      options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 &&
-                      options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 &&
-                      options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) {
+       if (data && !memcmp(data, VBSF_MOUNT_SIGNATURE, 4)) {
                vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n");
                return -EINVAL;
        }
index 77e159a..60a4372 100644 (file)
@@ -177,7 +177,7 @@ static int build_merkle_tree(struct file *filp,
         * (level 0) and ascending to the root node (level 'num_levels - 1').
         * Then at the end (level 'num_levels'), calculate the root hash.
         */
-       blocks = (inode->i_size + params->block_size - 1) >>
+       blocks = ((u64)inode->i_size + params->block_size - 1) >>
                 params->log_blocksize;
        for (level = 0; level <= params->num_levels; level++) {
                err = build_merkle_tree_level(filp, level, blocks, params,
index 60ff8af..92df87f 100644 (file)
@@ -89,7 +89,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
         */
 
        /* Compute number of levels and the number of blocks in each level */
-       blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
+       blocks = ((u64)inode->i_size + params->block_size - 1) >> log_blocksize;
        pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
        while (blocks > 1) {
                if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
index 864b999..90f2189 100644 (file)
@@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
                            struct kvm_device_attr *attr);
 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
-int kvm_pmu_probe_pmuver(void);
 #else
 struct kvm_pmu {
 };
@@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        return 0;
 }
 
-static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
-
 #endif
 
 #endif
index f4c16f1..020a7d5 100644 (file)
@@ -578,11 +578,12 @@ struct btf_func_model {
  * programs only. Should not be used with normal calls and indirect calls.
  */
 #define BPF_TRAMP_F_SKIP_FRAME         BIT(2)
-
 /* Store IP address of the caller on the trampoline stack,
  * so it's available for trampoline's programs.
  */
 #define BPF_TRAMP_F_IP_ARG             BIT(3)
+/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
+#define BPF_TRAMP_F_RET_FENTRY_RET     BIT(4)
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
index 041ca7f..0f18df7 100644 (file)
@@ -608,7 +608,6 @@ struct kvm {
        unsigned long mmu_notifier_range_start;
        unsigned long mmu_notifier_range_end;
 #endif
-       long tlbs_dirty;
        struct list_head devices;
        u64 manual_dirty_log_protect;
        struct dentry *debugfs_dentry;
@@ -721,11 +720,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
        return NULL;
 }
 
-static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
-{
-       return vcpu->vcpu_idx;
-}
-
 #define kvm_for_each_memslot(memslot, slots)                           \
        for (memslot = &slots->memslots[0];                             \
             memslot < slots->memslots + slots->used_slots; memslot++)  \
index 5054802..2512e2f 100644 (file)
@@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
 static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 #endif
 
+#ifdef CONFIG_KVM
+void kvm_host_pmu_init(struct arm_pmu *pmu);
+#else
+#define kvm_host_pmu_init(x)   do { } while(0)
+#endif
+
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 struct arm_pmu *armpmu_alloc_atomic(void);
index 21c5386..ab5348e 100644 (file)
@@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
                     u8 rt_family, unsigned char *flags, bool skip_oif);
 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
-                   int nh_weight, u8 rt_family);
+                   int nh_weight, u8 rt_family, u32 nh_tclassid);
 #endif  /* _NET_FIB_H */
index af0fc13..618d1f4 100644 (file)
@@ -2818,13 +2818,13 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag
  * when they are able to replace in-use PTK keys according to the following
  * requirements:
- * 1) They do not hand over frames decrypted with the old key to
-      mac80211 once the call to set_key() with command %DISABLE_KEY has been
-      completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key,
+ * 1) They do not hand over frames decrypted with the old key to mac80211
+      once the call to set_key() with command %DISABLE_KEY has been completed,
    2) either drop or continue to use the old key for any outgoing frames queued
       at the time of the key deletion (including re-transmits),
    3) never send out a frame queued prior to the set_key() %SET_KEY command
-      encrypted with the new key and
+      encrypted with the new key when also needing
+      @IEEE80211_KEY_FLAG_GENERATE_IV and
    4) never send out a frame unencrypted when it should be encrypted.
    Mac80211 will not queue any new frames for a deleted key to the driver.
  */
index 10e1777..28085b9 100644 (file)
@@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
                struct fib_nh_common *nhc = &nhi->fib_nhc;
                int weight = nhg->nh_entries[i].weight;
 
-               if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0)
+               if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
                        return -EMSGSIZE;
        }
 
index 6d7b12c..bf79f3a 100644 (file)
@@ -11,6 +11,7 @@
 #include <uapi/linux/pkt_sched.h>
 
 #define DEFAULT_TX_QUEUE_LEN   1000
+#define STAB_SIZE_LOG_MAX      30
 
 struct qdisc_walker {
        int     stop;
index c005c3c..ae929e2 100644 (file)
@@ -488,8 +488,10 @@ struct sock {
        u8                      sk_prefer_busy_poll;
        u16                     sk_busy_poll_budget;
 #endif
+       spinlock_t              sk_peer_lock;
        struct pid              *sk_peer_pid;
        const struct cred       *sk_peer_cred;
+
        long                    sk_rcvtimeo;
        ktime_t                 sk_stamp;
 #if BITS_PER_LONG==32
@@ -1623,7 +1625,36 @@ void release_sock(struct sock *sk);
                                SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)   spin_unlock(&((__sk)->sk_lock.slock))
 
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
+
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken:
+ *
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ *
+ * return true if slow path is taken:
+ *
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+static inline bool lock_sock_fast(struct sock *sk)
+{
+       /* The sk_lock has mutex_lock() semantics here. */
+       mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
+       return __lock_sock_fast(sk);
+}
+
+/* fast socket lock variant for caller already holding a [different] socket lock */
+static inline bool lock_sock_fast_nested(struct sock *sk)
+{
+       mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+
+       return __lock_sock_fast(sk);
+}
 
 /**
  * unlock_sock_fast - complement of lock_sock_fast
index 989e151..7a08ed2 100644 (file)
@@ -98,6 +98,7 @@ struct snd_rawmidi_file {
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_substream *input;
        struct snd_rawmidi_substream *output;
+       unsigned int user_pversion;     /* supported protocol version */
 };
 
 struct snd_rawmidi_str {
index 1d84ec9..5859ca0 100644 (file)
@@ -784,6 +784,7 @@ struct snd_rawmidi_status {
 
 #define SNDRV_RAWMIDI_IOCTL_PVERSION   _IOR('W', 0x00, int)
 #define SNDRV_RAWMIDI_IOCTL_INFO       _IOR('W', 0x01, struct snd_rawmidi_info)
+#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
 #define SNDRV_RAWMIDI_IOCTL_PARAMS     _IOWR('W', 0x10, struct snd_rawmidi_params)
 #define SNDRV_RAWMIDI_IOCTL_STATUS     _IOWR('W', 0x20, struct snd_rawmidi_status)
 #define SNDRV_RAWMIDI_IOCTL_DROP       _IOW('W', 0x30, int)
index d6731c3..9abcc33 100644 (file)
@@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
                const struct btf_type *mtype, *ptype;
                struct bpf_prog *prog;
                u32 moff;
+               u32 flags;
 
                moff = btf_member_bit_offset(t, member) / 8;
                ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
@@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
                tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
                tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+               flags = st_ops->func_models[i].ret_size > 0 ?
+                       BPF_TRAMP_F_RET_FENTRY_RET : 0;
                err = arch_prepare_bpf_trampoline(NULL, image,
                                                  st_map->image + PAGE_SIZE,
-                                                 &st_ops->func_models[i], 0,
-                                                 tprogs, NULL);
+                                                 &st_ops->func_models[i],
+                                                 flags, tprogs, NULL);
                if (err < 0)
                        goto reset_unlock;
 
index 9f4636d..d6b7dfd 100644 (file)
@@ -827,7 +827,7 @@ int bpf_jit_charge_modmem(u32 pages)
 {
        if (atomic_long_add_return(pages, &bpf_jit_current) >
            (bpf_jit_limit >> PAGE_SHIFT)) {
-               if (!capable(CAP_SYS_ADMIN)) {
+               if (!bpf_capable()) {
                        atomic_long_sub(pages, &bpf_jit_current);
                        return -EPERM;
                }
index 8afa869..570b0c9 100644 (file)
@@ -6574,22 +6574,29 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
 
 void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 {
-       /* Don't associate the sock with unrelated interrupted task's cgroup. */
-       if (in_interrupt())
-               return;
+       struct cgroup *cgroup;
 
        rcu_read_lock();
+       /* Don't associate the sock with unrelated interrupted task's cgroup. */
+       if (in_interrupt()) {
+               cgroup = &cgrp_dfl_root.cgrp;
+               cgroup_get(cgroup);
+               goto out;
+       }
+
        while (true) {
                struct css_set *cset;
 
                cset = task_css_set(current);
                if (likely(cgroup_tryget(cset->dfl_cgrp))) {
-                       skcd->cgroup = cset->dfl_cgrp;
-                       cgroup_bpf_get(cset->dfl_cgrp);
+                       cgroup = cset->dfl_cgrp;
                        break;
                }
                cpu_relax();
        }
+out:
+       skcd->cgroup = cgroup;
+       cgroup_bpf_get(cgroup);
        rcu_read_unlock();
 }
 
index 2eb0e55..b5f4ef3 100644 (file)
@@ -552,6 +552,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
        __skb->gso_segs = skb_shinfo(skb)->gso_segs;
 }
 
+static struct proto bpf_dummy_proto = {
+       .name   = "bpf_dummy",
+       .owner  = THIS_MODULE,
+       .obj_size = sizeof(struct sock),
+};
+
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
                          union bpf_attr __user *uattr)
 {
@@ -596,20 +602,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
                break;
        }
 
-       sk = kzalloc(sizeof(struct sock), GFP_USER);
+       sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
        if (!sk) {
                kfree(data);
                kfree(ctx);
                return -ENOMEM;
        }
-       sock_net_set(sk, net);
        sock_init_data(NULL, sk);
 
        skb = build_skb(data, 0);
        if (!skb) {
                kfree(data);
                kfree(ctx);
-               kfree(sk);
+               sk_free(sk);
                return -ENOMEM;
        }
        skb->sk = sk;
@@ -682,8 +687,7 @@ out:
        if (dev && dev != net->loopback_dev)
                dev_put(dev);
        kfree_skb(skb);
-       bpf_sk_storage_free(sk);
-       kfree(sk);
+       sk_free(sk);
        kfree(ctx);
        return ret;
 }
index 3523c8c..f3d7511 100644 (file)
@@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
                                        int ifindex,
                                        struct br_ip *saddr)
 {
-       lockdep_assert_held_once(&brmctx->br->multicast_lock);
-
        write_seqcount_begin(&querier->seq);
        querier->port_ifidx = ifindex;
        memcpy(&querier->addr, saddr, sizeof(*saddr));
@@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_bridge *br,
 
        brmctx->ip4_other_query.delay_time = 0;
        brmctx->ip4_querier.port_ifidx = 0;
-       seqcount_init(&brmctx->ip4_querier.seq);
+       seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
        brmctx->multicast_igmp_version = 2;
 #if IS_ENABLED(CONFIG_IPV6)
        brmctx->multicast_mld_version = 1;
        brmctx->ip6_other_query.delay_time = 0;
        brmctx->ip6_querier.port_ifidx = 0;
-       seqcount_init(&brmctx->ip6_querier.seq);
+       seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
 #endif
 
        timer_setup(&brmctx->ip4_mc_router_timer,
index b4cef3a..e8136db 100644 (file)
@@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
 struct bridge_mcast_querier {
        struct br_ip addr;
        int port_ifidx;
-       seqcount_t seq;
+       seqcount_spinlock_t seq;
 };
 
 /* IGMP/MLD statistics */
index 8c39283..f0cb383 100644 (file)
@@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
        if (addr_len > MAX_ADDR_LEN)
                return -EINVAL;
 
+       ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+       if (ha && !memcmp(addr, ha->addr, addr_len) &&
+           (!addr_type || addr_type == ha->type))
+               goto found_it;
+
        while (*ins_point) {
                int diff;
 
@@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
                } else if (diff > 0) {
                        ins_point = &parent->rb_right;
                } else {
+found_it:
                        if (exclusive)
                                return -EEXIST;
                        if (global) {
index 512e629..c1601f7 100644 (file)
@@ -1376,6 +1376,16 @@ set_sndbuf:
 }
 EXPORT_SYMBOL(sock_setsockopt);
 
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+       const struct cred *cred;
+
+       spin_lock(&sk->sk_peer_lock);
+       cred = get_cred(sk->sk_peer_cred);
+       spin_unlock(&sk->sk_peer_lock);
+
+       return cred;
+}
 
 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
                          struct ucred *ucred)
@@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                struct ucred peercred;
                if (len > sizeof(peercred))
                        len = sizeof(peercred);
+
+               spin_lock(&sk->sk_peer_lock);
                cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+               spin_unlock(&sk->sk_peer_lock);
+
                if (copy_to_user(optval, &peercred, len))
                        return -EFAULT;
                goto lenout;
@@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
        case SO_PEERGROUPS:
        {
+               const struct cred *cred;
                int ret, n;
 
-               if (!sk->sk_peer_cred)
+               cred = sk_get_peer_cred(sk);
+               if (!cred)
                        return -ENODATA;
 
-               n = sk->sk_peer_cred->group_info->ngroups;
+               n = cred->group_info->ngroups;
                if (len < n * sizeof(gid_t)) {
                        len = n * sizeof(gid_t);
+                       put_cred(cred);
                        return put_user(len, optlen) ? -EFAULT : -ERANGE;
                }
                len = n * sizeof(gid_t);
 
-               ret = groups_to_user((gid_t __user *)optval,
-                                    sk->sk_peer_cred->group_info);
+               ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+               put_cred(cred);
                if (ret)
                        return ret;
                goto lenout;
@@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
                sk->sk_frag.page = NULL;
        }
 
-       if (sk->sk_peer_cred)
-               put_cred(sk->sk_peer_cred);
+       /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+       put_cred(sk->sk_peer_cred);
        put_pid(sk->sk_peer_pid);
+
        if (likely(sk->sk_net_refcnt))
                put_net(sock_net(sk));
        sk_prot_free(sk->sk_prot_creator, sk);
@@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
        sk->sk_peer_pid         =       NULL;
        sk->sk_peer_cred        =       NULL;
+       spin_lock_init(&sk->sk_peer_lock);
+
        sk->sk_write_pending    =       0;
        sk->sk_rcvlowat         =       1;
        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
@@ -3210,24 +3230,8 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
-/**
- * lock_sock_fast - fast version of lock_sock
- * @sk: socket
- *
- * This version should be used for very small section, where process wont block
- * return false if fast path is taken:
- *
- *   sk_lock.slock locked, owned = 0, BH disabled
- *
- * return true if slow path is taken:
- *
- *   sk_lock.slock unlocked, owned = 1, BH enabled
- */
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
 {
-       /* The sk_lock has mutex_lock() semantics here. */
-       mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
-
        might_sleep();
        spin_lock_bh(&sk->sk_lock.slock);
 
@@ -3256,7 +3260,7 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
        spin_unlock_bh(&sk->sk_lock.slock);
        return true;
 }
-EXPORT_SYMBOL(lock_sock_fast);
+EXPORT_SYMBOL(__lock_sock_fast);
 
 int sock_gettstamp(struct socket *sock, void __user *userstamp,
                   bool timeval, bool time32)
index b42c429..3364cb9 100644 (file)
@@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
 
 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
-                   int nh_weight, u8 rt_family)
+                   int nh_weight, u8 rt_family, u32 nh_tclassid)
 {
        const struct net_device *dev = nhc->nhc_dev;
        struct rtnexthop *rtnh;
@@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
 
        rtnh->rtnh_flags = flags;
 
+       if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
+               goto nla_put_failure;
+
        /* length of rtnetlink header + attributes */
        rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
 
@@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
        }
 
        for_nexthops(fi) {
-               if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
-                                   AF_INET) < 0)
-                       goto nla_put_failure;
+               u32 nh_tclassid = 0;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-               if (nh->nh_tclassid &&
-                   nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
-                       goto nla_put_failure;
+               nh_tclassid = nh->nh_tclassid;
 #endif
+               if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+                                   AF_INET, nh_tclassid) < 0)
+                       goto nla_put_failure;
        } endfor_nexthops(fi);
 
 mp_end:
index b88e0f3..8265c67 100644 (file)
@@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
-static int __net_init iptable_raw_table_init(struct net *net)
+static int iptable_raw_table_init(struct net *net)
 {
        struct ipt_replace *repl;
        const struct xt_table *table = &packet_raw;
index 8851c94..2a7825a 100644 (file)
@@ -1053,7 +1053,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        __be16 dport;
        u8  tos;
        int err, is_udplite = IS_UDPLITE(sk);
-       int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+       int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
        struct sk_buff *skb;
        struct ip_options_data opt_copy;
@@ -1361,7 +1361,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
        }
 
        up->len += size;
-       if (!(up->corkflag || (flags&MSG_MORE)))
+       if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
                ret = udp_push_pending_frames(sk);
        if (!ret)
                ret = size;
@@ -2662,9 +2662,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
        switch (optname) {
        case UDP_CORK:
                if (val != 0) {
-                       up->corkflag = 1;
+                       WRITE_ONCE(up->corkflag, 1);
                } else {
-                       up->corkflag = 0;
+                       WRITE_ONCE(up->corkflag, 0);
                        lock_sock(sk);
                        push_pending_frames(sk);
                        release_sock(sk);
@@ -2787,7 +2787,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 
        switch (optname) {
        case UDP_CORK:
-               val = up->corkflag;
+               val = READ_ONCE(up->corkflag);
                break;
 
        case UDP_ENCAP:
index de2cf39..a579ea1 100644 (file)
@@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
         * things we don't know, ie. tcp syn flag or ports).  If the
         * rule is also a fragment-specific rule, non-fragments won't
         * match it. */
+       acpar.fragoff = 0;
        acpar.hotdrop = false;
        acpar.state   = state;
 
index dbc2240..9b9ef09 100644 (file)
@@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
                        goto nla_put_failure;
 
                if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
-                                   rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
+                                   rt->fib6_nh->fib_nh_weight, AF_INET6,
+                                   0) < 0)
                        goto nla_put_failure;
 
                list_for_each_entry_safe(sibling, next_sibling,
                                         &rt->fib6_siblings, fib6_siblings) {
                        if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
                                            sibling->fib6_nh->fib_nh_weight,
-                                           AF_INET6) < 0)
+                                           AF_INET6, 0) < 0)
                                goto nla_put_failure;
                }
 
index ea53847..e505bb0 100644 (file)
@@ -1303,7 +1303,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        int addr_len = msg->msg_namelen;
        bool connected = false;
        int ulen = len;
-       int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+       int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
        int err;
        int is_udplite = IS_UDPLITE(sk);
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
index efbefcb..7cab1cf 100644 (file)
@@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void)
        atomic_set(&newtbl->entries,  0);
        spin_lock_init(&newtbl->gates_lock);
        spin_lock_init(&newtbl->walk_lock);
-       rhashtable_init(&newtbl->rhead, &mesh_rht_params);
+       if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
+               kfree(newtbl);
+               return NULL;
+       }
 
        return newtbl;
 }
index 204830a..3fbd0b9 100644 (file)
@@ -2,6 +2,7 @@
 /*
  * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
  * Copyright 2012-2013, cozybit Inc.
+ * Copyright (C) 2021 Intel Corporation
  */
 
 #include "mesh.h"
@@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
 
        /* only transmit to PS STA with announced, non-zero awake window */
        if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
-           (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+           (!elems->awake_window || !get_unaligned_le16(elems->awake_window)))
                return;
 
        if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
index e5935e3..8c64161 100644 (file)
@@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
        int mcast_rate;
        bool use_basicrate = false;
 
-       if (ieee80211_is_tx_data(txrc->skb) &&
-           info->flags & IEEE80211_TX_CTL_NO_ACK)
-               return false;
-
        if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
                __rate_control_send_low(txrc->hw, sband, pubsta, info,
                                        txrc->rate_idx_mask);
index 99ed68f..c4071b0 100644 (file)
@@ -4131,7 +4131,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
                if (!bssid)
                        return false;
                if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
-                   ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+                   ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+                   !is_valid_ether_addr(hdr->addr2))
                        return false;
                if (ieee80211_is_beacon(hdr->frame_control))
                        return true;
index 2d1193e..8921088 100644 (file)
@@ -2209,7 +2209,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                        }
 
                        vht_mcs = iterator.this_arg[4] >> 4;
+                       if (vht_mcs > 11)
+                               vht_mcs = 0;
                        vht_nss = iterator.this_arg[4] & 0xF;
+                       if (!vht_nss || vht_nss > 8)
+                               vht_nss = 1;
                        break;
 
                /*
@@ -3380,6 +3384,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
        if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
                goto out;
 
+       /* If n == 2, the "while (*frag_tail)" loop above didn't execute
+        * and  frag_tail should be &skb_shinfo(head)->frag_list.
+        * However, ieee80211_amsdu_prepare_head() can reallocate it.
+        * Reload frag_tail to have it pointing to the correct place.
+        */
+       if (n == 2)
+               frag_tail = &skb_shinfo(head)->frag_list;
+
        /*
         * Pad out the previous subframe to a multiple of 4 by adding the
         * padding to the next one, that's being added. Note that head->len
index bca47fa..4eed23e 100644 (file)
@@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
                        return RX_DROP_UNUSABLE;
        }
 
+       /* reload hdr - skb might have been reallocated */
+       hdr = (void *)rx->skb->data;
+
        data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
        if (!rx->sta || data_len < 0)
                return RX_DROP_UNUSABLE;
@@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
                        return RX_DROP_UNUSABLE;
        }
 
+       /* reload hdr - skb might have been reallocated */
+       hdr = (void *)rx->skb->data;
+
        data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
        if (!rx->sta || data_len < 0)
                return RX_DROP_UNUSABLE;
index f48eb63..292374f 100644 (file)
@@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
        struct sock *sk;
 
        net = sock_net(in_skb->sk);
-       msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
+       msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
        if (!msk)
                goto out_nosk;
 
index c4f9a5c..050eea2 100644 (file)
@@ -1718,9 +1718,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
 
        list_for_each_entry(entry, &pernet->local_addr_list, list) {
                if (addresses_equal(&entry->addr, &addr.addr, true)) {
-                       ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
-                       if (ret)
-                               return ret;
+                       mptcp_nl_addr_backup(net, &entry->addr, bkup);
 
                        if (bkup)
                                entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
index dbcebf5..e5df0b5 100644 (file)
@@ -2735,7 +2735,7 @@ cleanup:
        inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
        mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-               bool slow = lock_sock_fast(ssk);
+               bool slow = lock_sock_fast_nested(ssk);
 
                sock_orphan(ssk);
                unlock_sock_fast(ssk, slow);
index d3e6fd1..dc98467 100644 (file)
@@ -709,7 +709,7 @@ int mptcp_token_new_connect(struct sock *sk);
 void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
                        struct mptcp_sock *msk);
 bool mptcp_token_exists(u32 token);
-struct mptcp_sock *mptcp_token_get_sock(u32 token);
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
 struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
                                         long *s_num);
 void mptcp_token_destroy(struct mptcp_sock *msk);
index 1de7ce8..6172f38 100644 (file)
@@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
        struct mptcp_sock *msk;
        int local_id;
 
-       msk = mptcp_token_get_sock(subflow_req->token);
+       msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
        if (!msk) {
                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
                return NULL;
index 3712778..7f22526 100644 (file)
@@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
 
        e->valid = 0;
 
-       msk = mptcp_token_get_sock(e->token);
+       msk = mptcp_token_get_sock(net, e->token);
        if (!msk) {
                spin_unlock_bh(&join_entry_locks[i]);
                return false;
        }
 
-       /* If this fails, the token got re-used in the mean time by another
-        * mptcp socket in a different netns, i.e. entry is outdated.
-        */
-       if (!net_eq(sock_net((struct sock *)msk), net))
-               goto err_put;
-
        subflow_req->remote_nonce = e->remote_nonce;
        subflow_req->local_nonce = e->local_nonce;
        subflow_req->backup = e->backup;
@@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
        subflow_req->msk = msk;
        spin_unlock_bh(&join_entry_locks[i]);
        return true;
-
-err_put:
-       spin_unlock_bh(&join_entry_locks[i]);
-       sock_put((struct sock *)msk);
-       return false;
 }
 
 void __init mptcp_join_cookie_init(void)
index a98e554..e581b34 100644 (file)
@@ -231,6 +231,7 @@ found:
 
 /**
  * mptcp_token_get_sock - retrieve mptcp connection sock using its token
+ * @net: restrict to this namespace
  * @token: token of the mptcp connection to retrieve
  *
  * This function returns the mptcp connection structure with the given token.
@@ -238,7 +239,7 @@ found:
  *
  * returns NULL if no connection with the given token value exists.
  */
-struct mptcp_sock *mptcp_token_get_sock(u32 token)
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
 {
        struct hlist_nulls_node *pos;
        struct token_bucket *bucket;
@@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token)
 again:
        sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
                msk = mptcp_sk(sk);
-               if (READ_ONCE(msk->token) != token)
+               if (READ_ONCE(msk->token) != token ||
+                   !net_eq(sock_net(sk), net))
                        continue;
+
                if (!refcount_inc_not_zero(&sk->sk_refcnt))
                        goto not_found;
-               if (READ_ONCE(msk->token) != token) {
+
+               if (READ_ONCE(msk->token) != token ||
+                   !net_eq(sock_net(sk), net)) {
                        sock_put(sk);
                        goto again;
                }
index e1bd6f0..5d984be 100644 (file)
@@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
                            GFP_USER);
        KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
        mptcp_token_init_request((struct request_sock *)req);
+       sock_net_set((struct sock *)req, &init_net);
        return req;
 }
 
@@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test)
        KUNIT_ASSERT_EQ(test, 0,
                        mptcp_token_new_request((struct request_sock *)req));
        KUNIT_EXPECT_NE(test, 0, (int)req->token);
-       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token));
 
        /* cleanup */
        mptcp_token_destroy_request((struct request_sock *)req);
@@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test)
        msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
        KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
        refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+       sock_net_set((struct sock *)msk, &init_net);
        return msk;
 }
 
@@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test)
                        mptcp_token_new_connect((struct sock *)icsk));
        KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
        KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
-       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token));
        KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
 
        mptcp_token_destroy(msk);
-       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token));
 }
 
 static void mptcp_token_test_accept(struct kunit *test)
@@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test)
                        mptcp_token_new_request((struct request_sock *)req));
        msk->token = req->token;
        mptcp_token_accept(req, msk);
-       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
 
        /* this is now a no-op */
        mptcp_token_destroy_request((struct request_sock *)req);
-       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+       KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
 
        /* cleanup */
        mptcp_token_destroy(msk);
@@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test)
 
        /* simulate race on removal */
        refcount_set(&sk->sk_refcnt, 0);
-       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+       KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token));
 
        /* cleanup */
        mptcp_token_destroy(msk);
index 6186358..6e39130 100644 (file)
@@ -130,11 +130,11 @@ htable_size(u8 hbits)
 {
        size_t hsize;
 
-       /* We must fit both into u32 in jhash and size_t */
+       /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
        if (hbits > 31)
                return 0;
        hsize = jhash_size(hbits);
-       if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+       if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
            < hsize)
                return 0;
 
index c100c6b..2c467c4 100644 (file)
@@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
        int idx;
 
        /* Compute size and mask */
+       if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+               pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+               ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+       }
        ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
        ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
 
index 94e18fb..770a631 100644 (file)
@@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
 #define GC_SCAN_INTERVAL       (120u * HZ)
 #define GC_SCAN_MAX_DURATION   msecs_to_jiffies(10)
 
-#define MAX_CHAINLEN   64u
+#define MIN_CHAINLEN   8u
+#define MAX_CHAINLEN   (32u - MIN_CHAINLEN)
 
 static struct conntrack_gc_work conntrack_gc_work;
 
@@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
 static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             unsigned int zoneid,
                              const struct net *net)
 {
        struct {
                struct nf_conntrack_man src;
                union nf_inet_addr dst_addr;
+               unsigned int zone;
                u32 net_mix;
                u16 dport;
                u16 proto;
@@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
        /* The direction must be ignored, so handle usable members manually. */
        combined.src = tuple->src;
        combined.dst_addr = tuple->dst.u3;
+       combined.zone = zoneid;
        combined.net_mix = net_hash_mix(net);
        combined.dport = (__force __u16)tuple->dst.u.all;
        combined.proto = tuple->dst.protonum;
@@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
 
 static u32 __hash_conntrack(const struct net *net,
                            const struct nf_conntrack_tuple *tuple,
+                           unsigned int zoneid,
                            unsigned int size)
 {
-       return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+       return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
 }
 
 static u32 hash_conntrack(const struct net *net,
-                         const struct nf_conntrack_tuple *tuple)
+                         const struct nf_conntrack_tuple *tuple,
+                         unsigned int zoneid)
 {
-       return scale_hash(hash_conntrack_raw(tuple, net));
+       return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
 }
 
 static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
        do {
                sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
        clean_from_lists(ct);
@@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple)
 {
-       return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple, net));
+       unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       struct nf_conntrack_tuple_hash *thash;
+
+       thash = __nf_conntrack_find_get(net, zone, tuple,
+                                       hash_conntrack_raw(tuple, zone_id, net));
+
+       if (thash)
+               return thash;
+
+       rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+       if (rid != zone_id)
+               return __nf_conntrack_find_get(net, zone, tuple,
+                                              hash_conntrack_raw(tuple, rid, net));
+       return thash;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 
@@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
+       unsigned int max_chainlen;
        unsigned int chainlen = 0;
        unsigned int sequence;
        int err = -EEXIST;
@@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        do {
                sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
        /* See if there's one in the list already, including reverse */
        hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                    zone, net))
                        goto out;
 
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
@@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
@@ -1103,8 +1131,8 @@ drop:
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
 {
+       unsigned int chainlen = 0, sequence, max_chainlen;
        const struct nf_conntrack_zone *zone;
-       unsigned int chainlen = 0, sequence;
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
@@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
                hash = scale_hash(hash);
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
        /* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                goto dying;
        }
 
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
        /* See if there's one in the list already, including reverse:
           NAT could have grabbed it without realizing, since we're
           not in the hash.  If there is, we lost race. */
@@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
@@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN) {
+               if (chainlen++ > max_chainlen) {
 chaintoolong:
                        nf_ct_add_to_dying_list(ct);
                        NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
        rcu_read_lock();
  begin:
        nf_conntrack_get_ht(&ct_hash, &hsize);
-       hash = __hash_conntrack(net, tuple, hsize);
+       hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
        struct nf_conntrack_tuple_hash *h;
        enum ip_conntrack_info ctinfo;
        struct nf_conntrack_zone tmp;
+       u32 hash, zone_id, rid;
        struct nf_conn *ct;
-       u32 hash;
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                             dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
 
        /* look for tuple match */
        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple, state->net);
+
+       zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       hash = hash_conntrack_raw(&tuple, zone_id, state->net);
        h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+       if (!h) {
+               rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+               if (zone_id != rid) {
+                       u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+                       h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+               }
+       }
+
        if (!h) {
                h = init_conntrack(state->net, tmpl, &tuple,
                                   skb, dataoff, hash);
@@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
        spinlock_t *lockp;
 
        for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+               struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+               if (hlist_nulls_empty(hslot))
+                       continue;
+
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
                nf_conntrack_lock(lockp);
-               if (*bucket < nf_conntrack_htable_size) {
-                       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
-                               if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
-                                       continue;
-                               /* All nf_conn objects are added to hash table twice, one
-                                * for original direction tuple, once for the reply tuple.
-                                *
-                                * Exception: In the IPS_NAT_CLASH case, only the reply
-                                * tuple is added (the original tuple already existed for
-                                * a different object).
-                                *
-                                * We only need to call the iterator once for each
-                                * conntrack, so we just use the 'reply' direction
-                                * tuple while iterating.
-                                */
-                               ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (iter(ct, data))
-                                       goto found;
-                       }
+               hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+                       if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+                               continue;
+                       /* All nf_conn objects are added to hash table twice, one
+                        * for original direction tuple, once for the reply tuple.
+                        *
+                        * Exception: In the IPS_NAT_CLASH case, only the reply
+                        * tuple is added (the original tuple already existed for
+                        * a different object).
+                        *
+                        * We only need to call the iterator once for each
+                        * conntrack, so we just use the 'reply' direction
+                        * tuple while iterating.
+                        */
+                       ct = nf_ct_tuplehash_to_ctrack(h);
+                       if (iter(ct, data))
+                               goto found;
                }
                spin_unlock(lockp);
                local_bh_enable();
@@ -2264,26 +2308,20 @@ found:
 static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
                                  void *data, u32 portid, int report)
 {
-       unsigned int bucket = 0, sequence;
+       unsigned int bucket = 0;
        struct nf_conn *ct;
 
        might_sleep();
 
-       for (;;) {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-
-               while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
-                       /* Time to push up daises... */
+       mutex_lock(&nf_conntrack_mutex);
+       while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+               /* Time to push up daises... */
 
-                       nf_ct_delete(ct, portid, report);
-                       nf_ct_put(ct);
-                       cond_resched();
-               }
-
-               if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
-                       break;
-               bucket = 0;
+               nf_ct_delete(ct, portid, report);
+               nf_ct_put(ct);
+               cond_resched();
        }
+       mutex_unlock(&nf_conntrack_mutex);
 }
 
 struct iter_data {
@@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
        if (!hash)
                return -ENOMEM;
 
+       mutex_lock(&nf_conntrack_mutex);
        old_size = nf_conntrack_htable_size;
        if (old_size == hashsize) {
+               mutex_unlock(&nf_conntrack_mutex);
                kvfree(hash);
                return 0;
        }
@@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 
        for (i = 0; i < nf_conntrack_htable_size; i++) {
                while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+                       unsigned int zone_id;
+
                        h = hlist_nulls_entry(nf_conntrack_hash[i].first,
                                              struct nf_conntrack_tuple_hash, hnnode);
                        ct = nf_ct_tuplehash_to_ctrack(h);
                        hlist_nulls_del_rcu(&h->hnnode);
+
+                       zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
                        bucket = __hash_conntrack(nf_ct_net(ct),
-                                                 &h->tuple, hashsize);
+                                                 &h->tuple, zone_id, hashsize);
                        hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                }
        }
@@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
        nf_conntrack_all_unlock();
        local_bh_enable();
 
+       mutex_unlock(&nf_conntrack_mutex);
+
        synchronize_net();
        kvfree(old_hash);
        return 0;
index 7008961..2731176 100644 (file)
@@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net,
+           const struct nf_conntrack_zone *zone,
+           const struct nf_conntrack_tuple *tuple)
 {
        unsigned int hash;
        struct {
                struct nf_conntrack_man src;
                u32 net_mix;
                u32 protonum;
+               u32 zone;
        } __aligned(SIPHASH_ALIGNMENT) combined;
 
        get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
@@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 
        /* Original src, to ensure we map it consistently if poss. */
        combined.src = tuple->src;
-       combined.net_mix = net_hash_mix(n);
+       combined.net_mix = net_hash_mix(net);
        combined.protonum = tuple->dst.protonum;
 
+       /* Zone ID can be used provided its valid for both directions */
+       if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
+               combined.zone = zone->id;
+
        hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
 
        return reciprocal_scale(hash, nf_nat_htable_size);
@@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
                     struct nf_conntrack_tuple *result,
                     const struct nf_nat_range2 *range)
 {
-       unsigned int h = hash_by_src(net, tuple);
+       unsigned int h = hash_by_src(net, zone, tuple);
        const struct nf_conn *ct;
 
        hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
@@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
                unsigned int srchash;
                spinlock_t *lock;
 
-               srchash = hash_by_src(net,
+               srchash = hash_by_src(net, nf_ct_zone(ct),
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
                spin_lock_bh(lock);
@@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
        unsigned int h;
 
-       h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+       h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
        spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
        hlist_del_rcu(&ct->nat_bysource);
        spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
index 8e8a65d..acd73f7 100644 (file)
@@ -9,8 +9,19 @@
 
 #include <net/netfilter/nf_nat_masquerade.h>
 
+struct masq_dev_work {
+       struct work_struct work;
+       struct net *net;
+       union nf_inet_addr addr;
+       int ifindex;
+       int (*iter)(struct nf_conn *i, void *data);
+};
+
+#define MAX_MASQ_WORKER_COUNT  16
+
 static DEFINE_MUTEX(masq_mutex);
 static unsigned int masq_refcnt __read_mostly;
+static atomic_t masq_worker_count __read_mostly;
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
 
-static int device_cmp(struct nf_conn *i, void *ifindex)
+static void iterate_cleanup_work(struct work_struct *work)
+{
+       struct masq_dev_work *w;
+
+       w = container_of(work, struct masq_dev_work, work);
+
+       nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+
+       put_net(w->net);
+       kfree(w);
+       atomic_dec(&masq_worker_count);
+       module_put(THIS_MODULE);
+}
+
+/* Iterate conntrack table in the background and remove conntrack entries
+ * that use the device/address being removed.
+ *
+ * In case too many work items have been queued already or memory allocation
+ * fails iteration is skipped, conntrack entries will time out eventually.
+ */
+static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
+                                int ifindex,
+                                int (*iter)(struct nf_conn *i, void *data),
+                                gfp_t gfp_flags)
+{
+       struct masq_dev_work *w;
+
+       if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
+               return;
+
+       net = maybe_get_net(net);
+       if (!net)
+               return;
+
+       if (!try_module_get(THIS_MODULE))
+               goto err_module;
+
+       w = kzalloc(sizeof(*w), gfp_flags);
+       if (w) {
+               /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
+               atomic_inc(&masq_worker_count);
+
+               INIT_WORK(&w->work, iterate_cleanup_work);
+               w->ifindex = ifindex;
+               w->net = net;
+               w->iter = iter;
+               if (addr)
+                       w->addr = *addr;
+               schedule_work(&w->work);
+               return;
+       }
+
+       module_put(THIS_MODULE);
+ err_module:
+       put_net(net);
+}
+
+static int device_cmp(struct nf_conn *i, void *arg)
 {
        const struct nf_conn_nat *nat = nfct_nat(i);
+       const struct masq_dev_work *w = arg;
 
        if (!nat)
                return 0;
-       return nat->masq_index == (int)(long)ifindex;
+       return nat->masq_index == w->ifindex;
 }
 
 static int masq_device_event(struct notifier_block *this,
@@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
                 * and forget them.
                 */
 
-               nf_ct_iterate_cleanup_net(net, device_cmp,
-                                         (void *)(long)dev->ifindex, 0, 0);
+               nf_nat_masq_schedule(net, NULL, dev->ifindex,
+                                    device_cmp, GFP_KERNEL);
        }
 
        return NOTIFY_DONE;
@@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
 
 static int inet_cmp(struct nf_conn *ct, void *ptr)
 {
-       struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-       struct net_device *dev = ifa->ifa_dev->dev;
        struct nf_conntrack_tuple *tuple;
+       struct masq_dev_work *w = ptr;
 
-       if (!device_cmp(ct, (void *)(long)dev->ifindex))
+       if (!device_cmp(ct, ptr))
                return 0;
 
        tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 
-       return ifa->ifa_address == tuple->dst.u3.ip;
+       return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
 }
 
 static int masq_inet_event(struct notifier_block *this,
                           unsigned long event,
                           void *ptr)
 {
-       struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
-       struct net *net = dev_net(idev->dev);
+       const struct in_ifaddr *ifa = ptr;
+       const struct in_device *idev;
+       const struct net_device *dev;
+       union nf_inet_addr addr;
+
+       if (event != NETDEV_DOWN)
+               return NOTIFY_DONE;
 
        /* The masq_dev_notifier will catch the case of the device going
         * down.  So if the inetdev is dead and being destroyed we have
         * no work to do.  Otherwise this is an individual address removal
         * and we have to perform the flush.
         */
+       idev = ifa->ifa_dev;
        if (idev->dead)
                return NOTIFY_DONE;
 
-       if (event == NETDEV_DOWN)
-               nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+       memset(&addr, 0, sizeof(addr));
+
+       addr.ip = ifa->ifa_address;
+
+       dev = idev->dev;
+       nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
+                            inet_cmp, GFP_KERNEL);
 
        return NOTIFY_DONE;
 }
@@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
-static atomic_t v6_worker_count __read_mostly;
-
 static int
 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
                       const struct in6_addr *daddr, unsigned int srcprefs,
@@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
 
-struct masq_dev_work {
-       struct work_struct work;
-       struct net *net;
-       struct in6_addr addr;
-       int ifindex;
-};
-
-static int inet6_cmp(struct nf_conn *ct, void *work)
-{
-       struct masq_dev_work *w = (struct masq_dev_work *)work;
-       struct nf_conntrack_tuple *tuple;
-
-       if (!device_cmp(ct, (void *)(long)w->ifindex))
-               return 0;
-
-       tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
-       return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
-       struct masq_dev_work *w;
-
-       w = container_of(work, struct masq_dev_work, work);
-
-       nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
-
-       put_net(w->net);
-       kfree(w);
-       atomic_dec(&v6_worker_count);
-       module_put(THIS_MODULE);
-}
-
 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
  *
  * Defer it to the system workqueue.
@@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
 {
        struct inet6_ifaddr *ifa = ptr;
        const struct net_device *dev;
-       struct masq_dev_work *w;
-       struct net *net;
+       union nf_inet_addr addr;
 
-       if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+       if (event != NETDEV_DOWN)
                return NOTIFY_DONE;
 
        dev = ifa->idev->dev;
-       net = maybe_get_net(dev_net(dev));
-       if (!net)
-               return NOTIFY_DONE;
 
-       if (!try_module_get(THIS_MODULE))
-               goto err_module;
+       memset(&addr, 0, sizeof(addr));
 
-       w = kmalloc(sizeof(*w), GFP_ATOMIC);
-       if (w) {
-               atomic_inc(&v6_worker_count);
-
-               INIT_WORK(&w->work, iterate_cleanup_work);
-               w->ifindex = dev->ifindex;
-               w->net = net;
-               w->addr = ifa->addr;
-               schedule_work(&w->work);
+       addr.in6 = ifa->addr;
 
-               return NOTIFY_DONE;
-       }
-
-       module_put(THIS_MODULE);
- err_module:
-       put_net(net);
+       nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
+                            GFP_ATOMIC);
        return NOTIFY_DONE;
 }
 
index 081437d..b9546de 100644 (file)
@@ -4336,7 +4336,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
        if (ops->privsize != NULL)
                size = ops->privsize(nla, &desc);
        alloc_size = sizeof(*set) + size + udlen;
-       if (alloc_size < size)
+       if (alloc_size < size || alloc_size > INT_MAX)
                return -ENOMEM;
        set = kvzalloc(alloc_size, GFP_KERNEL);
        if (!set)
@@ -9599,7 +9599,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
                table->use--;
                nf_tables_chain_destroy(&ctx);
        }
-       list_del(&table->list);
        nf_tables_table_destroy(&ctx);
 }
 
@@ -9612,6 +9611,8 @@ static void __nft_release_tables(struct net *net)
                if (nft_table_has_owner(table))
                        continue;
 
+               list_del(&table->list);
+
                __nft_release_table(net, table);
        }
 }
@@ -9619,31 +9620,38 @@ static void __nft_release_tables(struct net *net)
 static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
                            void *ptr)
 {
+       struct nft_table *table, *to_delete[8];
        struct nftables_pernet *nft_net;
        struct netlink_notify *n = ptr;
-       struct nft_table *table, *nt;
        struct net *net = n->net;
-       bool release = false;
+       unsigned int deleted;
+       bool restart = false;
 
        if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
                return NOTIFY_DONE;
 
        nft_net = nft_pernet(net);
+       deleted = 0;
        mutex_lock(&nft_net->commit_mutex);
+again:
        list_for_each_entry(table, &nft_net->tables, list) {
                if (nft_table_has_owner(table) &&
                    n->portid == table->nlpid) {
                        __nft_release_hook(net, table);
-                       release = true;
+                       list_del_rcu(&table->list);
+                       to_delete[deleted++] = table;
+                       if (deleted >= ARRAY_SIZE(to_delete))
+                               break;
                }
        }
-       if (release) {
+       if (deleted) {
+               restart = deleted >= ARRAY_SIZE(to_delete);
                synchronize_rcu();
-               list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
-                       if (nft_table_has_owner(table) &&
-                           n->portid == table->nlpid)
-                               __nft_release_table(net, table);
-               }
+               while (deleted)
+                       __nft_release_table(net, to_delete[--deleted]);
+
+               if (restart)
+                       goto again;
        }
        mutex_unlock(&nft_net->commit_mutex);
 
index 272bcdb..f69cc73 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
 
 /* Used for matches where *info is larger than X byte */
 #define NFT_MATCH_LARGE_THRESH 192
@@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        nft_compat_wait_for_destructors();
 
        ret = xt_check_target(&par, size, proto, inv);
-       if (ret < 0)
+       if (ret < 0) {
+               if (ret == -ENOENT) {
+                       const char *modname = NULL;
+
+                       if (strcmp(target->name, "LOG") == 0)
+                               modname = "nf_log_syslog";
+                       else if (strcmp(target->name, "NFLOG") == 0)
+                               modname = "nfnetlink_log";
+
+                       if (modname &&
+                           nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
+                               return -EAGAIN;
+               }
+
                return ret;
+       }
 
        /* The standard target cannot be used */
        if (!target->target)
index 2ff75f7..f39244f 100644 (file)
@@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static int log_tg_check(const struct xt_tgchk_param *par)
 {
        const struct xt_log_info *loginfo = par->targinfo;
+       int ret;
 
        if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
                return -EINVAL;
@@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
                return -EINVAL;
        }
 
-       return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+       ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+       if (ret != 0 && !par->nft_compat) {
+               request_module("%s", "nf_log_syslog");
+
+               ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+       }
+
+       return ret;
 }
 
 static void log_tg_destroy(const struct xt_tgdtor_param *par)
index fb57932..e660c37 100644 (file)
@@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static int nflog_tg_check(const struct xt_tgchk_param *par)
 {
        const struct xt_nflog_info *info = par->targinfo;
+       int ret;
 
        if (info->flags & ~XT_NFLOG_MASK)
                return -EINVAL;
        if (info->prefix[sizeof(info->prefix) - 1] != '\0')
                return -EINVAL;
 
-       return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+       ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+       if (ret != 0 && !par->nft_compat) {
+               request_module("%s", "nfnetlink_log");
+
+               ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+       }
+
+       return ret;
 }
 
 static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
index 23b2125..eb6345a 100644 (file)
@@ -2188,18 +2188,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 
        arg->count = arg->skip;
 
+       rcu_read_lock();
        idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
                /* don't return filters that are being deleted */
                if (!refcount_inc_not_zero(&f->refcnt))
                        continue;
+               rcu_read_unlock();
+
                if (arg->fn(tp, f, arg) < 0) {
                        __fl_put(f);
                        arg->stop = 1;
+                       rcu_read_lock();
                        break;
                }
                __fl_put(f);
                arg->count++;
+               rcu_read_lock();
        }
+       rcu_read_unlock();
        arg->cookie = id;
 }
 
index 5e90e9b..12f39a2 100644 (file)
@@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
                return stab;
        }
 
+       if (s->size_log > STAB_SIZE_LOG_MAX ||
+           s->cell_log > STAB_SIZE_LOG_MAX) {
+               NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
+               return ERR_PTR(-EINVAL);
+       }
+
        stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
        if (!stab)
                return ERR_PTR(-ENOMEM);
index 5ef86fd..1f17860 100644 (file)
@@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
                ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
 
                /* Break out if chunk length is less then minimal. */
-               if (ntohs(ch->length) < sizeof(_ch))
+               if (!ch || ntohs(ch->length) < sizeof(_ch))
                        break;
 
                ch_end = offset + SCTP_PAD4(ntohs(ch->length));
index 92345c9..efac598 100644 (file)
@@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
 
 static void init_peercred(struct sock *sk)
 {
-       put_pid(sk->sk_peer_pid);
-       if (sk->sk_peer_cred)
-               put_cred(sk->sk_peer_cred);
+       const struct cred *old_cred;
+       struct pid *old_pid;
+
+       spin_lock(&sk->sk_peer_lock);
+       old_pid = sk->sk_peer_pid;
+       old_cred = sk->sk_peer_cred;
        sk->sk_peer_pid  = get_pid(task_tgid(current));
        sk->sk_peer_cred = get_current_cred();
+       spin_unlock(&sk->sk_peer_lock);
+
+       put_pid(old_pid);
+       put_cred(old_cred);
 }
 
 static void copy_peercred(struct sock *sk, struct sock *peersk)
 {
-       put_pid(sk->sk_peer_pid);
-       if (sk->sk_peer_cred)
-               put_cred(sk->sk_peer_cred);
+       const struct cred *old_cred;
+       struct pid *old_pid;
+
+       if (sk < peersk) {
+               spin_lock(&sk->sk_peer_lock);
+               spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+       } else {
+               spin_lock(&peersk->sk_peer_lock);
+               spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+       }
+       old_pid = sk->sk_peer_pid;
+       old_cred = sk->sk_peer_cred;
        sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
        sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+       spin_unlock(&sk->sk_peer_lock);
+       spin_unlock(&peersk->sk_peer_lock);
+
+       put_pid(old_pid);
+       put_cred(old_cred);
 }
 
 static int unix_listen(struct socket *sock, int backlog)
@@ -828,20 +850,25 @@ struct proto unix_stream_proto = {
 
 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
 {
-       struct sock *sk = NULL;
        struct unix_sock *u;
+       struct sock *sk;
+       int err;
 
        atomic_long_inc(&unix_nr_socks);
-       if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
-               goto out;
+       if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
+               err = -ENFILE;
+               goto err;
+       }
 
        if (type == SOCK_STREAM)
                sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
        else /*dgram and  seqpacket */
                sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
 
-       if (!sk)
-               goto out;
+       if (!sk) {
+               err = -ENOMEM;
+               goto err;
+       }
 
        sock_init_data(sock, sk);
 
@@ -861,20 +888,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
        memset(&u->scm_stat, 0, sizeof(struct scm_stat));
        unix_insert_socket(unix_sockets_unbound(sk), sk);
-out:
-       if (sk == NULL)
-               atomic_long_dec(&unix_nr_socks);
-       else {
-               local_bh_disable();
-               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-               local_bh_enable();
-       }
+
+       local_bh_disable();
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+       local_bh_enable();
+
        return sk;
+
+err:
+       atomic_long_dec(&unix_nr_socks);
+       return ERR_PTR(err);
 }
 
 static int unix_create(struct net *net, struct socket *sock, int protocol,
                       int kern)
 {
+       struct sock *sk;
+
        if (protocol && protocol != PF_UNIX)
                return -EPROTONOSUPPORT;
 
@@ -901,7 +931,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
                return -ESOCKTNOSUPPORT;
        }
 
-       return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
+       sk = unix_create1(net, sock, kern, sock->type);
+       if (IS_ERR(sk))
+               return PTR_ERR(sk);
+
+       return 0;
 }
 
 static int unix_release(struct socket *sock)
@@ -1314,12 +1348,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
           we will have to recheck all again in any case.
         */
 
-       err = -ENOMEM;
-
        /* create new sock for complete connection */
        newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
-       if (newsk == NULL)
+       if (IS_ERR(newsk)) {
+               err = PTR_ERR(newsk);
+               newsk = NULL;
                goto out;
+       }
+
+       err = -ENOMEM;
 
        /* Allocate skb for sending to listening sock */
        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
index 6c0a4a6..6f30231 100644 (file)
@@ -873,12 +873,21 @@ static long snd_rawmidi_ioctl(struct file *file, unsigned int cmd, unsigned long
                        return -EINVAL;
                }
        }
+       case SNDRV_RAWMIDI_IOCTL_USER_PVERSION:
+               if (get_user(rfile->user_pversion, (unsigned int __user *)arg))
+                       return -EFAULT;
+               return 0;
+
        case SNDRV_RAWMIDI_IOCTL_PARAMS:
        {
                struct snd_rawmidi_params params;
 
                if (copy_from_user(&params, argp, sizeof(struct snd_rawmidi_params)))
                        return -EFAULT;
+               if (rfile->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 2)) {
+                       params.mode = 0;
+                       memset(params.reserved, 0, sizeof(params.reserved));
+               }
                switch (params.stream) {
                case SNDRV_RAWMIDI_STREAM_OUTPUT:
                        if (rfile->output == NULL)
index ed40d0f..773db4b 100644 (file)
@@ -143,7 +143,7 @@ enum hrtimer_restart pcsp_do_timer(struct hrtimer *handle)
        if (pointer_update)
                pcsp_pointer_update(chip);
 
-       hrtimer_forward(handle, hrtimer_get_expires(handle), ns_to_ktime(ns));
+       hrtimer_forward_now(handle, ns_to_ktime(ns));
 
        return HRTIMER_RESTART;
 }
index 5388b85..a18c2c0 100644 (file)
@@ -276,10 +276,11 @@ static void __maybe_unused copy_message(u64 *frames, __be32 *buffer,
 
        /* This is just for v2/v3 protocol. */
        for (i = 0; i < data_blocks; ++i) {
-               *frames = (be32_to_cpu(buffer[1]) << 16) |
-                         (be32_to_cpu(buffer[2]) >> 16);
+               *frames = be32_to_cpu(buffer[1]);
+               *frames <<= 16;
+               *frames |= be32_to_cpu(buffer[2]) >> 16;
+               ++frames;
                buffer += data_block_quadlets;
-               frames++;
        }
 }
 
index cb5b5e3..daf7313 100644 (file)
@@ -184,13 +184,16 @@ static int detect_quirks(struct snd_oxfw *oxfw, const struct ieee1394_device_id
                        model = val;
        }
 
-       /*
-        * Mackie Onyx Satellite with base station has a quirk to report a wrong
-        * value in 'dbs' field of CIP header against its format information.
-        */
-       if (vendor == VENDOR_LOUD && model == MODEL_SATELLITE)
+       if (vendor == VENDOR_LOUD) {
+               // Mackie Onyx Satellite with base station has a quirk to report a wrong
+               // value in 'dbs' field of CIP header against its format information.
                oxfw->quirks |= SND_OXFW_QUIRK_WRONG_DBS;
 
+               // OXFW971-based models may transfer events by blocking method.
+               if (!(oxfw->quirks & SND_OXFW_QUIRK_JUMBO_PAYLOAD))
+                       oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION;
+       }
+
        return 0;
 }
 
index 3aa432d..4777743 100644 (file)
@@ -883,10 +883,11 @@ static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev)
        return azx_get_pos_posbuf(chip, azx_dev);
 }
 
-static void azx_shutdown_chip(struct azx *chip)
+static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset)
 {
        azx_stop_chip(chip);
-       azx_enter_link_reset(chip);
+       if (!skip_link_reset)
+               azx_enter_link_reset(chip);
        azx_clear_irq_pending(chip);
        display_power(chip, false);
 }
@@ -895,6 +896,11 @@ static void azx_shutdown_chip(struct azx *chip)
 static DEFINE_MUTEX(card_list_lock);
 static LIST_HEAD(card_list);
 
+static void azx_shutdown_chip(struct azx *chip)
+{
+       __azx_shutdown_chip(chip, false);
+}
+
 static void azx_add_card_list(struct azx *chip)
 {
        struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
@@ -2357,7 +2363,7 @@ static void azx_shutdown(struct pci_dev *pci)
                return;
        chip = card->private_data;
        if (chip && chip->running)
-               azx_shutdown_chip(chip);
+               __azx_shutdown_chip(chip, true);
 }
 
 /* PCI IDs */
index 3c7ef55..31ff11a 100644 (file)
@@ -1207,6 +1207,9 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac
                snd_hda_jack_add_kctl(codec, DOLPHIN_LO_PIN_NID, "Line Out", true,
                                      SND_JACK_HEADPHONE, NULL);
 
+               snd_hda_jack_add_kctl(codec, DOLPHIN_AMIC_PIN_NID, "Microphone", true,
+                                     SND_JACK_MICROPHONE, NULL);
+
                cs8409_fix_caps(codec, DOLPHIN_HP_PIN_NID);
                cs8409_fix_caps(codec, DOLPHIN_LO_PIN_NID);
                cs8409_fix_caps(codec, DOLPHIN_AMIC_PIN_NID);
index 8b7a389..4407f7d 100644 (file)
@@ -6429,6 +6429,20 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec,
        hda_fixup_thinkpad_acpi(codec, fix, action);
 }
 
+/* Fixup for Lenovo Legion 15IMHg05 speaker output on headset removal. */
+static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec,
+                                                 const struct hda_fixup *fix,
+                                                 int action)
+{
+       struct alc_spec *spec = codec->spec;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               spec->gen.suppress_auto_mute = 1;
+               break;
+       }
+}
+
 /* for alc295_fixup_hp_top_speakers */
 #include "hp_x360_helper.c"
 
@@ -6646,6 +6660,10 @@ enum {
        ALC623_FIXUP_LENOVO_THINKSTATION_P340,
        ALC255_FIXUP_ACER_HEADPHONE_AND_MIC,
        ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST,
+       ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS,
+       ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
+       ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
+       ALC287_FIXUP_13S_GEN2_SPEAKERS
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8236,6 +8254,113 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
        },
+       [ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS] = {
+               .type = HDA_FIXUP_VERBS,
+               //.v.verbs = legion_15imhg05_coefs,
+               .v.verbs = (const struct hda_verb[]) {
+                        // set left speaker Legion 7i.
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x41 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xc },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x1a },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        // set right speaker Legion 7i.
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x42 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xc },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2a },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+                        {}
+               },
+               .chained = true,
+               .chain_id = ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
+       },
+       [ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_legion_15imhg05_speakers,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
+       [ALC287_FIXUP_YOGA7_14ITL_SPEAKERS] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                        // set left speaker Yoga 7i.
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x41 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xc },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x1a },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        // set right speaker Yoga 7i.
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x46 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xc },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2a },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+                        {}
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
+       [ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x41 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+                       { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x42 },
+                       { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x2 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0x0 },
+                       { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 },
+                       {}
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8630,6 +8755,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
        SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
        SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
+       SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
+       SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
index 87d2422..23f253e 100644 (file)
@@ -52,7 +52,7 @@
 #define PCXHR_DSP 2
 
 #if (PCXHR_DSP_OFFSET_MAX > PCXHR_PLX_OFFSET_MIN)
-#undef  PCXHR_REG_TO_PORT(x)
+#error  PCXHR_REG_TO_PORT(x)
 #else
 #define PCXHR_REG_TO_PORT(x)   ((x)>PCXHR_DSP_OFFSET_MAX ? PCXHR_PLX : PCXHR_DSP)
 #endif
index a961f83..bda66b3 100644 (file)
@@ -1073,6 +1073,16 @@ static int fsl_esai_probe(struct platform_device *pdev)
        if (ret < 0)
                goto err_pm_get_sync;
 
+       /*
+        * Register platform component before registering cpu dai for there
+        * is not defer probe for platform component in snd_soc_add_pcm_runtime().
+        */
+       ret = imx_pcm_dma_init(pdev, IMX_ESAI_DMABUF_SIZE);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to init imx pcm dma: %d\n", ret);
+               goto err_pm_get_sync;
+       }
+
        ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component,
                                              &fsl_esai_dai, 1);
        if (ret) {
@@ -1082,12 +1092,6 @@ static int fsl_esai_probe(struct platform_device *pdev)
 
        INIT_WORK(&esai_priv->work, fsl_esai_hw_reset);
 
-       ret = imx_pcm_dma_init(pdev, IMX_ESAI_DMABUF_SIZE);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to init imx pcm dma: %d\n", ret);
-               goto err_pm_get_sync;
-       }
-
        return ret;
 
 err_pm_get_sync:
index 8c0c75c..9f90989 100644 (file)
@@ -737,18 +737,23 @@ static int fsl_micfil_probe(struct platform_device *pdev)
        pm_runtime_enable(&pdev->dev);
        regcache_cache_only(micfil->regmap, true);
 
+       /*
+        * Register platform component before registering cpu dai for there
+        * is not defer probe for platform component in snd_soc_add_pcm_runtime().
+        */
+       ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to pcm register\n");
+               return ret;
+       }
+
        ret = devm_snd_soc_register_component(&pdev->dev, &fsl_micfil_component,
                                              &fsl_micfil_dai, 1);
        if (ret) {
                dev_err(&pdev->dev, "failed to register component %s\n",
                        fsl_micfil_component.name);
-               return ret;
        }
 
-       ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
-       if (ret)
-               dev_err(&pdev->dev, "failed to pcm register\n");
-
        return ret;
 }
 
index 223fcd1..38f6362 100644 (file)
@@ -1152,11 +1152,10 @@ static int fsl_sai_probe(struct platform_device *pdev)
        if (ret < 0)
                goto err_pm_get_sync;
 
-       ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
-                                             &sai->cpu_dai_drv, 1);
-       if (ret)
-               goto err_pm_get_sync;
-
+       /*
+        * Register platform component before registering cpu dai for there
+        * is not defer probe for platform component in snd_soc_add_pcm_runtime().
+        */
        if (sai->soc_data->use_imx_pcm) {
                ret = imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE);
                if (ret)
@@ -1167,6 +1166,11 @@ static int fsl_sai_probe(struct platform_device *pdev)
                        goto err_pm_get_sync;
        }
 
+       ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
+                                             &sai->cpu_dai_drv, 1);
+       if (ret)
+               goto err_pm_get_sync;
+
        return ret;
 
 err_pm_get_sync:
index 8ffb1a6..1c53719 100644 (file)
@@ -1434,16 +1434,20 @@ static int fsl_spdif_probe(struct platform_device *pdev)
        pm_runtime_enable(&pdev->dev);
        regcache_cache_only(spdif_priv->regmap, true);
 
-       ret = devm_snd_soc_register_component(&pdev->dev, &fsl_spdif_component,
-                                             &spdif_priv->cpu_dai_drv, 1);
+       /*
+        * Register platform component before registering cpu dai for there
+        * is not defer probe for platform component in snd_soc_add_pcm_runtime().
+        */
+       ret = imx_pcm_dma_init(pdev, IMX_SPDIF_DMABUF_SIZE);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register DAI: %d\n", ret);
+               dev_err_probe(&pdev->dev, ret, "imx_pcm_dma_init failed\n");
                goto err_pm_disable;
        }
 
-       ret = imx_pcm_dma_init(pdev, IMX_SPDIF_DMABUF_SIZE);
+       ret = devm_snd_soc_register_component(&pdev->dev, &fsl_spdif_component,
+                                             &spdif_priv->cpu_dai_drv, 1);
        if (ret) {
-               dev_err_probe(&pdev->dev, ret, "imx_pcm_dma_init failed\n");
+               dev_err(&pdev->dev, "failed to register DAI: %d\n", ret);
                goto err_pm_disable;
        }
 
index 31c5ee6..7ba2fd1 100644 (file)
@@ -1215,18 +1215,23 @@ static int fsl_xcvr_probe(struct platform_device *pdev)
        pm_runtime_enable(dev);
        regcache_cache_only(xcvr->regmap, true);
 
+       /*
+        * Register platform component before registering cpu dai for there
+        * is not defer probe for platform component in snd_soc_add_pcm_runtime().
+        */
+       ret = devm_snd_dmaengine_pcm_register(dev, NULL, 0);
+       if (ret) {
+               dev_err(dev, "failed to pcm register\n");
+               return ret;
+       }
+
        ret = devm_snd_soc_register_component(dev, &fsl_xcvr_comp,
                                              &fsl_xcvr_dai, 1);
        if (ret) {
                dev_err(dev, "failed to register component %s\n",
                        fsl_xcvr_comp.name);
-               return ret;
        }
 
-       ret = devm_snd_dmaengine_pcm_register(dev, NULL, 0);
-       if (ret)
-               dev_err(dev, "failed to pcm register\n");
-
        return ret;
 }
 
index 6602eda..6b06248 100644 (file)
@@ -929,6 +929,11 @@ static int create_sdw_dailink(struct snd_soc_card *card,
                              cpus + *cpu_id, cpu_dai_num,
                              codecs, codec_num,
                              NULL, &sdw_ops);
+               /*
+                * SoundWire DAILINKs use 'stream' functions and Bank Switch operations
+                * based on wait_for_completion(), tag them as 'nonatomic'.
+                */
+               dai_links[*be_index].nonatomic = true;
 
                ret = set_codec_init_func(card, link, dai_links + (*be_index)++,
                                          playback, group_id);
index 5a2f466..81ad2dc 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config SND_SOC_MEDIATEK
        tristate
+       select REGMAP_MMIO
 
 config SND_SOC_MT2701
        tristate "ASoC support for Mediatek MT2701 chip"
@@ -188,7 +189,9 @@ config SND_SOC_MT8192_MT6359_RT1015_RT5682
 config SND_SOC_MT8195
        tristate "ASoC support for Mediatek MT8195 chip"
        depends on ARCH_MEDIATEK || COMPILE_TEST
+       depends on COMMON_CLK
        select SND_SOC_MEDIATEK
+       select MFD_SYSCON if SND_SOC_MT6359
        help
          This adds ASoC platform driver support for Mediatek MT8195 chip
          that can be used with other codecs.
index baaa588..e95c7c0 100644 (file)
@@ -334,9 +334,11 @@ int mtk_afe_suspend(struct snd_soc_component *component)
                        devm_kcalloc(dev, afe->reg_back_up_list_num,
                                     sizeof(unsigned int), GFP_KERNEL);
 
-       for (i = 0; i < afe->reg_back_up_list_num; i++)
-               regmap_read(regmap, afe->reg_back_up_list[i],
-                           &afe->reg_back_up[i]);
+       if (afe->reg_back_up) {
+               for (i = 0; i < afe->reg_back_up_list_num; i++)
+                       regmap_read(regmap, afe->reg_back_up_list[i],
+                                   &afe->reg_back_up[i]);
+       }
 
        afe->suspended = true;
        afe->runtime_suspend(dev);
@@ -356,12 +358,13 @@ int mtk_afe_resume(struct snd_soc_component *component)
 
        afe->runtime_resume(dev);
 
-       if (!afe->reg_back_up)
+       if (!afe->reg_back_up) {
                dev_dbg(dev, "%s no reg_backup\n", __func__);
-
-       for (i = 0; i < afe->reg_back_up_list_num; i++)
-               mtk_regmap_write(regmap, afe->reg_back_up_list[i],
-                                afe->reg_back_up[i]);
+       } else {
+               for (i = 0; i < afe->reg_back_up_list_num; i++)
+                       mtk_regmap_write(regmap, afe->reg_back_up_list[i],
+                                        afe->reg_back_up[i]);
+       }
 
        afe->suspended = false;
        return 0;
index c97ace7..de09f67 100644 (file)
@@ -424,8 +424,8 @@ static int mt8195_hdmi_codec_init(struct snd_soc_pcm_runtime *rtd)
        return snd_soc_component_set_jack(cmpnt_codec, &priv->hdmi_jack, NULL);
 }
 
-static int mt8195_hdmitx_dptx_hw_params_fixup(struct snd_soc_pcm_runtime *rtd,
-                                             struct snd_pcm_hw_params *params)
+static int mt8195_dptx_hw_params_fixup(struct snd_soc_pcm_runtime *rtd,
+                                      struct snd_pcm_hw_params *params)
 
 {
        /* fix BE i2s format to 32bit, clean param mask first */
@@ -902,7 +902,7 @@ static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = {
                .no_pcm = 1,
                .dpcm_playback = 1,
                .ops = &mt8195_dptx_ops,
-               .be_hw_params_fixup = mt8195_hdmitx_dptx_hw_params_fixup,
+               .be_hw_params_fixup = mt8195_dptx_hw_params_fixup,
                SND_SOC_DAILINK_REG(DPTX_BE),
        },
        [DAI_LINK_ETDM1_IN_BE] = {
@@ -953,7 +953,6 @@ static struct snd_soc_dai_link mt8195_mt6359_rt1019_rt5682_dai_links[] = {
                        SND_SOC_DAIFMT_NB_NF |
                        SND_SOC_DAIFMT_CBS_CFS,
                .dpcm_playback = 1,
-               .be_hw_params_fixup = mt8195_hdmitx_dptx_hw_params_fixup,
                SND_SOC_DAILINK_REG(ETDM3_OUT_BE),
        },
        [DAI_LINK_PCM1_BE] = {
index 3e4dd4a..59d0d7b 100644 (file)
@@ -371,7 +371,6 @@ int snd_sof_device_remove(struct device *dev)
                        dev_warn(dev, "error: %d failed to prepare DSP for device removal",
                                 ret);
 
-               snd_sof_fw_unload(sdev);
                snd_sof_ipc_free(sdev);
                snd_sof_free_debug(sdev);
                snd_sof_free_trace(sdev);
@@ -394,8 +393,7 @@ int snd_sof_device_remove(struct device *dev)
                snd_sof_remove(sdev);
 
        /* release firmware */
-       release_firmware(pdata->fw);
-       pdata->fw = NULL;
+       snd_sof_fw_unload(sdev);
 
        return 0;
 }
index 12fedf0..7e9723a 100644 (file)
@@ -365,7 +365,14 @@ static int imx8_remove(struct snd_sof_dev *sdev)
 /* on i.MX8 there is 1 to 1 match between type and BAR idx */
 static int imx8_get_bar_index(struct snd_sof_dev *sdev, u32 type)
 {
-       return type;
+       /* Only IRAM and SRAM bars are valid */
+       switch (type) {
+       case SOF_FW_BLK_TYPE_IRAM:
+       case SOF_FW_BLK_TYPE_SRAM:
+               return type;
+       default:
+               return -EINVAL;
+       }
 }
 
 static void imx8_ipc_msg_data(struct snd_sof_dev *sdev,
index cb822d9..892e148 100644 (file)
@@ -228,7 +228,14 @@ static int imx8m_remove(struct snd_sof_dev *sdev)
 /* on i.MX8 there is 1 to 1 match between type and BAR idx */
 static int imx8m_get_bar_index(struct snd_sof_dev *sdev, u32 type)
 {
-       return type;
+       /* Only IRAM and SRAM bars are valid */
+       switch (type) {
+       case SOF_FW_BLK_TYPE_IRAM:
+       case SOF_FW_BLK_TYPE_SRAM:
+               return type;
+       default:
+               return -EINVAL;
+       }
 }
 
 static void imx8m_ipc_msg_data(struct snd_sof_dev *sdev,
index 2b38a77..bb79c77 100644 (file)
@@ -729,10 +729,10 @@ int snd_sof_load_firmware_raw(struct snd_sof_dev *sdev)
        ret = request_firmware(&plat_data->fw, fw_filename, sdev->dev);
 
        if (ret < 0) {
-               dev_err(sdev->dev, "error: request firmware %s failed err: %d\n",
-                       fw_filename, ret);
                dev_err(sdev->dev,
-                       "you may need to download the firmware from https://github.com/thesofproject/sof-bin/\n");
+                       "error: sof firmware file is missing, you might need to\n");
+               dev_err(sdev->dev,
+                       "       download it from https://github.com/thesofproject/sof-bin/\n");
                goto err;
        } else {
                dev_dbg(sdev->dev, "request_firmware %s successful\n",
@@ -880,5 +880,7 @@ EXPORT_SYMBOL(snd_sof_run_firmware);
 void snd_sof_fw_unload(struct snd_sof_dev *sdev)
 {
        /* TODO: support module unloading at runtime */
+       release_firmware(sdev->pdata->fw);
+       sdev->pdata->fw = NULL;
 }
 EXPORT_SYMBOL(snd_sof_fw_unload);
index f72a6e8..58f6ca5 100644 (file)
@@ -530,7 +530,6 @@ void snd_sof_trace_notify_for_error(struct snd_sof_dev *sdev)
                return;
 
        if (sdev->dtrace_is_enabled) {
-               dev_err(sdev->dev, "error: waking up any trace sleepers\n");
                sdev->dtrace_error = true;
                wake_up(&sdev->trace_sleep);
        }
index bbb9a22..f6e3411 100644 (file)
@@ -122,9 +122,9 @@ static void xtensa_stack(struct snd_sof_dev *sdev, void *oops, u32 *stack,
         * 0x0049fbb0: 8000f2d0 0049fc00 6f6c6c61 00632e63
         */
        for (i = 0; i < stack_words; i += 4) {
-               hex_dump_to_buffer(stack + i * 4, 16, 16, 4,
+               hex_dump_to_buffer(stack + i, 16, 16, 4,
                                   buf, sizeof(buf), false);
-               dev_err(sdev->dev, "0x%08x: %s\n", stack_ptr + i, buf);
+               dev_err(sdev->dev, "0x%08x: %s\n", stack_ptr + i * 4, buf);
        }
 }
 
index fd570a4..1764b93 100644 (file)
@@ -1054,7 +1054,7 @@ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message)
        return 0;
 }
 
-static int __usb_audio_resume(struct usb_interface *intf, bool reset_resume)
+static int usb_audio_resume(struct usb_interface *intf)
 {
        struct snd_usb_audio *chip = usb_get_intfdata(intf);
        struct snd_usb_stream *as;
@@ -1080,7 +1080,7 @@ static int __usb_audio_resume(struct usb_interface *intf, bool reset_resume)
         * we just notify and restart the mixers
         */
        list_for_each_entry(mixer, &chip->mixer_list, list) {
-               err = snd_usb_mixer_resume(mixer, reset_resume);
+               err = snd_usb_mixer_resume(mixer);
                if (err < 0)
                        goto err_out;
        }
@@ -1100,20 +1100,10 @@ err_out:
        atomic_dec(&chip->active); /* allow autopm after this point */
        return err;
 }
-
-static int usb_audio_resume(struct usb_interface *intf)
-{
-       return __usb_audio_resume(intf, false);
-}
-
-static int usb_audio_reset_resume(struct usb_interface *intf)
-{
-       return __usb_audio_resume(intf, true);
-}
 #else
 #define usb_audio_suspend      NULL
 #define usb_audio_resume       NULL
-#define usb_audio_reset_resume NULL
+#define usb_audio_resume       NULL
 #endif         /* CONFIG_PM */
 
 static const struct usb_device_id usb_audio_ids [] = {
@@ -1135,7 +1125,7 @@ static struct usb_driver usb_audio_driver = {
        .disconnect =   usb_audio_disconnect,
        .suspend =      usb_audio_suspend,
        .resume =       usb_audio_resume,
-       .reset_resume = usb_audio_reset_resume,
+       .reset_resume = usb_audio_resume,
        .id_table =     usb_audio_ids,
        .supports_autosuspend = 1,
 };
index 43bc595..a2ce535 100644 (file)
@@ -3653,33 +3653,16 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list)
        return 0;
 }
 
-static int default_mixer_reset_resume(struct usb_mixer_elem_list *list)
-{
-       int err;
-
-       if (list->resume) {
-               err = list->resume(list);
-               if (err < 0)
-                       return err;
-       }
-       return restore_mixer_value(list);
-}
-
-int snd_usb_mixer_resume(struct usb_mixer_interface *mixer, bool reset_resume)
+int snd_usb_mixer_resume(struct usb_mixer_interface *mixer)
 {
        struct usb_mixer_elem_list *list;
-       usb_mixer_elem_resume_func_t f;
        int id, err;
 
        /* restore cached mixer values */
        for (id = 0; id < MAX_ID_ELEMS; id++) {
                for_each_mixer_elem(list, mixer, id) {
-                       if (reset_resume)
-                               f = list->reset_resume;
-                       else
-                               f = list->resume;
-                       if (f) {
-                               err = f(list);
+                       if (list->resume) {
+                               err = list->resume(list);
                                if (err < 0)
                                        return err;
                        }
@@ -3700,7 +3683,6 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
        list->id = unitid;
        list->dump = snd_usb_mixer_dump_cval;
 #ifdef CONFIG_PM
-       list->resume = NULL;
-       list->reset_resume = default_mixer_reset_resume;
+       list->resume = restore_mixer_value;
 #endif
 }
index 876bbc9..98ea24d 100644 (file)
@@ -70,7 +70,6 @@ struct usb_mixer_elem_list {
        bool is_std_info;
        usb_mixer_elem_dump_func_t dump;
        usb_mixer_elem_resume_func_t resume;
-       usb_mixer_elem_resume_func_t reset_resume;
 };
 
 /* iterate over mixer element list of the given unit id */
@@ -121,7 +120,7 @@ int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 
 #ifdef CONFIG_PM
 int snd_usb_mixer_suspend(struct usb_mixer_interface *mixer);
-int snd_usb_mixer_resume(struct usb_mixer_interface *mixer, bool reset_resume);
+int snd_usb_mixer_resume(struct usb_mixer_interface *mixer);
 #endif
 
 int snd_usb_set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel,
index a66ce03..46082dc 100644 (file)
@@ -151,7 +151,7 @@ static int add_single_ctl_with_resume(struct usb_mixer_interface *mixer,
                *listp = list;
        list->mixer = mixer;
        list->id = id;
-       list->reset_resume = resume;
+       list->resume = resume;
        kctl = snd_ctl_new1(knew, list);
        if (!kctl) {
                kfree(list);
index 10911a8..2df880c 100644 (file)
@@ -1649,11 +1649,17 @@ static bool btf_is_non_static(const struct btf_type *t)
 static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
                             int *out_btf_sec_id, int *out_btf_id)
 {
-       int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
+       int i, j, n, m, btf_id = 0;
        const struct btf_type *t;
        const struct btf_var_secinfo *vi;
        const char *name;
 
+       if (!obj->btf) {
+               pr_warn("failed to find BTF info for object '%s'\n", obj->filename);
+               return -EINVAL;
+       }
+
+       n = btf__get_nr_types(obj->btf);
        for (i = 1; i <= n; i++) {
                t = btf__type_by_id(obj->btf, i);
 
index 52152d1..7993635 100644 (file)
@@ -164,7 +164,7 @@ const char unwinding_data[n]: an array of unwinding data, consisting of the EH F
 The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
 
 
-The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
+The EH Frame follows the LSB specification as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
 
 
 NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.
index de6beed..3b6a2c8 100644 (file)
@@ -261,7 +261,7 @@ COALESCE
 User can specify how to sort offsets for cacheline.
 
 Following fields are available and governs the final
-output fields set for caheline offsets output:
+output fields set for cacheline offsets output:
 
   tid   - coalesced by process TIDs
   pid   - coalesced by process PIDs
index 184ba62..db465fa 100644 (file)
@@ -883,7 +883,7 @@ and "r" can be combined to get calls and returns.
 
 "Transactions" events correspond to the start or end of transactions. The
 'flags' field can be used in perf script to determine whether the event is a
-tranasaction start, commit or abort.
+transaction start, commit or abort.
 
 Note that "instructions", "branches" and "transactions" events depend on code
 flow packets which can be disabled by using the config term "branch=0".  Refer
index 74d7745..1b4d452 100644 (file)
@@ -44,7 +44,7 @@ COMMON OPTIONS
 
 -f::
 --force::
-       Don't complan, do it.
+       Don't complain, do it.
 
 REPORT OPTIONS
 --------------
index 5a1f681..fa4f39d 100644 (file)
@@ -54,7 +54,7 @@ all sched_wakeup events in the system:
 Traces meant to be processed using a script should be recorded with
 the above option: -a to enable system-wide collection.
 
-The format file for the sched_wakep event defines the following fields
+The format file for the sched_wakeup event defines the following fields
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
 
 ----
index 0250dc6..cf4b7f4 100644 (file)
@@ -448,7 +448,7 @@ all sched_wakeup events in the system:
 Traces meant to be processed using a script should be recorded with
 the above option: -a to enable system-wide collection.
 
-The format file for the sched_wakep event defines the following fields
+The format file for the sched_wakeup event defines the following fields
 (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
 
 ----
index 4c9310b..7e6fb7c 100644 (file)
@@ -385,7 +385,7 @@ Aggregate counts per physical processor for system-wide mode measurements.
 Print metrics or metricgroups specified in a comma separated list.
 For a group all metrics from the group are added.
 The events from the metrics are automatically measured.
-See perf list output for the possble metrics and metricgroups.
+See perf list output for the possible metrics and metricgroups.
 
 -A::
 --no-aggr::
index c6302df..a15b93f 100644 (file)
@@ -2,7 +2,7 @@ Using TopDown metrics in user space
 -----------------------------------
 
 Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown
-methology to break down CPU pipeline execution into 4 bottlenecks:
+methodology to break down CPU pipeline execution into 4 bottlenecks:
 frontend bound, backend bound, bad speculation, retiring.
 
 For more details on Topdown see [1][5]
index c7c7ec0..5fc6a2a 100644 (file)
@@ -8,10 +8,10 @@
 #include <linux/coresight-pmu.h>
 #include <linux/zalloc.h>
 
-#include "../../util/auxtrace.h"
-#include "../../util/debug.h"
-#include "../../util/evlist.h"
-#include "../../util/pmu.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/debug.h"
+#include "../../../util/evlist.h"
+#include "../../../util/pmu.h"
 #include "cs-etm.h"
 #include "arm-spe.h"
 
index 515aae4..293a23b 100644 (file)
 #include <linux/zalloc.h>
 
 #include "cs-etm.h"
-#include "../../util/debug.h"
-#include "../../util/record.h"
-#include "../../util/auxtrace.h"
-#include "../../util/cpumap.h"
-#include "../../util/event.h"
-#include "../../util/evlist.h"
-#include "../../util/evsel.h"
-#include "../../util/perf_api_probe.h"
-#include "../../util/evsel_config.h"
-#include "../../util/pmu.h"
-#include "../../util/cs-etm.h"
+#include "../../../util/debug.h"
+#include "../../../util/record.h"
+#include "../../../util/auxtrace.h"
+#include "../../../util/cpumap.h"
+#include "../../../util/event.h"
+#include "../../../util/evlist.h"
+#include "../../../util/evsel.h"
+#include "../../../util/perf_api_probe.h"
+#include "../../../util/evsel_config.h"
+#include "../../../util/pmu.h"
+#include "../../../util/cs-etm.h"
 #include <internal/lib.h> // page_size
-#include "../../util/session.h"
+#include "../../../util/session.h"
 
 #include <errno.h>
 #include <stdlib.h>
index 2864e2e..2833e10 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../../util/perf_regs.h"
+#include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
index bbc297a..b8b23b9 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/string.h>
 
 #include "arm-spe.h"
-#include "../../util/pmu.h"
+#include "../../../util/pmu.h"
 
 struct perf_event_attr
 *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
index 36ba4c6..b7692cb 100644 (file)
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
index 3a55022..438906b 100644 (file)
@@ -3,8 +3,8 @@
 #include <errno.h>
 #include <libunwind.h>
 #include "perf_regs.h"
-#include "../../util/unwind.h"
-#include "../../util/debug.h"
+#include "../../../util/unwind.h"
+#include "../../../util/debug.h"
 
 int libunwind__arch_reg_id(int regnum)
 {
index eeafe97..792cd75 100644 (file)
@@ -432,7 +432,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
        u8 die = ((struct iio_root_port *)evsel->priv)->die;
        struct perf_counts_values *count = perf_counts(evsel->counts, die, 0);
 
-       if (count->run && count->ena) {
+       if (count && count->run && count->ena) {
                if (evsel->prev_raw_counts && !out->force_header) {
                        struct perf_counts_values *prev_count =
                                perf_counts(evsel->prev_raw_counts, die, 0);
index f6e87b7..f0ecfda 100644 (file)
@@ -2408,6 +2408,8 @@ int cmd_stat(int argc, const char **argv)
                        goto out;
                } else if (verbose)
                        iostat_list(evsel_list, &stat_config);
+               if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target))
+                       target.system_wide = true;
        }
 
        if (add_default_attributes())
index 84a0ced..f1f2965 100644 (file)
   {
     "EventCode": "0x4e010",
     "EventName": "PM_GCT_NOSLOT_IC_L3MISS",
-    "BriefDescription": "Gct empty for this thread due to icach l3 miss",
+    "BriefDescription": "Gct empty for this thread due to icache l3 miss",
     "PublicDescription": ""
   },
   {
index 9866cdd..9b4a765 100644 (file)
@@ -229,8 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
                            struct thread *thread, struct state *state)
 {
        struct addr_location al;
-       unsigned char buf1[BUFSZ];
-       unsigned char buf2[BUFSZ];
+       unsigned char buf1[BUFSZ] = {0};
+       unsigned char buf2[BUFSZ] = {0};
        size_t ret_len;
        u64 objdump_addr;
        const char *objdump_name;
index a288035..c756284 100644 (file)
 /* For bsearch. We try to unwind functions in shared object. */
 #include <stdlib.h>
 
+/*
+ * The test will assert frames are on the stack but tail call optimizations lose
+ * the frame of the caller. Clang can disable this optimization on a called
+ * function but GCC currently (11/2020) lacks this attribute. The barrier is
+ * used to inhibit tail calls in these cases.
+ */
+#ifdef __has_attribute
+#if __has_attribute(disable_tail_calls)
+#define NO_TAIL_CALL_ATTRIBUTE __attribute__((disable_tail_calls))
+#define NO_TAIL_CALL_BARRIER
+#endif
+#endif
+#ifndef NO_TAIL_CALL_ATTRIBUTE
+#define NO_TAIL_CALL_ATTRIBUTE
+#define NO_TAIL_CALL_BARRIER __asm__ __volatile__("" : : : "memory");
+#endif
+
 static int mmap_handler(struct perf_tool *tool __maybe_unused,
                        union perf_event *event,
                        struct perf_sample *sample,
@@ -91,7 +108,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
        return strcmp((const char *) symbol, funcs[idx]);
 }
 
-noinline int test_dwarf_unwind__thread(struct thread *thread)
+NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thread)
 {
        struct perf_sample sample;
        unsigned long cnt = 0;
@@ -122,7 +139,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 {
        /* Any possible value should be 'thread' */
        struct thread *thread = *(struct thread **)p1;
@@ -141,7 +158,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2)
        return p1 - p2;
 }
 
-noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 {
        struct thread *array[2] = {thread, thread};
        void *fp = &bsearch;
@@ -160,14 +177,22 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread)
        return global_unwind_retval;
 }
 
-noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_2(struct thread *thread)
 {
-       return test_dwarf_unwind__krava_3(thread);
+       int ret;
+
+       ret =  test_dwarf_unwind__krava_3(thread);
+       NO_TAIL_CALL_BARRIER;
+       return ret;
 }
 
-noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *thread)
 {
-       return test_dwarf_unwind__krava_2(thread);
+       int ret;
+
+       ret =  test_dwarf_unwind__krava_2(thread);
+       NO_TAIL_CALL_BARRIER;
+       return ret;
 }
 
 int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
index 4fb5e90..60ce590 100644 (file)
@@ -801,7 +801,7 @@ int perf_config_set(struct perf_config_set *set,
                                  section->name, item->name);
                        ret = fn(key, value, data);
                        if (ret < 0) {
-                               pr_err("Error: wrong config key-value pair %s=%s\n",
+                               pr_err("Error in the given config file: wrong config key-value pair %s=%s\n",
                                       key, value);
                                /*
                                 * Can't be just a 'break', as perf_config_set__for_each_entry()
index 866531c..799b881 100644 (file)
@@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:                         \
                     $(TRUNNER_BPF_PROGS_DIR)/%.c                       \
                     $(TRUNNER_BPF_PROGS_DIR)/*.h                       \
                     $$(INCLUDE_DIR)/vmlinux.h                          \
-                    $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+                    $(wildcard $(BPFDIR)/bpf_*.h)                      \
+                    | $(TRUNNER_OUTPUT) $$(BPFOBJ)
        $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,                      \
                                          $(TRUNNER_BPF_CFLAGS))
 
index 59ea569..b497bb8 100755 (executable)
@@ -112,6 +112,14 @@ setup()
        ip netns add "${NS2}"
        ip netns add "${NS3}"
 
+       # rp_filter gets confused by what these tests are doing, so disable it
+       ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+       ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+       ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+       ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+       ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+       ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
        ip link add veth1 type veth peer name veth2
        ip link add veth3 type veth peer name veth4
        ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
        ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
        ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
 
-       # rp_filter gets confused by what these tests are doing, so disable it
-       ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
-       ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
-       ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
        TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
 
        sleep 1  # reduce flakiness
index 618bf9b..b8dbabe 100644 (file)
@@ -24,6 +24,7 @@
 /x86_64/smm_test
 /x86_64/state_test
 /x86_64/svm_vmcall_test
+/x86_64/svm_int_ctl_test
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
 /x86_64/userspace_msr_exit_test
index 9ac325c..d1774f4 100644 (file)
@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
index 71e277c..5d95113 100644 (file)
@@ -371,9 +371,7 @@ static void help(char *name)
        printf(" -v: specify the number of vCPUs to run.\n");
        printf(" -o: Overlap guest memory accesses instead of partitioning\n"
               "     them into a separate region of memory for each vCPU.\n");
-       printf(" -s: specify the type of memory that should be used to\n"
-              "     back the guest data region.\n\n");
-       backing_src_help();
+       backing_src_help("-s");
        puts("");
        exit(0);
 }
@@ -381,7 +379,7 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
        struct test_params params = {
-               .backing_src = VM_MEM_SRC_ANONYMOUS,
+               .backing_src = DEFAULT_VM_MEM_SRC,
                .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
                .vcpus = 1,
        };
index e79c1b6..1510b21 100644 (file)
@@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
                        return NULL;
                }
 
-               if (!pollfd[0].revents & POLLIN)
+               if (!(pollfd[0].revents & POLLIN))
                        continue;
 
                r = read(uffd, &msg, sizeof(msg));
@@ -416,7 +416,7 @@ static void help(char *name)
 {
        puts("");
        printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
-              "          [-b memory] [-t type] [-v vcpus] [-o]\n", name);
+              "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
        guest_modes_help();
        printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
               "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@@ -426,8 +426,7 @@ static void help(char *name)
        printf(" -b: specify the size of the memory region which should be\n"
               "     demand paged by each vCPU. e.g. 10M or 3G.\n"
               "     Default: 1G\n");
-       printf(" -t: The type of backing memory to use. Default: anonymous\n");
-       backing_src_help();
+       backing_src_help("-s");
        printf(" -v: specify the number of vCPUs to run.\n");
        printf(" -o: Overlap guest memory accesses instead of partitioning\n"
               "     them into a separate region of memory for each vCPU.\n");
@@ -439,14 +438,14 @@ int main(int argc, char *argv[])
 {
        int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
        struct test_params p = {
-               .src_type = VM_MEM_SRC_ANONYMOUS,
+               .src_type = DEFAULT_VM_MEM_SRC,
                .partition_vcpu_memory_access = true,
        };
        int opt;
 
        guest_modes_append_default();
 
-       while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
+       while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
                switch (opt) {
                case 'm':
                        guest_modes_cmdline(optarg);
@@ -465,7 +464,7 @@ int main(int argc, char *argv[])
                case 'b':
                        guest_percpu_mem_size = parse_size(optarg);
                        break;
-               case 't':
+               case 's':
                        p.src_type = parse_backing_src_type(optarg);
                        break;
                case 'v':
@@ -485,7 +484,7 @@ int main(int argc, char *argv[])
 
        if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
            !backing_src_is_shared(p.src_type)) {
-               TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+               TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
        }
 
        for_each_guest_mode(run_test, &p);
index 4798685..7ffab5b 100644 (file)
@@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
        toggle_dirty_logging(vm, slots, false);
 }
 
-static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
-                         uint64_t nr_pages)
+static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
 {
-       uint64_t slot_pages = nr_pages / slots;
        int i;
 
        for (i = 0; i < slots; i++) {
                int slot = PERF_TEST_MEM_SLOT_INDEX + i;
-               unsigned long *slot_bitmap = bitmap + i * slot_pages;
 
-               kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
+               kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
        }
 }
 
-static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
-                           uint64_t nr_pages)
+static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+                           int slots, uint64_t pages_per_slot)
 {
-       uint64_t slot_pages = nr_pages / slots;
        int i;
 
        for (i = 0; i < slots; i++) {
                int slot = PERF_TEST_MEM_SLOT_INDEX + i;
-               unsigned long *slot_bitmap = bitmap + i * slot_pages;
 
-               kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
+               kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
        }
 }
 
+static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+       unsigned long **bitmaps;
+       int i;
+
+       bitmaps = malloc(slots * sizeof(bitmaps[0]));
+       TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+       for (i = 0; i < slots; i++) {
+               bitmaps[i] = bitmap_zalloc(pages_per_slot);
+               TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+       }
+
+       return bitmaps;
+}
+
+static void free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+       int i;
+
+       for (i = 0; i < slots; i++)
+               free(bitmaps[i]);
+
+       free(bitmaps);
+}
+
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
        struct test_params *p = arg;
        pthread_t *vcpu_threads;
        struct kvm_vm *vm;
-       unsigned long *bmap;
+       unsigned long **bitmaps;
        uint64_t guest_num_pages;
        uint64_t host_num_pages;
+       uint64_t pages_per_slot;
        int vcpu_id;
        struct timespec start;
        struct timespec ts_diff;
@@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
        guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
        host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-       bmap = bitmap_zalloc(host_num_pages);
+       pages_per_slot = host_num_pages / p->slots;
+
+       bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
 
        if (dirty_log_manual_caps) {
                cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                        iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
                clock_gettime(CLOCK_MONOTONIC, &start);
-               get_dirty_log(vm, p->slots, bmap, host_num_pages);
+               get_dirty_log(vm, bitmaps, p->slots);
                ts_diff = timespec_elapsed(start);
                get_dirty_log_total = timespec_add(get_dirty_log_total,
                                                   ts_diff);
@@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
                if (dirty_log_manual_caps) {
                        clock_gettime(CLOCK_MONOTONIC, &start);
-                       clear_dirty_log(vm, p->slots, bmap, host_num_pages);
+                       clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
                        ts_diff = timespec_elapsed(start);
                        clear_dirty_log_total = timespec_add(clear_dirty_log_total,
                                                             ts_diff);
@@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                        clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
        }
 
-       free(bmap);
+       free_bitmaps(bitmaps, p->slots);
        free(vcpu_threads);
        perf_test_destroy_vm(vm);
 }
@@ -308,11 +332,9 @@ static void help(char *name)
        printf(" -v: specify the number of vCPUs to run.\n");
        printf(" -o: Overlap guest memory accesses instead of partitioning\n"
               "     them into a separate region of memory for each vCPU.\n");
-       printf(" -s: specify the type of memory that should be used to\n"
-              "     back the guest data region.\n\n");
+       backing_src_help("-s");
        printf(" -x: Split the memory region into this number of memslots.\n"
-              "     (default: 1)");
-       backing_src_help();
+              "     (default: 1)\n");
        puts("");
        exit(0);
 }
@@ -324,7 +346,7 @@ int main(int argc, char *argv[])
                .iterations = TEST_HOST_LOOP_N,
                .wr_fract = 1,
                .partition_vcpu_memory_access = true,
-               .backing_src = VM_MEM_SRC_ANONYMOUS,
+               .backing_src = DEFAULT_VM_MEM_SRC,
                .slots = 1,
        };
        int opt;
index 451fed5..f8fddc8 100644 (file)
@@ -90,6 +90,8 @@ enum vm_mem_backing_src_type {
        NUM_SRC_TYPES,
 };
 
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
 struct vm_mem_backing_src_alias {
        const char *name;
        uint32_t flag;
@@ -102,7 +104,7 @@ size_t get_trans_hugepagesz(void);
 size_t get_def_hugetlb_pagesz(void);
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
 size_t get_backing_src_pagesz(uint32_t i);
-void backing_src_help(void);
+void backing_src_help(const char *flag);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 long get_run_delay(void);
 
index 242ae8e..eba8bd0 100644 (file)
@@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
        }
 }
 
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
+#define GET_XMM(__xmm)                                                 \
+({                                                                     \
+       unsigned long __val;                                            \
+       asm volatile("movq %%"#__xmm", %0" : "=r"(__val) : : #__xmm);   \
+       __val;                                                          \
+})
+
 static inline unsigned long get_xmm(int n)
 {
        assert(n >= 0 && n <= 7);
 
-       register v1di xmm0 __asm__("%xmm0");
-       register v1di xmm1 __asm__("%xmm1");
-       register v1di xmm2 __asm__("%xmm2");
-       register v1di xmm3 __asm__("%xmm3");
-       register v1di xmm4 __asm__("%xmm4");
-       register v1di xmm5 __asm__("%xmm5");
-       register v1di xmm6 __asm__("%xmm6");
-       register v1di xmm7 __asm__("%xmm7");
        switch (n) {
        case 0:
-               return (unsigned long)xmm0;
+               return GET_XMM(xmm0);
        case 1:
-               return (unsigned long)xmm1;
+               return GET_XMM(xmm1);
        case 2:
-               return (unsigned long)xmm2;
+               return GET_XMM(xmm2);
        case 3:
-               return (unsigned long)xmm3;
+               return GET_XMM(xmm3);
        case 4:
-               return (unsigned long)xmm4;
+               return GET_XMM(xmm4);
        case 5:
-               return (unsigned long)xmm5;
+               return GET_XMM(xmm5);
        case 6:
-               return (unsigned long)xmm6;
+               return GET_XMM(xmm6);
        case 7:
-               return (unsigned long)xmm7;
+               return GET_XMM(xmm7);
        }
+
+       /* never reached */
        return 0;
 }
 
index 0d04a7d..36407cb 100644 (file)
@@ -456,10 +456,7 @@ static void help(char *name)
               "     (default: 1G)\n");
        printf(" -v: specify the number of vCPUs to run\n"
               "     (default: 1)\n");
-       printf(" -s: specify the type of memory that should be used to\n"
-              "     back the guest data region.\n"
-              "     (default: anonymous)\n\n");
-       backing_src_help();
+       backing_src_help("-s");
        puts("");
 }
 
@@ -468,7 +465,7 @@ int main(int argc, char *argv[])
        int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
        struct test_params p = {
                .test_mem_size = DEFAULT_TEST_MEM_SIZE,
-               .src_type = VM_MEM_SRC_ANONYMOUS,
+               .src_type = DEFAULT_VM_MEM_SRC,
        };
        int opt;
 
index a9107bf..b724291 100644 (file)
@@ -283,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
        }
 }
 
-void backing_src_help(void)
+static void print_available_backing_src_types(const char *prefix)
 {
        int i;
 
-       printf("Available backing src types:\n");
+       printf("%sAvailable backing src types:\n", prefix);
+
        for (i = 0; i < NUM_SRC_TYPES; i++)
-               printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
+               printf("%s    %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+       printf(" %s: specify the type of memory that should be used to\n"
+              "     back the guest data region. (default: %s)\n",
+              flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+       print_available_backing_src_types("     ");
 }
 
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@@ -300,7 +309,7 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
                if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
                        return i;
 
-       backing_src_help();
+       print_available_backing_src_types("");
        TEST_FAIL("Unknown backing src type: %s", type_name);
        return -1;
 }
index 060538b..c5e0dd6 100644 (file)
@@ -180,6 +180,7 @@ int main(int argc, char *argv[])
         * CPU affinity.
         */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
+       ucall_init(vm, NULL);
 
        pthread_create(&migration_thread, NULL, migration_worker, 0);
 
index 2172d65..62f2eb9 100644 (file)
@@ -116,12 +116,12 @@ struct st_time {
        uint64_t st_time;
 };
 
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
 {
        unsigned long ret;
 
        asm volatile(
-               "mov    x0, %1\n"
+               "mov    w0, %w1\n"
                "mov    x1, %2\n"
                "hvc    #0\n"
                "mov    %0, x0\n"
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644 (file)
index 0000000..df04f56
--- /dev/null
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+#define VCPU_ID                0
+
+static struct kvm_vm *vm;
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+       vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+       x2apic_write_reg(APIC_EOI, 0x00);
+       intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+       /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+        * and since L1 didn't enable virtual interrupt masking,
+        * L2 should receive it and not L1.
+        *
+        * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+        * so it should also receive it after the following 'sti'.
+        */
+       x2apic_write_reg(APIC_ICR,
+               APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       GUEST_ASSERT(vintr_irq_called);
+       GUEST_ASSERT(intr_irq_called);
+
+       __asm__ __volatile__(
+               "vmcall\n"
+       );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+       #define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+       struct vmcb *vmcb = svm->vmcb;
+
+       x2apic_enable();
+
+       /* Prepare for L2 execution. */
+       generic_svm_setup(svm, l2_guest_code,
+                         &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /* No virtual interrupt masking */
+       vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+       /* No intercepts for real and virtual interrupts */
+       vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+
+       /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+       vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+       vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+       run_guest(vmcb, svm->vmcb_gpa);
+       GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t svm_gva;
+
+       nested_svm_check_supported();
+
+       vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+       vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+       vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+       vcpu_alloc_svm(vm, &svm_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc;
+
+       vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_ABORT:
+               TEST_FAIL("%s", (const char *)uc.args[0]);
+               break;
+               /* NOT REACHED */
+       case UCALL_DONE:
+               goto done;
+       default:
+               TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+       }
+done:
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755 (executable)
index 0000000..b9ab373
--- /dev/null
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+#            veth1-.
+#                  |
+#               NAT Gateway --veth0--> Server
+#                  | |
+#            veth2-' |
+# client2---'        |
+#  ....              |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces.  Each client connects to Server, each with colliding tuples:
+#   clientsaddr:10000 -> serveraddr:dport
+#   NAT Gateway is supposed to do port reallocation for each of the
+#   connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+       ip netns del $gw
+       ip netns del $srv
+       for i in $(seq 1 $maxclients); do
+               ip netns del ns-cl$i-$sfx 2>/dev/null
+       done
+
+       sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+       sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+       sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+       sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+       sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+       sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without conntrack tool"
+       exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+       have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace $gw"
+       exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create server netns $srv"
+       exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+
+  ip netns add "$cl"
+  if [ $? -ne 0 ];then
+     echo "SKIP: Could not create client netns $cl"
+     exit $ksft_skip
+  fi
+  ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+  if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+  fi
+done
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+  echo netns exec "$cl" ip link set lo up
+  echo netns exec "$cl" ip link set eth0 up
+  echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+  echo netns exec "$gw" ip link set veth$i up
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+  # clients have same IP addresses.
+  echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+  echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+  echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+  echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+  # NB: same addresses on client-facing interfaces.
+  echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+  echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+  # gw: policy routing
+  echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+       map iiftomark {
+               type ifname : mark
+       }
+
+       map iiftozone {
+               typeof iifname : ct zone
+       }
+
+       set inicmp {
+               flags dynamic
+               type ipv4_addr . ifname . ipv4_addr
+       }
+       set inflows {
+               flags dynamic
+               type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+       }
+
+       set inflows6 {
+               flags dynamic
+               type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+       }
+
+       chain prerouting {
+               type filter hook prerouting priority -64000; policy accept;
+               ct original zone set meta iifname map @iiftozone
+               meta mark set meta iifname map @iiftomark
+
+               tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+               add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+               ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+       }
+
+       chain nat_postrouting {
+               type nat hook postrouting priority 0; policy accept;
+                ct mark set meta mark meta oifname veth0 masquerade
+       }
+
+       chain mangle_prerouting {
+               type filter hook prerouting priority -100; policy accept;
+               ct direction reply meta mark set ct mark
+       }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+       for i in $(seq 1 $((maxclients-1))); do
+               echo \"veth$i\" : $i,
+       done
+       echo \"veth$maxclients\" : $maxclients \}
+       echo add element inet raw iiftozone \{
+       for i in $(seq 1 $((maxclients-1))); do
+               echo \"veth$i\" : $i,
+       done
+       echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+  if [ $? -ne 0 ]; then
+     echo FAIL: Ping failure from $cl 1>&2
+     ret=1
+     break
+  fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+      ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+      break
+   fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+    ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if  [ $ret -eq 0 ]; then
+       echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+       echo "SKIP: iperf3 not installed"
+       if [ $ret -ne 0 ];then
+           exit $ret
+       fi
+       exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+  if [ $ret -ne 0 ]; then
+     break
+  fi
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+  if [ $? -ne 0 ]; then
+     echo FAIL: Failure to connect for $cl 1>&2
+     ip netns exec $gw conntrack -S 1>&2
+     ret=1
+  fi
+done
+if [ $ret -eq 0 ];then
+       echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+      break
+   fi
+done
+if [ $ret -eq 0 ];then
+       echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755 (executable)
index 0000000..ac64637
--- /dev/null
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+       ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace $gw"
+       exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+       have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+       local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+       map rndzone {
+               typeof numgen inc mod $max_zones : ct zone
+       }
+
+       chain output {
+               type filter hook output priority -64000; policy accept;
+               udp dport 12345  ct zone set numgen inc mod 65536 map @rndzone
+       }
+}
+EOF
+       (
+               echo "add element inet raw rndzone {"
+       for i in $(seq 1 $max_zones);do
+               echo -n "$i : $i"
+               if [ $i -lt $max_zones ]; then
+                       echo ","
+               else
+                       echo "}"
+               fi
+       done
+       ) | ip netns exec $ns nft -f /dev/stdin
+
+       local i=0
+       local j=0
+       local outerstart=$(date +%s%3N)
+       local stop=$outerstart
+
+       while [ $i -lt $max_zones ]; do
+               local start=$(date +%s%3N)
+               i=$((i + 10000))
+               j=$((j + 1))
+               dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+               if [ $? -ne 0 ] ;then
+                       ret=1
+                       break
+               fi
+
+               stop=$(date +%s%3N)
+               local duration=$((stop-start))
+               echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+       done
+
+       if [ $have_ct_tool -eq 1 ]; then
+               local count=$(ip netns exec "$ns" conntrack -C)
+               local duration=$((stop-outerstart))
+
+               if [ $count -eq $max_zones ]; then
+                       echo "PASS: inserted $count entries from packet path in $duration ms total"
+               else
+                       ip netns exec $ns conntrack -S 1>&2
+                       echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+                       ret=1
+               fi
+       fi
+
+       if [ $ret -ne 0 ];then
+               echo "FAIL: insert $max_zones entries from packet path" 1>&2
+       fi
+}
+
+test_conntrack_tool() {
+       local max_zones=$1
+
+       ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+       local outerstart=$(date +%s%3N)
+       local start=$(date +%s%3N)
+       local stop=$start
+       local i=0
+       while [ $i -lt $max_zones ]; do
+               i=$((i + 1))
+               ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+                        --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+               if [ $? -ne 0 ];then
+                       ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+                        --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+                       echo "FAIL: conntrack -I returned an error"
+                       ret=1
+                       break
+               fi
+
+               if [ $((i%10000)) -eq 0 ];then
+                       stop=$(date +%s%3N)
+
+                       local duration=$((stop-start))
+                       echo "PASS: added 10000 entries in $duration ms (now $i total)"
+                       start=$stop
+               fi
+       done
+
+       local count=$(ip netns exec "$ns" conntrack -C)
+       local duration=$((stop-outerstart))
+
+       if [ $count -eq $max_zones ]; then
+               echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+       else
+               ip netns exec $ns conntrack -S 1>&2
+               echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+               ret=1
+       fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+       test_conntrack_tool $zones
+else
+       echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+       if [ $ret -eq 0 ];then
+               exit $ksft_skip
+       fi
+fi
+
+exit $ret
index 439d3b4..7851f3a 100644 (file)
@@ -235,9 +235,13 @@ static void ack_flush(void *_completed)
 {
 }
 
-static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
 {
-       if (unlikely(!cpus))
+       const struct cpumask *cpus;
+
+       if (likely(cpumask_available(tmp)))
+               cpus = tmp;
+       else
                cpus = cpu_online_mask;
 
        if (cpumask_empty(cpus))
@@ -263,14 +267,34 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
                        continue;
 
                kvm_make_request(req, vcpu);
-               cpu = vcpu->cpu;
 
                if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
                        continue;
 
-               if (tmp != NULL && cpu != -1 && cpu != me &&
-                   kvm_request_needs_ipi(vcpu, req))
-                       __cpumask_set_cpu(cpu, tmp);
+               /*
+                * tmp can be "unavailable" if cpumasks are allocated off stack
+                * as allocation of the mask is deliberately not fatal and is
+                * handled by falling back to kicking all online CPUs.
+                */
+               if (!cpumask_available(tmp))
+                       continue;
+
+               /*
+                * Note, the vCPU could get migrated to a different pCPU at any
+                * point after kvm_request_needs_ipi(), which could result in
+                * sending an IPI to the previous pCPU.  But, that's ok because
+                * the purpose of the IPI is to ensure the vCPU returns to
+                * OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates.
+                * Entering READING_SHADOW_PAGE_TABLES after this point is also
+                * ok, as the requirement is only that KVM wait for vCPUs that
+                * were reading SPTEs _before_ any changes were finalized.  See
+                * kvm_vcpu_kick() for more details on handling requests.
+                */
+               if (kvm_request_needs_ipi(vcpu, req)) {
+                       cpu = READ_ONCE(vcpu->cpu);
+                       if (cpu != -1 && cpu != me)
+                               __cpumask_set_cpu(cpu, tmp);
+               }
        }
 
        called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
@@ -302,13 +326,8 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-       /*
-        * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
-        * kvm_make_all_cpus_request.
-        */
-       long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
-
        ++kvm->stat.generic.remote_tlb_flush_requests;
+
        /*
         * We want to publish modifications to the page tables before reading
         * mode. Pairs with a memory barrier in arch-specific code.
@@ -323,7 +342,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
        if (!kvm_arch_flush_remote_tlb(kvm)
            || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.generic.remote_tlb_flush;
-       cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 #endif
@@ -528,7 +546,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
                }
        }
 
-       if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
+       if (range->flush_on_ret && ret)
                kvm_flush_remote_tlbs(kvm);
 
        if (locked)
@@ -3134,15 +3152,19 @@ out:
 
 static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
 {
-       unsigned int old, val, shrink;
+       unsigned int old, val, shrink, grow_start;
 
        old = val = vcpu->halt_poll_ns;
        shrink = READ_ONCE(halt_poll_ns_shrink);
+       grow_start = READ_ONCE(halt_poll_ns_grow_start);
        if (shrink == 0)
                val = 0;
        else
                val /= shrink;
 
+       if (val < grow_start)
+               val = 0;
+
        vcpu->halt_poll_ns = val;
        trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
 }
@@ -3290,16 +3312,24 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
  */
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-       int me;
-       int cpu = vcpu->cpu;
+       int me, cpu;
 
        if (kvm_vcpu_wake_up(vcpu))
                return;
 
+       /*
+        * Note, the vCPU could get migrated to a different pCPU at any point
+        * after kvm_arch_vcpu_should_kick(), which could result in sending an
+        * IPI to the previous pCPU.  But, that's ok because the purpose of the
+        * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
+        * vCPU also requires it to leave IN_GUEST_MODE.
+        */
        me = get_cpu();
-       if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
-               if (kvm_arch_vcpu_should_kick(vcpu))
+       if (kvm_arch_vcpu_should_kick(vcpu)) {
+               cpu = READ_ONCE(vcpu->cpu);
+               if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
                        smp_send_reschedule(cpu);
+       }
        put_cpu();
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);