Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Fri, 11 Feb 2022 01:29:56 +0000 (17:29 -0800)
committerJakub Kicinski <kuba@kernel.org>
Fri, 11 Feb 2022 01:29:56 +0000 (17:29 -0800)
No conflicts.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
328 files changed:
Documentation/arm64/silicon-errata.rst
Documentation/dev-tools/kunit/usage.rst
Documentation/filesystems/netfs_library.rst
Documentation/gpu/todo.rst
Documentation/userspace-api/ioctl/ioctl-number.rst
MAINTAINERS
Makefile
arch/arm/crypto/blake2s-shash.c
arch/arm64/Kconfig
arch/arm64/kernel/cpu_errata.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/tools/cpucaps
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/kvm/mips.c
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_sbi_base.c
arch/s390/kvm/kvm-s390.c
arch/x86/crypto/blake2s-shash.c
arch/x86/events/intel/core.c
arch/x86/events/intel/pt.c
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/xen/hypervisor.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/xen/enlighten_hvm.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/smp_pv.c
block/bio-integrity.c
block/fops.c
crypto/algapi.c
crypto/api.c
crypto/blake2s_generic.c
drivers/acpi/Kconfig
drivers/ata/libata-core.c
drivers/char/random.c
drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
drivers/dma-buf/dma-heap.c
drivers/edac/altera_edac.c
drivers/edac/xgene_edac.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/i915/display/intel_overlay.c
drivers/gpu/drm/i915/display/intel_tc.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gt/uc/intel_guc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/kmb/kmb_plane.c
drivers/gpu/drm/mxsfb/mxsfb_kms.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/ucma.c
drivers/infiniband/hw/hfi1/ipoib.h
drivers/infiniband/hw/hfi1/ipoib_main.c
drivers/infiniband/hw/hfi1/ipoib_tx.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/siw/siw.h
drivers/infiniband/sw/siw/siw_qp_rx.c
drivers/infiniband/sw/siw/siw_verbs.c
drivers/input/touchscreen/wm97xx-core.c
drivers/iommu/amd/init.c
drivers/iommu/intel/irq_remapping.c
drivers/iommu/ioasid.c
drivers/iommu/iommu.c
drivers/iommu/omap-iommu.c
drivers/md/md.c
drivers/mmc/core/sd.c
drivers/mmc/host/moxart-mmc.c
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sh_mmcif.c
drivers/net/bonding/bond_3ad.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/lantiq_gswip.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/ocelot/seville_vsc9953.c
drivers/net/dsa/qca/ar9331.c
drivers/net/ethernet/amd/xgbe/xgbe-pci.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/google/gve/gve_rx.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_common.c
drivers/net/ethernet/intel/ice/ice_lag.c
drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/litex/Kconfig
drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
drivers/net/mdio/mdio-aspeed.c
drivers/net/phy/marvell.c
drivers/net/usb/qmi_wwan.c
drivers/net/veth.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/pci/controller/cadence/pci-j721e.c
drivers/pci/controller/dwc/pcie-kirin.c
drivers/pci/msi/msi.c
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/pm8001/pm8001_hwi.c
drivers/scsi/pm8001/pm8001_sas.c
drivers/scsi/pm8001/pm80xx_hwi.c
drivers/scsi/scsi_scan.c
drivers/video/console/Kconfig
drivers/video/fbdev/core/bitblit.c
drivers/video/fbdev/core/fbcon.c
drivers/video/fbdev/core/fbcon.h
drivers/video/fbdev/core/fbcon_ccw.c
drivers/video/fbdev/core/fbcon_cw.c
drivers/video/fbdev/core/fbcon_rotate.h
drivers/video/fbdev/core/fbcon_ud.c
drivers/video/fbdev/core/tileblit.c
drivers/video/fbdev/skeletonfb.c
fs/9p/fid.c
fs/Kconfig
fs/binfmt_misc.c
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-checker.c
fs/btrfs/tree-log.c
fs/cachefiles/io.c
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/fscache.c
fs/cifs/fscache.h
fs/cifs/inode.c
fs/cifs/sess.c
fs/erofs/data.c
fs/erofs/zdata.c
fs/erofs/zmap.c
fs/ext4/acl.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/fast_commit.c
fs/ext4/indirect.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/migrate.c
fs/ext4/namei.c
fs/ext4/orphan.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/file_table.c
fs/iomap/buffered-io.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/ksmbd/auth.c
fs/ksmbd/smb2pdu.c
fs/ksmbd/smb_common.c
fs/ksmbd/transport_rdma.c
fs/ksmbd/vfs.h
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/nfs4proc.c
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4xdr.c
fs/nfsd/trace.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/quota/dquot.c
fs/super.c
fs/sync.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_super.c
include/crypto/internal/blake2s.h
include/linux/ata.h
include/linux/ceph/libceph.h
include/linux/ceph/messenger.h
include/linux/fb.h
include/linux/iomap.h
include/linux/jbd2.h
include/linux/kvm_host.h
include/linux/libata.h
include/linux/netfs.h
include/linux/nfs.h
include/linux/nfs_fs.h
include/linux/page_table_check.h
include/linux/pgtable.h
include/net/dst_metadata.h
include/sound/pcm.h
include/uapi/linux/kvm.h
include/uapi/linux/netfilter/nf_conntrack_common.h
include/uapi/linux/perf_event.h
include/uapi/sound/asound.h
include/uapi/xen/gntdev.h
include/xen/xenbus_dev.h
ipc/sem.c
kernel/auditsc.c
kernel/events/core.c
lib/crypto/blake2s.c
mm/debug_vm_pgtable.c
mm/khugepaged.c
mm/kmemleak.c
mm/page_isolation.c
mm/page_table_check.c
net/8021q/vlan.h
net/8021q/vlan_dev.c
net/8021q/vlan_netlink.c
net/ax25/af_ax25.c
net/can/isotp.c
net/ceph/ceph_common.c
net/ceph/messenger.c
net/ceph/messenger_v1.c
net/ceph/messenger_v2.c
net/core/skbuff.c
net/dsa/dsa2.c
net/ipv4/ipmr.c
net/ipv4/tcp.c
net/ipv6/ip6mr.c
net/mpls/af_mpls.c
net/mptcp/pm_netlink.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_payload.c
net/smc/smc_pnet.c
net/sunrpc/sysfs.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtsock.c
net/tipc/link.c
net/tipc/monitor.c
net/tipc/name_distr.c
security/integrity/digsig_asymmetric.c
security/integrity/ima/ima_fs.c
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_template.c
security/integrity/integrity_audit.c
sound/core/pcm_native.c
sound/hda/intel-sdw-acpi.c
sound/pci/hda/hda_auto_parser.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_generic.h
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp/acp-mach-common.c
sound/soc/codecs/cpcap.c
sound/soc/codecs/hdmi-codec.c
sound/soc/codecs/lpass-rx-macro.c
sound/soc/codecs/max9759.c
sound/soc/codecs/rt5682-i2c.c
sound/soc/codecs/rt5682.c
sound/soc/codecs/rt5682.h
sound/soc/codecs/wcd938x.c
sound/soc/fsl/pcm030-audio-fabric.c
sound/soc/generic/simple-card.c
sound/soc/mediatek/Kconfig
sound/soc/qcom/qdsp6/q6apm-dai.c
sound/soc/soc-acpi.c
sound/soc/soc-ops.c
sound/soc/soc-pcm.c
sound/soc/xilinx/xlnx_formatter_pcm.c
sound/usb/mixer.c
sound/usb/quirks-table.h
tools/arch/x86/include/asm/cpufeatures.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/linux/prctl.h
tools/include/uapi/sound/asound.h
tools/lib/perf/mmap.c
tools/lib/perf/tests/test-evsel.c
tools/objtool/check.c
tools/perf/builtin-ftrace.c
tools/perf/trace/beauty/prctl_option.sh
tools/perf/util/annotate.c
tools/perf/util/bpf_counter_cgroup.c
tools/perf/util/machine.c
tools/perf/util/map_symbol.h
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/stat-display.c
tools/perf/util/synthetic-events.c
tools/testing/kunit/run_checks.py
tools/testing/selftests/clone3/clone3.c
tools/testing/selftests/ir/ir_loopback.c
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/perf_events/sigtrap_threads.c
tools/testing/selftests/pidfd/pidfd.h
tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
tools/testing/selftests/pidfd/pidfd_test.c
tools/testing/selftests/pidfd/pidfd_wait.c
tools/testing/selftests/vm/userfaultfd.c

index 0ec7b7f..ea281dd 100644 (file)
@@ -100,6 +100,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
index 76af931..1c83e7d 100644 (file)
@@ -242,7 +242,7 @@ example:
 
        int rectangle_area(struct shape *this)
        {
-               struct rectangle *self = container_of(this, struct shape, parent);
+               struct rectangle *self = container_of(this, struct rectangle, parent);
 
                return self->length * self->width;
        };
index 136f8da..4f373a8 100644 (file)
@@ -462,6 +462,10 @@ operation table looks like the following::
                             struct iov_iter *iter,
                             netfs_io_terminated_t term_func,
                             void *term_func_priv);
+
+               int (*query_occupancy)(struct netfs_cache_resources *cres,
+                                      loff_t start, size_t len, size_t granularity,
+                                      loff_t *_data_start, size_t *_data_len);
        };
 
 With a termination handler function pointer::
@@ -536,6 +540,18 @@ The methods defined in the table are:
    indicating whether the termination is definitely happening in the caller's
    context.
 
+ * ``query_occupancy()``
+
+   [Required] Called to find out where the next piece of data is within a
+   particular region of the cache.  The start and length of the region to be
+   queried are passed in, along with the granularity to which the answer needs
+   to be aligned.  The function passes back the start and length of the data,
+   if any, available within that region.  Note that there may be a hole at the
+   front.
+
+   It returns 0 if some data was found, -ENODATA if there was no usable data
+   within the region or -ENOBUFS if there is no caching on this file.
+
 Note that these methods are passed a pointer to the cache resource structure,
 not the read request structure as they could be used in other situations where
 there isn't a read request structure as well, such as writing dirty data to the
index da138dd..a1212b5 100644 (file)
@@ -300,30 +300,6 @@ Contact: Daniel Vetter, Noralf Tronnes
 
 Level: Advanced
 
-Garbage collect fbdev scrolling acceleration
---------------------------------------------
-
-Scroll acceleration has been disabled in fbcon. Now it works as the old
-SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was
-removed from fbcon_ops.
-Remaining tasks:
-
-- a bunch of the hooks in fbcon_ops could be removed or simplified by calling
-  directly instead of the function table (with a switch on p->rotate)
-
-- fb_copyarea is unused after this, and can be deleted from all drivers
-
-- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as
-  well as cfb_copyarea
-
-Note that not all acceleration code can be deleted, since clearing and cursor
-support is still accelerated, which might be good candidates for further
-deletion projects.
-
-Contact: Daniel Vetter
-
-Level: Intermediate
-
 idr_init_base()
 ---------------
 
index 687efcf..e6fce2c 100644 (file)
@@ -115,6 +115,7 @@ Code  Seq#    Include File                                           Comments
 'B'   00-1F  linux/cciss_ioctl.h                                     conflict!
 'B'   00-0F  include/linux/pmu.h                                     conflict!
 'B'   C0-FF  advanced bbus                                           <mailto:maassen@uni-freiburg.de>
+'B'   00-0F  xen/xenbus_dev.h                                        conflict!
 'C'   all    linux/soundcard.h                                       conflict!
 'C'   01-2F  linux/capi.h                                            conflict!
 'C'   F0-FF  drivers/net/wan/cosa.h                                  conflict!
@@ -134,6 +135,7 @@ Code  Seq#    Include File                                           Comments
 'F'   80-8F  linux/arcfb.h                                           conflict!
 'F'   DD     video/sstfb.h                                           conflict!
 'G'   00-3F  drivers/misc/sgi-gru/grulib.h                           conflict!
+'G'   00-0F  xen/gntalloc.h, xen/gntdev.h                            conflict!
 'H'   00-7F  linux/hiddev.h                                          conflict!
 'H'   00-0F  linux/hidraw.h                                          conflict!
 'H'   01     linux/mei.h                                             conflict!
@@ -176,6 +178,7 @@ Code  Seq#    Include File                                           Comments
 'P'   60-6F  sound/sscape_ioctl.h                                    conflict!
 'P'   00-0F  drivers/usb/class/usblp.c                               conflict!
 'P'   01-09  drivers/misc/pci_endpoint_test.c                        conflict!
+'P'   00-0F  xen/privcmd.h                                           conflict!
 'Q'   all    linux/soundcard.h
 'R'   00-1F  linux/random.h                                          conflict!
 'R'   01     linux/rfkill.h                                          conflict!
index e70f5b2..25eb309 100644 (file)
@@ -10881,6 +10881,12 @@ T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 F:     drivers/ata/pata_arasan_cf.c
 F:     include/linux/pata_arasan_cf_data.h
 
+LIBATA PATA DRIVERS
+R:     Sergey Shtylyov <s.shtylyov@omp.ru>
+L:     linux-ide@vger.kernel.org
+F:     drivers/ata/ata_*.c
+F:     drivers/ata/pata_*.c
+
 LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
 M:     Linus Walleij <linus.walleij@linaro.org>
 L:     linux-ide@vger.kernel.org
@@ -12401,7 +12407,7 @@ F:      include/uapi/linux/membarrier.h
 F:     kernel/sched/membarrier.c
 
 MEMBLOCK
-M:     Mike Rapoport <rppt@linux.ibm.com>
+M:     Mike Rapoport <rppt@kernel.org>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     Documentation/core-api/boot-time-mm.rst
@@ -13299,8 +13305,8 @@ W:      http://www.iptables.org/
 W:     http://www.nftables.org/
 Q:     http://patchwork.ozlabs.org/project/netfilter-devel/list/
 C:     irc://irc.libera.chat/netfilter
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next.git
 F:     include/linux/netfilter*
 F:     include/linux/netfilter/
 F:     include/net/netfilter/
@@ -13567,7 +13573,7 @@ F:      tools/testing/selftests/nci/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
 M:     Trond Myklebust <trond.myklebust@hammerspace.com>
-M:     Anna Schumaker <anna.schumaker@netapp.com>
+M:     Anna Schumaker <anna@kernel.org>
 L:     linux-nfs@vger.kernel.org
 S:     Maintained
 W:     http://client.linux-nfs.org
@@ -16469,6 +16475,14 @@ F:     Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml
 F:     drivers/i2c/busses/i2c-rcar.c
 F:     drivers/i2c/busses/i2c-sh_mobile.c
 
+RENESAS R-CAR SATA DRIVER
+R:     Sergey Shtylyov <s.shtylyov@omp.ru>
+S:     Supported
+L:     linux-ide@vger.kernel.org
+L:     linux-renesas-soc@vger.kernel.org
+F:     Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+F:     drivers/ata/sata_rcar.c
+
 RENESAS R-CAR THERMAL DRIVERS
 M:     Niklas Söderlund <niklas.soderlund@ragnatech.se>
 L:     linux-renesas-soc@vger.kernel.org
index 1fc3491..ceb987e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Gobble Gobble
 
 # *DOCUMENTATION*
index 17c1c3b..763c73b 100644 (file)
 static int crypto_blake2s_update_arm(struct shash_desc *desc,
                                     const u8 *in, unsigned int inlen)
 {
-       return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+       return crypto_blake2s_update(desc, in, inlen, false);
 }
 
 static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
 {
-       return crypto_blake2s_final(desc, out, blake2s_compress);
+       return crypto_blake2s_final(desc, out, false);
 }
 
 #define BLAKE2S_ALG(name, driver_name, digest_size)                    \
index f2b5a4a..cbcd42d 100644 (file)
@@ -680,6 +680,22 @@ config ARM64_ERRATUM_2051678
 
          If unsure, say Y.
 
+config ARM64_ERRATUM_2077057
+       bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
+       help
+         This option adds the workaround for ARM Cortex-A510 erratum 2077057.
+         Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
+         expected, but a Pointer Authentication trap is taken instead. The
+         erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
+         EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
+
+         This can only happen when EL2 is stepping EL1.
+
+         When these conditions occur, the SPSR_EL2 value is unchanged from the
+         previous guest entry, and can be restored from the in-memory copy.
+
+         If unsure, say Y.
+
 config ARM64_ERRATUM_2119858
        bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
        default y
index 0660981..b217941 100644 (file)
@@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
        },
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_2077057
+       {
+               .desc = "ARM erratum 2077057",
+               .capability = ARM64_WORKAROUND_2077057,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+       },
+#endif
 #ifdef CONFIG_ARM64_ERRATUM_2064142
        {
                .desc = "ARM erratum 2064142",
index a4a0063..ecc5958 100644 (file)
@@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
                        xfer_to_guest_mode_work_pending();
 }
 
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+       int ret;
+
+       guest_state_enter_irqoff();
+       ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+       guest_state_exit_irqoff();
+
+       return ret;
+}
+
 /**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:      The VCPU pointer
@@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 * Enter the guest
                 */
                trace_kvm_entry(*vcpu_pc(vcpu));
-               guest_enter_irqoff();
+               guest_timing_enter_irqoff();
 
-               ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+               ret = kvm_arm_vcpu_enter_exit(vcpu);
 
                vcpu->mode = OUTSIDE_GUEST_MODE;
                vcpu->stat.exits++;
@@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                kvm_arch_vcpu_ctxsync_fp(vcpu);
 
                /*
-                * We may have taken a host interrupt in HYP mode (ie
-                * while executing the guest). This interrupt is still
-                * pending, as we haven't serviced it yet!
+                * We must ensure that any pending interrupts are taken before
+                * we exit guest timing so that timer ticks are accounted as
+                * guest time. Transiently unmask interrupts so that any
+                * pending interrupts are taken.
                 *
-                * We're now back in SVC mode, with interrupts
-                * disabled.  Enabling the interrupts now will have
-                * the effect of taking the interrupt again, in SVC
-                * mode this time.
+                * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
+                * context synchronization event) is necessary to ensure that
+                * pending interrupts are taken.
                 */
                local_irq_enable();
+               isb();
+               local_irq_disable();
+
+               guest_timing_exit_irqoff();
+
+               local_irq_enable();
 
-               /*
-                * We do local_irq_enable() before calling guest_exit() so
-                * that if a timer interrupt hits while running the guest we
-                * account that tick as being spent in the guest.  We enable
-                * preemption after calling guest_exit() so that if we get
-                * preempted we make sure ticks after that is not counted as
-                * guest time.
-                */
-               guest_exit();
                trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 
                /* Exit types that need handling before we can be preempted */
index fd2dd26..e3140ab 100644 (file)
@@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 {
        struct kvm_run *run = vcpu->run;
 
+       if (ARM_SERROR_PENDING(exception_index)) {
+               /*
+                * The SError is handled by handle_exit_early(). If the guest
+                * survives it will re-execute the original instruction.
+                */
+               return 1;
+       }
+
        exception_index = ARM_EXCEPTION_CODE(exception_index);
 
        switch (exception_index) {
index 58e14f8..701cfb9 100644 (file)
@@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
        return false;
 }
 
+static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+       /*
+        * Check for the conditions of Cortex-A510's #2077057. When these occur
+        * SPSR_EL2 can't be trusted, but isn't needed either as it is
+        * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
+        * Are we single-stepping the guest, and took a PAC exception from the
+        * active-not-pending state?
+        */
+       if (cpus_have_final_cap(ARM64_WORKAROUND_2077057)               &&
+           vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP                 &&
+           *vcpu_cpsr(vcpu) & DBG_SPSR_SS                              &&
+           ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
+               write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+       vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the
@@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
         * Save PSTATE early so that we can evaluate the vcpu mode
         * early on.
         */
-       vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+       synchronize_vcpu_pstate(vcpu, exit_code);
 
        /*
         * Check whether we want to repaint the state one way or
@@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
        if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
                vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
 
-       if (ARM_SERROR_PENDING(*exit_code)) {
+       if (ARM_SERROR_PENDING(*exit_code) &&
+           ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
                u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
 
                /*
index e7719e8..9c65b1e 100644 (file)
@@ -55,9 +55,10 @@ WORKAROUND_1418040
 WORKAROUND_1463225
 WORKAROUND_1508412
 WORKAROUND_1542419
-WORKAROUND_2064142
-WORKAROUND_2038923
 WORKAROUND_1902691
+WORKAROUND_2038923
+WORKAROUND_2064142
+WORKAROUND_2077057
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
 WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
index 3e336b3..ab6e3dc 100644 (file)
@@ -83,6 +83,8 @@
                label = "HDMI OUT";
                type = "a";
 
+               ddc-en-gpios = <&gpa 25 GPIO_ACTIVE_HIGH>;
+
                port {
                        hdmi_con: endpoint {
                                remote-endpoint = <&dw_hdmi_out>;
                gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
                enable-active-high;
        };
-
-       hdmi_power: fixedregulator@3 {
-               compatible = "regulator-fixed";
-
-               regulator-name = "hdmi_power";
-               regulator-min-microvolt = <5000000>;
-               regulator-max-microvolt = <5000000>;
-
-               gpio = <&gpa 25 0>;
-               enable-active-high;
-       };
 };
 
 &ext {
        pinctrl-names = "default";
        pinctrl-0 = <&pins_hdmi_ddc>;
 
-       hdmi-5v-supply = <&hdmi_power>;
-
        ports {
                #address-cells = <1>;
                #size-cells = <0>;
index e59cb62..a25e0b7 100644 (file)
@@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        return -ENOIOCTLCMD;
 }
 
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+       int ret;
+
+       guest_state_enter_irqoff();
+       ret = kvm_mips_callbacks->vcpu_run(vcpu);
+       guest_state_exit_irqoff();
+
+       return ret;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
        int r = -EINTR;
@@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        lose_fpu(1);
 
        local_irq_disable();
-       guest_enter_irqoff();
+       guest_timing_enter_irqoff();
        trace_kvm_enter(vcpu);
 
        /*
@@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
         */
        smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-       r = kvm_mips_callbacks->vcpu_run(vcpu);
+       r = kvm_mips_vcpu_enter_exit(vcpu);
+
+       /*
+        * We must ensure that any pending interrupts are taken before
+        * we exit guest timing so that timer ticks are accounted as
+        * guest time. Transiently unmask interrupts so that any
+        * pending interrupts are taken.
+        *
+        * TODO: is there a barrier which ensures that pending interrupts are
+        * recognised? Currently this just hopes that the CPU takes any pending
+        * interrupts between the enable and disable.
+        */
+       local_irq_enable();
+       local_irq_disable();
 
        trace_kvm_out(vcpu);
-       guest_exit_irqoff();
+       guest_timing_exit_irqoff();
        local_irq_enable();
 
 out:
@@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void)
 /*
  * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
  */
-int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
        u32 cause = vcpu->arch.host_cp0_cause;
@@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
        return ret;
 }
 
+int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+{
+       int ret;
+
+       guest_state_exit_irqoff();
+       ret = __kvm_mips_handle_exit(vcpu);
+       guest_state_enter_irqoff();
+
+       return ret;
+}
+
 /* Enable FPU for guest and restore context */
 void kvm_own_fpu(struct kvm_vcpu *vcpu)
 {
index 0c5239e..6241660 100644 (file)
@@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpu_context *cntx;
+       struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
 
        /* Mark this VCPU never ran */
        vcpu->arch.ran_atleast_once = false;
@@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        cntx->hstatus |= HSTATUS_SPVP;
        cntx->hstatus |= HSTATUS_SPV;
 
+       /* By default, make CY, TM, and IR counters accessible in VU mode */
+       reset_csr->scounteren = 0x7;
+
        /* Setup VCPU timer */
        kvm_riscv_vcpu_timer_init(vcpu);
 
@@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
        csr_write(CSR_HVIP, csr->hvip);
 }
 
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+       guest_state_enter_irqoff();
+       __kvm_riscv_switch_to(&vcpu->arch);
+       guest_state_exit_irqoff();
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
        int ret;
@@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        continue;
                }
 
-               guest_enter_irqoff();
+               guest_timing_enter_irqoff();
 
-               __kvm_riscv_switch_to(&vcpu->arch);
+               kvm_riscv_vcpu_enter_exit(vcpu);
 
                vcpu->mode = OUTSIDE_GUEST_MODE;
                vcpu->stat.exits++;
@@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                kvm_riscv_vcpu_sync_interrupts(vcpu);
 
                /*
-                * We may have taken a host interrupt in VS/VU-mode (i.e.
-                * while executing the guest). This interrupt is still
-                * pending, as we haven't serviced it yet!
+                * We must ensure that any pending interrupts are taken before
+                * we exit guest timing so that timer ticks are accounted as
+                * guest time. Transiently unmask interrupts so that any
+                * pending interrupts are taken.
                 *
-                * We're now back in HS-mode with interrupts disabled
-                * so enabling the interrupts now will have the effect
-                * of taking the interrupt again, in HS-mode this time.
+                * There's no barrier which ensures that pending interrupts are
+                * recognised, so we just hope that the CPU takes any pending
+                * interrupts between the enable and disable.
                 */
                local_irq_enable();
+               local_irq_disable();
 
-               /*
-                * We do local_irq_enable() before calling guest_exit() so
-                * that if a timer interrupt hits while running the guest
-                * we account that tick as being spent in the guest. We
-                * enable preemption after calling guest_exit() so that if
-                * we get preempted we make sure ticks after that is not
-                * counted as guest time.
-                */
-               guest_exit();
+               guest_timing_exit_irqoff();
+
+               local_irq_enable();
 
                preempt_enable();
 
index 4ecf377..48f4310 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
+#include <linux/version.h>
 #include <asm/csr.h>
 #include <asm/sbi.h>
 #include <asm/kvm_vcpu_timer.h>
@@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
                *out_val = KVM_SBI_IMPID;
                break;
        case SBI_EXT_BASE_GET_IMP_VERSION:
-               *out_val = 0;
+               *out_val = LINUX_VERSION_CODE;
                break;
        case SBI_EXT_BASE_PROBE_EXT:
                if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
index 577f1ea..2296b1f 100644 (file)
@@ -4667,6 +4667,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
                return -EINVAL;
        if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
                return -E2BIG;
+       if (!kvm_s390_pv_cpu_is_protected(vcpu))
+               return -EINVAL;
 
        switch (mop->op) {
        case KVM_S390_MEMOP_SIDA_READ:
index f9e2fec..59ae28a 100644 (file)
 static int crypto_blake2s_update_x86(struct shash_desc *desc,
                                     const u8 *in, unsigned int inlen)
 {
-       return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+       return crypto_blake2s_update(desc, in, inlen, false);
 }
 
 static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
 {
-       return crypto_blake2s_final(desc, out, blake2s_compress);
+       return crypto_blake2s_final(desc, out, false);
 }
 
 #define BLAKE2S_ALG(name, driver_name, digest_size)                    \
index c914340..a3c7ca8 100644 (file)
@@ -4703,6 +4703,19 @@ static __initconst const struct x86_pmu intel_pmu = {
        .lbr_read               = intel_pmu_lbr_read_64,
        .lbr_save               = intel_pmu_lbr_save,
        .lbr_restore            = intel_pmu_lbr_restore,
+
+       /*
+        * SMM has access to all 4 rings and while traditionally SMM code only
+        * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
+        *
+        * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
+        * between SMM or not, this results in what should be pure userspace
+        * counters including SMM data.
+        *
+        * This is a clear privilege issue, therefore globally disable
+        * counting SMM by default.
+        */
+       .attr_freeze_on_smi     = 1,
 };
 
 static __init void intel_clovertown_quirk(void)
index 7f406c1..2d33bba 100644 (file)
@@ -897,8 +897,9 @@ static void pt_handle_status(struct pt *pt)
                 * means we are already losing data; need to let the decoder
                 * know.
                 */
-               if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
-                   buf->output_off == pt_buffer_region_size(buf)) {
+               if (!buf->single &&
+                   (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+                    buf->output_off == pt_buffer_region_size(buf))) {
                        perf_aux_output_flag(&pt->handle,
                                             PERF_AUX_FLAG_TRUNCATED);
                        advance++;
index 631d504..d39e0de 100644 (file)
@@ -82,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt)
 KVM_X86_OP(load_eoi_exitmap)
 KVM_X86_OP(set_virtual_apic_mode)
 KVM_X86_OP_NULL(set_apic_access_page_addr)
-KVM_X86_OP(deliver_posted_interrupt)
+KVM_X86_OP(deliver_interrupt)
 KVM_X86_OP_NULL(sync_pir_to_irr)
 KVM_X86_OP(set_tss_addr)
 KVM_X86_OP(set_identity_map_addr)
index 6e7c545..6dcccb3 100644 (file)
@@ -1410,7 +1410,8 @@ struct kvm_x86_ops {
        void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
        void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
        void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
-       int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+       void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+                                 int trig_mode, int vector);
        int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
        int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
        int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
index 1bf2ad3..16f548a 100644 (file)
@@ -43,20 +43,6 @@ static inline uint32_t xen_cpuid_base(void)
        return hypervisor_cpuid_base("XenVMMXenVMM", 2);
 }
 
-#ifdef CONFIG_XEN
-extern bool __init xen_hvm_need_lapic(void);
-
-static inline bool __init xen_x2apic_para_available(void)
-{
-       return xen_hvm_need_lapic();
-}
-#else
-static inline bool __init xen_x2apic_para_available(void)
-{
-       return (xen_cpuid_base() != 0);
-}
-#endif
-
 struct pci_dev;
 
 #ifdef CONFIG_XEN_PV_DOM0
index 28be02a..494d4d3 100644 (file)
@@ -554,12 +554,13 @@ void kvm_set_cpu_caps(void)
        );
 
        kvm_cpu_cap_mask(CPUID_7_0_EBX,
-               F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-               F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
-               F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
-               F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
-               F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
-       );
+               F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
+               F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
+               F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
+               F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
+               F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
+               F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
+               F(AVX512VL));
 
        kvm_cpu_cap_mask(CPUID_7_ECX,
                F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
index 4662469..d7e6fde 100644 (file)
@@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                                                       apic->regs + APIC_TMR);
                }
 
-               if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
-                       kvm_lapic_set_irr(vector, apic);
-                       kvm_make_request(KVM_REQ_EVENT, vcpu);
-                       kvm_vcpu_kick(vcpu);
-               } else {
-                       trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
-                                                  trig_mode, vector);
-               }
+               static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
+                                                      trig_mode, vector);
                break;
 
        case APIC_DM_REMRD:
index 6d97629..a290efb 100644 (file)
@@ -3291,6 +3291,21 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
                SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
+static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+                                 int trig_mode, int vector)
+{
+       struct kvm_vcpu *vcpu = apic->vcpu;
+
+       if (svm_deliver_avic_intr(vcpu, vector)) {
+               kvm_lapic_set_irr(vector, apic);
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               kvm_vcpu_kick(vcpu);
+       } else {
+               trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+                                          trig_mode, vector);
+       }
+}
+
 static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -3615,7 +3630,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        unsigned long vmcb_pa = svm->current_vmcb->pa;
 
-       kvm_guest_enter_irqoff();
+       guest_state_enter_irqoff();
 
        if (sev_es_guest(vcpu->kvm)) {
                __svm_sev_es_vcpu_run(vmcb_pa);
@@ -3635,7 +3650,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
                vmload(__sme_page_pa(sd->save_area));
        }
 
-       kvm_guest_exit_irqoff();
+       guest_state_exit_irqoff();
 }
 
 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -4545,7 +4560,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .pmu_ops = &amd_pmu_ops,
        .nested_ops = &svm_nested_ops,
 
-       .deliver_posted_interrupt = svm_deliver_avic_intr,
+       .deliver_interrupt = svm_deliver_interrupt,
        .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
        .update_pi_irte = svm_update_pi_irte,
        .setup_mce = svm_setup_mce,
index aca3ae2..6c27bd0 100644 (file)
@@ -4041,6 +4041,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
        return 0;
 }
 
+static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+                                 int trig_mode, int vector)
+{
+       struct kvm_vcpu *vcpu = apic->vcpu;
+
+       if (vmx_deliver_posted_interrupt(vcpu, vector)) {
+               kvm_lapic_set_irr(vector, apic);
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               kvm_vcpu_kick(vcpu);
+       } else {
+               trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+                                          trig_mode, vector);
+       }
+}
+
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -6754,7 +6769,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                        struct vcpu_vmx *vmx)
 {
-       kvm_guest_enter_irqoff();
+       guest_state_enter_irqoff();
 
        /* L1D Flush includes CPU buffer clear to mitigate MDS */
        if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6770,7 +6785,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
        vcpu->arch.cr2 = native_read_cr2();
 
-       kvm_guest_exit_irqoff();
+       guest_state_exit_irqoff();
 }
 
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -7768,7 +7783,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .hwapic_isr_update = vmx_hwapic_isr_update,
        .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
        .sync_pir_to_irr = vmx_sync_pir_to_irr,
-       .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+       .deliver_interrupt = vmx_deliver_interrupt,
        .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
 
        .set_tss_addr = vmx_set_tss_addr,
index 74b53a1..7131d73 100644 (file)
@@ -90,6 +90,8 @@
 u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
 EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
 
+#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
+
 #define emul_to_vcpu(ctxt) \
        ((struct kvm_vcpu *)(ctxt)->vcpu)
 
@@ -4340,7 +4342,7 @@ static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
        void __user *uaddr = (void __user*)(unsigned long)attr->addr;
 
        if ((u64)(unsigned long)uaddr != attr->addr)
-               return ERR_PTR(-EFAULT);
+               return ERR_PTR_USR(-EFAULT);
        return uaddr;
 }
 
@@ -10041,6 +10043,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                set_debugreg(0, 7);
        }
 
+       guest_timing_enter_irqoff();
+
        for (;;) {
                /*
                 * Assert that vCPU vs. VM APICv state is consistent.  An APICv
@@ -10125,7 +10129,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         * of accounting via context tracking, but the loss of accuracy is
         * acceptable for all known use cases.
         */
-       vtime_account_guest_exit();
+       guest_timing_exit_irqoff();
 
        if (lapic_in_kernel(vcpu)) {
                s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
@@ -11639,8 +11643,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
        kvm_free_pit(kvm);
 }
 
-#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
-
 /**
  * __x86_set_memory_region: Setup KVM internal memory slot
  *
index 635b75f..767ec7f 100644 (file)
 
 void kvm_spurious_fault(void);
 
-static __always_inline void kvm_guest_enter_irqoff(void)
-{
-       /*
-        * VMENTER enables interrupts (host state), but the kernel state is
-        * interrupts disabled when this is invoked. Also tell RCU about
-        * it. This is the same logic as for exit_to_user_mode().
-        *
-        * This ensures that e.g. latency analysis on the host observes
-        * guest mode as interrupt enabled.
-        *
-        * guest_enter_irqoff() informs context tracking about the
-        * transition to guest mode and if enabled adjusts RCU state
-        * accordingly.
-        */
-       instrumentation_begin();
-       trace_hardirqs_on_prepare();
-       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-       instrumentation_end();
-
-       guest_enter_irqoff();
-       lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static __always_inline void kvm_guest_exit_irqoff(void)
-{
-       /*
-        * VMEXIT disables interrupts (host state), but tracing and lockdep
-        * have them in state 'on' as recorded before entering guest mode.
-        * Same as enter_from_user_mode().
-        *
-        * context_tracking_guest_exit() restores host context and reinstates
-        * RCU if enabled and required.
-        *
-        * This needs to be done immediately after VM-Exit, before any code
-        * that might contain tracepoints or call out to the greater world,
-        * e.g. before x86_spec_ctrl_restore_host().
-        */
-       lockdep_hardirqs_off(CALLER_ADDR0);
-       context_tracking_guest_exit();
-
-       instrumentation_begin();
-       trace_hardirqs_off_finish();
-       instrumentation_end();
-}
-
 #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check)                \
 ({                                                                     \
        bool failed = (consistency_check);                              \
index 4230094..6448c50 100644 (file)
@@ -9,6 +9,7 @@
 #include <xen/events.h>
 #include <xen/interface/memory.h>
 
+#include <asm/apic.h>
 #include <asm/cpu.h>
 #include <asm/smp.h>
 #include <asm/io_apic.h>
@@ -242,15 +243,9 @@ static __init int xen_parse_no_vector_callback(char *arg)
 }
 early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
 
-bool __init xen_hvm_need_lapic(void)
+static __init bool xen_x2apic_available(void)
 {
-       if (xen_pv_domain())
-               return false;
-       if (!xen_hvm_domain())
-               return false;
-       if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
-               return false;
-       return true;
+       return x2apic_supported();
 }
 
 static __init void xen_hvm_guest_late_init(void)
@@ -312,7 +307,7 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
        .detect                 = xen_platform_hvm,
        .type                   = X86_HYPER_XEN_HVM,
        .init.init_platform     = xen_hvm_guest_init,
-       .init.x2apic_available  = xen_x2apic_para_available,
+       .init.x2apic_available  = xen_x2apic_available,
        .init.init_mem_mapping  = xen_hvm_init_mem_mapping,
        .init.guest_late_init   = xen_hvm_guest_late_init,
        .runtime.pin_vcpu       = xen_pin_vcpu,
index 5004feb..d47c3d1 100644 (file)
@@ -1341,10 +1341,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
                xen_acpi_sleep_register();
 
-               /* Avoid searching for BIOS MP tables */
-               x86_init.mpparse.find_smp_config = x86_init_noop;
-               x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
                xen_boot_params_init_edd();
 
 #ifdef CONFIG_ACPI
index 6a8f3b5..4a60192 100644 (file)
@@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu)
        return rc;
 }
 
-static void __init xen_fill_possible_map(void)
-{
-       int i, rc;
-
-       if (xen_initial_domain())
-               return;
-
-       for (i = 0; i < nr_cpu_ids; i++) {
-               rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-               if (rc >= 0) {
-                       num_processors++;
-                       set_cpu_possible(i, true);
-               }
-       }
-}
-
-static void __init xen_filter_cpu_maps(void)
+static void __init _get_smp_config(unsigned int early)
 {
        int i, rc;
        unsigned int subtract = 0;
 
-       if (!xen_initial_domain())
+       if (early)
                return;
 
        num_processors = 0;
@@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
                 * sure the old memory can be recycled. */
                make_lowmem_page_readwrite(xen_initial_gdt);
 
-       xen_filter_cpu_maps();
        xen_setup_vcpu_info_placement();
 
        /*
@@ -476,5 +459,8 @@ static const struct smp_ops xen_smp_ops __initconst = {
 void __init xen_smp_init(void)
 {
        smp_ops = xen_smp_ops;
-       xen_fill_possible_map();
+
+       /* Avoid searching for BIOS MP tables */
+       x86_init.mpparse.find_smp_config = x86_init_noop;
+       x86_init.mpparse.get_smp_config = _get_smp_config;
 }
index d251147..0827b19 100644 (file)
@@ -373,7 +373,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
        struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
        unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
-       bip->bip_iter.bi_sector += bytes_done >> 9;
+       bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
        bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 
index 26bf15c..4f59e0f 100644 (file)
@@ -566,34 +566,37 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct block_device *bdev = iocb->ki_filp->private_data;
        loff_t size = bdev_nr_bytes(bdev);
-       size_t count = iov_iter_count(to);
        loff_t pos = iocb->ki_pos;
        size_t shorted = 0;
        ssize_t ret = 0;
+       size_t count;
 
-       if (unlikely(pos + count > size)) {
+       if (unlikely(pos + iov_iter_count(to) > size)) {
                if (pos >= size)
                        return 0;
                size -= pos;
-               if (count > size) {
-                       shorted = count - size;
-                       iov_iter_truncate(to, size);
-               }
+               shorted = iov_iter_count(to) - size;
+               iov_iter_truncate(to, size);
        }
 
+       count = iov_iter_count(to);
+       if (!count)
+               goto reexpand; /* skip atime */
+
        if (iocb->ki_flags & IOCB_DIRECT) {
                struct address_space *mapping = iocb->ki_filp->f_mapping;
 
                if (iocb->ki_flags & IOCB_NOWAIT) {
-                       if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
-                                               iocb->ki_pos + count - 1))
-                               return -EAGAIN;
+                       if (filemap_range_needs_writeback(mapping, pos,
+                                                         pos + count - 1)) {
+                               ret = -EAGAIN;
+                               goto reexpand;
+                       }
                } else {
-                       ret = filemap_write_and_wait_range(mapping,
-                                               iocb->ki_pos,
-                                               iocb->ki_pos + count - 1);
+                       ret = filemap_write_and_wait_range(mapping, pos,
+                                                          pos + count - 1);
                        if (ret < 0)
-                               return ret;
+                               goto reexpand;
                }
 
                file_accessed(iocb->ki_filp);
@@ -603,12 +606,14 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
                        iocb->ki_pos += ret;
                        count -= ret;
                }
+               iov_iter_revert(to, count - iov_iter_count(to));
                if (ret < 0 || !count)
-                       return ret;
+                       goto reexpand;
        }
 
        ret = filemap_read(iocb, to, ret);
 
+reexpand:
        if (unlikely(shorted))
                iov_iter_reexpand(to, iov_iter_count(to) + shorted);
        return ret;
index a366cb3..76fdaa1 100644 (file)
@@ -1324,3 +1324,4 @@ module_exit(crypto_algapi_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cryptographic algorithms API");
+MODULE_SOFTDEP("pre: cryptomgr");
index cf0869d..7ddfe94 100644 (file)
@@ -643,4 +643,3 @@ EXPORT_SYMBOL_GPL(crypto_req_done);
 
 MODULE_DESCRIPTION("Cryptographic core API");
 MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: cryptomgr");
index 72fe480..5f96a21 100644 (file)
 static int crypto_blake2s_update_generic(struct shash_desc *desc,
                                         const u8 *in, unsigned int inlen)
 {
-       return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic);
+       return crypto_blake2s_update(desc, in, inlen, true);
 }
 
 static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
 {
-       return crypto_blake2s_final(desc, out, blake2s_compress_generic);
+       return crypto_blake2s_final(desc, out, true);
 }
 
 #define BLAKE2S_ALG(name, driver_name, digest_size)                    \
index ba45541..273741d 100644 (file)
@@ -11,6 +11,7 @@ menuconfig ACPI
        depends on ARCH_SUPPORTS_ACPI
        select PNP
        select NLS
+       select CRC32
        default y if X86
        help
          Advanced Configuration and Power Interface (ACPI) support for 
index 67f8802..ba9273f 100644 (file)
@@ -2007,6 +2007,9 @@ static bool ata_log_supported(struct ata_device *dev, u8 log)
 {
        struct ata_port *ap = dev->link->ap;
 
+       if (dev->horkage & ATA_HORKAGE_NO_LOG_DIR)
+               return false;
+
        if (ata_read_log_page(dev, ATA_LOG_DIRECTORY, 0, ap->sector_buf, 1))
                return false;
        return get_unaligned_le16(&ap->sector_buf[log * 2]) ? true : false;
@@ -2445,23 +2448,21 @@ static void ata_dev_config_cpr(struct ata_device *dev)
        struct ata_cpr_log *cpr_log = NULL;
        u8 *desc, *buf = NULL;
 
-       if (!ata_identify_page_supported(dev,
-                                ATA_LOG_CONCURRENT_POSITIONING_RANGES))
+       if (ata_id_major_version(dev->id) < 11 ||
+           !ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES))
                goto out;
 
        /*
-        * Read IDENTIFY DEVICE data log, page 0x47
-        * (concurrent positioning ranges). We can have at most 255 32B range
-        * descriptors plus a 64B header.
+        * Read the concurrent positioning ranges log (0x47). We can have at
+        * most 255 32B range descriptors plus a 64B header.
         */
        buf_len = (64 + 255 * 32 + 511) & ~511;
        buf = kzalloc(buf_len, GFP_KERNEL);
        if (!buf)
                goto out;
 
-       err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
-                                    ATA_LOG_CONCURRENT_POSITIONING_RANGES,
-                                    buf, buf_len >> 9);
+       err_mask = ata_read_log_page(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES,
+                                    0, buf, buf_len >> 9);
        if (err_mask)
                goto out;
 
@@ -4073,6 +4074,13 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "WDC WD3000JD-*",             NULL,   ATA_HORKAGE_WD_BROKEN_LPM },
        { "WDC WD3200JD-*",             NULL,   ATA_HORKAGE_WD_BROKEN_LPM },
 
+       /*
+        * This sata dom device goes on a walkabout when the ATA_LOG_DIRECTORY
+        * log page is accessed. Ensure we never ask for this log page with
+        * these devices.
+        */
+       { "SATADOM-ML 3ME",             NULL,   ATA_HORKAGE_NO_LOG_DIR },
+
        /* End Marker */
        { }
 };
index 68613f0..3404a91 100644 (file)
@@ -762,7 +762,7 @@ static bool crng_init_try_arch(struct crng_state *crng)
        return arch_init;
 }
 
-static bool __init crng_init_try_arch_early(struct crng_state *crng)
+static bool __init crng_init_try_arch_early(void)
 {
        int i;
        bool arch_init = true;
@@ -774,7 +774,7 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng)
                        rv = random_get_entropy();
                        arch_init = false;
                }
-               crng->state[i] ^= rv;
+               primary_crng.state[i] ^= rv;
        }
 
        return arch_init;
@@ -788,22 +788,20 @@ static void crng_initialize_secondary(struct crng_state *crng)
        crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 }
 
-static void __init crng_initialize_primary(struct crng_state *crng)
+static void __init crng_initialize_primary(void)
 {
-       _extract_entropy(&crng->state[4], sizeof(u32) * 12);
-       if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) {
+       _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12);
+       if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) {
                invalidate_batched_entropy();
                numa_crng_init();
                crng_init = 2;
                pr_notice("crng init done (trusting CPU's manufacturer)\n");
        }
-       crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
+       primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 }
 
-static void crng_finalize_init(struct crng_state *crng)
+static void crng_finalize_init(void)
 {
-       if (crng != &primary_crng || crng_init >= 2)
-               return;
        if (!system_wq) {
                /* We can't call numa_crng_init until we have workqueues,
                 * so mark this for processing later. */
@@ -814,6 +812,7 @@ static void crng_finalize_init(struct crng_state *crng)
        invalidate_batched_entropy();
        numa_crng_init();
        crng_init = 2;
+       crng_need_final_init = false;
        process_random_ready_list();
        wake_up_interruptible(&crng_init_wait);
        kill_fasync(&fasync, SIGIO, POLL_IN);
@@ -980,7 +979,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool)
        memzero_explicit(&buf, sizeof(buf));
        WRITE_ONCE(crng->init_time, jiffies);
        spin_unlock_irqrestore(&crng->lock, flags);
-       crng_finalize_init(crng);
+       if (crng == &primary_crng && crng_init < 2)
+               crng_finalize_init();
 }
 
 static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE])
@@ -1697,8 +1697,8 @@ int __init rand_initialize(void)
 {
        init_std_data();
        if (crng_need_final_init)
-               crng_finalize_init(&primary_crng);
-       crng_initialize_primary(&primary_crng);
+               crng_finalize_init();
+       crng_initialize_primary();
        crng_global_init_time = jiffies;
        if (ratelimit_disable) {
                urandom_warning.interval = 0;
@@ -1856,7 +1856,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
                 */
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
-               input_pool.entropy_count = 0;
+               if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) {
+                       wake_up_interruptible(&random_write_wait);
+                       kill_fasync(&fasync, SIGIO, POLL_OUT);
+               }
                return 0;
        case RNDRESEEDCRNG:
                if (!capable(CAP_SYS_ADMIN))
@@ -2205,13 +2208,15 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
                        return;
        }
 
-       /* Suspend writing if we're above the trickle threshold.
+       /* Throttle writing if we're above the trickle threshold.
         * We'll be woken up again once below random_write_wakeup_thresh,
-        * or when the calling thread is about to terminate.
+        * when the calling thread is about to terminate, or once
+        * CRNG_RESEED_INTERVAL has lapsed.
         */
-       wait_event_interruptible(random_write_wait,
+       wait_event_interruptible_timeout(random_write_wait,
                        !system_wq || kthread_should_stop() ||
-                       POOL_ENTROPY_BITS() <= random_write_wakeup_bits);
+                       POOL_ENTROPY_BITS() <= random_write_wakeup_bits,
+                       CRNG_RESEED_INTERVAL);
        mix_pool_bytes(buffer, count);
        credit_entropy_bits(entropy);
 }
index 4c8ebdf..1b4d425 100644 (file)
@@ -1753,7 +1753,6 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf)
        char engs_info[2 * OTX2_CPT_NAME_LENGTH];
        struct otx2_cpt_eng_grp_info *grp;
        struct otx2_cpt_engs_rsvd *engs;
-       u32 mask[4];
        int i, j;
 
        pr_debug("Engine groups global info");
@@ -1785,6 +1784,8 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf)
                for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) {
                        engs = &grp->engs[j];
                        if (engs->type) {
+                               u32 mask[5] = { };
+
                                get_engs_info(grp, engs_info,
                                              2 * OTX2_CPT_NAME_LENGTH, j);
                                pr_debug("Slot%d: %s", j, engs_info);
index 56bf5ad..8f5848a 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/xarray.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/dma-heap.h>
@@ -135,6 +136,7 @@ static long dma_heap_ioctl(struct file *file, unsigned int ucmd,
        if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds))
                return -EINVAL;
 
+       nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds));
        /* Get the kernel ioctl cmd that matches */
        kcmd = dma_heap_ioctl_cmds[nr];
 
index 3a6d241..5dd2978 100644 (file)
@@ -350,7 +350,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
        if (irq < 0) {
                edac_printk(KERN_ERR, EDAC_MC,
                            "No irq %d in DT\n", irq);
-               return -ENODEV;
+               return irq;
        }
 
        /* Arria10 has a 2nd IRQ */
index 2ccd1db..7197f9f 100644 (file)
@@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev)
                        irq = platform_get_irq_optional(pdev, i);
                        if (irq < 0) {
                                dev_err(&pdev->dev, "No IRQ resource\n");
-                               rc = -EINVAL;
+                               rc = irq;
                                goto out_err;
                        }
                        rc = devm_request_irq(&pdev->dev, irq,
index d8b854f..9a53a4d 100644 (file)
@@ -1408,12 +1408,10 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta
 int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
 
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
-bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
 void amdgpu_acpi_detect(void);
 #else
 static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
-static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
 static inline void amdgpu_acpi_detect(void) { }
 static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
 static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
@@ -1422,6 +1420,14 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
                                                 enum amdgpu_ss ss_state) { return 0; }
 #endif
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+#else
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+#endif
+
 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                           uint64_t addr, struct amdgpu_bo **bo,
                           struct amdgpu_bo_va_mapping **mapping);
index 4811b0f..0e12315 100644 (file)
@@ -1031,6 +1031,20 @@ void amdgpu_acpi_detect(void)
        }
 }
 
+#if IS_ENABLED(CONFIG_SUSPEND)
+/**
+ * amdgpu_acpi_is_s3_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+{
+       return !(adev->flags & AMD_IS_APU) ||
+               (pm_suspend_target_state == PM_SUSPEND_MEM);
+}
+
 /**
  * amdgpu_acpi_is_s0ix_active
  *
@@ -1040,11 +1054,24 @@ void amdgpu_acpi_detect(void)
  */
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 {
-#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND)
-       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
-               if (adev->flags & AMD_IS_APU)
-                       return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
+       if (!(adev->flags & AMD_IS_APU) ||
+           (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+               return false;
+
+       if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+               dev_warn_once(adev->dev,
+                             "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+                             "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+               return false;
        }
-#endif
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+       dev_warn_once(adev->dev,
+                     "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
        return false;
+#else
+       return true;
+#endif /* CONFIG_AMD_PMC */
 }
+
+#endif /* CONFIG_SUSPEND */
index 4c83f1d..63a0899 100644 (file)
@@ -2246,13 +2246,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
 static int amdgpu_pmops_prepare(struct device *dev)
 {
        struct drm_device *drm_dev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
 
        /* Return a positive number here so
         * DPM_FLAG_SMART_SUSPEND works properly
         */
        if (amdgpu_device_supports_boco(drm_dev))
-               return pm_runtime_suspended(dev) &&
-                       pm_suspend_via_firmware();
+               return pm_runtime_suspended(dev);
+
+       /* if we will not support s3 or s2i for the device
+        *  then skip suspend
+        */
+       if (!amdgpu_acpi_is_s0ix_active(adev) &&
+           !amdgpu_acpi_is_s3_active(adev))
+               return 1;
 
        return 0;
 }
index 5c3f240..4655702 100644 (file)
@@ -1904,7 +1904,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
        unsigned i;
        int r;
 
-       if (direct_submit && !ring->sched.ready) {
+       if (!direct_submit && !ring->sched.ready) {
                DRM_ERROR("Trying to move memory with ring turned off.\n");
                return -EINVAL;
        }
index 38bb427..a2f8ed0 100644 (file)
@@ -1140,6 +1140,9 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3))
+               return;
+
        adev->mmhub.funcs->get_clockgating(adev, flags);
 
        if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
index 48005de..bc4ddc3 100644 (file)
@@ -570,32 +570,32 @@ static struct wm_table lpddr5_wm_table = {
                        .wm_inst = WM_A,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 7.95,
-                       .sr_enter_plus_exit_time_us = 9,
+                       .sr_exit_time_us = 13.5,
+                       .sr_enter_plus_exit_time_us = 16.5,
                        .valid = true,
                },
                {
                        .wm_inst = WM_B,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 9.82,
-                       .sr_enter_plus_exit_time_us = 11.196,
+                       .sr_exit_time_us = 13.5,
+                       .sr_enter_plus_exit_time_us = 16.5,
                        .valid = true,
                },
                {
                        .wm_inst = WM_C,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 9.89,
-                       .sr_enter_plus_exit_time_us = 11.24,
+                       .sr_exit_time_us = 13.5,
+                       .sr_enter_plus_exit_time_us = 16.5,
                        .valid = true,
                },
                {
                        .wm_inst = WM_D,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.65333,
-                       .sr_exit_time_us = 9.748,
-                       .sr_enter_plus_exit_time_us = 11.102,
+                       .sr_exit_time_us = 13.5,
+                       .sr_enter_plus_exit_time_us = 16.5,
                        .valid = true,
                },
        }
index 4162ce4..9d17c5a 100644 (file)
@@ -329,38 +329,38 @@ static struct clk_bw_params dcn31_bw_params = {
 
 };
 
-static struct wm_table ddr4_wm_table = {
+static struct wm_table ddr5_wm_table = {
        .entries = {
                {
                        .wm_inst = WM_A,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 6.09,
-                       .sr_enter_plus_exit_time_us = 7.14,
+                       .sr_exit_time_us = 9,
+                       .sr_enter_plus_exit_time_us = 11,
                        .valid = true,
                },
                {
                        .wm_inst = WM_B,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 9,
+                       .sr_enter_plus_exit_time_us = 11,
                        .valid = true,
                },
                {
                        .wm_inst = WM_C,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 9,
+                       .sr_enter_plus_exit_time_us = 11,
                        .valid = true,
                },
                {
                        .wm_inst = WM_D,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 9,
+                       .sr_enter_plus_exit_time_us = 11,
                        .valid = true,
                },
        }
@@ -687,7 +687,7 @@ void dcn31_clk_mgr_construct(
                if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
                        dcn31_bw_params.wm_table = lpddr5_wm_table;
                } else {
-                       dcn31_bw_params.wm_table = ddr4_wm_table;
+                       dcn31_bw_params.wm_table = ddr5_wm_table;
                }
                /* Saved clocks configured at boot for debug purposes */
                 dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
index 4c3ab25..61b8f29 100644 (file)
@@ -5597,6 +5597,26 @@ static bool retrieve_link_cap(struct dc_link *link)
                dp_hw_fw_revision.ieee_fw_rev,
                sizeof(dp_hw_fw_revision.ieee_fw_rev));
 
+       /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */
+       {
+               uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 };
+               uint8_t fwrev_mbp_2018[] = { 7, 4 };
+               uint8_t fwrev_mbp_2018_vega[] = { 8, 4 };
+
+               /* We also check for the firmware revision as 16,1 models have an
+                * identical device id and are incorrectly quirked otherwise.
+                */
+               if ((link->dpcd_caps.sink_dev_id == 0x0010fa) &&
+                   !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018,
+                            sizeof(str_mbp_2018)) &&
+                   (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018,
+                            sizeof(fwrev_mbp_2018)) ||
+                   !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega,
+                            sizeof(fwrev_mbp_2018_vega)))) {
+                       link->reported_link_cap.link_rate = LINK_RATE_RBR2;
+               }
+       }
+
        memset(&link->dpcd_caps.dsc_caps, '\0',
                        sizeof(link->dpcd_caps.dsc_caps));
        memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap));
index f3ff141..26ec69b 100644 (file)
@@ -1608,11 +1608,6 @@ static enum dc_status apply_single_controller_ctx_to_hw(
                        pipe_ctx->stream_res.stream_enc,
                        pipe_ctx->stream_res.tg->inst);
 
-       if (dc_is_embedded_signal(pipe_ctx->stream->signal) &&
-               pipe_ctx->stream_res.stream_enc->funcs->reset_fifo)
-               pipe_ctx->stream_res.stream_enc->funcs->reset_fifo(
-                       pipe_ctx->stream_res.stream_enc);
-
        if (dc_is_dp_signal(pipe_ctx->stream->signal))
                dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
 
index bf4436d..b0c08ee 100644 (file)
@@ -902,19 +902,6 @@ void enc1_stream_encoder_stop_dp_info_packets(
 
 }
 
-void enc1_stream_encoder_reset_fifo(
-       struct stream_encoder *enc)
-{
-       struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
-
-       /* set DIG_START to 0x1 to reset FIFO */
-       REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
-       udelay(100);
-
-       /* write 0 to take the FIFO out of reset */
-       REG_UPDATE(DIG_FE_CNTL, DIG_START, 0);
-}
-
 void enc1_stream_encoder_dp_blank(
        struct dc_link *link,
        struct stream_encoder *enc)
@@ -1600,8 +1587,6 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
                enc1_stream_encoder_send_immediate_sdp_message,
        .stop_dp_info_packets =
                enc1_stream_encoder_stop_dp_info_packets,
-       .reset_fifo =
-               enc1_stream_encoder_reset_fifo,
        .dp_blank =
                enc1_stream_encoder_dp_blank,
        .dp_unblank =
index a146a41..687d7e4 100644 (file)
@@ -626,9 +626,6 @@ void enc1_stream_encoder_send_immediate_sdp_message(
 void enc1_stream_encoder_stop_dp_info_packets(
        struct stream_encoder *enc);
 
-void enc1_stream_encoder_reset_fifo(
-       struct stream_encoder *enc);
-
 void enc1_stream_encoder_dp_blank(
        struct dc_link *link,
        struct stream_encoder *enc);
index 8a70f92..aab25ca 100644 (file)
@@ -593,8 +593,6 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = {
                enc1_stream_encoder_send_immediate_sdp_message,
        .stop_dp_info_packets =
                enc1_stream_encoder_stop_dp_info_packets,
-       .reset_fifo =
-               enc1_stream_encoder_reset_fifo,
        .dp_blank =
                enc1_stream_encoder_dp_blank,
        .dp_unblank =
index 8daa127..a04ca4a 100644 (file)
@@ -789,8 +789,6 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = {
                enc3_stream_encoder_update_dp_info_packets,
        .stop_dp_info_packets =
                enc1_stream_encoder_stop_dp_info_packets,
-       .reset_fifo =
-               enc1_stream_encoder_reset_fifo,
        .dp_blank =
                enc1_stream_encoder_dp_blank,
        .dp_unblank =
index 073f8b6..c88e113 100644 (file)
@@ -164,10 +164,6 @@ struct stream_encoder_funcs {
        void (*stop_dp_info_packets)(
                struct stream_encoder *enc);
 
-       void (*reset_fifo)(
-               struct stream_encoder *enc
-       );
-
        void (*dp_blank)(
                struct dc_link *link,
                struct stream_encoder *enc);
index 777f717..a420729 100644 (file)
@@ -3696,14 +3696,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
 
 static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu)
 {
-       struct smu_table_context *table_context = &smu->smu_table;
-       PPTable_t *smc_pptable = table_context->driver_pptable;
+       uint16_t *mgpu_fan_boost_limit_rpm;
 
+       GET_PPTABLE_MEMBER(MGpuFanBoostLimitRpm, &mgpu_fan_boost_limit_rpm);
        /*
         * Skip the MGpuFanBoost setting for those ASICs
         * which do not support it
         */
-       if (!smc_pptable->MGpuFanBoostLimitRpm)
+       if (*mgpu_fan_boost_limit_rpm == 0)
                return 0;
 
        return smu_cmn_send_smc_msg_with_param(smu,
index 1a376e9..d610e48 100644 (file)
@@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay,
        const struct intel_crtc_state *pipe_config =
                overlay->crtc->config;
 
+       if (rec->dst_height == 0 || rec->dst_width == 0)
+               return -EINVAL;
+
        if (rec->dst_x < pipe_config->pipe_src_w &&
            rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w &&
            rec->dst_y < pipe_config->pipe_src_h &&
index 40faa18..dbd7d0d 100644 (file)
@@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port)
 static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port)
 {
        struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+       enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port);
        struct intel_uncore *uncore = &i915->uncore;
        u32 val;
 
-       val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx));
+       val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port));
        if (val == 0xffffffff) {
                drm_dbg_kms(&i915->drm,
                            "Port %s: PHY in TCCOLD, assuming not complete\n",
index 3a5b247..1736efa 100644 (file)
@@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
                                      timeout) < 0) {
                        i915_request_put(rq);
 
-                       tl = intel_context_timeline_lock(ce);
+                       /*
+                        * Error path, cannot use intel_context_timeline_lock as
+                        * that is user interruptable and this clean up step
+                        * must be done.
+                        */
+                       mutex_lock(&ce->timeline->mutex);
                        intel_context_exit(ce);
-                       intel_context_timeline_unlock(tl);
+                       mutex_unlock(&ce->timeline->mutex);
 
                        if (nonblock)
                                return -EWOULDBLOCK;
index f9240d4..3aabe16 100644 (file)
@@ -206,6 +206,11 @@ struct intel_guc {
                 * context usage for overflows.
                 */
                struct delayed_work work;
+
+               /**
+                * @shift: Right shift value for the gpm timestamp
+                */
+               u32 shift;
        } timestamp;
 
 #ifdef CONFIG_DRM_I915_SELFTEST
index e751720..154ad72 100644 (file)
@@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
        if (new_start == lower_32_bits(*prev_start))
                return;
 
+       /*
+        * When gt is unparked, we update the gt timestamp and start the ping
+        * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
+        * is unparked, all switched in contexts will have a start time that is
+        * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
+        *
+        * If neither gt_stamp nor new_start has rolled over, then the
+        * gt_stamp_hi does not need to be adjusted, however if one of them has
+        * rolled over, we need to adjust gt_stamp_hi accordingly.
+        *
+        * The below conditions address the cases of new_start rollover and
+        * gt_stamp_last rollover respectively.
+        */
        if (new_start < gt_stamp_last &&
            (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
                gt_stamp_hi++;
@@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
        *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
 }
 
-static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+/*
+ * GuC updates shared memory and KMD reads it. Since this is not synchronized,
+ * we run into a race where the value read is inconsistent. Sometimes the
+ * inconsistency is in reading the upper MSB bytes of the last_in value when
+ * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
+ * 24 bits are zero. Since these are non-zero values, it is non-trivial to
+ * determine validity of these values. Instead we read the values multiple times
+ * until they are consistent. In test runs, 3 attempts results in consistent
+ * values. The upper bound is set to 6 attempts and may need to be tuned as per
+ * any new occurences.
+ */
+static void __get_engine_usage_record(struct intel_engine_cs *engine,
+                                     u32 *last_in, u32 *id, u32 *total)
 {
        struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+       int i = 0;
+
+       do {
+               *last_in = READ_ONCE(rec->last_switch_in_stamp);
+               *id = READ_ONCE(rec->current_context_index);
+               *total = READ_ONCE(rec->total_runtime);
+
+               if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
+                   READ_ONCE(rec->current_context_index) == *id &&
+                   READ_ONCE(rec->total_runtime) == *total)
+                       break;
+       } while (++i < 6);
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
        struct intel_engine_guc_stats *stats = &engine->stats.guc;
        struct intel_guc *guc = &engine->gt->uc.guc;
-       u32 last_switch = rec->last_switch_in_stamp;
-       u32 ctx_id = rec->current_context_index;
-       u32 total = rec->total_runtime;
+       u32 last_switch, ctx_id, total;
 
        lockdep_assert_held(&guc->timestamp.lock);
 
+       __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
+
        stats->running = ctx_id != ~0U && last_switch;
        if (stats->running)
                __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
@@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
        }
 }
 
-static void guc_update_pm_timestamp(struct intel_guc *guc,
-                                   struct intel_engine_cs *engine,
-                                   ktime_t *now)
+static u32 gpm_timestamp_shift(struct intel_gt *gt)
+{
+       intel_wakeref_t wakeref;
+       u32 reg, shift;
+
+       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+               reg = intel_uncore_read(gt->uncore, RPM_CONFIG0);
+
+       shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+               GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;
+
+       return 3 - shift;
+}
+
+static u64 gpm_timestamp(struct intel_gt *gt)
+{
+       u32 lo, hi, old_hi, loop = 0;
+
+       hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       do {
+               lo = intel_uncore_read(gt->uncore, MISC_STATUS0);
+               old_hi = hi;
+               hi = intel_uncore_read(gt->uncore, MISC_STATUS1);
+       } while (old_hi != hi && loop++ < 2);
+
+       return ((u64)hi << 32) | lo;
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
 {
-       u32 gt_stamp_now, gt_stamp_hi;
+       struct intel_gt *gt = guc_to_gt(guc);
+       u32 gt_stamp_lo, gt_stamp_hi;
+       u64 gpm_ts;
 
        lockdep_assert_held(&guc->timestamp.lock);
 
        gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
-       gt_stamp_now = intel_uncore_read(engine->uncore,
-                                        RING_TIMESTAMP(engine->mmio_base));
+       gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift;
+       gt_stamp_lo = lower_32_bits(gpm_ts);
        *now = ktime_get();
 
-       if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+       if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp))
                gt_stamp_hi++;
 
-       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+       guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
 }
 
 /*
@@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
        if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
                stats_saved = *stats;
                gt_stamp_saved = guc->timestamp.gt_stamp;
+               /*
+                * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
+                * start_gt_clk' calculation below for active engines.
+                */
                guc_update_engine_gt_clks(engine);
-               guc_update_pm_timestamp(guc, engine, now);
+               guc_update_pm_timestamp(guc, now);
                intel_gt_pm_put_async(gt);
                if (i915_reset_count(gpu_error) != reset_count) {
                        *stats = stats_saved;
@@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
 
        spin_lock_irqsave(&guc->timestamp.lock, flags);
 
+       guc_update_pm_timestamp(guc, &unused);
        for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
                guc_update_engine_gt_clks(engine);
                engine->stats.guc.prev_total = 0;
        }
@@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
        ktime_t unused;
 
        spin_lock_irqsave(&guc->timestamp.lock, flags);
-       for_each_engine(engine, gt, id) {
-               guc_update_pm_timestamp(guc, engine, &unused);
+
+       guc_update_pm_timestamp(guc, &unused);
+       for_each_engine(engine, gt, id)
                guc_update_engine_gt_clks(engine);
-       }
+
        spin_unlock_irqrestore(&guc->timestamp.lock, flags);
 }
 
@@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt)
 void intel_guc_busyness_unpark(struct intel_gt *gt)
 {
        struct intel_guc *guc = &gt->uc.guc;
+       unsigned long flags;
+       ktime_t unused;
 
        if (!guc_submission_initialized(guc))
                return;
 
+       spin_lock_irqsave(&guc->timestamp.lock, flags);
+       guc_update_pm_timestamp(guc, &unused);
+       spin_unlock_irqrestore(&guc->timestamp.lock, flags);
        mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
                         guc->timestamp.ping_delay);
 }
@@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
        spin_lock_init(&guc->timestamp.lock);
        INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
        guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+       guc->timestamp.shift = gpm_timestamp_shift(gt);
 
        return 0;
 }
index 5ae812d..0633888 100644 (file)
@@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine,
        struct i915_request *rq = NULL;
        unsigned long flags;
 
-       ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
+       ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL);
        if (!ee)
                return NULL;
 
index c32420c..c2bb33f 100644 (file)
@@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   RING_WAIT            (1 << 11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE  (1 << 10) /* gen6+ */
 
-#define GUCPMTIMESTAMP          _MMIO(0xC3E8)
+#define MISC_STATUS0           _MMIO(0xA500)
+#define MISC_STATUS1           _MMIO(0xA504)
 
 /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
 #define GEN8_RING_CS_GPR(base, n)      _MMIO((base) + 0x600 + (n) * 8)
index 00404ba..2735b8e 100644 (file)
@@ -158,12 +158,6 @@ static void kmb_plane_atomic_disable(struct drm_plane *plane,
        case LAYER_1:
                kmb->plane_status[plane_id].ctrl = LCD_CTRL_VL2_ENABLE;
                break;
-       case LAYER_2:
-               kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL1_ENABLE;
-               break;
-       case LAYER_3:
-               kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL2_ENABLE;
-               break;
        }
 
        kmb->plane_status[plane_id].disable = true;
index 0655582..4cfb6c0 100644 (file)
@@ -361,7 +361,11 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc,
                bridge_state =
                        drm_atomic_get_new_bridge_state(state,
                                                        mxsfb->bridge);
-               bus_format = bridge_state->input_bus_cfg.format;
+               if (!bridge_state)
+                       bus_format = MEDIA_BUS_FMT_FIXED;
+               else
+                       bus_format = bridge_state->input_bus_cfg.format;
+
                if (bus_format == MEDIA_BUS_FMT_FIXED) {
                        dev_warn_once(drm->dev,
                                      "Bridge does not provide bus format, assuming MEDIA_BUS_FMT_RGB888_1X24.\n"
index d0f52d5..64e423d 100644 (file)
@@ -38,7 +38,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
                *addr += bios->imaged_addr;
        }
 
-       if (unlikely(*addr + size >= bios->size)) {
+       if (unlikely(*addr + size > bios->size)) {
                nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
                return false;
        }
index c903b74..35f0d5e 100644 (file)
@@ -3322,7 +3322,7 @@ static int cm_lap_handler(struct cm_work *work)
        ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
        if (ret) {
                rdma_destroy_ah_attr(&ah_attr);
-               return -EINVAL;
+               goto deref;
        }
 
        spin_lock_irq(&cm_id_priv->lock);
index 27a00ce..c447526 100644 (file)
@@ -67,8 +67,8 @@ static const char * const cma_events[] = {
        [RDMA_CM_EVENT_TIMEWAIT_EXIT]    = "timewait exit",
 };
 
-static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
-                        union ib_gid *mgid);
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+                             enum ib_gid_type gid_type);
 
 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
 {
@@ -1846,17 +1846,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
                if (dev_addr->bound_dev_if)
                        ndev = dev_get_by_index(dev_addr->net,
                                                dev_addr->bound_dev_if);
-               if (ndev) {
+               if (ndev && !send_only) {
+                       enum ib_gid_type gid_type;
                        union ib_gid mgid;
 
-                       cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
-                                    &mgid);
-
-                       if (!send_only)
-                               cma_igmp_send(ndev, &mgid, false);
-
-                       dev_put(ndev);
+                       gid_type = id_priv->cma_dev->default_gid_type
+                                          [id_priv->id.port_num -
+                                           rdma_start_port(
+                                                   id_priv->cma_dev->device)];
+                       cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+                                         gid_type);
+                       cma_igmp_send(ndev, &mgid, false);
                }
+               dev_put(ndev);
 
                cancel_work_sync(&mc->iboe_join.work);
        }
index 2b72c4f..9d6ac9d 100644 (file)
@@ -95,6 +95,7 @@ struct ucma_context {
        u64                     uid;
 
        struct list_head        list;
+       struct list_head        mc_list;
        struct work_struct      close_work;
 };
 
@@ -105,6 +106,7 @@ struct ucma_multicast {
 
        u64                     uid;
        u8                      join_state;
+       struct list_head        list;
        struct sockaddr_storage addr;
 };
 
@@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 
        INIT_WORK(&ctx->close_work, ucma_close_id);
        init_completion(&ctx->comp);
+       INIT_LIST_HEAD(&ctx->mc_list);
        /* So list_del() will work if we don't do ucma_finish_ctx() */
        INIT_LIST_HEAD(&ctx->list);
        ctx->file = file;
@@ -484,19 +487,19 @@ err1:
 
 static void ucma_cleanup_multicast(struct ucma_context *ctx)
 {
-       struct ucma_multicast *mc;
-       unsigned long index;
+       struct ucma_multicast *mc, *tmp;
 
-       xa_for_each(&multicast_table, index, mc) {
-               if (mc->ctx != ctx)
-                       continue;
+       xa_lock(&multicast_table);
+       list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+               list_del(&mc->list);
                /*
                 * At this point mc->ctx->ref is 0 so the mc cannot leave the
                 * lock on the reader and this is enough serialization
                 */
-               xa_erase(&multicast_table, index);
+               __xa_erase(&multicast_table, mc->id);
                kfree(mc);
        }
+       xa_unlock(&multicast_table);
 }
 
 static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
@@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file,
        mc->uid = cmd->uid;
        memcpy(&mc->addr, addr, cmd->addr_size);
 
-       if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+       xa_lock(&multicast_table);
+       if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
                     GFP_KERNEL)) {
                ret = -ENOMEM;
                goto err_free_mc;
        }
 
+       list_add_tail(&mc->list, &ctx->mc_list);
+       xa_unlock(&multicast_table);
+
        mutex_lock(&ctx->mutex);
        ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
                                  join_state, mc);
@@ -1500,8 +1507,11 @@ err_leave_multicast:
        mutex_unlock(&ctx->mutex);
        ucma_cleanup_mc_events(mc);
 err_xa_erase:
-       xa_erase(&multicast_table, mc->id);
+       xa_lock(&multicast_table);
+       list_del(&mc->list);
+       __xa_erase(&multicast_table, mc->id);
 err_free_mc:
+       xa_unlock(&multicast_table);
        kfree(mc);
 err_put_ctx:
        ucma_put_ctx(ctx);
@@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
                mc = ERR_PTR(-EINVAL);
        else if (!refcount_inc_not_zero(&mc->ctx->ref))
                mc = ERR_PTR(-ENXIO);
-       else
-               __xa_erase(&multicast_table, mc->id);
-       xa_unlock(&multicast_table);
 
        if (IS_ERR(mc)) {
+               xa_unlock(&multicast_table);
                ret = PTR_ERR(mc);
                goto out;
        }
 
+       list_del(&mc->list);
+       __xa_erase(&multicast_table, mc->id);
+       xa_unlock(&multicast_table);
+
        mutex_lock(&mc->ctx->mutex);
        rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
        mutex_unlock(&mc->ctx->mutex);
index 9091229..aec60d4 100644 (file)
@@ -55,7 +55,7 @@ union hfi1_ipoib_flow {
  */
 struct ipoib_txreq {
        struct sdma_txreq           txreq;
-       struct hfi1_sdma_header     sdma_hdr;
+       struct hfi1_sdma_header     *sdma_hdr;
        int                         sdma_status;
        int                         complete;
        struct hfi1_ipoib_dev_priv *priv;
index e1a2b02..5d814af 100644 (file)
@@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
        int ret;
 
        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!dev->tstats)
+               return -ENOMEM;
 
        ret = priv->netdev_ops->ndo_init(dev);
        if (ret)
-               return ret;
+               goto out_ret;
 
        ret = hfi1_netdev_add_data(priv->dd,
                                   qpn_from_mac(priv->netdev->dev_addr),
                                   dev);
        if (ret < 0) {
                priv->netdev_ops->ndo_uninit(dev);
-               return ret;
+               goto out_ret;
        }
 
        return 0;
+out_ret:
+       free_percpu(dev->tstats);
+       dev->tstats = NULL;
+       return ret;
 }
 
 static void hfi1_ipoib_dev_uninit(struct net_device *dev)
 {
        struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
 
+       free_percpu(dev->tstats);
+       dev->tstats = NULL;
+
        hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
 
        priv->netdev_ops->ndo_uninit(dev);
@@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
        hfi1_ipoib_rxq_deinit(priv->netdev);
 
        free_percpu(dev->tstats);
-}
-
-static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
-{
-       hfi1_ipoib_netdev_dtor(dev);
-       free_netdev(dev);
+       dev->tstats = NULL;
 }
 
 static void hfi1_ipoib_set_id(struct net_device *dev, int id)
@@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
        priv->port_num = port_num;
        priv->netdev_ops = netdev->netdev_ops;
 
-       netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
-
        ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
 
        rc = hfi1_ipoib_txreq_init(priv);
        if (rc) {
                dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
-               hfi1_ipoib_free_rdma_netdev(netdev);
                return rc;
        }
 
        rc = hfi1_ipoib_rxq_init(netdev);
        if (rc) {
                dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
-               hfi1_ipoib_free_rdma_netdev(netdev);
+               hfi1_ipoib_txreq_deinit(priv);
                return rc;
        }
 
+       netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
        netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
        netdev->needs_free_netdev = true;
 
index f401089..d6bbdb8 100644 (file)
@@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
                dd_dev_warn(priv->dd,
                            "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
                            __func__, tx->sdma_status,
-                           le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+                           le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
                            tx->txq->sde->this_idx);
        }
 
@@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
 {
        struct hfi1_devdata *dd = txp->dd;
        struct sdma_txreq *txreq = &tx->txreq;
-       struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+       struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
        u16 pkt_bytes =
                sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
        int ret;
@@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
                                           struct ipoib_txparms *txp)
 {
        struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
-       struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+       struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
        struct sk_buff *skb = tx->skb;
        struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
        struct rdma_ah_attr *ah_attr = txp->ah_attr;
@@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
        if (likely(!ret)) {
 tx_ok:
                trace_sdma_output_ibhdr(txq->priv->dd,
-                                       &tx->sdma_hdr.hdr,
+                                       &tx->sdma_hdr->hdr,
                                        ib_is_sc5(txp->flow.sc5));
                hfi1_ipoib_check_queue_depth(txq);
                return NETDEV_TX_OK;
@@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
        hfi1_ipoib_check_queue_depth(txq);
 
        trace_sdma_output_ibhdr(txq->priv->dd,
-                               &tx->sdma_hdr.hdr,
+                               &tx->sdma_hdr->hdr,
                                ib_is_sc5(txp->flow.sc5));
 
        if (!netdev_xmit_more())
@@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
 {
        struct net_device *dev = priv->netdev;
        u32 tx_ring_size, tx_item_size;
-       int i;
+       struct hfi1_ipoib_circ_buf *tx_ring;
+       int i, j;
 
        /*
         * Ring holds 1 less than tx_ring_size
@@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
 
        for (i = 0; i < dev->num_tx_queues; i++) {
                struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+               struct ipoib_txreq *tx;
 
+               tx_ring = &txq->tx_ring;
                iowait_init(&txq->wait,
                            0,
                            hfi1_ipoib_flush_txq,
@@ -725,14 +728,19 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
                                             priv->dd->node);
 
                txq->tx_ring.items =
-                       kcalloc_node(tx_ring_size, tx_item_size,
-                                    GFP_KERNEL, priv->dd->node);
+                       kvzalloc_node(array_size(tx_ring_size, tx_item_size),
+                                     GFP_KERNEL, priv->dd->node);
                if (!txq->tx_ring.items)
                        goto free_txqs;
 
                txq->tx_ring.max_items = tx_ring_size;
-               txq->tx_ring.shift = ilog2(tx_ring_size);
+               txq->tx_ring.shift = ilog2(tx_item_size);
                txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
+               tx_ring = &txq->tx_ring;
+               for (j = 0; j < tx_ring_size; j++)
+                       hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
+                               kzalloc_node(sizeof(*tx->sdma_hdr),
+                                            GFP_KERNEL, priv->dd->node);
 
                netif_tx_napi_add(dev, &txq->napi,
                                  hfi1_ipoib_poll_tx_ring,
@@ -746,7 +754,10 @@ free_txqs:
                struct hfi1_ipoib_txq *txq = &priv->txqs[i];
 
                netif_napi_del(&txq->napi);
-               kfree(txq->tx_ring.items);
+               tx_ring = &txq->tx_ring;
+               for (j = 0; j < tx_ring_size; j++)
+                       kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+               kvfree(tx_ring->items);
        }
 
        kfree(priv->txqs);
@@ -780,17 +791,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
 
 void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
 {
-       int i;
+       int i, j;
 
        for (i = 0; i < priv->netdev->num_tx_queues; i++) {
                struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+               struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
 
                iowait_cancel_work(&txq->wait);
                iowait_sdma_drain(&txq->wait);
                hfi1_ipoib_drain_tx_list(txq);
                netif_napi_del(&txq->napi);
                hfi1_ipoib_drain_tx_ring(txq);
-               kfree(txq->tx_ring.items);
+               for (j = 0; j < tx_ring->max_items; j++)
+                       kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+               kvfree(tx_ring->items);
        }
 
        kfree(priv->txqs);
index 1c3d972..93b1650 100644 (file)
@@ -3237,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
        case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
                ew = kmalloc(sizeof *ew, GFP_ATOMIC);
                if (!ew)
-                       break;
+                       return;
 
                INIT_WORK(&ew->work, handle_port_mgmt_change_event);
                memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
index 3305f27..ae50b56 100644 (file)
@@ -3073,6 +3073,8 @@ do_write:
        case IB_WR_ATOMIC_FETCH_AND_ADD:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
                        goto inv_err;
+               if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1)))
+                       goto inv_err;
                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
                                          wqe->atomic_wr.remote_addr,
                                          wqe->atomic_wr.rkey,
index 368959a..df03d84 100644 (file)
@@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
        return &qp->orq[qp->orq_get % qp->attrs.orq_size];
 }
 
-static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
-{
-       return &qp->orq[qp->orq_put % qp->attrs.orq_size];
-}
-
 static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
 {
-       struct siw_sqe *orq_e = orq_get_tail(qp);
+       struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
 
        if (READ_ONCE(orq_e->flags) == 0)
                return orq_e;
index 60116f2..875ea6f 100644 (file)
@@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
 
        spin_lock_irqsave(&qp->orq_lock, flags);
 
-       rreq = orq_get_current(qp);
-
        /* free current orq entry */
+       rreq = orq_get_current(qp);
        WRITE_ONCE(rreq->flags, 0);
 
+       qp->orq_get++;
+
        if (qp->tx_ctx.orq_fence) {
                if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
                        pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
@@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp)
                        rv = -EPROTO;
                        goto out;
                }
-               /* resume SQ processing */
+               /* resume SQ processing, if possible */
                if (tx_waiting->sqe.opcode == SIW_OP_READ ||
                    tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
-                       rreq = orq_get_tail(qp);
+
+                       /* SQ processing was stopped because of a full ORQ */
+                       rreq = orq_get_free(qp);
                        if (unlikely(!rreq)) {
                                pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
                                rv = -EPROTO;
@@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
                        resume_tx = 1;
 
                } else if (siw_orq_empty(qp)) {
+                       /*
+                        * SQ processing was stopped by fenced work request.
+                        * Resume since all previous Read's are now completed.
+                        */
                        qp->tx_ctx.orq_fence = 0;
                        resume_tx = 1;
-               } else {
-                       pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
-                               qp_id(qp), qp->orq_get, qp->orq_put);
-                       rv = -EPROTO;
                }
        }
-       qp->orq_get++;
 out:
        spin_unlock_irqrestore(&qp->orq_lock, flags);
 
index a3dd2cb..54ef367 100644 (file)
@@ -313,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 
        if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
                siw_dbg(base_dev, "too many QP's\n");
-               return -ENOMEM;
+               rv = -ENOMEM;
+               goto err_atomic;
        }
        if (attrs->qp_type != IB_QPT_RC) {
                siw_dbg(base_dev, "only RC QP's supported\n");
index 78d2ee9..1b58611 100644 (file)
@@ -615,10 +615,9 @@ static int wm97xx_register_touch(struct wm97xx *wm)
         * extensions)
         */
        wm->touch_dev = platform_device_alloc("wm97xx-touch", -1);
-       if (!wm->touch_dev) {
-               ret = -ENOMEM;
-               goto touch_err;
-       }
+       if (!wm->touch_dev)
+               return -ENOMEM;
+
        platform_set_drvdata(wm->touch_dev, wm);
        wm->touch_dev->dev.parent = wm->dev;
        wm->touch_dev->dev.platform_data = pdata;
@@ -629,9 +628,6 @@ static int wm97xx_register_touch(struct wm97xx *wm)
        return 0;
 touch_reg_err:
        platform_device_put(wm->touch_dev);
-touch_err:
-       input_unregister_device(wm->input_dev);
-       wm->input_dev = NULL;
 
        return ret;
 }
@@ -639,8 +635,6 @@ touch_err:
 static void wm97xx_unregister_touch(struct wm97xx *wm)
 {
        platform_device_unregister(wm->touch_dev);
-       input_unregister_device(wm->input_dev);
-       wm->input_dev = NULL;
 }
 
 static int _wm97xx_probe(struct wm97xx *wm)
index dc338ac..b10fb52 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/export.h>
 #include <linux/kmemleak.h>
 #include <linux/cc_platform.h>
+#include <linux/iopoll.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/apic.h>
@@ -834,6 +835,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
                if (status & (MMIO_STATUS_GALOG_RUN_MASK))
                        break;
+               udelay(10);
        }
 
        if (WARN_ON(i >= LOOP_TIMEOUT))
index f912fe4..a673195 100644 (file)
@@ -569,9 +569,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
                                            fn, &intel_ir_domain_ops,
                                            iommu);
        if (!iommu->ir_domain) {
-               irq_domain_free_fwnode(fn);
                pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
-               goto out_free_bitmap;
+               goto out_free_fwnode;
        }
        iommu->ir_msi_domain =
                arch_create_remap_msi_irq_domain(iommu->ir_domain,
@@ -595,7 +594,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
                if (dmar_enable_qi(iommu)) {
                        pr_err("Failed to enable queued invalidation\n");
-                       goto out_free_bitmap;
+                       goto out_free_ir_domain;
                }
        }
 
@@ -619,6 +618,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
        return 0;
 
+out_free_ir_domain:
+       if (iommu->ir_msi_domain)
+               irq_domain_remove(iommu->ir_msi_domain);
+       iommu->ir_msi_domain = NULL;
+       irq_domain_remove(iommu->ir_domain);
+       iommu->ir_domain = NULL;
+out_free_fwnode:
+       irq_domain_free_fwnode(fn);
 out_free_bitmap:
        bitmap_free(bitmap);
 out_free_pages:
index 50ee27b..06fee74 100644 (file)
@@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(ioasid_alloc);
 
 /**
  * ioasid_get - obtain a reference to the IOASID
+ * @ioasid: the ID to get
  */
 void ioasid_get(ioasid_t ioasid)
 {
index 8b86406..107dcf5 100644 (file)
@@ -207,9 +207,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
 
 static void dev_iommu_free(struct device *dev)
 {
-       iommu_fwspec_free(dev);
-       kfree(dev->iommu);
+       struct dev_iommu *param = dev->iommu;
+
        dev->iommu = NULL;
+       if (param->fwspec) {
+               fwnode_handle_put(param->fwspec->iommu_fwnode);
+               kfree(param->fwspec);
+       }
+       kfree(param);
 }
 
 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
@@ -980,17 +985,6 @@ static int iommu_group_device_count(struct iommu_group *group)
        return ret;
 }
 
-/**
- * iommu_group_for_each_dev - iterate over each device in the group
- * @group: the group
- * @data: caller opaque data to be passed to callback function
- * @fn: caller supplied callback function
- *
- * This function is called by group users to iterate over group devices.
- * Callers should hold a reference count to the group during callback.
- * The group->mutex is held across callbacks, which will block calls to
- * iommu_group_add/remove_device.
- */
 static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
                                      int (*fn)(struct device *, void *))
 {
@@ -1005,7 +999,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
        return ret;
 }
 
-
+/**
+ * iommu_group_for_each_dev - iterate over each device in the group
+ * @group: the group
+ * @data: caller opaque data to be passed to callback function
+ * @fn: caller supplied callback function
+ *
+ * This function is called by group users to iterate over group devices.
+ * Callers should hold a reference count to the group during callback.
+ * The group->mutex is held across callbacks, which will block calls to
+ * iommu_group_add/remove_device.
+ */
 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
                             int (*fn)(struct device *, void *))
 {
@@ -3032,6 +3036,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
  * iommu_sva_bind_device() - Bind a process address space to a device
  * @dev: the device
  * @mm: the mm to bind, caller must hold a reference to it
+ * @drvdata: opaque data pointer to pass to bind callback
  *
  * Create a bond between device and address space, allowing the device to access
  * the mm using the returned PASID. If a bond already exists between @device and
index 9174965..980e4af 100644 (file)
@@ -1085,7 +1085,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev)
 }
 
 /**
- * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation
+ * omap_iommu_prepare - prepare() dev_pm_ops implementation
  * @dev:       iommu device
  *
  * This function performs the necessary checks to determine if the IOMMU
index 5881d05..4d38bd7 100644 (file)
@@ -5869,10 +5869,6 @@ int md_run(struct mddev *mddev)
                nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev));
        }
 
-       /* Set the NOWAIT flags if all underlying devices support it */
-       if (nowait)
-               blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
-
        if (!bioset_initialized(&mddev->bio_set)) {
                err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
                if (err)
@@ -6010,6 +6006,10 @@ int md_run(struct mddev *mddev)
                else
                        blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
                blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
+
+               /* Set the NOWAIT flags if all underlying devices support it */
+               if (nowait)
+                       blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
        }
        if (pers->sync_request) {
                if (mddev->kobj.sd &&
index 45f5787..bd87012 100644 (file)
@@ -67,7 +67,7 @@ static const unsigned int sd_au_size[] = {
                __res & __mask;                                         \
        })
 
-#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000
+#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 1000
 #define SD_WRITE_EXTR_SINGLE_TIMEOUT_MS 1000
 
 struct sd_busy_data {
@@ -1664,6 +1664,12 @@ static int sd_poweroff_notify(struct mmc_card *card)
                goto out;
        }
 
+       /* Find out when the command is completed. */
+       err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false,
+                               MMC_BUSY_EXTR_SINGLE);
+       if (err)
+               goto out;
+
        cb_data.card = card;
        cb_data.reg_buf = reg_buf;
        err = __mmc_poll_for_busy(card->host, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
index 16d1c7a..b6eb75f 100644 (file)
@@ -705,12 +705,12 @@ static int moxart_remove(struct platform_device *pdev)
        if (!IS_ERR_OR_NULL(host->dma_chan_rx))
                dma_release_channel(host->dma_chan_rx);
        mmc_remove_host(mmc);
-       mmc_free_host(mmc);
 
        writel(0, host->base + REG_INTERRUPT_MASK);
        writel(0, host->base + REG_POWER_CONTROL);
        writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
               host->base + REG_CLOCK_CONTROL);
+       mmc_free_host(mmc);
 
        return 0;
 }
index a593b1f..0f3658b 100644 (file)
@@ -524,12 +524,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask)
 
 static int esdhc_of_enable_dma(struct sdhci_host *host)
 {
+       int ret;
        u32 value;
        struct device *dev = mmc_dev(host->mmc);
 
        if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") ||
-           of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc"))
-               dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+           of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) {
+               ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+               if (ret)
+                       return ret;
+       }
 
        value = sdhci_readl(host, ESDHC_DMA_SYSCTL);
 
index bcc595c..104dcd7 100644 (file)
@@ -405,6 +405,9 @@ static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host,
        struct dma_slave_config cfg = { 0, };
 
        res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
+       if (!res)
+               return -EINVAL;
+
        cfg.direction = direction;
 
        if (direction == DMA_DEV_TO_MEM) {
index 6006c2e..9fd1d6c 100644 (file)
@@ -1021,8 +1021,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
                                if (port->aggregator &&
                                    port->aggregator->is_active &&
                                    !__port_is_enabled(port)) {
-
                                        __enable_port(port);
+                                       *update_slave_arr = true;
                                }
                        }
                        break;
@@ -1779,6 +1779,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
                             port = port->next_port_in_aggregator) {
                                __enable_port(port);
                        }
+                       *update_slave_arr = true;
                }
        }
 
index 9161ce4..cf82b1f 100644 (file)
@@ -621,7 +621,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
        get_device(&priv->master_mii_bus->dev);
        priv->master_mii_dn = dn;
 
-       priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+       priv->slave_mii_bus = mdiobus_alloc();
        if (!priv->slave_mii_bus) {
                of_node_put(dn);
                return -ENOMEM;
@@ -681,8 +681,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
        }
 
        err = mdiobus_register(priv->slave_mii_bus);
-       if (err && dn)
+       if (err && dn) {
+               mdiobus_free(priv->slave_mii_bus);
                of_node_put(dn);
+       }
 
        return err;
 }
@@ -690,6 +692,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
 static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
 {
        mdiobus_unregister(priv->slave_mii_bus);
+       mdiobus_free(priv->slave_mii_bus);
        of_node_put(priv->master_mii_dn);
 }
 
index 46ed953..320ee7f 100644 (file)
@@ -498,8 +498,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg)
 static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
 {
        struct dsa_switch *ds = priv->ds;
+       int err;
 
-       ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev);
+       ds->slave_mii_bus = mdiobus_alloc();
        if (!ds->slave_mii_bus)
                return -ENOMEM;
 
@@ -512,7 +513,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np)
        ds->slave_mii_bus->parent = priv->dev;
        ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
 
-       return of_mdiobus_register(ds->slave_mii_bus, mdio_np);
+       err = of_mdiobus_register(ds->slave_mii_bus, mdio_np);
+       if (err)
+               mdiobus_free(ds->slave_mii_bus);
+
+       return err;
 }
 
 static int gswip_pce_table_entry_read(struct gswip_priv *priv,
@@ -2145,8 +2150,10 @@ disable_switch:
        gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
        dsa_unregister_switch(priv->ds);
 mdio_bus:
-       if (mdio_np)
+       if (mdio_np) {
                mdiobus_unregister(priv->ds->slave_mii_bus);
+               mdiobus_free(priv->ds->slave_mii_bus);
+       }
 put_mdio_node:
        of_node_put(mdio_np);
        for (i = 0; i < priv->num_gphy_fw; i++)
@@ -2169,6 +2176,7 @@ static int gswip_remove(struct platform_device *pdev)
 
        if (priv->ds->slave_mii_bus) {
                mdiobus_unregister(priv->ds->slave_mii_bus);
+               mdiobus_free(priv->ds->slave_mii_bus);
                of_node_put(priv->ds->slave_mii_bus->dev.of_node);
        }
 
index bc77a26..f74f25f 100644 (file)
@@ -2074,7 +2074,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv)
        if (priv->irq)
                mt7530_setup_mdio_irq(priv);
 
-       ret = mdiobus_register(bus);
+       ret = devm_mdiobus_register(dev, bus);
        if (ret) {
                dev_err(dev, "failed to register MDIO bus: %d\n", ret);
                if (priv->irq)
index c54649c..5344d0c 100644 (file)
@@ -3566,7 +3566,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
                        return err;
        }
 
-       bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
+       bus = mdiobus_alloc_size(sizeof(*mdio_bus));
        if (!bus)
                return -ENOMEM;
 
@@ -3591,14 +3591,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
        if (!external) {
                err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
                if (err)
-                       return err;
+                       goto out;
        }
 
        err = of_mdiobus_register(bus, np);
        if (err) {
                dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
                mv88e6xxx_g2_irq_mdio_free(chip, bus);
-               return err;
+               goto out;
        }
 
        if (external)
@@ -3607,21 +3607,26 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
                list_add(&mdio_bus->list, &chip->mdios);
 
        return 0;
+
+out:
+       mdiobus_free(bus);
+       return err;
 }
 
 static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
 
 {
-       struct mv88e6xxx_mdio_bus *mdio_bus;
+       struct mv88e6xxx_mdio_bus *mdio_bus, *p;
        struct mii_bus *bus;
 
-       list_for_each_entry(mdio_bus, &chip->mdios, list) {
+       list_for_each_entry_safe(mdio_bus, p, &chip->mdios, list) {
                bus = mdio_bus->bus;
 
                if (!mdio_bus->external)
                        mv88e6xxx_g2_irq_mdio_free(chip, bus);
 
                mdiobus_unregister(bus);
+               mdiobus_free(bus);
        }
 }
 
index bf8d382..33f0cea 100644 (file)
@@ -1061,7 +1061,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
                return PTR_ERR(hw);
        }
 
-       bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+       bus = mdiobus_alloc_size(sizeof(*mdio_priv));
        if (!bus)
                return -ENOMEM;
 
@@ -1081,6 +1081,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
        rc = mdiobus_register(bus);
        if (rc < 0) {
                dev_err(dev, "failed to register MDIO bus\n");
+               mdiobus_free(bus);
                return rc;
        }
 
@@ -1132,6 +1133,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
                lynx_pcs_destroy(phylink_pcs);
        }
        mdiobus_unregister(felix->imdio);
+       mdiobus_free(felix->imdio);
 }
 
 static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
index 8c1c9da..f2f1608 100644 (file)
@@ -1029,7 +1029,7 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot)
        }
 
        /* Needed in order to initialize the bus mutex lock */
-       rc = of_mdiobus_register(bus, NULL);
+       rc = devm_of_mdiobus_register(dev, bus, NULL);
        if (rc < 0) {
                dev_err(dev, "failed to register MDIO bus\n");
                return rc;
@@ -1083,7 +1083,8 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot)
                mdio_device_free(mdio_device);
                lynx_pcs_destroy(phylink_pcs);
        }
-       mdiobus_unregister(felix->imdio);
+
+       /* mdiobus_unregister and mdiobus_free handled by devres */
 }
 
 static const struct felix_info seville_info_vsc9953 = {
index 3bda701..e5098cf 100644 (file)
@@ -378,7 +378,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
        if (!mnp)
                return -ENODEV;
 
-       ret = of_mdiobus_register(mbus, mnp);
+       ret = devm_of_mdiobus_register(dev, mbus, mnp);
        of_node_put(mnp);
        if (ret)
                return ret;
@@ -1066,7 +1066,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev)
        }
 
        irq_domain_remove(priv->irqdomain);
-       mdiobus_unregister(priv->mbus);
        dsa_unregister_switch(&priv->ds);
 
        reset_control_assert(priv->sw_reset);
index efdcf48..2af3da4 100644 (file)
@@ -425,6 +425,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev)
 
        pci_free_irq_vectors(pdata->pcidev);
 
+       /* Disable all interrupts in the hardware */
+       XP_IOWRITE(pdata, XP_INT_EN, 0x0);
+
        xgbe_free_pdata(pdata);
 }
 
index 1ce20bf..4c23115 100644 (file)
@@ -4765,7 +4765,7 @@ static int macb_probe(struct platform_device *pdev)
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
        if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
-               dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
                bp->hw_dma_cap |= HW_DMA_CAP_64B;
        }
 #endif
index 88534aa..c4a48e6 100644 (file)
@@ -4733,12 +4733,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 #ifdef CONFIG_DEBUG_FS
        dpaa2_dbg_remove(priv);
 #endif
+
+       unregister_netdev(net_dev);
        rtnl_lock();
        dpaa2_eth_disconnect_mac(priv);
        rtnl_unlock();
 
-       unregister_netdev(net_dev);
-
        dpaa2_eth_dl_port_del(priv);
        dpaa2_eth_dl_traps_unregister(priv);
        dpaa2_eth_dl_free(priv);
index 2068199..e4e98aa 100644 (file)
@@ -609,6 +609,7 @@ static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
 
        *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0);
        *work_done = work_cnt;
+       skb_record_rx_queue(skb, rx->q_num);
        if (skb_is_nonlinear(skb))
                napi_gro_frags(napi);
        else
index bda7a2a..29617a8 100644 (file)
@@ -110,6 +110,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
                                         struct ibmvnic_sub_crq_queue *tx_scrq);
 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
                                struct ibmvnic_long_term_buff *ltb);
+static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
        char name[ETH_GSTRING_LEN];
@@ -1424,7 +1425,7 @@ static int __ibmvnic_open(struct net_device *netdev)
        rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
        if (rc) {
                ibmvnic_napi_disable(adapter);
-               release_resources(adapter);
+               ibmvnic_disable_irqs(adapter);
                return rc;
        }
 
@@ -1474,9 +1475,6 @@ static int ibmvnic_open(struct net_device *netdev)
                rc = init_resources(adapter);
                if (rc) {
                        netdev_err(netdev, "failed to initialize resources\n");
-                       release_resources(adapter);
-                       release_rx_pools(adapter);
-                       release_tx_pools(adapter);
                        goto out;
                }
        }
@@ -1493,6 +1491,13 @@ out:
                adapter->state = VNIC_OPEN;
                rc = 0;
        }
+
+       if (rc) {
+               release_resources(adapter);
+               release_rx_pools(adapter);
+               release_tx_pools(adapter);
+       }
+
        return rc;
 }
 
index 6a710c2..827fcb5 100644 (file)
@@ -487,6 +487,7 @@ enum ice_pf_flags {
        ICE_FLAG_MDD_AUTO_RESET_VF,
        ICE_FLAG_VF_VLAN_PRUNING,
        ICE_FLAG_LINK_LENIENT_MODE_ENA,
+       ICE_FLAG_PLUG_AUX_DEV,
        ICE_PF_FLAGS_NBITS              /* must be last */
 };
 
@@ -891,7 +892,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
        if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
                set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
                set_bit(ICE_FLAG_AUX_ENA, pf->flags);
-               ice_plug_aux_dev(pf);
+               set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
        }
 }
 
index 464ecb7..c57e5fc 100644 (file)
@@ -3381,7 +3381,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
            !ice_fw_supports_report_dflt_cfg(hw)) {
                struct ice_link_default_override_tlv tlv;
 
-               if (ice_get_link_default_override(&tlv, pi))
+               status = ice_get_link_default_override(&tlv, pi);
+               if (status)
                        goto out;
 
                if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
index e375ac8..4f954db 100644 (file)
@@ -204,11 +204,7 @@ ice_lag_unlink(struct ice_lag *lag,
                lag->upper_netdev = NULL;
        }
 
-       if (lag->peer_netdev) {
-               dev_put(lag->peer_netdev);
-               lag->peer_netdev = NULL;
-       }
-
+       lag->peer_netdev = NULL;
        ice_set_sriov_cap(pf);
        ice_set_rdma_cap(pf);
        lag->bonded = false;
@@ -216,6 +212,32 @@ ice_lag_unlink(struct ice_lag *lag,
 }
 
 /**
+ * ice_lag_unregister - handle netdev unregister events
+ * @lag: LAG info struct
+ * @netdev: netdev reporting the event
+ */
+static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev)
+{
+       struct ice_pf *pf = lag->pf;
+
+       /* check to see if this event is for this netdev
+        * check that we are in an aggregate
+        */
+       if (netdev != lag->netdev || !lag->bonded)
+               return;
+
+       if (lag->upper_netdev) {
+               dev_put(lag->upper_netdev);
+               lag->upper_netdev = NULL;
+               ice_set_sriov_cap(pf);
+               ice_set_rdma_cap(pf);
+       }
+       /* perform some cleanup in case we come back */
+       lag->bonded = false;
+       lag->role = ICE_LAG_NONE;
+}
+
+/**
  * ice_lag_changeupper_event - handle LAG changeupper event
  * @lag: LAG info struct
  * @ptr: opaque pointer data
@@ -307,7 +329,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
                ice_lag_info_event(lag, ptr);
                break;
        case NETDEV_UNREGISTER:
-               ice_lag_unlink(lag, ptr);
+               ice_lag_unregister(lag, netdev);
                break;
        default:
                break;
index a1fc676..b3baf7c 100644 (file)
@@ -570,6 +570,7 @@ struct ice_tx_ctx_desc {
                        (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
 
 #define ICE_TXD_CTX_QW1_MSS_S  50
+#define ICE_TXD_CTX_MIN_MSS    64
 
 #define ICE_TXD_CTX_QW1_VSI_S  50
 #define ICE_TXD_CTX_QW1_VSI_M  (0x3FFULL << ICE_TXD_CTX_QW1_VSI_S)
index ec70659..ce90ebf 100644 (file)
@@ -2256,6 +2256,9 @@ static void ice_service_task(struct work_struct *work)
                return;
        }
 
+       if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+               ice_plug_aux_dev(pf);
+
        ice_clean_adminq_subtask(pf);
        ice_check_media_subtask(pf);
        ice_check_for_hang_subtask(pf);
@@ -8724,6 +8727,7 @@ ice_features_check(struct sk_buff *skb,
                   struct net_device __always_unused *netdev,
                   netdev_features_t features)
 {
+       bool gso = skb_is_gso(skb);
        size_t len;
 
        /* No point in doing any of this if neither checksum nor GSO are
@@ -8736,24 +8740,32 @@ ice_features_check(struct sk_buff *skb,
        /* We cannot support GSO if the MSS is going to be less than
         * 64 bytes. If it is then we need to drop support for GSO.
         */
-       if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+       if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
                features &= ~NETIF_F_GSO_MASK;
 
-       len = skb_network_header(skb) - skb->data;
+       len = skb_network_offset(skb);
        if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
                goto out_rm_features;
 
-       len = skb_transport_header(skb) - skb_network_header(skb);
+       len = skb_network_header_len(skb);
        if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
                goto out_rm_features;
 
        if (skb->encapsulation) {
-               len = skb_inner_network_header(skb) - skb_transport_header(skb);
-               if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
-                       goto out_rm_features;
+               /* this must work for VXLAN frames AND IPIP/SIT frames, and in
+                * the case of IPIP frames, the transport header pointer is
+                * after the inner header! So check to make sure that this
+                * is a GRE or UDP_TUNNEL frame before doing that math.
+                */
+               if (gso && (skb_shinfo(skb)->gso_type &
+                           (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
+                       len = skb_inner_network_header(skb) -
+                             skb_transport_header(skb);
+                       if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+                               goto out_rm_features;
+               }
 
-               len = skb_inner_transport_header(skb) -
-                     skb_inner_network_header(skb);
+               len = skb_inner_network_header_len(skb);
                if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
                        goto out_rm_features;
        }
index 7c33be9..17fbc45 100644 (file)
@@ -1984,14 +1984,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
        if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
                return;
 
-       set_ring_build_skb_enabled(rx_ring);
+       if (PAGE_SIZE < 8192)
+               if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB)
+                       set_ring_uses_large_buffer(rx_ring);
 
-       if (PAGE_SIZE < 8192) {
-               if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
-                       return;
+       /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */
+       if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring))
+               return;
 
-               set_ring_uses_large_buffer(rx_ring);
-       }
+       set_ring_build_skb_enabled(rx_ring);
 }
 
 /**
index f99adbf..04345b9 100644 (file)
@@ -17,7 +17,7 @@ if NET_VENDOR_LITEX
 
 config LITEX_LITEETH
        tristate "LiteX Ethernet support"
-       depends on OF
+       depends on OF && HAS_IOMEM
        help
          If you wish to compile a kernel for hardware with a LiteX LiteEth
          device then you should answer Y to this.
index 59783fc..10b866e 100644 (file)
@@ -1103,7 +1103,7 @@ void sparx5_get_stats64(struct net_device *ndev,
        stats->tx_carrier_errors = portstats[spx5_stats_tx_csense_cnt];
        stats->tx_window_errors = portstats[spx5_stats_tx_late_coll_cnt];
        stats->rx_dropped = portstats[spx5_stats_ana_ac_port_stat_lsb_cnt];
-       for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++stats)
+       for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx)
                stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop
                                               + idx];
        stats->tx_dropped = portstats[spx5_stats_tx_local_drop];
index 455293a..e6de865 100644 (file)
@@ -1432,6 +1432,8 @@ static void
 ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
 {
        trap->key_type = OCELOT_VCAP_KEY_IPV4;
+       trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+       trap->key.ipv4.proto.mask[0] = 0xff;
        trap->key.ipv4.dport.value = PTP_EV_PORT;
        trap->key.ipv4.dport.mask = 0xffff;
 }
@@ -1440,6 +1442,8 @@ static void
 ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
 {
        trap->key_type = OCELOT_VCAP_KEY_IPV6;
+       trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+       trap->key.ipv4.proto.mask[0] = 0xff;
        trap->key.ipv6.dport.value = PTP_EV_PORT;
        trap->key.ipv6.dport.mask = 0xffff;
 }
@@ -1448,6 +1452,8 @@ static void
 ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
 {
        trap->key_type = OCELOT_VCAP_KEY_IPV4;
+       trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+       trap->key.ipv4.proto.mask[0] = 0xff;
        trap->key.ipv4.dport.value = PTP_GEN_PORT;
        trap->key.ipv4.dport.mask = 0xffff;
 }
@@ -1456,6 +1462,8 @@ static void
 ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
 {
        trap->key_type = OCELOT_VCAP_KEY_IPV6;
+       trap->key.ipv4.proto.value[0] = IPPROTO_UDP;
+       trap->key.ipv4.proto.mask[0] = 0xff;
        trap->key.ipv6.dport.value = PTP_GEN_PORT;
        trap->key.ipv6.dport.mask = 0xffff;
 }
@@ -1737,12 +1745,11 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
 }
 EXPORT_SYMBOL(ocelot_get_strings);
 
+/* Caller must hold &ocelot->stats_lock */
 static void ocelot_update_stats(struct ocelot *ocelot)
 {
        int i, j;
 
-       mutex_lock(&ocelot->stats_lock);
-
        for (i = 0; i < ocelot->num_phys_ports; i++) {
                /* Configure the port to read the stats from */
                ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
@@ -1761,8 +1768,6 @@ static void ocelot_update_stats(struct ocelot *ocelot)
                                              ~(u64)U32_MAX) + val;
                }
        }
-
-       mutex_unlock(&ocelot->stats_lock);
 }
 
 static void ocelot_check_stats_work(struct work_struct *work)
@@ -1771,7 +1776,9 @@ static void ocelot_check_stats_work(struct work_struct *work)
        struct ocelot *ocelot = container_of(del_work, struct ocelot,
                                             stats_work);
 
+       mutex_lock(&ocelot->stats_lock);
        ocelot_update_stats(ocelot);
+       mutex_unlock(&ocelot->stats_lock);
 
        queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
                           OCELOT_STATS_CHECK_DELAY);
@@ -1781,12 +1788,16 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
 {
        int i;
 
+       mutex_lock(&ocelot->stats_lock);
+
        /* check and update now */
        ocelot_update_stats(ocelot);
 
        /* Copy all counters */
        for (i = 0; i < ocelot->num_stats; i++)
                *data++ = ocelot->stats[port * ocelot->num_stats + i];
+
+       mutex_unlock(&ocelot->stats_lock);
 }
 EXPORT_SYMBOL(ocelot_get_ethtool_stats);
 
index ce865e6..cd50db7 100644 (file)
@@ -1011,6 +1011,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
        struct nfp_flower_repr_priv *repr_priv;
        struct nfp_tun_offloaded_mac *entry;
        struct nfp_repr *repr;
+       u16 nfp_mac_idx;
        int ida_idx;
 
        entry = nfp_tunnel_lookup_offloaded_macs(app, mac);
@@ -1029,8 +1030,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
                entry->bridge_count--;
 
                if (!entry->bridge_count && entry->ref_count) {
-                       u16 nfp_mac_idx;
-
                        nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
                        if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx,
                                                     false)) {
@@ -1046,7 +1045,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
 
        /* If MAC is now used by 1 repr set the offloaded MAC index to port. */
        if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) {
-               u16 nfp_mac_idx;
                int port, err;
 
                repr_priv = list_first_entry(&entry->repr_list,
@@ -1074,8 +1072,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
        WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs,
                                            &entry->ht_node,
                                            offloaded_macs_params));
+
+       if (nfp_flower_is_supported_bridge(netdev))
+               nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT;
+       else
+               nfp_mac_idx = entry->index;
+
        /* If MAC has global ID then extract and free the ida entry. */
-       if (nfp_tunnel_is_mac_idx_global(entry->index)) {
+       if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) {
                ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index);
                ida_simple_remove(&priv->tun.mac_off_ids, ida_idx);
        }
index 966c3b4..e227358 100644 (file)
@@ -148,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = {
        { .compatible = "aspeed,ast2600-mdio", },
        { },
 };
+MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match);
 
 static struct platform_driver aspeed_mdio_driver = {
        .driver = {
index fa71fb7..2429db6 100644 (file)
@@ -553,9 +553,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev)
        else
                mscr = 0;
 
-       return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
-                               MII_88E1121_PHY_MSCR_REG,
-                               MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
+       return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE,
+                                       MII_88E1121_PHY_MSCR_REG,
+                                       MII_88E1121_PHY_MSCR_DELAY_MASK, mscr);
 }
 
 static int m88e1121_config_aneg(struct phy_device *phydev)
@@ -569,11 +569,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
                        return err;
        }
 
+       changed = err;
+
        err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
        if (err < 0)
                return err;
 
-       changed = err;
+       changed |= err;
 
        err = genphy_config_aneg(phydev);
        if (err < 0)
@@ -1213,16 +1215,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev)
 {
        int err;
 
-       err = genphy_soft_reset(phydev);
+       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
        if (err < 0)
                return err;
 
-       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
+       err = genphy_config_aneg(phydev);
        if (err < 0)
                return err;
 
-       err = genphy_config_aneg(phydev);
-       return 0;
+       return genphy_soft_reset(phydev);
 }
 
 static int m88e1118_config_init(struct phy_device *phydev)
index 37e5f34..3353e76 100644 (file)
@@ -1400,6 +1400,8 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x413c, 0x81d7, 0)},    /* Dell Wireless 5821e */
        {QMI_FIXED_INTF(0x413c, 0x81d7, 1)},    /* Dell Wireless 5821e preproduction config */
        {QMI_FIXED_INTF(0x413c, 0x81e0, 0)},    /* Dell Wireless 5821e with eSIM support*/
+       {QMI_FIXED_INTF(0x413c, 0x81e4, 0)},    /* Dell Wireless 5829e with eSIM support*/
+       {QMI_FIXED_INTF(0x413c, 0x81e6, 0)},    /* Dell Wireless 5829e */
        {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},    /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
        {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},    /* HP lt4120 Snapdragon X5 LTE */
        {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
index 354a963..d29fb97 100644 (file)
@@ -265,9 +265,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
 {
        /* Write ptr_ring before reading rx_notify_masked */
        smp_mb();
-       if (!rq->rx_notify_masked) {
-               rq->rx_notify_masked = true;
-               napi_schedule(&rq->xdp_napi);
+       if (!READ_ONCE(rq->rx_notify_masked) &&
+           napi_schedule_prep(&rq->xdp_napi)) {
+               WRITE_ONCE(rq->rx_notify_masked, true);
+               __napi_schedule(&rq->xdp_napi);
        }
 }
 
@@ -912,8 +913,10 @@ static int veth_poll(struct napi_struct *napi, int budget)
                /* Write rx_notify_masked before reading ptr_ring */
                smp_store_mb(rq->rx_notify_masked, false);
                if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
-                       rq->rx_notify_masked = true;
-                       napi_schedule(&rq->xdp_napi);
+                       if (napi_schedule_prep(&rq->xdp_napi)) {
+                               WRITE_ONCE(rq->rx_notify_masked, true);
+                               __napi_schedule(&rq->xdp_napi);
+                       }
                }
        }
 
index 5e0bfda..961a5f8 100644 (file)
@@ -4253,7 +4253,14 @@ static void nvme_async_event_work(struct work_struct *work)
                container_of(work, struct nvme_ctrl, async_event_work);
 
        nvme_aen_uevent(ctrl);
-       ctrl->ops->submit_async_event(ctrl);
+
+       /*
+        * The transport drivers must guarantee AER submission here is safe by
+        * flushing ctrl async_event_work after changing the controller state
+        * from LIVE and before freeing the admin queue.
+       */
+       if (ctrl->state == NVME_CTRL_LIVE)
+               ctrl->ops->submit_async_event(ctrl);
 }
 
 static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
index c3203ff..1e3a09c 100644 (file)
@@ -170,6 +170,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
                        struct nvmf_ctrl_options *opts)
 {
        if (ctrl->state == NVME_CTRL_DELETING ||
+           ctrl->state == NVME_CTRL_DELETING_NOIO ||
            ctrl->state == NVME_CTRL_DEAD ||
            strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
            strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
index 850f84d..9c55e4b 100644 (file)
@@ -1200,6 +1200,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
                        struct nvme_rdma_ctrl, err_work);
 
        nvme_stop_keep_alive(&ctrl->ctrl);
+       flush_work(&ctrl->ctrl.async_event_work);
        nvme_rdma_teardown_io_queues(ctrl, false);
        nvme_start_queues(&ctrl->ctrl);
        nvme_rdma_teardown_admin_queue(ctrl, false);
index 4ceb286..01e24b5 100644 (file)
@@ -2096,6 +2096,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
        struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
 
        nvme_stop_keep_alive(ctrl);
+       flush_work(&ctrl->async_event_work);
        nvme_tcp_teardown_io_queues(ctrl, false);
        /* unquiesce to fail fast pending requests */
        nvme_start_queues(ctrl);
index 489586a..768d33f 100644 (file)
@@ -356,8 +356,8 @@ static int j721e_pcie_probe(struct platform_device *pdev)
        const struct j721e_pcie_data *data;
        struct cdns_pcie *cdns_pcie;
        struct j721e_pcie *pcie;
-       struct cdns_pcie_rc *rc;
-       struct cdns_pcie_ep *ep;
+       struct cdns_pcie_rc *rc = NULL;
+       struct cdns_pcie_ep *ep = NULL;
        struct gpio_desc *gpiod;
        void __iomem *base;
        struct clk *clk;
@@ -376,6 +376,46 @@ static int j721e_pcie_probe(struct platform_device *pdev)
        if (!pcie)
                return -ENOMEM;
 
+       switch (mode) {
+       case PCI_MODE_RC:
+               if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST))
+                       return -ENODEV;
+
+               bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
+               if (!bridge)
+                       return -ENOMEM;
+
+               if (!data->byte_access_allowed)
+                       bridge->ops = &cdns_ti_pcie_host_ops;
+               rc = pci_host_bridge_priv(bridge);
+               rc->quirk_retrain_flag = data->quirk_retrain_flag;
+               rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+
+               cdns_pcie = &rc->pcie;
+               cdns_pcie->dev = dev;
+               cdns_pcie->ops = &j721e_pcie_ops;
+               pcie->cdns_pcie = cdns_pcie;
+               break;
+       case PCI_MODE_EP:
+               if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP))
+                       return -ENODEV;
+
+               ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
+               if (!ep)
+                       return -ENOMEM;
+
+               ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
+
+               cdns_pcie = &ep->pcie;
+               cdns_pcie->dev = dev;
+               cdns_pcie->ops = &j721e_pcie_ops;
+               pcie->cdns_pcie = cdns_pcie;
+               break;
+       default:
+               dev_err(dev, "INVALID device type %d\n", mode);
+               return 0;
+       }
+
        pcie->mode = mode;
        pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
 
@@ -426,28 +466,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 
        switch (mode) {
        case PCI_MODE_RC:
-               if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST)) {
-                       ret = -ENODEV;
-                       goto err_get_sync;
-               }
-
-               bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc));
-               if (!bridge) {
-                       ret = -ENOMEM;
-                       goto err_get_sync;
-               }
-
-               if (!data->byte_access_allowed)
-                       bridge->ops = &cdns_ti_pcie_host_ops;
-               rc = pci_host_bridge_priv(bridge);
-               rc->quirk_retrain_flag = data->quirk_retrain_flag;
-               rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
-
-               cdns_pcie = &rc->pcie;
-               cdns_pcie->dev = dev;
-               cdns_pcie->ops = &j721e_pcie_ops;
-               pcie->cdns_pcie = cdns_pcie;
-
                gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
                if (IS_ERR(gpiod)) {
                        ret = PTR_ERR(gpiod);
@@ -497,23 +515,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 
                break;
        case PCI_MODE_EP:
-               if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP)) {
-                       ret = -ENODEV;
-                       goto err_get_sync;
-               }
-
-               ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
-               if (!ep) {
-                       ret = -ENOMEM;
-                       goto err_get_sync;
-               }
-               ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag;
-
-               cdns_pcie = &ep->pcie;
-               cdns_pcie->dev = dev;
-               cdns_pcie->ops = &j721e_pcie_ops;
-               pcie->cdns_pcie = cdns_pcie;
-
                ret = cdns_pcie_init_phy(dev, cdns_pcie);
                if (ret) {
                        dev_err(dev, "Failed to init phy\n");
@@ -525,8 +526,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
                        goto err_pcie_setup;
 
                break;
-       default:
-               dev_err(dev, "INVALID device type %d\n", mode);
        }
 
        return 0;
index fa6886d..c625fc6 100644 (file)
@@ -756,22 +756,28 @@ static int __exit kirin_pcie_remove(struct platform_device *pdev)
        return 0;
 }
 
+struct kirin_pcie_data {
+       enum pcie_kirin_phy_type        phy_type;
+};
+
+static const struct kirin_pcie_data kirin_960_data = {
+       .phy_type = PCIE_KIRIN_INTERNAL_PHY,
+};
+
+static const struct kirin_pcie_data kirin_970_data = {
+       .phy_type = PCIE_KIRIN_EXTERNAL_PHY,
+};
+
 static const struct of_device_id kirin_pcie_match[] = {
-       {
-               .compatible = "hisilicon,kirin960-pcie",
-               .data = (void *)PCIE_KIRIN_INTERNAL_PHY
-       },
-       {
-               .compatible = "hisilicon,kirin970-pcie",
-               .data = (void *)PCIE_KIRIN_EXTERNAL_PHY
-       },
+       { .compatible = "hisilicon,kirin960-pcie", .data = &kirin_960_data },
+       { .compatible = "hisilicon,kirin970-pcie", .data = &kirin_970_data },
        {},
 };
 
 static int kirin_pcie_probe(struct platform_device *pdev)
 {
-       enum pcie_kirin_phy_type phy_type;
        struct device *dev = &pdev->dev;
+       const struct kirin_pcie_data *data;
        struct kirin_pcie *kirin_pcie;
        struct dw_pcie *pci;
        int ret;
@@ -781,13 +787,12 @@ static int kirin_pcie_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       phy_type = (long)of_device_get_match_data(dev);
-       if (!phy_type) {
+       data = of_device_get_match_data(dev);
+       if (!data) {
                dev_err(dev, "OF data missing\n");
                return -EINVAL;
        }
 
-
        kirin_pcie = devm_kzalloc(dev, sizeof(struct kirin_pcie), GFP_KERNEL);
        if (!kirin_pcie)
                return -ENOMEM;
@@ -800,7 +805,7 @@ static int kirin_pcie_probe(struct platform_device *pdev)
        pci->ops = &kirin_dw_pcie_ops;
        pci->pp.ops = &kirin_pcie_host_ops;
        kirin_pcie->pci = pci;
-       kirin_pcie->type = phy_type;
+       kirin_pcie->type = data->phy_type;
 
        ret = kirin_pcie_get_resource(kirin_pcie, pdev);
        if (ret)
index c19c7ca..9037a78 100644 (file)
@@ -1111,7 +1111,8 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
        if (!desc)
                return cpu_possible_mask;
 
-       if (WARN_ON_ONCE(!desc->affinity))
+       /* MSI[X] interrupts can be allocated without affinity descriptor */
+       if (!desc->affinity)
                return NULL;
 
        /*
index 9be273c..a826456 100644 (file)
@@ -508,7 +508,8 @@ static int bnx2fc_l2_rcv_thread(void *arg)
 
 static void bnx2fc_recv_frame(struct sk_buff *skb)
 {
-       u32 fr_len;
+       u64 crc_err;
+       u32 fr_len, fr_crc;
        struct fc_lport *lport;
        struct fcoe_rcv_info *fr;
        struct fc_stats *stats;
@@ -542,6 +543,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
        skb_pull(skb, sizeof(struct fcoe_hdr));
        fr_len = skb->len - sizeof(struct fcoe_crc_eof);
 
+       stats = per_cpu_ptr(lport->stats, get_cpu());
+       stats->RxFrames++;
+       stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+       put_cpu();
+
        fp = (struct fc_frame *)skb;
        fc_frame_init(fp);
        fr_dev(fp) = lport;
@@ -624,16 +630,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb)
                return;
        }
 
-       stats = per_cpu_ptr(lport->stats, smp_processor_id());
-       stats->RxFrames++;
-       stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+       fr_crc = le32_to_cpu(fr_crc(fp));
 
-       if (le32_to_cpu(fr_crc(fp)) !=
-                       ~crc32(~0, skb->data, fr_len)) {
-               if (stats->InvalidCRCCount < 5)
+       if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) {
+               stats = per_cpu_ptr(lport->stats, get_cpu());
+               crc_err = (stats->InvalidCRCCount++);
+               put_cpu();
+               if (crc_err < 5)
                        printk(KERN_WARNING PFX "dropping frame with "
                               "CRC error\n");
-               stats->InvalidCRCCount++;
                kfree_skb(skb);
                return;
        }
index 2f53a2e..ebf5ec3 100644 (file)
@@ -400,8 +400,7 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba,
                           struct hisi_sas_slot *slot,
                           struct hisi_sas_dq *dq,
                           struct hisi_sas_device *sas_dev,
-                          struct hisi_sas_internal_abort *abort,
-                          struct hisi_sas_tmf_task *tmf)
+                          struct hisi_sas_internal_abort *abort)
 {
        struct hisi_sas_cmd_hdr *cmd_hdr_base;
        int dlvry_queue_slot, dlvry_queue;
@@ -427,8 +426,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba,
        cmd_hdr_base = hisi_hba->cmd_hdr[dlvry_queue];
        slot->cmd_hdr = &cmd_hdr_base[dlvry_queue_slot];
 
-       slot->tmf = tmf;
-       slot->is_internal = tmf;
        task->lldd_task = slot;
 
        memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr));
@@ -587,7 +584,7 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
        slot->is_internal = tmf;
 
        /* protect task_prep and start_delivery sequence */
-       hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL, tmf);
+       hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL);
 
        return 0;
 
@@ -1380,12 +1377,13 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device)
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
        struct device *dev = hisi_hba->dev;
        int s = sizeof(struct host_to_dev_fis);
+       struct hisi_sas_tmf_task tmf = {};
 
        ata_for_each_link(link, ap, EDGE) {
                int pmp = sata_srst_pmp(link);
 
                hisi_sas_fill_ata_reset_cmd(link->device, 1, pmp, fis);
-               rc = hisi_sas_exec_internal_tmf_task(device, fis, s, NULL);
+               rc = hisi_sas_exec_internal_tmf_task(device, fis, s, &tmf);
                if (rc != TMF_RESP_FUNC_COMPLETE)
                        break;
        }
@@ -1396,7 +1394,7 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device)
 
                        hisi_sas_fill_ata_reset_cmd(link->device, 0, pmp, fis);
                        rc = hisi_sas_exec_internal_tmf_task(device, fis,
-                                                            s, NULL);
+                                                            s, &tmf);
                        if (rc != TMF_RESP_FUNC_COMPLETE)
                                dev_err(dev, "ata disk %016llx de-reset failed\n",
                                        SAS_ADDR(device->sas_addr));
@@ -2067,7 +2065,7 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
        slot->port = port;
        slot->is_internal = true;
 
-       hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort, NULL);
+       hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort);
 
        return 0;
 
index c814e50..9ec310b 100644 (file)
@@ -2692,7 +2692,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
        u32 tag = le32_to_cpu(psataPayload->tag);
        u32 port_id = le32_to_cpu(psataPayload->port_id);
        u32 dev_id = le32_to_cpu(psataPayload->device_id);
-       unsigned long flags;
 
        if (event)
                pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event);
@@ -2724,8 +2723,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
                ts->resp = SAS_TASK_COMPLETE;
                ts->stat = SAS_DATA_OVERRUN;
                ts->residual = 0;
-               if (pm8001_dev)
-                       atomic_dec(&pm8001_dev->running_req);
                break;
        case IO_XFER_ERROR_BREAK:
                pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
@@ -2767,7 +2764,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
                                IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
                        ts->resp = SAS_TASK_COMPLETE;
                        ts->stat = SAS_QUEUE_FULL;
-                       pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
                        return;
                }
                break;
@@ -2853,20 +2849,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
                ts->stat = SAS_OPEN_TO;
                break;
        }
-       spin_lock_irqsave(&t->task_state_lock, flags);
-       t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
-       t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
-       t->task_state_flags |= SAS_TASK_STATE_DONE;
-       if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
-               spin_unlock_irqrestore(&t->task_state_lock, flags);
-               pm8001_dbg(pm8001_ha, FAIL,
-                          "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
-                          t, event, ts->resp, ts->stat);
-               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
-       } else {
-               spin_unlock_irqrestore(&t->task_state_lock, flags);
-               pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
-       }
 }
 
 /*See the comments for mpi_ssp_completion */
index 160ee8b..32edda3 100644 (file)
@@ -769,8 +769,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
                res = -TMF_RESP_FUNC_FAILED;
                /* Even TMF timed out, return direct. */
                if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+                       struct pm8001_ccb_info *ccb = task->lldd_task;
+
                        pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
                                   tmf->tmf);
+
+                       if (ccb)
+                               ccb->task = NULL;
                        goto ex_err;
                }
 
index 2530d13..9d20f80 100644 (file)
@@ -2185,9 +2185,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
                pm8001_dbg(pm8001_ha, FAIL,
                           "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
                           t, status, ts->resp, ts->stat);
+               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
                if (t->slow_task)
                        complete(&t->slow_task->completion);
-               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
        } else {
                spin_unlock_irqrestore(&t->task_state_lock, flags);
                pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
@@ -2794,9 +2794,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
                pm8001_dbg(pm8001_ha, FAIL,
                           "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
                           t, status, ts->resp, ts->stat);
+               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
                if (t->slow_task)
                        complete(&t->slow_task->completion);
-               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
        } else {
                spin_unlock_irqrestore(&t->task_state_lock, flags);
                spin_unlock_irqrestore(&circularQ->oq_lock,
@@ -2821,7 +2821,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
        u32 tag = le32_to_cpu(psataPayload->tag);
        u32 port_id = le32_to_cpu(psataPayload->port_id);
        u32 dev_id = le32_to_cpu(psataPayload->device_id);
-       unsigned long flags;
 
        if (event)
                pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event);
@@ -2854,8 +2853,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
                ts->resp = SAS_TASK_COMPLETE;
                ts->stat = SAS_DATA_OVERRUN;
                ts->residual = 0;
-               if (pm8001_dev)
-                       atomic_dec(&pm8001_dev->running_req);
                break;
        case IO_XFER_ERROR_BREAK:
                pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n");
@@ -2904,11 +2901,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
                                IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS);
                        ts->resp = SAS_TASK_COMPLETE;
                        ts->stat = SAS_QUEUE_FULL;
-                       spin_unlock_irqrestore(&circularQ->oq_lock,
-                                       circularQ->lock_flags);
-                       pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
-                       spin_lock_irqsave(&circularQ->oq_lock,
-                                       circularQ->lock_flags);
                        return;
                }
                break;
@@ -3008,24 +3000,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
                ts->stat = SAS_OPEN_TO;
                break;
        }
-       spin_lock_irqsave(&t->task_state_lock, flags);
-       t->task_state_flags &= ~SAS_TASK_STATE_PENDING;
-       t->task_state_flags &= ~SAS_TASK_AT_INITIATOR;
-       t->task_state_flags |= SAS_TASK_STATE_DONE;
-       if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) {
-               spin_unlock_irqrestore(&t->task_state_lock, flags);
-               pm8001_dbg(pm8001_ha, FAIL,
-                          "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n",
-                          t, event, ts->resp, ts->stat);
-               pm8001_ccb_task_free(pm8001_ha, t, ccb, tag);
-       } else {
-               spin_unlock_irqrestore(&t->task_state_lock, flags);
-               spin_unlock_irqrestore(&circularQ->oq_lock,
-                               circularQ->lock_flags);
-               pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag);
-               spin_lock_irqsave(&circularQ->oq_lock,
-                               circularQ->lock_flags);
-       }
 }
 
 /*See the comments for mpi_ssp_completion */
@@ -3931,6 +3905,7 @@ static int ssp_coalesced_comp_resp(struct pm8001_hba_info *pm8001_ha,
 /**
  * process_one_iomb - process one outbound Queue memory block
  * @pm8001_ha: our hba card information
+ * @circularQ: outbound circular queue
  * @piomb: IO message buffer
  */
 static void process_one_iomb(struct pm8001_hba_info *pm8001_ha,
index 3520b93..f4e6c68 100644 (file)
@@ -214,6 +214,48 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
                         SCSI_TIMEOUT, 3, NULL);
 }
 
+static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
+                                       unsigned int depth)
+{
+       int new_shift = sbitmap_calculate_shift(depth);
+       bool need_alloc = !sdev->budget_map.map;
+       bool need_free = false;
+       int ret;
+       struct sbitmap sb_backup;
+
+       /*
+        * realloc if new shift is calculated, which is caused by setting
+        * up one new default queue depth after calling ->slave_configure
+        */
+       if (!need_alloc && new_shift != sdev->budget_map.shift)
+               need_alloc = need_free = true;
+
+       if (!need_alloc)
+               return 0;
+
+       /*
+        * Request queue has to be frozen for reallocating budget map,
+        * and here disk isn't added yet, so freezing is pretty fast
+        */
+       if (need_free) {
+               blk_mq_freeze_queue(sdev->request_queue);
+               sb_backup = sdev->budget_map;
+       }
+       ret = sbitmap_init_node(&sdev->budget_map,
+                               scsi_device_max_queue_depth(sdev),
+                               new_shift, GFP_KERNEL,
+                               sdev->request_queue->node, false, true);
+       if (need_free) {
+               if (ret)
+                       sdev->budget_map = sb_backup;
+               else
+                       sbitmap_free(&sb_backup);
+               ret = 0;
+               blk_mq_unfreeze_queue(sdev->request_queue);
+       }
+       return ret;
+}
+
 /**
  * scsi_alloc_sdev - allocate and setup a scsi_Device
  * @starget: which target to allocate a &scsi_device for
@@ -306,11 +348,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
         * default device queue depth to figure out sbitmap shift
         * since we use this queue depth most of times.
         */
-       if (sbitmap_init_node(&sdev->budget_map,
-                               scsi_device_max_queue_depth(sdev),
-                               sbitmap_calculate_shift(depth),
-                               GFP_KERNEL, sdev->request_queue->node,
-                               false, true)) {
+       if (scsi_realloc_sdev_budget_map(sdev, depth)) {
                put_device(&starget->dev);
                kfree(sdev);
                goto out;
@@ -1017,6 +1055,13 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
                        }
                        return SCSI_SCAN_NO_RESPONSE;
                }
+
+               /*
+                * The queue_depth is often changed in ->slave_configure.
+                * Set up budget map again since memory consumption of
+                * the map depends on actual queue depth.
+                */
+               scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth);
        }
 
        if (sdev->scsi_level >= SCSI_3)
index 840d981..fcc4638 100644 (file)
@@ -78,6 +78,26 @@ config FRAMEBUFFER_CONSOLE
        help
          Low-level framebuffer-based console driver.
 
+config FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+       bool "Enable legacy fbcon hardware acceleration code"
+       depends on FRAMEBUFFER_CONSOLE
+       default y if PARISC
+       default n
+       help
+         This option enables the fbcon (framebuffer text-based) hardware
+         acceleration for graphics drivers which were written for the fbdev
+         graphics interface.
+
+         On modern machines, on mainstream machines (like x86-64) or when
+         using a modern Linux distribution those fbdev drivers usually aren't used.
+         So enabling this option wouldn't have any effect, which is why you want
+         to disable this option on such newer machines.
+
+         If you compile this kernel for older machines which still require the
+         fbdev drivers, you may want to say Y.
+
+         If unsure, select n.
+
 config FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
        bool "Map the console to the primary display device"
        depends on FRAMEBUFFER_CONSOLE
index 01fae2c..f98e8f2 100644 (file)
@@ -43,6 +43,21 @@ static void update_attr(u8 *dst, u8 *src, int attribute,
        }
 }
 
+static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+                     int sx, int dy, int dx, int height, int width)
+{
+       struct fb_copyarea area;
+
+       area.sx = sx * vc->vc_font.width;
+       area.sy = sy * vc->vc_font.height;
+       area.dx = dx * vc->vc_font.width;
+       area.dy = dy * vc->vc_font.height;
+       area.height = height * vc->vc_font.height;
+       area.width = width * vc->vc_font.width;
+
+       info->fbops->fb_copyarea(info, &area);
+}
+
 static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy,
                      int sx, int height, int width)
 {
@@ -378,6 +393,7 @@ static int bit_update_start(struct fb_info *info)
 
 void fbcon_set_bitops(struct fbcon_ops *ops)
 {
+       ops->bmove = bit_bmove;
        ops->clear = bit_clear;
        ops->putcs = bit_putcs;
        ops->clear_margins = bit_clear_margins;
index 99ecd9a..f36829e 100644 (file)
@@ -173,6 +173,8 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s,
                        int count, int ypos, int xpos);
 static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
 static void fbcon_cursor(struct vc_data *vc, int mode);
+static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
+                       int height, int width);
 static int fbcon_switch(struct vc_data *vc);
 static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
 static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
@@ -180,8 +182,16 @@ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
 /*
  *  Internal routines
  */
+static __inline__ void ywrap_up(struct vc_data *vc, int count);
+static __inline__ void ywrap_down(struct vc_data *vc, int count);
+static __inline__ void ypan_up(struct vc_data *vc, int count);
+static __inline__ void ypan_down(struct vc_data *vc, int count);
+static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
+                           int dy, int dx, int height, int width, u_int y_break);
 static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
                           int unit);
+static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
+                             int line, int count, int dy);
 static void fbcon_modechanged(struct fb_info *info);
 static void fbcon_set_all_vcs(struct fb_info *info);
 static void fbcon_start(void);
@@ -1015,7 +1025,7 @@ static void fbcon_init(struct vc_data *vc, int init)
        struct vc_data *svc = *default_mode;
        struct fbcon_display *t, *p = &fb_display[vc->vc_num];
        int logo = 1, new_rows, new_cols, rows, cols;
-       int ret;
+       int cap, ret;
 
        if (WARN_ON(info_idx == -1))
            return;
@@ -1024,6 +1034,7 @@ static void fbcon_init(struct vc_data *vc, int init)
                con2fb_map[vc->vc_num] = info_idx;
 
        info = registered_fb[con2fb_map[vc->vc_num]];
+       cap = info->flags;
 
        if (logo_shown < 0 && console_loglevel <= CONSOLE_LOGLEVEL_QUIET)
                logo_shown = FBCON_LOGO_DONTSHOW;
@@ -1125,6 +1136,14 @@ static void fbcon_init(struct vc_data *vc, int init)
 
        ops->graphics = 0;
 
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+       if ((cap & FBINFO_HWACCEL_COPYAREA) &&
+           !(cap & FBINFO_HWACCEL_DISABLED))
+               p->scrollmode = SCROLL_MOVE;
+       else /* default to something safe */
+               p->scrollmode = SCROLL_REDRAW;
+#endif
+
        /*
         *  ++guenther: console.c:vc_allocate() relies on initializing
         *  vc_{cols,rows}, but we must not set those if we are only
@@ -1211,13 +1230,14 @@ finished:
  *  This system is now divided into two levels because of complications
  *  caused by hardware scrolling. Top level functions:
  *
- *     fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+ *     fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
  *
  *  handles y values in range [0, scr_height-1] that correspond to real
  *  screen positions. y_wrap shift means that first line of bitmap may be
  *  anywhere on this display. These functions convert lineoffsets to
  *  bitmap offsets and deal with the wrap-around case by splitting blits.
  *
+ *     fbcon_bmove_physical_8()    -- These functions fast implementations
  *     fbcon_clear_physical_8()    -- of original fbcon_XXX fns.
  *     fbcon_putc_physical_8()     -- (font width != 8) may be added later
  *
@@ -1390,6 +1410,224 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
        }
 }
 
+static __inline__ void ywrap_up(struct vc_data *vc, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+
+       p->yscroll += count;
+       if (p->yscroll >= p->vrows)     /* Deal with wrap */
+               p->yscroll -= p->vrows;
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode |= FB_VMODE_YWRAP;
+       ops->update_start(info);
+       scrollback_max += count;
+       if (scrollback_max > scrollback_phys_max)
+               scrollback_max = scrollback_phys_max;
+       scrollback_current = 0;
+}
+
+static __inline__ void ywrap_down(struct vc_data *vc, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+
+       p->yscroll -= count;
+       if (p->yscroll < 0)     /* Deal with wrap */
+               p->yscroll += p->vrows;
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode |= FB_VMODE_YWRAP;
+       ops->update_start(info);
+       scrollback_max -= count;
+       if (scrollback_max < 0)
+               scrollback_max = 0;
+       scrollback_current = 0;
+}
+
+static __inline__ void ypan_up(struct vc_data *vc, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+       struct fbcon_ops *ops = info->fbcon_par;
+
+       p->yscroll += count;
+       if (p->yscroll > p->vrows - vc->vc_rows) {
+               ops->bmove(vc, info, p->vrows - vc->vc_rows,
+                           0, 0, 0, vc->vc_rows, vc->vc_cols);
+               p->yscroll -= p->vrows - vc->vc_rows;
+       }
+
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode &= ~FB_VMODE_YWRAP;
+       ops->update_start(info);
+       fbcon_clear_margins(vc, 1);
+       scrollback_max += count;
+       if (scrollback_max > scrollback_phys_max)
+               scrollback_max = scrollback_phys_max;
+       scrollback_current = 0;
+}
+
+static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+
+       p->yscroll += count;
+
+       if (p->yscroll > p->vrows - vc->vc_rows) {
+               p->yscroll -= p->vrows - vc->vc_rows;
+               fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t);
+       }
+
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode &= ~FB_VMODE_YWRAP;
+       ops->update_start(info);
+       fbcon_clear_margins(vc, 1);
+       scrollback_max += count;
+       if (scrollback_max > scrollback_phys_max)
+               scrollback_max = scrollback_phys_max;
+       scrollback_current = 0;
+}
+
+static __inline__ void ypan_down(struct vc_data *vc, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+       struct fbcon_ops *ops = info->fbcon_par;
+
+       p->yscroll -= count;
+       if (p->yscroll < 0) {
+               ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows,
+                           0, vc->vc_rows, vc->vc_cols);
+               p->yscroll += p->vrows - vc->vc_rows;
+       }
+
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode &= ~FB_VMODE_YWRAP;
+       ops->update_start(info);
+       fbcon_clear_margins(vc, 1);
+       scrollback_max -= count;
+       if (scrollback_max < 0)
+               scrollback_max = 0;
+       scrollback_current = 0;
+}
+
+static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+
+       p->yscroll -= count;
+
+       if (p->yscroll < 0) {
+               p->yscroll += p->vrows - vc->vc_rows;
+               fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count);
+       }
+
+       ops->var.xoffset = 0;
+       ops->var.yoffset = p->yscroll * vc->vc_font.height;
+       ops->var.vmode &= ~FB_VMODE_YWRAP;
+       ops->update_start(info);
+       fbcon_clear_margins(vc, 1);
+       scrollback_max -= count;
+       if (scrollback_max < 0)
+               scrollback_max = 0;
+       scrollback_current = 0;
+}
+
+static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
+                             int line, int count, int dy)
+{
+       unsigned short *s = (unsigned short *)
+               (vc->vc_origin + vc->vc_size_row * line);
+
+       while (count--) {
+               unsigned short *start = s;
+               unsigned short *le = advance_row(s, 1);
+               unsigned short c;
+               int x = 0;
+               unsigned short attr = 1;
+
+               do {
+                       c = scr_readw(s);
+                       if (attr != (c & 0xff00)) {
+                               attr = c & 0xff00;
+                               if (s > start) {
+                                       fbcon_putcs(vc, start, s - start,
+                                                   dy, x);
+                                       x += s - start;
+                                       start = s;
+                               }
+                       }
+                       console_conditional_schedule();
+                       s++;
+               } while (s < le);
+               if (s > start)
+                       fbcon_putcs(vc, start, s - start, dy, x);
+               console_conditional_schedule();
+               dy++;
+       }
+}
+
+static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
+                       struct fbcon_display *p, int line, int count, int ycount)
+{
+       int offset = ycount * vc->vc_cols;
+       unsigned short *d = (unsigned short *)
+           (vc->vc_origin + vc->vc_size_row * line);
+       unsigned short *s = d + offset;
+       struct fbcon_ops *ops = info->fbcon_par;
+
+       while (count--) {
+               unsigned short *start = s;
+               unsigned short *le = advance_row(s, 1);
+               unsigned short c;
+               int x = 0;
+
+               do {
+                       c = scr_readw(s);
+
+                       if (c == scr_readw(d)) {
+                               if (s > start) {
+                                       ops->bmove(vc, info, line + ycount, x,
+                                                  line, x, 1, s-start);
+                                       x += s - start + 1;
+                                       start = s + 1;
+                               } else {
+                                       x++;
+                                       start++;
+                               }
+                       }
+
+                       scr_writew(c, d);
+                       console_conditional_schedule();
+                       s++;
+                       d++;
+               } while (s < le);
+               if (s > start)
+                       ops->bmove(vc, info, line + ycount, x, line, x, 1,
+                                  s-start);
+               console_conditional_schedule();
+               if (ycount > 0)
+                       line++;
+               else {
+                       line--;
+                       /* NOTE: We subtract two lines from these pointers */
+                       s -= vc->vc_size_row;
+                       d -= vc->vc_size_row;
+               }
+       }
+}
+
 static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
                         int line, int count, int offset)
 {
@@ -1450,6 +1688,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 {
        struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
        struct fbcon_display *p = &fb_display[vc->vc_num];
+       int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
 
        if (fbcon_is_inactive(vc, info))
                return true;
@@ -1466,32 +1705,291 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
        case SM_UP:
                if (count > vc->vc_rows)        /* Maximum realistic size */
                        count = vc->vc_rows;
-               fbcon_redraw(vc, p, t, b - t - count,
-                            count * vc->vc_cols);
-               fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-               scr_memsetw((unsigned short *) (vc->vc_origin +
-                                               vc->vc_size_row *
-                                               (b - count)),
-                           vc->vc_video_erase_char,
-                           vc->vc_size_row * count);
-               return true;
+               if (logo_shown >= 0)
+                       goto redraw_up;
+               switch (fb_scrollmode(p)) {
+               case SCROLL_MOVE:
+                       fbcon_redraw_blit(vc, info, p, t, b - t - count,
+                                    count);
+                       fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+                       scr_memsetw((unsigned short *) (vc->vc_origin +
+                                                       vc->vc_size_row *
+                                                       (b - count)),
+                                   vc->vc_video_erase_char,
+                                   vc->vc_size_row * count);
+                       return true;
+
+               case SCROLL_WRAP_MOVE:
+                       if (b - t - count > 3 * vc->vc_rows >> 2) {
+                               if (t > 0)
+                                       fbcon_bmove(vc, 0, 0, count, 0, t,
+                                                   vc->vc_cols);
+                               ywrap_up(vc, count);
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_bmove(vc, b - count, 0, b, 0,
+                                                   vc->vc_rows - b,
+                                                   vc->vc_cols);
+                       } else if (info->flags & FBINFO_READS_FAST)
+                               fbcon_bmove(vc, t + count, 0, t, 0,
+                                           b - t - count, vc->vc_cols);
+                       else
+                               goto redraw_up;
+                       fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_PAN_REDRAW:
+                       if ((p->yscroll + count <=
+                            2 * (p->vrows - vc->vc_rows))
+                           && ((!scroll_partial && (b - t == vc->vc_rows))
+                               || (scroll_partial
+                                   && (b - t - count >
+                                       3 * vc->vc_rows >> 2)))) {
+                               if (t > 0)
+                                       fbcon_redraw_move(vc, p, 0, t, count);
+                               ypan_up_redraw(vc, t, count);
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_redraw_move(vc, p, b,
+                                                         vc->vc_rows - b, b);
+                       } else
+                               fbcon_redraw_move(vc, p, t + count, b - t - count, t);
+                       fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_PAN_MOVE:
+                       if ((p->yscroll + count <=
+                            2 * (p->vrows - vc->vc_rows))
+                           && ((!scroll_partial && (b - t == vc->vc_rows))
+                               || (scroll_partial
+                                   && (b - t - count >
+                                       3 * vc->vc_rows >> 2)))) {
+                               if (t > 0)
+                                       fbcon_bmove(vc, 0, 0, count, 0, t,
+                                                   vc->vc_cols);
+                               ypan_up(vc, count);
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_bmove(vc, b - count, 0, b, 0,
+                                                   vc->vc_rows - b,
+                                                   vc->vc_cols);
+                       } else if (info->flags & FBINFO_READS_FAST)
+                               fbcon_bmove(vc, t + count, 0, t, 0,
+                                           b - t - count, vc->vc_cols);
+                       else
+                               goto redraw_up;
+                       fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_REDRAW:
+                     redraw_up:
+                       fbcon_redraw(vc, p, t, b - t - count,
+                                    count * vc->vc_cols);
+                       fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+                       scr_memsetw((unsigned short *) (vc->vc_origin +
+                                                       vc->vc_size_row *
+                                                       (b - count)),
+                                   vc->vc_video_erase_char,
+                                   vc->vc_size_row * count);
+                       return true;
+               }
+               break;
 
        case SM_DOWN:
                if (count > vc->vc_rows)        /* Maximum realistic size */
                        count = vc->vc_rows;
-               fbcon_redraw(vc, p, b - 1, b - t - count,
-                            -count * vc->vc_cols);
-               fbcon_clear(vc, t, 0, count, vc->vc_cols);
-               scr_memsetw((unsigned short *) (vc->vc_origin +
-                                               vc->vc_size_row *
-                                               t),
-                           vc->vc_video_erase_char,
-                           vc->vc_size_row * count);
-               return true;
+               if (logo_shown >= 0)
+                       goto redraw_down;
+               switch (fb_scrollmode(p)) {
+               case SCROLL_MOVE:
+                       fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
+                                    -count);
+                       fbcon_clear(vc, t, 0, count, vc->vc_cols);
+                       scr_memsetw((unsigned short *) (vc->vc_origin +
+                                                       vc->vc_size_row *
+                                                       t),
+                                   vc->vc_video_erase_char,
+                                   vc->vc_size_row * count);
+                       return true;
+
+               case SCROLL_WRAP_MOVE:
+                       if (b - t - count > 3 * vc->vc_rows >> 2) {
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_bmove(vc, b, 0, b - count, 0,
+                                                   vc->vc_rows - b,
+                                                   vc->vc_cols);
+                               ywrap_down(vc, count);
+                               if (t > 0)
+                                       fbcon_bmove(vc, count, 0, 0, 0, t,
+                                                   vc->vc_cols);
+                       } else if (info->flags & FBINFO_READS_FAST)
+                               fbcon_bmove(vc, t, 0, t + count, 0,
+                                           b - t - count, vc->vc_cols);
+                       else
+                               goto redraw_down;
+                       fbcon_clear(vc, t, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_PAN_MOVE:
+                       if ((count - p->yscroll <= p->vrows - vc->vc_rows)
+                           && ((!scroll_partial && (b - t == vc->vc_rows))
+                               || (scroll_partial
+                                   && (b - t - count >
+                                       3 * vc->vc_rows >> 2)))) {
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_bmove(vc, b, 0, b - count, 0,
+                                                   vc->vc_rows - b,
+                                                   vc->vc_cols);
+                               ypan_down(vc, count);
+                               if (t > 0)
+                                       fbcon_bmove(vc, count, 0, 0, 0, t,
+                                                   vc->vc_cols);
+                       } else if (info->flags & FBINFO_READS_FAST)
+                               fbcon_bmove(vc, t, 0, t + count, 0,
+                                           b - t - count, vc->vc_cols);
+                       else
+                               goto redraw_down;
+                       fbcon_clear(vc, t, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_PAN_REDRAW:
+                       if ((count - p->yscroll <= p->vrows - vc->vc_rows)
+                           && ((!scroll_partial && (b - t == vc->vc_rows))
+                               || (scroll_partial
+                                   && (b - t - count >
+                                       3 * vc->vc_rows >> 2)))) {
+                               if (vc->vc_rows - b > 0)
+                                       fbcon_redraw_move(vc, p, b, vc->vc_rows - b,
+                                                         b - count);
+                               ypan_down_redraw(vc, t, count);
+                               if (t > 0)
+                                       fbcon_redraw_move(vc, p, count, t, 0);
+                       } else
+                               fbcon_redraw_move(vc, p, t, b - t - count, t + count);
+                       fbcon_clear(vc, t, 0, count, vc->vc_cols);
+                       break;
+
+               case SCROLL_REDRAW:
+                     redraw_down:
+                       fbcon_redraw(vc, p, b - 1, b - t - count,
+                                    -count * vc->vc_cols);
+                       fbcon_clear(vc, t, 0, count, vc->vc_cols);
+                       scr_memsetw((unsigned short *) (vc->vc_origin +
+                                                       vc->vc_size_row *
+                                                       t),
+                                   vc->vc_video_erase_char,
+                                   vc->vc_size_row * count);
+                       return true;
+               }
        }
        return false;
 }
 
+
+static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
+                       int height, int width)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_display *p = &fb_display[vc->vc_num];
+
+       if (fbcon_is_inactive(vc, info))
+               return;
+
+       if (!width || !height)
+               return;
+
+       /*  Split blits that cross physical y_wrap case.
+        *  Pathological case involves 4 blits, better to use recursive
+        *  code rather than unrolled case
+        *
+        *  Recursive invocations don't need to erase the cursor over and
+        *  over again, so we use fbcon_bmove_rec()
+        */
+       fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width,
+                       p->vrows - p->yscroll);
+}
+
+static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
+                           int dy, int dx, int height, int width, u_int y_break)
+{
+       struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+       struct fbcon_ops *ops = info->fbcon_par;
+       u_int b;
+
+       if (sy < y_break && sy + height > y_break) {
+               b = y_break - sy;
+               if (dy < sy) {  /* Avoid trashing self */
+                       fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+                                       y_break);
+                       fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+                                       height - b, width, y_break);
+               } else {
+                       fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+                                       height - b, width, y_break);
+                       fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+                                       y_break);
+               }
+               return;
+       }
+
+       if (dy < y_break && dy + height > y_break) {
+               b = y_break - dy;
+               if (dy < sy) {  /* Avoid trashing self */
+                       fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+                                       y_break);
+                       fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+                                       height - b, width, y_break);
+               } else {
+                       fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
+                                       height - b, width, y_break);
+                       fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
+                                       y_break);
+               }
+               return;
+       }
+       ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
+                  height, width);
+}
+
+static void updatescrollmode_accel(struct fbcon_display *p,
+                                       struct fb_info *info,
+                                       struct vc_data *vc)
+{
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+       struct fbcon_ops *ops = info->fbcon_par;
+       int cap = info->flags;
+       u16 t = 0;
+       int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep,
+                                 info->fix.xpanstep);
+       int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t);
+       int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
+       int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual,
+                                  info->var.xres_virtual);
+       int good_pan = (cap & FBINFO_HWACCEL_YPAN) &&
+               divides(ypan, vc->vc_font.height) && vyres > yres;
+       int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) &&
+               divides(ywrap, vc->vc_font.height) &&
+               divides(vc->vc_font.height, vyres) &&
+               divides(vc->vc_font.height, yres);
+       int reading_fast = cap & FBINFO_READS_FAST;
+       int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) &&
+               !(cap & FBINFO_HWACCEL_DISABLED);
+       int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) &&
+               !(cap & FBINFO_HWACCEL_DISABLED);
+
+       if (good_wrap || good_pan) {
+               if (reading_fast || fast_copyarea)
+                       p->scrollmode = good_wrap ?
+                               SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE;
+               else
+                       p->scrollmode = good_wrap ? SCROLL_REDRAW :
+                               SCROLL_PAN_REDRAW;
+       } else {
+               if (reading_fast || (fast_copyarea && !fast_imageblit))
+                       p->scrollmode = SCROLL_MOVE;
+               else
+                       p->scrollmode = SCROLL_REDRAW;
+       }
+#endif
+}
+
 static void updatescrollmode(struct fbcon_display *p,
                                        struct fb_info *info,
                                        struct vc_data *vc)
@@ -1507,6 +2005,9 @@ static void updatescrollmode(struct fbcon_display *p,
                p->vrows -= (yres - (fh * vc->vc_rows)) / fh;
        if ((yres % fh) && (vyres % fh < yres % fh))
                p->vrows--;
+
+       /* update scrollmode in case hardware acceleration is used */
+       updatescrollmode_accel(p, info, vc);
 }
 
 #define PITCH(w) (((w) + 7) >> 3)
@@ -1664,7 +2165,21 @@ static int fbcon_switch(struct vc_data *vc)
 
        updatescrollmode(p, info, vc);
 
-       scrollback_phys_max = 0;
+       switch (fb_scrollmode(p)) {
+       case SCROLL_WRAP_MOVE:
+               scrollback_phys_max = p->vrows - vc->vc_rows;
+               break;
+       case SCROLL_PAN_MOVE:
+       case SCROLL_PAN_REDRAW:
+               scrollback_phys_max = p->vrows - 2 * vc->vc_rows;
+               if (scrollback_phys_max < 0)
+                       scrollback_phys_max = 0;
+               break;
+       default:
+               scrollback_phys_max = 0;
+               break;
+       }
+
        scrollback_max = 0;
        scrollback_current = 0;
 
index a00603b..969d41e 100644 (file)
@@ -29,6 +29,9 @@ struct fbcon_display {
     /* Filled in by the low-level console driver */
     const u_char *fontdata;
     int userfont;                   /* != 0 if fontdata kmalloc()ed */
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+    u_short scrollmode;             /* Scroll Method, use fb_scrollmode() */
+#endif
     u_short inverse;                /* != 0 text black on white as default */
     short yscroll;                  /* Hardware scrolling */
     int vrows;                      /* number of virtual rows */
@@ -51,6 +54,8 @@ struct fbcon_display {
 };
 
 struct fbcon_ops {
+       void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy,
+                     int sx, int dy, int dx, int height, int width);
        void (*clear)(struct vc_data *vc, struct fb_info *info, int sy,
                      int sx, int height, int width);
        void (*putcs)(struct vc_data *vc, struct fb_info *info,
@@ -149,6 +154,73 @@ static inline int attr_col_ec(int shift, struct vc_data *vc,
 #define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
 #define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
 
+    /*
+     *  Scroll Method
+     */
+
+/* There are several methods fbcon can use to move text around the screen:
+ *
+ *                     Operation   Pan    Wrap
+ *---------------------------------------------
+ * SCROLL_MOVE         copyarea    No     No
+ * SCROLL_PAN_MOVE     copyarea    Yes    No
+ * SCROLL_WRAP_MOVE    copyarea    No     Yes
+ * SCROLL_REDRAW       imageblit   No     No
+ * SCROLL_PAN_REDRAW   imageblit   Yes    No
+ * SCROLL_WRAP_REDRAW  imageblit   No     Yes
+ *
+ * (SCROLL_WRAP_REDRAW is not implemented yet)
+ *
+ * In general, fbcon will choose the best scrolling
+ * method based on the rule below:
+ *
+ * Pan/Wrap > accel imageblit > accel copyarea >
+ * soft imageblit > (soft copyarea)
+ *
+ * Exception to the rule: Pan + accel copyarea is
+ * preferred over Pan + accel imageblit.
+ *
+ * The above is typical for PCI/AGP cards. Unless
+ * overridden, fbcon will never use soft copyarea.
+ *
+ * If you need to override the above rule, set the
+ * appropriate flags in fb_info->flags.  For example,
+ * to prefer copyarea over imageblit, set
+ * FBINFO_READS_FAST.
+ *
+ * Other notes:
+ * + use the hardware engine to move the text
+ *    (hw-accelerated copyarea() and fillrect())
+ * + use hardware-supported panning on a large virtual screen
+ * + amifb can not only pan, but also wrap the display by N lines
+ *    (i.e. visible line i = physical line (i+N) % yres).
+ * + read what's already rendered on the screen and
+ *     write it in a different place (this is cfb_copyarea())
+ * + re-render the text to the screen
+ *
+ * Whether to use wrapping or panning can only be figured out at
+ * runtime (when we know whether our font height is a multiple
+ * of the pan/wrap step)
+ *
+ */
+
+#define SCROLL_MOVE       0x001
+#define SCROLL_PAN_MOVE           0x002
+#define SCROLL_WRAP_MOVE   0x003
+#define SCROLL_REDRAW     0x004
+#define SCROLL_PAN_REDRAW  0x005
+
+static inline u_short fb_scrollmode(struct fbcon_display *fb)
+{
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION
+       return fb->scrollmode;
+#else
+       /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */
+       return SCROLL_REDRAW;
+#endif
+}
+
+
 #ifdef CONFIG_FB_TILEBLITTING
 extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
 #endif
index ffa7893..2789ace 100644 (file)
@@ -59,12 +59,31 @@ static void ccw_update_attr(u8 *dst, u8 *src, int attribute,
        }
 }
 
+
+static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+                    int sx, int dy, int dx, int height, int width)
+{
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fb_copyarea area;
+       u32 vyres = GETVYRES(ops->p, info);
+
+       area.sx = sy * vc->vc_font.height;
+       area.sy = vyres - ((sx + width) * vc->vc_font.width);
+       area.dx = dy * vc->vc_font.height;
+       area.dy = vyres - ((dx + width) * vc->vc_font.width);
+       area.width = height * vc->vc_font.height;
+       area.height  = width * vc->vc_font.width;
+
+       info->fbops->fb_copyarea(info, &area);
+}
+
 static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
                     int sx, int height, int width)
 {
+       struct fbcon_ops *ops = info->fbcon_par;
        struct fb_fillrect region;
        int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-       u32 vyres = info->var.yres;
+       u32 vyres = GETVYRES(ops->p, info);
 
        region.color = attr_bgcol_ec(bgshift,vc,info);
        region.dx = sy * vc->vc_font.height;
@@ -121,7 +140,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info,
        u32 cnt, pitch, size;
        u32 attribute = get_attribute(info, scr_readw(s));
        u8 *dst, *buf = NULL;
-       u32 vyres = info->var.yres;
+       u32 vyres = GETVYRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -210,7 +229,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
        int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
        int err = 1, dx, dy;
        char *src;
-       u32 vyres = info->var.yres;
+       u32 vyres = GETVYRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -368,7 +387,7 @@ static int ccw_update_start(struct fb_info *info)
 {
        struct fbcon_ops *ops = info->fbcon_par;
        u32 yoffset;
-       u32 vyres = info->var.yres;
+       u32 vyres = GETVYRES(ops->p, info);
        int err;
 
        yoffset = (vyres - info->var.yres) - ops->var.xoffset;
@@ -383,6 +402,7 @@ static int ccw_update_start(struct fb_info *info)
 
 void fbcon_rotate_ccw(struct fbcon_ops *ops)
 {
+       ops->bmove = ccw_bmove;
        ops->clear = ccw_clear;
        ops->putcs = ccw_putcs;
        ops->clear_margins = ccw_clear_margins;
index 92e5b7f..86a254c 100644 (file)
@@ -44,12 +44,31 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute,
        }
 }
 
+
+static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+                    int sx, int dy, int dx, int height, int width)
+{
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fb_copyarea area;
+       u32 vxres = GETVXRES(ops->p, info);
+
+       area.sx = vxres - ((sy + height) * vc->vc_font.height);
+       area.sy = sx * vc->vc_font.width;
+       area.dx = vxres - ((dy + height) * vc->vc_font.height);
+       area.dy = dx * vc->vc_font.width;
+       area.width = height * vc->vc_font.height;
+       area.height  = width * vc->vc_font.width;
+
+       info->fbops->fb_copyarea(info, &area);
+}
+
 static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
                     int sx, int height, int width)
 {
+       struct fbcon_ops *ops = info->fbcon_par;
        struct fb_fillrect region;
        int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-       u32 vxres = info->var.xres;
+       u32 vxres = GETVXRES(ops->p, info);
 
        region.color = attr_bgcol_ec(bgshift,vc,info);
        region.dx = vxres - ((sy + height) * vc->vc_font.height);
@@ -106,7 +125,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info,
        u32 cnt, pitch, size;
        u32 attribute = get_attribute(info, scr_readw(s));
        u8 *dst, *buf = NULL;
-       u32 vxres = info->var.xres;
+       u32 vxres = GETVXRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -193,7 +212,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
        int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
        int err = 1, dx, dy;
        char *src;
-       u32 vxres = info->var.xres;
+       u32 vxres = GETVXRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -350,7 +369,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
 static int cw_update_start(struct fb_info *info)
 {
        struct fbcon_ops *ops = info->fbcon_par;
-       u32 vxres = info->var.xres;
+       u32 vxres = GETVXRES(ops->p, info);
        u32 xoffset;
        int err;
 
@@ -366,6 +385,7 @@ static int cw_update_start(struct fb_info *info)
 
 void fbcon_rotate_cw(struct fbcon_ops *ops)
 {
+       ops->bmove = cw_bmove;
        ops->clear = cw_clear;
        ops->putcs = cw_putcs;
        ops->clear_margins = cw_clear_margins;
index b528b2e..01cbe30 100644 (file)
 #ifndef _FBCON_ROTATE_H
 #define _FBCON_ROTATE_H
 
+#define GETVYRES(s,i) ({                           \
+        (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE) ? \
+        (i)->var.yres : (i)->var.yres_virtual; })
+
+#define GETVXRES(s,i) ({                           \
+        (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
+        (i)->var.xres : (i)->var.xres_virtual; })
+
+
 static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
 {
        u32 tmp = (y * pitch) + x, index = tmp / 8,  bit = tmp % 8;
index 09619bd..23bc045 100644 (file)
@@ -44,13 +44,33 @@ static void ud_update_attr(u8 *dst, u8 *src, int attribute,
        }
 }
 
+
+static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+                    int sx, int dy, int dx, int height, int width)
+{
+       struct fbcon_ops *ops = info->fbcon_par;
+       struct fb_copyarea area;
+       u32 vyres = GETVYRES(ops->p, info);
+       u32 vxres = GETVXRES(ops->p, info);
+
+       area.sy = vyres - ((sy + height) * vc->vc_font.height);
+       area.sx = vxres - ((sx + width) * vc->vc_font.width);
+       area.dy = vyres - ((dy + height) * vc->vc_font.height);
+       area.dx = vxres - ((dx + width) * vc->vc_font.width);
+       area.height = height * vc->vc_font.height;
+       area.width  = width * vc->vc_font.width;
+
+       info->fbops->fb_copyarea(info, &area);
+}
+
 static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
                     int sx, int height, int width)
 {
+       struct fbcon_ops *ops = info->fbcon_par;
        struct fb_fillrect region;
        int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-       u32 vyres = info->var.yres;
-       u32 vxres = info->var.xres;
+       u32 vyres = GETVYRES(ops->p, info);
+       u32 vxres = GETVXRES(ops->p, info);
 
        region.color = attr_bgcol_ec(bgshift,vc,info);
        region.dy = vyres - ((sy + height) * vc->vc_font.height);
@@ -142,8 +162,8 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info,
        u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
        u32 attribute = get_attribute(info, scr_readw(s));
        u8 *dst, *buf = NULL;
-       u32 vyres = info->var.yres;
-       u32 vxres = info->var.xres;
+       u32 vyres = GETVYRES(ops->p, info);
+       u32 vxres = GETVXRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -239,8 +259,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode,
        int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
        int err = 1, dx, dy;
        char *src;
-       u32 vyres = info->var.yres;
-       u32 vxres = info->var.xres;
+       u32 vyres = GETVYRES(ops->p, info);
+       u32 vxres = GETVXRES(ops->p, info);
 
        if (!ops->fontbuffer)
                return;
@@ -390,8 +410,8 @@ static int ud_update_start(struct fb_info *info)
 {
        struct fbcon_ops *ops = info->fbcon_par;
        int xoffset, yoffset;
-       u32 vyres = info->var.yres;
-       u32 vxres = info->var.xres;
+       u32 vyres = GETVYRES(ops->p, info);
+       u32 vxres = GETVXRES(ops->p, info);
        int err;
 
        xoffset = vxres - info->var.xres - ops->var.xoffset;
@@ -409,6 +429,7 @@ static int ud_update_start(struct fb_info *info)
 
 void fbcon_rotate_ud(struct fbcon_ops *ops)
 {
+       ops->bmove = ud_bmove;
        ops->clear = ud_clear;
        ops->putcs = ud_putcs;
        ops->clear_margins = ud_clear_margins;
index 72af950..2768eff 100644 (file)
 #include <asm/types.h>
 #include "fbcon.h"
 
+static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+                      int sx, int dy, int dx, int height, int width)
+{
+       struct fb_tilearea area;
+
+       area.sx = sx;
+       area.sy = sy;
+       area.dx = dx;
+       area.dy = dy;
+       area.height = height;
+       area.width = width;
+
+       info->tileops->fb_tilecopy(info, &area);
+}
+
 static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy,
                       int sx, int height, int width)
 {
@@ -118,6 +133,7 @@ void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info)
        struct fb_tilemap map;
        struct fbcon_ops *ops = info->fbcon_par;
 
+       ops->bmove = tile_bmove;
        ops->clear = tile_clear;
        ops->putcs = tile_putcs;
        ops->clear_margins = tile_clear_margins;
index 0fe922f..bcacfb6 100644 (file)
@@ -505,15 +505,15 @@ void xxxfb_fillrect(struct fb_info *p, const struct fb_fillrect *region)
 }
 
 /**
- *      xxxfb_copyarea - OBSOLETE function.
+ *      xxxfb_copyarea - REQUIRED function. Can use generic routines if
+ *                       non acclerated hardware and packed pixel based.
  *                       Copies one area of the screen to another area.
- *                       Will be deleted in a future version
  *
  *      @info: frame buffer structure that represents a single frame buffer
  *      @area: Structure providing the data to copy the framebuffer contents
  *            from one region to another.
  *
- *      This drawing operation copied a rectangular area from one area of the
+ *      This drawing operation copies a rectangular area from one area of the
  *     screen to another area.
  */
 void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) 
@@ -645,9 +645,9 @@ static const struct fb_ops xxxfb_ops = {
        .fb_setcolreg   = xxxfb_setcolreg,
        .fb_blank       = xxxfb_blank,
        .fb_pan_display = xxxfb_pan_display,
-       .fb_fillrect    = xxxfb_fillrect,       /* Needed !!!   */
-       .fb_copyarea    = xxxfb_copyarea,       /* Obsolete     */
-       .fb_imageblit   = xxxfb_imageblit,      /* Needed !!!   */
+       .fb_fillrect    = xxxfb_fillrect,       /* Needed !!! */
+       .fb_copyarea    = xxxfb_copyarea,       /* Needed !!! */
+       .fb_imageblit   = xxxfb_imageblit,      /* Needed !!! */
        .fb_cursor      = xxxfb_cursor,         /* Optional !!! */
        .fb_sync        = xxxfb_sync,
        .fb_ioctl       = xxxfb_ioctl,
index 6aab046..79df61f 100644 (file)
@@ -96,12 +96,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
                 dentry, dentry, from_kuid(&init_user_ns, uid),
                 any);
        ret = NULL;
-
-       if (d_inode(dentry))
-               ret = v9fs_fid_find_inode(d_inode(dentry), uid);
-
        /* we'll recheck under lock if there's anything to look in */
-       if (!ret && dentry->d_fsdata) {
+       if (dentry->d_fsdata) {
                struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
 
                spin_lock(&dentry->d_lock);
@@ -113,6 +109,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
                        }
                }
                spin_unlock(&dentry->d_lock);
+       } else {
+               if (dentry->d_inode)
+                       ret = v9fs_fid_find_inode(dentry->d_inode, uid);
        }
 
        return ret;
index 7a2b11c..6c7dc13 100644 (file)
@@ -369,8 +369,8 @@ source "fs/ksmbd/Kconfig"
 
 config SMBFS_COMMON
        tristate
-       default y if CIFS=y
-       default m if CIFS=m
+       default y if CIFS=y || SMB_SERVER=y
+       default m if CIFS=m || SMB_SERVER=m
 
 source "fs/coda/Kconfig"
 source "fs/afs/Kconfig"
index c07f357..e1eae7e 100644 (file)
@@ -817,20 +817,16 @@ static struct file_system_type bm_fs_type = {
 };
 MODULE_ALIAS_FS("binfmt_misc");
 
-static struct ctl_table_header *binfmt_misc_header;
-
 static int __init init_misc_binfmt(void)
 {
        int err = register_filesystem(&bm_fs_type);
        if (!err)
                insert_binfmt(&misc_format);
-       binfmt_misc_header = register_sysctl_mount_point("fs/binfmt_misc");
-       return 0;
+       return err;
 }
 
 static void __exit exit_misc_binfmt(void)
 {
-       unregister_sysctl_table(binfmt_misc_header);
        unregister_binfmt(&misc_format);
        unregister_filesystem(&bm_fs_type);
 }
index 1db24e6..8202ad6 100644 (file)
@@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
 {
        if (refcount_dec_and_test(&cache->refs)) {
                WARN_ON(cache->pinned > 0);
-               WARN_ON(cache->reserved > 0);
+               /*
+                * If there was a failure to cleanup a log tree, very likely due
+                * to an IO failure on a writeback attempt of one or more of its
+                * extent buffers, we could not do proper (and cheap) unaccounting
+                * of their reserved space, so don't warn on reserved > 0 in that
+                * case.
+                */
+               if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+                   !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
+                       WARN_ON(cache->reserved > 0);
 
                /*
                 * A block_group shouldn't be on the discard_list anymore.
@@ -2544,6 +2553,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
        int ret;
        bool dirty_bg_running;
 
+       /*
+        * This can only happen when we are doing read-only scrub on read-only
+        * mount.
+        * In that case we should not start a new transaction on read-only fs.
+        * Thus here we skip all chunk allocations.
+        */
+       if (sb_rdonly(fs_info->sb)) {
+               mutex_lock(&fs_info->ro_block_group_mutex);
+               ret = inc_block_group_ro(cache, 0);
+               mutex_unlock(&fs_info->ro_block_group_mutex);
+               return ret;
+       }
+
        do {
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
@@ -3974,9 +3996,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                 * important and indicates a real bug if this happens.
                 */
                if (WARN_ON(space_info->bytes_pinned > 0 ||
-                           space_info->bytes_reserved > 0 ||
                            space_info->bytes_may_use > 0))
                        btrfs_dump_space_info(info, space_info, 0, 0);
+
+               /*
+                * If there was a failure to cleanup a log tree, very likely due
+                * to an IO failure on a writeback attempt of one or more of its
+                * extent buffers, we could not do proper (and cheap) unaccounting
+                * of their reserved space, so don't warn on bytes_reserved > 0 in
+                * that case.
+                */
+               if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+                   !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
+                       if (WARN_ON(space_info->bytes_reserved > 0))
+                               btrfs_dump_space_info(info, space_info, 0, 0);
+               }
+
                WARN_ON(space_info->reclaim_size > 0);
                list_del(&space_info->list);
                btrfs_sysfs_remove_space_info(space_info);
index b4a9b1c..8992e00 100644 (file)
@@ -145,6 +145,9 @@ enum {
        BTRFS_FS_STATE_DUMMY_FS_INFO,
 
        BTRFS_FS_STATE_NO_CSUMS,
+
+       /* Indicates there was an error cleaning up a log tree. */
+       BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
 };
 
 #define BTRFS_BACKREF_REV_MAX          256
@@ -3593,6 +3596,9 @@ do {                                                              \
 
 #define BTRFS_FS_ERROR(fs_info)        (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
                                                   &(fs_info)->fs_state)))
+#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info)                            \
+       (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,            \
+                          &(fs_info)->fs_state)))
 
 __printf(5, 6)
 __cold
index d8af662..33eda39 100644 (file)
@@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                goto fail;
        }
 
-       spin_lock(&fs_info->trans_lock);
-       list_add(&pending_snapshot->list,
-                &trans->transaction->pending_snapshots);
-       spin_unlock(&fs_info->trans_lock);
+       trans->pending_snapshot = pending_snapshot;
 
        ret = btrfs_commit_transaction(trans);
        if (ret)
@@ -3354,7 +3351,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
        struct block_device *bdev = NULL;
        fmode_t mode;
        int ret;
-       bool cancel;
+       bool cancel = false;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
index 8928275..f12dc68 100644 (file)
@@ -1185,9 +1185,24 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        struct btrfs_trans_handle *trans = NULL;
        int ret = 0;
 
+       /*
+        * We need to have subvol_sem write locked, to prevent races between
+        * concurrent tasks trying to disable quotas, because we will unlock
+        * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+        */
+       lockdep_assert_held_write(&fs_info->subvol_sem);
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!fs_info->quota_root)
                goto out;
+
+       /*
+        * Request qgroup rescan worker to complete and wait for it. This wait
+        * must be done before transaction start for quota disable since it may
+        * deadlock with transaction by the qgroup rescan worker.
+        */
+       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
        /*
@@ -1205,14 +1220,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                trans = NULL;
+               set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
                goto out;
        }
 
        if (!fs_info->quota_root)
                goto out;
 
-       clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-       btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
@@ -3383,6 +3397,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                        btrfs_warn(fs_info,
                        "qgroup rescan init failed, qgroup is not enabled");
                        ret = -EINVAL;
+               } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+                       /* Quota disable is in progress */
+                       ret = -EBUSY;
                }
 
                if (ret) {
index 03de89b..c43bbc7 100644 (file)
@@ -2000,6 +2000,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
                btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
 }
 
+/*
+ * Add a pending snapshot associated with the given transaction handle to the
+ * respective handle. This must be called after the transaction commit started
+ * and while holding fs_info->trans_lock.
+ * This serves to guarantee a caller of btrfs_commit_transaction() that it can
+ * safely free the pending snapshot pointer in case btrfs_commit_transaction()
+ * returns an error.
+ */
+static void add_pending_snapshot(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+
+       if (!trans->pending_snapshot)
+               return;
+
+       lockdep_assert_held(&trans->fs_info->trans_lock);
+       ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
+
+       list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -2073,6 +2094,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
                enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
 
+               add_pending_snapshot(trans);
+
                spin_unlock(&fs_info->trans_lock);
                refcount_inc(&cur_trans->use_count);
 
@@ -2163,6 +2186,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
         * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
         */
        spin_lock(&fs_info->trans_lock);
+       add_pending_snapshot(trans);
        cur_trans->state = TRANS_STATE_COMMIT_DOING;
        spin_unlock(&fs_info->trans_lock);
        wait_event(cur_trans->writer_wait,
index 1852ed9..9402d8d 100644 (file)
@@ -123,6 +123,8 @@ struct btrfs_trans_handle {
        struct btrfs_transaction *transaction;
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_block_rsv *orig_rsv;
+       /* Set by a task that wants to create a snapshot. */
+       struct btrfs_pending_snapshot *pending_snapshot;
        refcount_t use_count;
        unsigned int type;
        /*
index 72e1c94..9fd145f 100644 (file)
@@ -965,6 +965,7 @@ static int check_dev_item(struct extent_buffer *leaf,
                          struct btrfs_key *key, int slot)
 {
        struct btrfs_dev_item *ditem;
+       const u32 item_size = btrfs_item_size(leaf, slot);
 
        if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
                dev_item_err(leaf, slot,
@@ -972,6 +973,13 @@ static int check_dev_item(struct extent_buffer *leaf,
                             key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
                return -EUCLEAN;
        }
+
+       if (unlikely(item_size != sizeof(*ditem))) {
+               dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
+                            item_size, sizeof(*ditem));
+               return -EUCLEAN;
+       }
+
        ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
        if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
                dev_item_err(leaf, slot,
@@ -1007,6 +1015,7 @@ static int check_inode_item(struct extent_buffer *leaf,
        struct btrfs_inode_item *iitem;
        u64 super_gen = btrfs_super_generation(fs_info->super_copy);
        u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
+       const u32 item_size = btrfs_item_size(leaf, slot);
        u32 mode;
        int ret;
        u32 flags;
@@ -1016,6 +1025,12 @@ static int check_inode_item(struct extent_buffer *leaf,
        if (unlikely(ret < 0))
                return ret;
 
+       if (unlikely(item_size != sizeof(*iitem))) {
+               generic_err(leaf, slot, "invalid item size: has %u expect %zu",
+                           item_size, sizeof(*iitem));
+               return -EUCLEAN;
+       }
+
        iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
 
        /* Here we use super block generation + 1 to handle log tree */
index c1ddbe8..3ee014c 100644 (file)
@@ -3414,6 +3414,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
        if (log->node) {
                ret = walk_log_tree(trans, log, &wc);
                if (ret) {
+                       /*
+                        * We weren't able to traverse the entire log tree, the
+                        * typical scenario is getting an -EIO when reading an
+                        * extent buffer of the tree, due to a previous writeback
+                        * failure of it.
+                        */
+                       set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
+                               &log->fs_info->fs_state);
+
+                       /*
+                        * Some extent buffers of the log tree may still be dirty
+                        * and not yet written back to storage, because we may
+                        * have updates to a log tree without syncing a log tree,
+                        * such as during rename and link operations. So flush
+                        * them out and wait for their writeback to complete, so
+                        * that we properly cleanup their state and pages.
+                        */
+                       btrfs_write_marked_extents(log->fs_info,
+                                                  &log->dirty_log_pages,
+                                                  EXTENT_DIRTY | EXTENT_NEW);
+                       btrfs_wait_tree_log_extents(log,
+                                                   EXTENT_DIRTY | EXTENT_NEW);
+
                        if (trans)
                                btrfs_abort_transaction(trans, ret);
                        else
index 04eb527..753986e 100644 (file)
@@ -192,6 +192,64 @@ presubmission_error:
 }
 
 /*
+ * Query the occupancy of the cache in a region, returning where the next chunk
+ * of data starts and how long it is.
+ */
+static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
+                                     loff_t start, size_t len, size_t granularity,
+                                     loff_t *_data_start, size_t *_data_len)
+{
+       struct cachefiles_object *object;
+       struct file *file;
+       loff_t off, off2;
+
+       *_data_start = -1;
+       *_data_len = 0;
+
+       if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
+               return -ENOBUFS;
+
+       object = cachefiles_cres_object(cres);
+       file = cachefiles_cres_file(cres);
+       granularity = max_t(size_t, object->volume->cache->bsize, granularity);
+
+       _enter("%pD,%li,%llx,%zx/%llx",
+              file, file_inode(file)->i_ino, start, len,
+              i_size_read(file_inode(file)));
+
+       off = cachefiles_inject_read_error();
+       if (off == 0)
+               off = vfs_llseek(file, start, SEEK_DATA);
+       if (off == -ENXIO)
+               return -ENODATA; /* Beyond EOF */
+       if (off < 0 && off >= (loff_t)-MAX_ERRNO)
+               return -ENOBUFS; /* Error. */
+       if (round_up(off, granularity) >= start + len)
+               return -ENODATA; /* No data in range */
+
+       off2 = cachefiles_inject_read_error();
+       if (off2 == 0)
+               off2 = vfs_llseek(file, off, SEEK_HOLE);
+       if (off2 == -ENXIO)
+               return -ENODATA; /* Beyond EOF */
+       if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
+               return -ENOBUFS; /* Error. */
+
+       /* Round away partial blocks */
+       off = round_up(off, granularity);
+       off2 = round_down(off2, granularity);
+       if (off2 <= off)
+               return -ENODATA;
+
+       *_data_start = off;
+       if (off2 > start + len)
+               *_data_len = len;
+       else
+               *_data_len = off2 - off;
+       return 0;
+}
+
+/*
  * Handle completion of a write to the cache.
  */
 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
@@ -545,6 +603,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
        .write                  = cachefiles_write,
        .prepare_read           = cachefiles_prepare_read,
        .prepare_write          = cachefiles_prepare_write,
+       .query_occupancy        = cachefiles_query_occupancy,
 };
 
 /*
index 11a22a3..0b742bd 100644 (file)
@@ -162,7 +162,7 @@ static void cifs_resolve_server(struct work_struct *work)
        mutex_unlock(&server->srv_mutex);
 }
 
-/**
+/*
  * Mark all sessions and tcons for reconnect.
  *
  * @server needs to be previously set to CifsNeedReconnect.
@@ -1831,13 +1831,9 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
                int i;
 
                for (i = 1; i < chan_count; i++) {
-                       /*
-                        * note: for now, we're okay accessing ses->chans
-                        * without chan_lock. But when chans can go away, we'll
-                        * need to introduce ref counting to make sure that chan
-                        * is not freed from under us.
-                        */
+                       spin_unlock(&ses->chan_lock);
                        cifs_put_tcp_session(ses->chans[i].server, 0);
+                       spin_lock(&ses->chan_lock);
                        ses->chans[i].server = NULL;
                }
        }
@@ -1981,6 +1977,19 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
                }
        }
 
+       ctx->workstation_name = kstrdup(ses->workstation_name, GFP_KERNEL);
+       if (!ctx->workstation_name) {
+               cifs_dbg(FYI, "Unable to allocate memory for workstation_name\n");
+               rc = -ENOMEM;
+               kfree(ctx->username);
+               ctx->username = NULL;
+               kfree_sensitive(ctx->password);
+               ctx->password = NULL;
+               kfree(ctx->domainname);
+               ctx->domainname = NULL;
+               goto out_key_put;
+       }
+
 out_key_put:
        up_read(&key->sem);
        key_put(key);
index 59334be..e7af802 100644 (file)
@@ -4269,8 +4269,6 @@ cifs_readv_complete(struct work_struct *work)
        for (i = 0; i < rdata->nr_pages; i++) {
                struct page *page = rdata->pages[i];
 
-               lru_cache_add(page);
-
                if (rdata->result == 0 ||
                    (rdata->result == -EAGAIN && got_bytes)) {
                        flush_dcache_page(page);
@@ -4278,12 +4276,12 @@ cifs_readv_complete(struct work_struct *work)
                } else
                        SetPageError(page);
 
-               unlock_page(page);
-
                if (rdata->result == 0 ||
                    (rdata->result == -EAGAIN && got_bytes))
                        cifs_readpage_to_fscache(rdata->mapping->host, page);
 
+               unlock_page(page);
+
                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
 
                put_page(page);
@@ -4340,7 +4338,6 @@ readpages_fill_pages(struct TCP_Server_Info *server,
                         * fill them until the writes are flushed.
                         */
                        zero_user(page, 0, PAGE_SIZE);
-                       lru_cache_add(page);
                        flush_dcache_page(page);
                        SetPageUptodate(page);
                        unlock_page(page);
@@ -4350,7 +4347,6 @@ readpages_fill_pages(struct TCP_Server_Info *server,
                        continue;
                } else {
                        /* no need to hold page hostage */
-                       lru_cache_add(page);
                        unlock_page(page);
                        put_page(page);
                        rdata->pages[i] = NULL;
@@ -4393,92 +4389,20 @@ cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
        return readpages_fill_pages(server, rdata, iter, iter->count);
 }
 
-static int
-readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
-                   unsigned int rsize, struct list_head *tmplist,
-                   unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
-{
-       struct page *page, *tpage;
-       unsigned int expected_index;
-       int rc;
-       gfp_t gfp = readahead_gfp_mask(mapping);
-
-       INIT_LIST_HEAD(tmplist);
-
-       page = lru_to_page(page_list);
-
-       /*
-        * Lock the page and put it in the cache. Since no one else
-        * should have access to this page, we're safe to simply set
-        * PG_locked without checking it first.
-        */
-       __SetPageLocked(page);
-       rc = add_to_page_cache_locked(page, mapping,
-                                     page->index, gfp);
-
-       /* give up if we can't stick it in the cache */
-       if (rc) {
-               __ClearPageLocked(page);
-               return rc;
-       }
-
-       /* move first page to the tmplist */
-       *offset = (loff_t)page->index << PAGE_SHIFT;
-       *bytes = PAGE_SIZE;
-       *nr_pages = 1;
-       list_move_tail(&page->lru, tmplist);
-
-       /* now try and add more pages onto the request */
-       expected_index = page->index + 1;
-       list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
-               /* discontinuity ? */
-               if (page->index != expected_index)
-                       break;
-
-               /* would this page push the read over the rsize? */
-               if (*bytes + PAGE_SIZE > rsize)
-                       break;
-
-               __SetPageLocked(page);
-               rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
-               if (rc) {
-                       __ClearPageLocked(page);
-                       break;
-               }
-               list_move_tail(&page->lru, tmplist);
-               (*bytes) += PAGE_SIZE;
-               expected_index++;
-               (*nr_pages)++;
-       }
-       return rc;
-}
-
-static int cifs_readpages(struct file *file, struct address_space *mapping,
-       struct list_head *page_list, unsigned num_pages)
+static void cifs_readahead(struct readahead_control *ractl)
 {
        int rc;
-       int err = 0;
-       struct list_head tmplist;
-       struct cifsFileInfo *open_file = file->private_data;
-       struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
+       struct cifsFileInfo *open_file = ractl->file->private_data;
+       struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
        struct TCP_Server_Info *server;
        pid_t pid;
-       unsigned int xid;
+       unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
+       pgoff_t next_cached = ULONG_MAX;
+       bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
+               cifs_inode_cookie(ractl->mapping->host)->cache_priv;
+       bool check_cache = caching;
 
        xid = get_xid();
-       /*
-        * Reads as many pages as possible from fscache. Returns -ENOBUFS
-        * immediately if the cookie is negative
-        *
-        * After this point, every page in the list might have PG_fscache set,
-        * so we will need to clean that up off of every page we don't use.
-        */
-       rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
-                                        &num_pages);
-       if (rc == 0) {
-               free_xid(xid);
-               return rc;
-       }
 
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
@@ -4489,39 +4413,73 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
 
        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
-                __func__, file, mapping, num_pages);
+                __func__, ractl->file, ractl->mapping, readahead_count(ractl));
 
        /*
-        * Start with the page at end of list and move it to private
-        * list. Do the same with any following pages until we hit
-        * the rsize limit, hit an index discontinuity, or run out of
-        * pages. Issue the async read and then start the loop again
-        * until the list is empty.
-        *
-        * Note that list order is important. The page_list is in
-        * the order of declining indexes. When we put the pages in
-        * the rdata->pages, then we want them in increasing order.
+        * Chop the readahead request up into rsize-sized read requests.
         */
-       while (!list_empty(page_list) && !err) {
-               unsigned int i, nr_pages, bytes, rsize;
-               loff_t offset;
-               struct page *page, *tpage;
+       while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
+               unsigned int i, got, rsize;
+               struct page *page;
                struct cifs_readdata *rdata;
                struct cifs_credits credits_on_stack;
                struct cifs_credits *credits = &credits_on_stack;
+               pgoff_t index = readahead_index(ractl) + last_batch_size;
+
+               /*
+                * Find out if we have anything cached in the range of
+                * interest, and if so, where the next chunk of cached data is.
+                */
+               if (caching) {
+                       if (check_cache) {
+                               rc = cifs_fscache_query_occupancy(
+                                       ractl->mapping->host, index, nr_pages,
+                                       &next_cached, &cache_nr_pages);
+                               if (rc < 0)
+                                       caching = false;
+                               check_cache = false;
+                       }
+
+                       if (index == next_cached) {
+                               /*
+                                * TODO: Send a whole batch of pages to be read
+                                * by the cache.
+                                */
+                               page = readahead_page(ractl);
+                               last_batch_size = 1 << thp_order(page);
+                               if (cifs_readpage_from_fscache(ractl->mapping->host,
+                                                              page) < 0) {
+                                       /*
+                                        * TODO: Deal with cache read failure
+                                        * here, but for the moment, delegate
+                                        * that to readpage.
+                                        */
+                                       caching = false;
+                               }
+                               unlock_page(page);
+                               next_cached++;
+                               cache_nr_pages--;
+                               if (cache_nr_pages == 0)
+                                       check_cache = true;
+                               continue;
+                       }
+               }
 
                if (open_file->invalidHandle) {
                        rc = cifs_reopen_file(open_file, true);
-                       if (rc == -EAGAIN)
-                               continue;
-                       else if (rc)
+                       if (rc) {
+                               if (rc == -EAGAIN)
+                                       continue;
                                break;
+                       }
                }
 
                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
                                                   &rsize, credits);
                if (rc)
                        break;
+               nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
+               nr_pages = min_t(size_t, nr_pages, next_cached - index);
 
                /*
                 * Give up immediately if rsize is too small to read an entire
@@ -4529,16 +4487,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                 * reach this point however since we set ra_pages to 0 when the
                 * rsize is smaller than a cache page.
                 */
-               if (unlikely(rsize < PAGE_SIZE)) {
-                       add_credits_and_wake_if(server, credits, 0);
-                       free_xid(xid);
-                       return 0;
-               }
-
-               nr_pages = 0;
-               err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
-                                        &nr_pages, &offset, &bytes);
-               if (!nr_pages) {
+               if (unlikely(!nr_pages)) {
                        add_credits_and_wake_if(server, credits, 0);
                        break;
                }
@@ -4546,36 +4495,31 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
                if (!rdata) {
                        /* best to give up if we're out of mem */
-                       list_for_each_entry_safe(page, tpage, &tmplist, lru) {
-                               list_del(&page->lru);
-                               lru_cache_add(page);
-                               unlock_page(page);
-                               put_page(page);
-                       }
-                       rc = -ENOMEM;
                        add_credits_and_wake_if(server, credits, 0);
                        break;
                }
 
-               rdata->cfile = cifsFileInfo_get(open_file);
-               rdata->server = server;
-               rdata->mapping = mapping;
-               rdata->offset = offset;
-               rdata->bytes = bytes;
-               rdata->pid = pid;
-               rdata->pagesz = PAGE_SIZE;
-               rdata->tailsz = PAGE_SIZE;
+               got = __readahead_batch(ractl, rdata->pages, nr_pages);
+               if (got != nr_pages) {
+                       pr_warn("__readahead_batch() returned %u/%u\n",
+                               got, nr_pages);
+                       nr_pages = got;
+               }
+
+               rdata->nr_pages = nr_pages;
+               rdata->bytes    = readahead_batch_length(ractl);
+               rdata->cfile    = cifsFileInfo_get(open_file);
+               rdata->server   = server;
+               rdata->mapping  = ractl->mapping;
+               rdata->offset   = readahead_pos(ractl);
+               rdata->pid      = pid;
+               rdata->pagesz   = PAGE_SIZE;
+               rdata->tailsz   = PAGE_SIZE;
                rdata->read_into_pages = cifs_readpages_read_into_pages;
                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
-               rdata->credits = credits_on_stack;
-
-               list_for_each_entry_safe(page, tpage, &tmplist, lru) {
-                       list_del(&page->lru);
-                       rdata->pages[rdata->nr_pages++] = page;
-               }
+               rdata->credits  = credits_on_stack;
 
                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
-
                if (!rc) {
                        if (rdata->cfile->invalidHandle)
                                rc = -EAGAIN;
@@ -4587,7 +4531,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                        add_credits_and_wake_if(server, &rdata->credits, 0);
                        for (i = 0; i < rdata->nr_pages; i++) {
                                page = rdata->pages[i];
-                               lru_cache_add(page);
                                unlock_page(page);
                                put_page(page);
                        }
@@ -4597,10 +4540,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                }
 
                kref_put(&rdata->refcount, cifs_readdata_release);
+               last_batch_size = nr_pages;
        }
 
        free_xid(xid);
-       return rc;
 }
 
 /*
@@ -4924,7 +4867,7 @@ oplock_break_done:
  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
  * so this method should never be called.
  *
- * Direct IO is not yet supported in the cached mode. 
+ * Direct IO is not yet supported in the cached mode.
  */
 static ssize_t
 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
@@ -5006,7 +4949,7 @@ static int cifs_set_page_dirty(struct page *page)
 
 const struct address_space_operations cifs_addr_ops = {
        .readpage = cifs_readpage,
-       .readpages = cifs_readpages,
+       .readahead = cifs_readahead,
        .writepage = cifs_writepage,
        .writepages = cifs_writepages,
        .write_begin = cifs_write_begin,
index efaac4d..33af72e 100644 (file)
@@ -134,37 +134,127 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
        }
 }
 
+static inline void fscache_end_operation(struct netfs_cache_resources *cres)
+{
+       const struct netfs_cache_ops *ops = fscache_operation_valid(cres);
+
+       if (ops)
+               ops->end_operation(cres);
+}
+
 /*
- * Retrieve a page from FS-Cache
+ * Fallback page reading interface.
  */
-int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+static int fscache_fallback_read_page(struct inode *inode, struct page *page)
 {
-       cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
-                __func__, CIFS_I(inode)->fscache, page, inode);
-       return -ENOBUFS; // Needs conversion to using netfslib
+       struct netfs_cache_resources cres;
+       struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+       struct iov_iter iter;
+       struct bio_vec bvec[1];
+       int ret;
+
+       memset(&cres, 0, sizeof(cres));
+       bvec[0].bv_page         = page;
+       bvec[0].bv_offset       = 0;
+       bvec[0].bv_len          = PAGE_SIZE;
+       iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+
+       ret = fscache_begin_read_operation(&cres, cookie);
+       if (ret < 0)
+               return ret;
+
+       ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL,
+                          NULL, NULL);
+       fscache_end_operation(&cres);
+       return ret;
 }
 
 /*
- * Retrieve a set of pages from FS-Cache
+ * Fallback page writing interface.
  */
-int __cifs_readpages_from_fscache(struct inode *inode,
-                               struct address_space *mapping,
-                               struct list_head *pages,
-                               unsigned *nr_pages)
+static int fscache_fallback_write_page(struct inode *inode, struct page *page,
+                                      bool no_space_allocated_yet)
 {
-       cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
-                __func__, CIFS_I(inode)->fscache, *nr_pages, inode);
-       return -ENOBUFS; // Needs conversion to using netfslib
+       struct netfs_cache_resources cres;
+       struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+       struct iov_iter iter;
+       struct bio_vec bvec[1];
+       loff_t start = page_offset(page);
+       size_t len = PAGE_SIZE;
+       int ret;
+
+       memset(&cres, 0, sizeof(cres));
+       bvec[0].bv_page         = page;
+       bvec[0].bv_offset       = 0;
+       bvec[0].bv_len          = PAGE_SIZE;
+       iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+
+       ret = fscache_begin_write_operation(&cres, cookie);
+       if (ret < 0)
+               return ret;
+
+       ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
+                                     no_space_allocated_yet);
+       if (ret == 0)
+               ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL);
+       fscache_end_operation(&cres);
+       return ret;
 }
 
-void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+/*
+ * Retrieve a page from FS-Cache
+ */
+int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
 {
-       struct cifsInodeInfo *cifsi = CIFS_I(inode);
+       int ret;
 
-       WARN_ON(!cifsi->fscache);
+       cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
+                __func__, cifs_inode_cookie(inode), page, inode);
 
+       ret = fscache_fallback_read_page(inode, page);
+       if (ret < 0)
+               return ret;
+
+       /* Read completed synchronously */
+       SetPageUptodate(page);
+       return 0;
+}
+
+void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
        cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
-                __func__, cifsi->fscache, page, inode);
+                __func__, cifs_inode_cookie(inode), page, inode);
+
+       fscache_fallback_write_page(inode, page, true);
+}
+
+/*
+ * Query the cache occupancy.
+ */
+int __cifs_fscache_query_occupancy(struct inode *inode,
+                                  pgoff_t first, unsigned int nr_pages,
+                                  pgoff_t *_data_first,
+                                  unsigned int *_data_nr_pages)
+{
+       struct netfs_cache_resources cres;
+       struct fscache_cookie *cookie = cifs_inode_cookie(inode);
+       loff_t start, data_start;
+       size_t len, data_len;
+       int ret;
 
-       // Needs conversion to using netfslib
+       ret = fscache_begin_read_operation(&cres, cookie);
+       if (ret < 0)
+               return ret;
+
+       start = first * PAGE_SIZE;
+       len = nr_pages * PAGE_SIZE;
+       ret = cres.ops->query_occupancy(&cres, start, len, PAGE_SIZE,
+                                       &data_start, &data_len);
+       if (ret == 0) {
+               *_data_first = data_start / PAGE_SIZE;
+               *_data_nr_pages = len / PAGE_SIZE;
+       }
+
+       fscache_end_operation(&cres);
+       return ret;
 }
index c6ca49a..5512990 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef _CIFS_FSCACHE_H
 #define _CIFS_FSCACHE_H
 
+#include <linux/swap.h>
 #include <linux/fscache.h>
 
 #include "cifsglob.h"
@@ -58,14 +59,6 @@ void cifs_fscache_fill_coherency(struct inode *inode,
 }
 
 
-extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
-extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
-extern int __cifs_readpages_from_fscache(struct inode *,
-                                        struct address_space *,
-                                        struct list_head *,
-                                        unsigned *);
-extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-
 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
 {
        return CIFS_I(inode)->fscache;
@@ -80,33 +73,52 @@ static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags
                           i_size_read(inode), flags);
 }
 
-static inline int cifs_readpage_from_fscache(struct inode *inode,
-                                            struct page *page)
-{
-       if (CIFS_I(inode)->fscache)
-               return __cifs_readpage_from_fscache(inode, page);
+extern int __cifs_fscache_query_occupancy(struct inode *inode,
+                                         pgoff_t first, unsigned int nr_pages,
+                                         pgoff_t *_data_first,
+                                         unsigned int *_data_nr_pages);
 
-       return -ENOBUFS;
+static inline int cifs_fscache_query_occupancy(struct inode *inode,
+                                              pgoff_t first, unsigned int nr_pages,
+                                              pgoff_t *_data_first,
+                                              unsigned int *_data_nr_pages)
+{
+       if (!cifs_inode_cookie(inode))
+               return -ENOBUFS;
+       return __cifs_fscache_query_occupancy(inode, first, nr_pages,
+                                             _data_first, _data_nr_pages);
 }
 
-static inline int cifs_readpages_from_fscache(struct inode *inode,
-                                             struct address_space *mapping,
-                                             struct list_head *pages,
-                                             unsigned *nr_pages)
+extern int __cifs_readpage_from_fscache(struct inode *pinode, struct page *ppage);
+extern void __cifs_readpage_to_fscache(struct inode *pinode, struct page *ppage);
+
+
+static inline int cifs_readpage_from_fscache(struct inode *inode,
+                                            struct page *page)
 {
-       if (CIFS_I(inode)->fscache)
-               return __cifs_readpages_from_fscache(inode, mapping, pages,
-                                                    nr_pages);
+       if (cifs_inode_cookie(inode))
+               return __cifs_readpage_from_fscache(inode, page);
        return -ENOBUFS;
 }
 
 static inline void cifs_readpage_to_fscache(struct inode *inode,
                                            struct page *page)
 {
-       if (PageFsCache(page))
+       if (cifs_inode_cookie(inode))
                __cifs_readpage_to_fscache(inode, page);
 }
 
+static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+       if (PageFsCache(page)) {
+               if (current_is_kswapd() || !(gfp & __GFP_FS))
+                       return false;
+               wait_on_page_fscache(page);
+               fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
+       }
+       return true;
+}
+
 #else /* CONFIG_CIFS_FSCACHE */
 static inline
 void cifs_fscache_fill_coherency(struct inode *inode,
@@ -123,22 +135,29 @@ static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool upd
 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
 static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
 
-static inline int
-cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+static inline int cifs_fscache_query_occupancy(struct inode *inode,
+                                              pgoff_t first, unsigned int nr_pages,
+                                              pgoff_t *_data_first,
+                                              unsigned int *_data_nr_pages)
 {
+       *_data_first = ULONG_MAX;
+       *_data_nr_pages = 0;
        return -ENOBUFS;
 }
 
-static inline int cifs_readpages_from_fscache(struct inode *inode,
-                                             struct address_space *mapping,
-                                             struct list_head *pages,
-                                             unsigned *nr_pages)
+static inline int
+cifs_readpage_from_fscache(struct inode *inode, struct page *page)
 {
        return -ENOBUFS;
 }
 
-static inline void cifs_readpage_to_fscache(struct inode *inode,
-                       struct page *page) {}
+static inline
+void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {}
+
+static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+       return true; /* May release page */
+}
 
 #endif /* CONFIG_CIFS_FSCACHE */
 
index 7d8b3ce..60d853c 100644 (file)
@@ -83,6 +83,7 @@ static void cifs_set_ops(struct inode *inode)
 static void
 cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
 {
+       struct cifs_fscache_inode_coherency_data cd;
        struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 
        cifs_dbg(FYI, "%s: revalidating inode %llu\n",
@@ -113,6 +114,9 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
        cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n",
                 __func__, cifs_i->uniqueid);
        set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags);
+       /* Invalidate fscache cookie */
+       cifs_fscache_fill_coherency(&cifs_i->vfs_inode, &cd);
+       fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
 }
 
 /*
@@ -2261,8 +2265,6 @@ cifs_dentry_needs_reval(struct dentry *dentry)
 int
 cifs_invalidate_mapping(struct inode *inode)
 {
-       struct cifs_fscache_inode_coherency_data cd;
-       struct cifsInodeInfo *cifsi = CIFS_I(inode);
        int rc = 0;
 
        if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
@@ -2272,8 +2274,6 @@ cifs_invalidate_mapping(struct inode *inode)
                                 __func__, inode);
        }
 
-       cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
-       fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
        return rc;
 }
 
index dc3b16d..5723d50 100644 (file)
@@ -713,7 +713,11 @@ static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size)
        else
                sz += sizeof(__le16);
 
-       sz += sizeof(__le16) * strnlen(ses->workstation_name, CIFS_MAX_WORKSTATION_LEN);
+       if (ses->workstation_name)
+               sz += sizeof(__le16) * strnlen(ses->workstation_name,
+                       CIFS_MAX_WORKSTATION_LEN);
+       else
+               sz += sizeof(__le16);
 
        return sz;
 }
index fa7ddb7..226a57c 100644 (file)
@@ -252,12 +252,10 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
                return ret;
 
        iomap->offset = map.m_la;
-       if (flags & IOMAP_DAX) {
+       if (flags & IOMAP_DAX)
                iomap->dax_dev = mdev.m_daxdev;
-               iomap->offset += mdev.m_dax_part_off;
-       } else {
+       else
                iomap->bdev = mdev.m_bdev;
-       }
        iomap->length = map.m_llen;
        iomap->flags = 0;
        iomap->private = NULL;
@@ -284,6 +282,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
        } else {
                iomap->type = IOMAP_MAPPED;
                iomap->addr = mdev.m_pa;
+               if (flags & IOMAP_DAX)
+                       iomap->addr += mdev.m_dax_part_off;
        }
        return 0;
 }
index 498b766..423bc1a 100644 (file)
@@ -810,68 +810,11 @@ static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
        return false;
 }
 
-static void z_erofs_decompressqueue_work(struct work_struct *work);
-static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
-                                      bool sync, int bios)
-{
-       struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
-
-       /* wake up the caller thread for sync decompression */
-       if (sync) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&io->u.wait.lock, flags);
-               if (!atomic_add_return(bios, &io->pending_bios))
-                       wake_up_locked(&io->u.wait);
-               spin_unlock_irqrestore(&io->u.wait.lock, flags);
-               return;
-       }
-
-       if (atomic_add_return(bios, &io->pending_bios))
-               return;
-       /* Use workqueue and sync decompression for atomic contexts only */
-       if (in_atomic() || irqs_disabled()) {
-               queue_work(z_erofs_workqueue, &io->u.work);
-               /* enable sync decompression for readahead */
-               if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
-                       sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
-               return;
-       }
-       z_erofs_decompressqueue_work(&io->u.work);
-}
-
 static bool z_erofs_page_is_invalidated(struct page *page)
 {
        return !page->mapping && !z_erofs_is_shortlived_page(page);
 }
 
-static void z_erofs_decompressqueue_endio(struct bio *bio)
-{
-       tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
-       struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
-       blk_status_t err = bio->bi_status;
-       struct bio_vec *bvec;
-       struct bvec_iter_all iter_all;
-
-       bio_for_each_segment_all(bvec, bio, iter_all) {
-               struct page *page = bvec->bv_page;
-
-               DBG_BUGON(PageUptodate(page));
-               DBG_BUGON(z_erofs_page_is_invalidated(page));
-
-               if (err)
-                       SetPageError(page);
-
-               if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
-                       if (!err)
-                               SetPageUptodate(page);
-                       unlock_page(page);
-               }
-       }
-       z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
-       bio_put(bio);
-}
-
 static int z_erofs_decompress_pcluster(struct super_block *sb,
                                       struct z_erofs_pcluster *pcl,
                                       struct page **pagepool)
@@ -1123,6 +1066,35 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
        kvfree(bgq);
 }
 
+static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+                                      bool sync, int bios)
+{
+       struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
+       /* wake up the caller thread for sync decompression */
+       if (sync) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&io->u.wait.lock, flags);
+               if (!atomic_add_return(bios, &io->pending_bios))
+                       wake_up_locked(&io->u.wait);
+               spin_unlock_irqrestore(&io->u.wait.lock, flags);
+               return;
+       }
+
+       if (atomic_add_return(bios, &io->pending_bios))
+               return;
+       /* Use workqueue and sync decompression for atomic contexts only */
+       if (in_atomic() || irqs_disabled()) {
+               queue_work(z_erofs_workqueue, &io->u.work);
+               /* enable sync decompression for readahead */
+               if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
+                       sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
+               return;
+       }
+       z_erofs_decompressqueue_work(&io->u.work);
+}
+
 static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
                                               unsigned int nr,
                                               struct page **pagepool,
@@ -1300,6 +1272,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
        qtail[JQ_BYPASS] = &pcl->next;
 }
 
+static void z_erofs_decompressqueue_endio(struct bio *bio)
+{
+       tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
+       struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+       blk_status_t err = bio->bi_status;
+       struct bio_vec *bvec;
+       struct bvec_iter_all iter_all;
+
+       bio_for_each_segment_all(bvec, bio, iter_all) {
+               struct page *page = bvec->bv_page;
+
+               DBG_BUGON(PageUptodate(page));
+               DBG_BUGON(z_erofs_page_is_invalidated(page));
+
+               if (err)
+                       SetPageError(page);
+
+               if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
+                       if (!err)
+                               SetPageUptodate(page);
+                       unlock_page(page);
+               }
+       }
+       z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
+       bio_put(bio);
+}
+
 static void z_erofs_submit_queue(struct super_block *sb,
                                 struct z_erofs_decompress_frontend *f,
                                 struct page **pagepool,
index 18d7fd1..361b1d6 100644 (file)
@@ -630,6 +630,13 @@ static int z_erofs_do_map_blocks(struct inode *inode,
                if (endoff >= m.clusterofs) {
                        m.headtype = m.type;
                        map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+                       /*
+                        * For ztailpacking files, in order to inline data more
+                        * effectively, special EOF lclusters are now supported
+                        * which can have three parts at most.
+                        */
+                       if (ztailpacking && end > inode->i_size)
+                               end = inode->i_size;
                        break;
                }
                /* m.lcn should be >= 1 if endoff < m.clusterofs */
index 5a35768..57e82e2 100644 (file)
@@ -139,7 +139,7 @@ fail:
 /*
  * Inode operation get_posix_acl().
  *
- * inode->i_mutex: don't care
+ * inode->i_rwsem: don't care
  */
 struct posix_acl *
 ext4_get_acl(struct inode *inode, int type, bool rcu)
@@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu)
 /*
  * Set the access or default ACL of an inode.
  *
- * inode->i_mutex: down unless called from ext4_new_inode
+ * inode->i_rwsem: down unless called from ext4_new_inode
  */
 static int
 __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
@@ -271,8 +271,8 @@ out_stop:
 /*
  * Initialize the ACLs of a new inode. Called from ext4_new_inode.
  *
- * dir->i_mutex: down
- * inode->i_mutex: up (access to inode is still exclusive)
+ * dir->i_rwsem: down
+ * inode->i_rwsem: up (access to inode is still exclusive)
  */
 int
 ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
index 242e74c..bcd3b9b 100644 (file)
@@ -1028,7 +1028,7 @@ struct ext4_inode_info {
 
        /*
         * Extended attributes can be read independently of the main file
-        * data. Taking i_mutex even when reading would cause contention
+        * data. Taking i_rwsem even when reading would cause contention
         * between readers of EAs and writers of regular file data, so
         * instead we synchronize on xattr_sem when reading or changing
         * EAs.
@@ -1750,6 +1750,7 @@ struct ext4_sb_info {
        spinlock_t s_fc_lock;
        struct buffer_head *s_fc_bh;
        struct ext4_fc_stats s_fc_stats;
+       tid_t s_fc_ineligible_tid;
 #ifdef CONFIG_EXT4_DEBUG
        int s_fc_debug_max_replay;
 #endif
@@ -1795,10 +1796,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 enum {
        EXT4_MF_MNTDIR_SAMPLED,
        EXT4_MF_FS_ABORTED,     /* Fatal error detected */
-       EXT4_MF_FC_INELIGIBLE,  /* Fast commit ineligible */
-       EXT4_MF_FC_COMMITTING   /* File system underoing a fast
-                                * commit.
-                                */
+       EXT4_MF_FC_INELIGIBLE   /* Fast commit ineligible */
 };
 
 static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
@@ -2926,7 +2924,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
                            struct dentry *dentry);
 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
 void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
 void ext4_fc_start_update(struct inode *inode);
 void ext4_fc_stop_update(struct inode *inode);
 void ext4_fc_del(struct inode *inode);
@@ -2935,6 +2933,9 @@ void ext4_fc_replay_cleanup(struct super_block *sb);
 int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
 int __init ext4_fc_init_dentry_cache(void);
 void ext4_fc_destroy_dentry_cache(void);
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+                          ext4_lblk_t lblk, ext4_fsblk_t pblk,
+                          int len, int replay);
 
 /* mballoc.c */
 extern const struct seq_operations ext4_mb_seq_groups_ops;
@@ -3407,7 +3408,7 @@ do {                                                              \
 #define EXT4_FREECLUSTERS_WATERMARK 0
 #endif
 
-/* Update i_disksize. Requires i_mutex to avoid races with truncate */
+/* Update i_disksize. Requires i_rwsem to avoid races with truncate */
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
 {
        WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
@@ -3418,7 +3419,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
        up_write(&EXT4_I(inode)->i_data_sem);
 }
 
-/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
+/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */
 static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
 {
        int changed = 0;
index 0e4fa64..db2ae4a 100644 (file)
@@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
 /*
  * This function controls whether or not we should try to go down the
  * dioread_nolock code paths, which makes it safe to avoid taking
- * i_mutex for direct I/O reads.  This only works for extent-based
+ * i_rwsem for direct I/O reads.  This only works for extent-based
  * files, and it doesn't work if data journaling is enabled, since the
  * dioread_nolock code uses b_private to pass information back to the
  * I/O completion handler, and this conflicts with the jbd's use of
index 74c91da..c0f3f83 100644 (file)
@@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
         * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
         * moment, get_block can be called only for blocks inside i_size since
         * page cache has been already dropped and writes are blocked by
-        * i_mutex. So we can safely drop the i_data_sem here.
+        * i_rwsem. So we can safely drop the i_data_sem here.
         */
        BUG_ON(EXT4_JOURNAL(inode) == NULL);
        ext4_discard_preallocations(inode, 0);
@@ -4572,7 +4572,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
        flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
        /* Preallocate the range including the unaligned edges */
@@ -4738,7 +4738,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
@@ -5334,7 +5334,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
 
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode, 0);
@@ -5474,7 +5474,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
 
        /* Expand file to avoid data loss if there is error while shifting */
        inode->i_size += len;
@@ -5571,7 +5571,7 @@ out_mutex:
  * stuff such as page-cache locking consistency, bh mapping consistency or
  * extent's data copying must be performed by caller.
  * Locking:
- *             i_mutex is held for both inodes
+ *             i_rwsem is held for both inodes
  *             i_data_sem is locked for write for both inodes
  * Assumptions:
  *             All pages from requested range are locked for both inodes
@@ -6091,11 +6091,15 @@ int ext4_ext_clear_bb(struct inode *inode)
 
                                        ext4_mb_mark_bb(inode->i_sb,
                                                        path[j].p_block, 1, 0);
+                                       ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+                                                       0, path[j].p_block, 1, 1);
                                }
                                ext4_ext_drop_refs(path);
                                kfree(path);
                        }
                        ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
+                       ext4_fc_record_regions(inode->i_sb, inode->i_ino,
+                                       map.m_lblk, map.m_pblk, map.m_len, 1);
                }
                cur = cur + map.m_len;
        }
index 5ae8026..7964ee3 100644 (file)
@@ -300,18 +300,32 @@ restart:
 }
 
 /*
- * Mark file system as fast commit ineligible. This means that next commit
- * operation would result in a full jbd2 commit.
+ * Mark file system as fast commit ineligible, and record latest
+ * ineligible transaction tid. This means until the recorded
+ * transaction, commit operation would result in a full jbd2 commit.
  */
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+       tid_t tid;
 
        if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
            (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                return;
 
        ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       if (handle && !IS_ERR(handle))
+               tid = handle->h_transaction->t_tid;
+       else {
+               read_lock(&sbi->s_journal->j_state_lock);
+               tid = sbi->s_journal->j_running_transaction ?
+                               sbi->s_journal->j_running_transaction->t_tid : 0;
+               read_unlock(&sbi->s_journal->j_state_lock);
+       }
+       spin_lock(&sbi->s_fc_lock);
+       if (sbi->s_fc_ineligible_tid < tid)
+               sbi->s_fc_ineligible_tid = tid;
+       spin_unlock(&sbi->s_fc_lock);
        WARN_ON(reason >= EXT4_FC_REASON_MAX);
        sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
 }
@@ -361,7 +375,8 @@ static int ext4_fc_track_template(
        spin_lock(&sbi->s_fc_lock);
        if (list_empty(&EXT4_I(inode)->i_fc_list))
                list_add_tail(&EXT4_I(inode)->i_fc_list,
-                               (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
+                               (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+                                sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
                                &sbi->s_fc_q[FC_Q_STAGING] :
                                &sbi->s_fc_q[FC_Q_MAIN]);
        spin_unlock(&sbi->s_fc_lock);
@@ -387,7 +402,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        mutex_unlock(&ei->i_fc_lock);
        node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
        if (!node) {
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
                mutex_lock(&ei->i_fc_lock);
                return -ENOMEM;
        }
@@ -400,7 +415,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
                if (!node->fcd_name.name) {
                        kmem_cache_free(ext4_fc_dentry_cachep, node);
                        ext4_fc_mark_ineligible(inode->i_sb,
-                               EXT4_FC_REASON_NOMEM);
+                               EXT4_FC_REASON_NOMEM, NULL);
                        mutex_lock(&ei->i_fc_lock);
                        return -ENOMEM;
                }
@@ -414,7 +429,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        node->fcd_name.len = dentry->d_name.len;
 
        spin_lock(&sbi->s_fc_lock);
-       if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
+       if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
+               sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
                list_add_tail(&node->fcd_list,
                                &sbi->s_fc_dentry_q[FC_Q_STAGING]);
        else
@@ -502,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 
        if (ext4_should_journal_data(inode)) {
                ext4_fc_mark_ineligible(inode->i_sb,
-                                       EXT4_FC_REASON_INODE_JOURNAL_DATA);
+                                       EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
                return;
        }
 
@@ -879,7 +895,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
        int ret = 0;
 
        spin_lock(&sbi->s_fc_lock);
-       ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
        list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
                ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
                while (atomic_read(&ei->i_fc_updates)) {
@@ -1179,7 +1194,7 @@ fallback:
  * Fast commit cleanup routine. This is called after every fast commit and
  * full commit. full is true if we are called after a full commit.
  */
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
 {
        struct super_block *sb = journal->j_private;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1197,7 +1212,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
                list_del_init(&iter->i_fc_list);
                ext4_clear_inode_state(&iter->vfs_inode,
                                       EXT4_STATE_FC_COMMITTING);
-               ext4_fc_reset_inode(&iter->vfs_inode);
+               if (iter->i_sync_tid <= tid)
+                       ext4_fc_reset_inode(&iter->vfs_inode);
                /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
                smp_mb();
 #if (BITS_PER_LONG < 64)
@@ -1226,8 +1242,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
        list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
                                &sbi->s_fc_q[FC_Q_MAIN]);
 
-       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
-       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       if (tid >= sbi->s_fc_ineligible_tid) {
+               sbi->s_fc_ineligible_tid = 0;
+               ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       }
 
        if (full)
                sbi->s_fc_bytes = 0;
@@ -1392,14 +1410,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
                if (state->fc_modified_inodes[i] == ino)
                        return 0;
        if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
-               state->fc_modified_inodes_size +=
-                       EXT4_FC_REPLAY_REALLOC_INCREMENT;
                state->fc_modified_inodes = krealloc(
-                                       state->fc_modified_inodes, sizeof(int) *
-                                       state->fc_modified_inodes_size,
-                                       GFP_KERNEL);
+                               state->fc_modified_inodes,
+                               sizeof(int) * (state->fc_modified_inodes_size +
+                               EXT4_FC_REPLAY_REALLOC_INCREMENT),
+                               GFP_KERNEL);
                if (!state->fc_modified_inodes)
                        return -ENOMEM;
+               state->fc_modified_inodes_size +=
+                       EXT4_FC_REPLAY_REALLOC_INCREMENT;
        }
        state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
        return 0;
@@ -1431,7 +1450,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
        }
        inode = NULL;
 
-       ext4_fc_record_modified_inode(sb, ino);
+       ret = ext4_fc_record_modified_inode(sb, ino);
+       if (ret)
+               goto out;
 
        raw_fc_inode = (struct ext4_inode *)
                (val + offsetof(struct ext4_fc_inode, fc_raw_inode));
@@ -1563,16 +1584,23 @@ out:
 }
 
 /*
- * Record physical disk regions which are in use as per fast commit area. Our
- * simple replay phase allocator excludes these regions from allocation.
+ * Record physical disk regions which are in use as per fast commit area,
+ * and used by inodes during replay phase. Our simple replay phase
+ * allocator excludes these regions from allocation.
  */
-static int ext4_fc_record_regions(struct super_block *sb, int ino,
-               ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
+int ext4_fc_record_regions(struct super_block *sb, int ino,
+               ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
 {
        struct ext4_fc_replay_state *state;
        struct ext4_fc_alloc_region *region;
 
        state = &EXT4_SB(sb)->s_fc_replay_state;
+       /*
+        * during replay phase, the fc_regions_valid may not same as
+        * fc_regions_used, update it when do new additions.
+        */
+       if (replay && state->fc_regions_used != state->fc_regions_valid)
+               state->fc_regions_used = state->fc_regions_valid;
        if (state->fc_regions_used == state->fc_regions_size) {
                state->fc_regions_size +=
                        EXT4_FC_REPLAY_REALLOC_INCREMENT;
@@ -1590,6 +1618,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
        region->pblk = pblk;
        region->len = len;
 
+       if (replay)
+               state->fc_regions_valid++;
+
        return 0;
 }
 
@@ -1621,6 +1652,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
        }
 
        ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+       if (ret)
+               goto out;
 
        start = le32_to_cpu(ex->ee_block);
        start_pblk = ext4_ext_pblock(ex);
@@ -1638,18 +1671,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
                map.m_pblk = 0;
                ret = ext4_map_blocks(NULL, inode, &map, 0);
 
-               if (ret < 0) {
-                       iput(inode);
-                       return 0;
-               }
+               if (ret < 0)
+                       goto out;
 
                if (ret == 0) {
                        /* Range is not mapped */
                        path = ext4_find_extent(inode, cur, NULL, 0);
-                       if (IS_ERR(path)) {
-                               iput(inode);
-                               return 0;
-                       }
+                       if (IS_ERR(path))
+                               goto out;
                        memset(&newex, 0, sizeof(newex));
                        newex.ee_block = cpu_to_le32(cur);
                        ext4_ext_store_pblock(
@@ -1663,10 +1692,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
                        up_write((&EXT4_I(inode)->i_data_sem));
                        ext4_ext_drop_refs(path);
                        kfree(path);
-                       if (ret) {
-                               iput(inode);
-                               return 0;
-                       }
+                       if (ret)
+                               goto out;
                        goto next;
                }
 
@@ -1679,10 +1706,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
                        ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
                                        ext4_ext_is_unwritten(ex),
                                        start_pblk + cur - start);
-                       if (ret) {
-                               iput(inode);
-                               return 0;
-                       }
+                       if (ret)
+                               goto out;
                        /*
                         * Mark the old blocks as free since they aren't used
                         * anymore. We maintain an array of all the modified
@@ -1702,10 +1727,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
                        ext4_ext_is_unwritten(ex), map.m_pblk);
                ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
                                        ext4_ext_is_unwritten(ex), map.m_pblk);
-               if (ret) {
-                       iput(inode);
-                       return 0;
-               }
+               if (ret)
+                       goto out;
                /*
                 * We may have split the extent tree while toggling the state.
                 * Try to shrink the extent tree now.
@@ -1717,6 +1740,7 @@ next:
        }
        ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
                                        sb->s_blocksize_bits);
+out:
        iput(inode);
        return 0;
 }
@@ -1746,6 +1770,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
        }
 
        ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
+       if (ret)
+               goto out;
 
        jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
                        inode->i_ino, le32_to_cpu(lrange.fc_lblk),
@@ -1755,10 +1781,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
                map.m_len = remaining;
 
                ret = ext4_map_blocks(NULL, inode, &map, 0);
-               if (ret < 0) {
-                       iput(inode);
-                       return 0;
-               }
+               if (ret < 0)
+                       goto out;
                if (ret > 0) {
                        remaining -= ret;
                        cur += ret;
@@ -1770,18 +1794,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
        }
 
        down_write(&EXT4_I(inode)->i_data_sem);
-       ret = ext4_ext_remove_space(inode, lrange.fc_lblk,
-                               lrange.fc_lblk + lrange.fc_len - 1);
+       ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
+                               le32_to_cpu(lrange.fc_lblk) +
+                               le32_to_cpu(lrange.fc_len) - 1);
        up_write(&EXT4_I(inode)->i_data_sem);
-       if (ret) {
-               iput(inode);
-               return 0;
-       }
+       if (ret)
+               goto out;
        ext4_ext_replay_shrink_inode(inode,
                i_size_read(inode) >> sb->s_blocksize_bits);
        ext4_mark_inode_dirty(NULL, inode);
+out:
        iput(inode);
-
        return 0;
 }
 
@@ -1937,7 +1960,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
                        ret = ext4_fc_record_regions(sb,
                                le32_to_cpu(ext.fc_ino),
                                le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
-                               ext4_ext_get_actual_len(ex));
+                               ext4_ext_get_actual_len(ex), 0);
                        if (ret < 0)
                                break;
                        ret = JBD2_FC_REPLAY_CONTINUE;
index 89efa78..07a8c75 100644 (file)
@@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
         * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
         * moment, get_block can be called only for blocks inside i_size since
         * page cache has been already dropped and writes are blocked by
-        * i_mutex. So we can safely drop the i_data_sem here.
+        * i_rwsem. So we can safely drop the i_data_sem here.
         */
        BUG_ON(EXT4_JOURNAL(inode) == NULL);
        ext4_discard_preallocations(inode, 0);
index 635bcf6..e429418 100644 (file)
@@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
                                    struct page **pagep,
                                    void **fsdata)
 {
-       int ret, inline_size;
+       int ret;
        handle_t *handle;
        struct page *page;
        struct ext4_iloc iloc;
@@ -928,14 +928,9 @@ retry_journal:
                goto out;
        }
 
-       inline_size = ext4_get_max_inline_size(inode);
-
-       ret = -ENOSPC;
-       if (inline_size >= pos + len) {
-               ret = ext4_prepare_inline_data(handle, inode, pos + len);
-               if (ret && ret != -ENOSPC)
-                       goto out_journal;
-       }
+       ret = ext4_prepare_inline_data(handle, inode, pos + len);
+       if (ret && ret != -ENOSPC)
+               goto out_journal;
 
        /*
         * We cannot recurse into the filesystem as the transaction
@@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
                                     struct ext4_iloc *iloc,
                                     void *buf, int inline_size)
 {
-       ext4_create_inline_data(handle, inode, inline_size);
+       int ret;
+
+       ret = ext4_create_inline_data(handle, inode, inline_size);
+       if (ret) {
+               ext4_msg(inode->i_sb, KERN_EMERG,
+                       "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
+                       inode->i_ino, ret);
+               return;
+       }
        ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
        ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
 }
index 5f79d26..01c9e4f 100644 (file)
@@ -338,7 +338,7 @@ stop_handle:
        return;
 no_delete:
        if (!list_empty(&EXT4_I(inode)->i_fc_list))
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
        ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
 }
 
@@ -1224,7 +1224,7 @@ retry_journal:
                /*
                 * __block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
-                * i_size_read because we hold i_mutex.
+                * i_size_read because we hold i_rwsem.
                 *
                 * Add inode to orphan list in case we crash before
                 * truncate finishes
@@ -3979,7 +3979,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
        }
 
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
 
        /*
@@ -4129,7 +4129,7 @@ int ext4_truncate(struct inode *inode)
        /*
         * There is a possibility that we're either freeing the inode
         * or it's a completely new inode. In those cases we might not
-        * have i_mutex locked because it's not necessary.
+        * have i_rwsem locked because it's not necessary.
         */
        if (!(inode->i_state & (I_NEW|I_FREEING)))
                WARN_ON(!inode_is_locked(inode));
@@ -5271,7 +5271,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
  * transaction are already on disk (truncate waits for pages under
  * writeback).
  *
- * Called with inode->i_mutex down.
+ * Called with inode->i_rwsem down.
  */
 int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
                 struct iattr *attr)
@@ -5983,7 +5983,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                return PTR_ERR(handle);
 
        ext4_fc_mark_ineligible(inode->i_sb,
-               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
        err = ext4_mark_inode_dirty(handle, inode);
        ext4_handle_sync(handle);
        ext4_journal_stop(handle);
index bbbedf2..a8022c2 100644 (file)
@@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
                err = -EINVAL;
                goto err_out;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
 
        /* Protect extent tree against block allocations via delalloc */
        ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1373,7 +1373,7 @@ mext_out:
 
                err = ext4_resize_fs(sb, n_blocks_count);
                if (EXT4_SB(sb)->s_journal) {
-                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
+                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
                        jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
                        err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
index 9f86dd9..67ac95c 100644 (file)
@@ -5753,7 +5753,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
        struct super_block *sb = ar->inode->i_sb;
        ext4_group_t group;
        ext4_grpblk_t blkoff;
-       int i = sb->s_blocksize;
+       ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+       ext4_grpblk_t i = 0;
        ext4_fsblk_t goal, block;
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
@@ -5775,19 +5776,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
                ext4_get_group_no_and_offset(sb,
                        max(ext4_group_first_block_no(sb, group), goal),
                        NULL, &blkoff);
-               i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
+               while (1) {
+                       i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
                                                blkoff);
+                       if (i >= max)
+                               break;
+                       if (ext4_fc_replay_check_excluded(sb,
+                               ext4_group_first_block_no(sb, group) + i)) {
+                               blkoff = i + 1;
+                       } else
+                               break;
+               }
                brelse(bitmap_bh);
-               if (i >= sb->s_blocksize)
-                       continue;
-               if (ext4_fc_replay_check_excluded(sb,
-                       ext4_group_first_block_no(sb, group) + i))
-                       continue;
-               break;
+               if (i < max)
+                       break;
        }
 
-       if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
+       if (group >= ext4_get_groups_count(sb) || i >= max) {
+               *errp = -ENOSPC;
                return 0;
+       }
 
        block = ext4_group_first_block_no(sb, group) + i;
        ext4_mb_mark_bb(sb, block, 1, 1);
index ff8916e..7a5353a 100644 (file)
@@ -485,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode)
         * when we add extents we extent the journal
         */
        /*
-        * Even though we take i_mutex we can still cause block
+        * Even though we take i_rwsem we can still cause block
         * allocation via mmap write to holes. If we have allocated
         * new blocks we fail migrate.  New block allocation will
         * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
index 269d2d0..8cf0a92 100644 (file)
@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
                 * dirents in directories.
                 */
                ext4_fc_mark_ineligible(old.inode->i_sb,
-                       EXT4_FC_REASON_RENAME_DIR);
+                       EXT4_FC_REASON_RENAME_DIR, handle);
        } else {
                if (new.inode)
                        ext4_fc_track_unlink(handle, new.dentry);
@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlikely(retval))
                goto end_rename;
        ext4_fc_mark_ineligible(new.inode->i_sb,
-                               EXT4_FC_REASON_CROSS_RENAME);
+                               EXT4_FC_REASON_CROSS_RENAME, handle);
        if (old.dir_bh) {
                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
                if (retval)
index 53adc8f..7de0612 100644 (file)
@@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
  * At filesystem recovery time, we walk this list deleting unlinked
  * inodes and truncating linked inodes in ext4_orphan_cleanup().
  *
- * Orphan list manipulation functions must be called under i_mutex unless
+ * Orphan list manipulation functions must be called under i_rwsem unless
  * we are just creating the inode or deleting it.
  */
 int ext4_orphan_add(handle_t *handle, struct inode *inode)
@@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        /*
         * Orphan handling is only valid for files with data blocks
         * being truncated, or files being unlinked. Note that we either
-        * hold i_mutex, or the inode can not be referenced from outside,
+        * hold i_rwsem, or the inode can not be referenced from outside,
         * so i_nlink should not be bumped due to race
         */
        ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
index ce60b50..c5021ca 100644 (file)
@@ -5082,7 +5082,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
        INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
        sbi->s_fc_bytes = 0;
        ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
-       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+       sbi->s_fc_ineligible_tid = 0;
        spin_lock_init(&sbi->s_fc_lock);
        memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
        sbi->s_fc_replay_state.fc_regions = NULL;
@@ -5540,7 +5540,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
 
        sbi = ext4_alloc_sbi(sb);
        if (!sbi)
-               ret = -ENOMEM;
+               return -ENOMEM;
 
        fc->s_fs_info = sbi;
 
index 1e0fc1e..0423253 100644 (file)
@@ -2408,7 +2408,7 @@ retry_inode:
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
        }
-       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
 
 cleanup:
        brelse(is.iloc.bh);
@@ -2486,7 +2486,7 @@ retry:
                if (error == 0)
                        error = error2;
        }
-       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
 
        return error;
 }
@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
                                         error);
                        goto cleanup;
                }
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
        }
        error = 0;
 cleanup:
index 57edef1..4969021 100644 (file)
@@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
 static int __init init_fs_stat_sysctls(void)
 {
        register_sysctl_init("fs", fs_stat_sysctls);
+       if (IS_ENABLED(CONFIG_BINFMT_MISC))
+               register_sysctl_mount_point("fs/binfmt_misc");
        return 0;
 }
 fs_initcall(init_fs_stat_sysctls);
index c938bba..6c51a75 100644 (file)
@@ -21,6 +21,8 @@
 
 #include "../internal.h"
 
+#define IOEND_BATCH_SIZE       4096
+
 /*
  * Structure allocated for each folio when block size < folio size
  * to track sub-folio uptodate status and I/O completions.
@@ -1039,7 +1041,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
  * state, release holds on bios, and finally free up memory.  Do not use the
  * ioend after this.
  */
-static void
+static u32
 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
 {
        struct inode *inode = ioend->io_inode;
@@ -1048,6 +1050,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
        u64 start = bio->bi_iter.bi_sector;
        loff_t offset = ioend->io_offset;
        bool quiet = bio_flagged(bio, BIO_QUIET);
+       u32 folio_count = 0;
 
        for (bio = &ioend->io_inline_bio; bio; bio = next) {
                struct folio_iter fi;
@@ -1062,9 +1065,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
                        next = bio->bi_private;
 
                /* walk all folios in bio, ending page IO on them */
-               bio_for_each_folio_all(fi, bio)
+               bio_for_each_folio_all(fi, bio) {
                        iomap_finish_folio_write(inode, fi.folio, fi.length,
                                        error);
+                       folio_count++;
+               }
                bio_put(bio);
        }
        /* The ioend has been freed by bio_put() */
@@ -1074,20 +1079,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
 "%s: writeback error on inode %lu, offset %lld, sector %llu",
                        inode->i_sb->s_id, inode->i_ino, offset, start);
        }
+       return folio_count;
 }
 
+/*
+ * Ioend completion routine for merged bios. This can only be called from task
+ * contexts as merged ioends can be of unbound length. Hence we have to break up
+ * the writeback completions into manageable chunks to avoid long scheduler
+ * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
+ * good batch processing throughput without creating adverse scheduler latency
+ * conditions.
+ */
 void
 iomap_finish_ioends(struct iomap_ioend *ioend, int error)
 {
        struct list_head tmp;
+       u32 completions;
+
+       might_sleep();
 
        list_replace_init(&ioend->io_list, &tmp);
-       iomap_finish_ioend(ioend, error);
+       completions = iomap_finish_ioend(ioend, error);
 
        while (!list_empty(&tmp)) {
+               if (completions > IOEND_BATCH_SIZE * 8) {
+                       cond_resched();
+                       completions = 0;
+               }
                ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
                list_del_init(&ioend->io_list);
-               iomap_finish_ioend(ioend, error);
+               completions += iomap_finish_ioend(ioend, error);
        }
 }
 EXPORT_SYMBOL_GPL(iomap_finish_ioends);
@@ -1108,6 +1129,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
                return false;
        if (ioend->io_offset + ioend->io_size != next->io_offset)
                return false;
+       /*
+        * Do not merge physically discontiguous ioends. The filesystem
+        * completion functions will have to iterate the physical
+        * discontiguities even if we merge the ioends at a logical level, so
+        * we don't gain anything by merging physical discontiguities here.
+        *
+        * We cannot use bio->bi_iter.bi_sector here as it is modified during
+        * submission so does not point to the start sector of the bio at
+        * completion.
+        */
+       if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
+               return false;
        return true;
 }
 
@@ -1209,8 +1242,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
        ioend->io_flags = wpc->iomap.flags;
        ioend->io_inode = inode;
        ioend->io_size = 0;
+       ioend->io_folios = 0;
        ioend->io_offset = offset;
        ioend->io_bio = bio;
+       ioend->io_sector = sector;
        return ioend;
 }
 
@@ -1251,6 +1286,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
                return false;
        if (sector != bio_end_sector(wpc->ioend->io_bio))
                return false;
+       /*
+        * Limit ioend bio chain lengths to minimise IO completion latency. This
+        * also prevents long tight loops ending page writeback on all the
+        * folios in the ioend.
+        */
+       if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE)
+               return false;
        return true;
 }
 
@@ -1335,6 +1377,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
                                 &submit_list);
                count++;
        }
+       if (count)
+               wpc->ioend->io_folios++;
 
        WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
        WARN_ON_ONCE(!folio_test_locked(folio));
index 3cc4ab2..5b9408e 100644 (file)
@@ -484,22 +484,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
                                              stats.run.rs_locked);
 
-       spin_lock(&commit_transaction->t_handle_lock);
-       while (atomic_read(&commit_transaction->t_updates)) {
-               DEFINE_WAIT(wait);
+       // waits for any t_updates to finish
+       jbd2_journal_wait_updates(journal);
 
-               prepare_to_wait(&journal->j_wait_updates, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-               if (atomic_read(&commit_transaction->t_updates)) {
-                       spin_unlock(&commit_transaction->t_handle_lock);
-                       write_unlock(&journal->j_state_lock);
-                       schedule();
-                       write_lock(&journal->j_state_lock);
-                       spin_lock(&commit_transaction->t_handle_lock);
-               }
-               finish_wait(&journal->j_wait_updates, &wait);
-       }
-       spin_unlock(&commit_transaction->t_handle_lock);
        commit_transaction->t_state = T_SWITCH;
        write_unlock(&journal->j_state_lock);
 
@@ -817,7 +804,7 @@ start_journal_io:
        commit_transaction->t_state = T_COMMIT_DFLUSH;
        write_unlock(&journal->j_state_lock);
 
-       /* 
+       /*
         * If the journal is not located on the file system device,
         * then we must flush the file system device before we issue
         * the commit record
@@ -1170,7 +1157,7 @@ restart_loop:
        if (journal->j_commit_callback)
                journal->j_commit_callback(journal, commit_transaction);
        if (journal->j_fc_cleanup_callback)
-               journal->j_fc_cleanup_callback(journal, 1);
+               journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
 
        trace_jbd2_end_commit(journal, commit_transaction);
        jbd_debug(1, "JBD2: commit %d complete, head %d\n",
index bf108d4..c2cf74b 100644 (file)
@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
 {
        jbd2_journal_unlock_updates(journal);
        if (journal->j_fc_cleanup_callback)
-               journal->j_fc_cleanup_callback(journal, 0);
+               journal->j_fc_cleanup_callback(journal, 0, tid);
        write_lock(&journal->j_state_lock);
        journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
        if (fallback)
@@ -1287,6 +1287,8 @@ static int jbd2_min_tag_size(void)
 
 /**
  * jbd2_journal_shrink_scan()
+ * @shrink: shrinker to work on
+ * @sc: reclaim request to process
  *
  * Scan the checkpointed buffer on the checkpoint list and release the
  * journal_head.
@@ -1312,6 +1314,8 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
 
 /**
  * jbd2_journal_shrink_count()
+ * @shrink: shrinker to work on
+ * @sc: reclaim request to process
  *
  * Count the number of checkpoint buffers on the checkpoint list.
  */
index 6a3caed..8e2f827 100644 (file)
@@ -449,7 +449,7 @@ repeat:
        }
 
        /* OK, account for the buffers that this operation expects to
-        * use and add the handle to the running transaction. 
+        * use and add the handle to the running transaction.
         */
        update_t_max_wait(transaction, ts);
        handle->h_transaction = transaction;
@@ -836,6 +836,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
 }
 EXPORT_SYMBOL(jbd2_journal_restart);
 
+/*
+ * Waits for any outstanding t_updates to finish.
+ * This is called with write j_state_lock held.
+ */
+void jbd2_journal_wait_updates(journal_t *journal)
+{
+       transaction_t *commit_transaction = journal->j_running_transaction;
+
+       if (!commit_transaction)
+               return;
+
+       spin_lock(&commit_transaction->t_handle_lock);
+       while (atomic_read(&commit_transaction->t_updates)) {
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(&journal->j_wait_updates, &wait,
+                                       TASK_UNINTERRUPTIBLE);
+               if (atomic_read(&commit_transaction->t_updates)) {
+                       spin_unlock(&commit_transaction->t_handle_lock);
+                       write_unlock(&journal->j_state_lock);
+                       schedule();
+                       write_lock(&journal->j_state_lock);
+                       spin_lock(&commit_transaction->t_handle_lock);
+               }
+               finish_wait(&journal->j_wait_updates, &wait);
+       }
+       spin_unlock(&commit_transaction->t_handle_lock);
+}
+
 /**
  * jbd2_journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
@@ -863,27 +892,9 @@ void jbd2_journal_lock_updates(journal_t *journal)
                write_lock(&journal->j_state_lock);
        }
 
-       /* Wait until there are no running updates */
-       while (1) {
-               transaction_t *transaction = journal->j_running_transaction;
-
-               if (!transaction)
-                       break;
+       /* Wait until there are no running t_updates */
+       jbd2_journal_wait_updates(journal);
 
-               spin_lock(&transaction->t_handle_lock);
-               prepare_to_wait(&journal->j_wait_updates, &wait,
-                               TASK_UNINTERRUPTIBLE);
-               if (!atomic_read(&transaction->t_updates)) {
-                       spin_unlock(&transaction->t_handle_lock);
-                       finish_wait(&journal->j_wait_updates, &wait);
-                       break;
-               }
-               spin_unlock(&transaction->t_handle_lock);
-               write_unlock(&journal->j_state_lock);
-               schedule();
-               finish_wait(&journal->j_wait_updates, &wait);
-               write_lock(&journal->j_state_lock);
-       }
        write_unlock(&journal->j_state_lock);
 
        /*
index dc3d061..911444d 100644 (file)
@@ -29,6 +29,7 @@
 #include "mgmt/user_config.h"
 #include "crypto_ctx.h"
 #include "transport_ipc.h"
+#include "../smbfs_common/arc4.h"
 
 /*
  * Fixed format data defining GSS header and fixed string
@@ -336,6 +337,29 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
                                nt_len - CIFS_ENCPWD_SIZE,
                                domain_name, conn->ntlmssp.cryptkey);
        kfree(domain_name);
+
+       /* The recovered secondary session key */
+       if (conn->ntlmssp.client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) {
+               struct arc4_ctx *ctx_arc4;
+               unsigned int sess_key_off, sess_key_len;
+
+               sess_key_off = le32_to_cpu(authblob->SessionKey.BufferOffset);
+               sess_key_len = le16_to_cpu(authblob->SessionKey.Length);
+
+               if (blob_len < (u64)sess_key_off + sess_key_len)
+                       return -EINVAL;
+
+               ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
+               if (!ctx_arc4)
+                       return -ENOMEM;
+
+               cifs_arc4_setkey(ctx_arc4, sess->sess_key,
+                                SMB2_NTLMV2_SESSKEY_SIZE);
+               cifs_arc4_crypt(ctx_arc4, sess->sess_key,
+                               (char *)authblob + sess_key_off, sess_key_len);
+               kfree_sensitive(ctx_arc4);
+       }
+
        return ret;
 }
 
@@ -408,6 +432,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
            (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
                flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
 
+       if (cflags & NTLMSSP_NEGOTIATE_KEY_XCH)
+               flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
        chgblob->NegotiateFlags = cpu_to_le32(flags);
        len = strlen(ksmbd_netbios_name());
        name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
index 1866c81..67e8e28 100644 (file)
@@ -2688,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work)
                                        (struct create_posix *)context;
                                if (le16_to_cpu(context->DataOffset) +
                                    le32_to_cpu(context->DataLength) <
-                                   sizeof(struct create_posix)) {
+                                   sizeof(struct create_posix) - 4) {
                                        rc = -EINVAL;
                                        goto err_out1;
                                }
@@ -3422,9 +3422,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                goto free_conv_name;
        }
 
-       struct_sz = readdir_info_level_struct_sz(info_level);
-       next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
-                                 KSMBD_DIR_INFO_ALIGNMENT);
+       struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
+       next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
+       d_info->last_entry_off_align = next_entry_offset - struct_sz;
 
        if (next_entry_offset > d_info->out_buf_len) {
                d_info->out_buf_len = 0;
@@ -3976,6 +3976,7 @@ int smb2_query_dir(struct ksmbd_work *work)
                ((struct file_directory_info *)
                ((char *)rsp->Buffer + d_info.last_entry_offset))
                ->NextEntryOffset = 0;
+               d_info.data_count -= d_info.last_entry_off_align;
 
                rsp->StructureSize = cpu_to_le16(9);
                rsp->OutputBufferOffset = cpu_to_le16(72);
@@ -6126,13 +6127,26 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
                                        __le16 ChannelInfoOffset,
                                        __le16 ChannelInfoLength)
 {
+       unsigned int i, ch_count;
+
        if (work->conn->dialect == SMB30_PROT_ID &&
            Channel != SMB2_CHANNEL_RDMA_V1)
                return -EINVAL;
 
-       if (ChannelInfoOffset == 0 ||
-           le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
+       ch_count = le16_to_cpu(ChannelInfoLength) / sizeof(*desc);
+       if (ksmbd_debug_types & KSMBD_DEBUG_RDMA) {
+               for (i = 0; i < ch_count; i++) {
+                       pr_info("RDMA r/w request %#x: token %#x, length %#x\n",
+                               i,
+                               le32_to_cpu(desc[i].token),
+                               le32_to_cpu(desc[i].length));
+               }
+       }
+       if (ch_count != 1) {
+               ksmbd_debug(RDMA, "RDMA multiple buffer descriptors %d are not supported yet\n",
+                           ch_count);
                return -EINVAL;
+       }
 
        work->need_invalidate_rkey =
                (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
@@ -6185,9 +6199,15 @@ int smb2_read(struct ksmbd_work *work)
 
        if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
            req->Channel == SMB2_CHANNEL_RDMA_V1) {
+               unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset);
+
+               if (ch_offset < offsetof(struct smb2_read_req, Buffer)) {
+                       err = -EINVAL;
+                       goto out;
+               }
                err = smb2_set_remote_key_for_rdma(work,
                                                   (struct smb2_buffer_desc_v1 *)
-                                                  &req->Buffer[0],
+                                                  ((char *)req + ch_offset),
                                                   req->Channel,
                                                   req->ReadChannelInfoOffset,
                                                   req->ReadChannelInfoLength);
@@ -6428,11 +6448,16 @@ int smb2_write(struct ksmbd_work *work)
 
        if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
            req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
-               if (req->Length != 0 || req->DataOffset != 0)
-                       return -EINVAL;
+               unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset);
+
+               if (req->Length != 0 || req->DataOffset != 0 ||
+                   ch_offset < offsetof(struct smb2_write_req, Buffer)) {
+                       err = -EINVAL;
+                       goto out;
+               }
                err = smb2_set_remote_key_for_rdma(work,
                                                   (struct smb2_buffer_desc_v1 *)
-                                                  &req->Buffer[0],
+                                                  ((char *)req + ch_offset),
                                                   req->Channel,
                                                   req->WriteChannelInfoOffset,
                                                   req->WriteChannelInfoLength);
index ef7f42b..9a7e211 100644 (file)
@@ -308,14 +308,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
        for (i = 0; i < 2; i++) {
                struct kstat kstat;
                struct ksmbd_kstat ksmbd_kstat;
+               struct dentry *dentry;
 
                if (!dir->dot_dotdot[i]) { /* fill dot entry info */
                        if (i == 0) {
                                d_info->name = ".";
                                d_info->name_len = 1;
+                               dentry = dir->filp->f_path.dentry;
                        } else {
                                d_info->name = "..";
                                d_info->name_len = 2;
+                               dentry = dir->filp->f_path.dentry->d_parent;
                        }
 
                        if (!match_pattern(d_info->name, d_info->name_len,
@@ -327,7 +330,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
                        ksmbd_kstat.kstat = &kstat;
                        ksmbd_vfs_fill_dentry_attrs(work,
                                                    user_ns,
-                                                   dir->filp->f_path.dentry->d_parent,
+                                                   dentry,
                                                    &ksmbd_kstat);
                        rc = fn(conn, info_level, d_info, &ksmbd_kstat);
                        if (rc)
index 3c1ec1a..ba5a22b 100644 (file)
@@ -80,7 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
 /*  The maximum single-message size which can be received */
 static int smb_direct_max_receive_size = 8192;
 
-static int smb_direct_max_read_write_size = 1048512;
+static int smb_direct_max_read_write_size = 524224;
 
 static int smb_direct_max_outstanding_rw_ops = 8;
 
index adf94a4..8c37aaf 100644 (file)
@@ -47,6 +47,7 @@ struct ksmbd_dir_info {
        int             last_entry_offset;
        bool            hide_dot_file;
        int             flags;
+       int             last_entry_off_align;
 };
 
 struct ksmbd_readdir_data {
index f18e80f..d1f3422 100644 (file)
@@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
        INIT_LIST_HEAD(&clp->cl_superblocks);
        clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
+       clp->cl_flags = cl_init->init_flags;
        clp->cl_proto = cl_init->proto;
        clp->cl_nconnect = cl_init->nconnect;
        clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
@@ -423,7 +424,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
                        list_add_tail(&new->cl_share_link,
                                        &nn->nfs_client_list);
                        spin_unlock(&nn->nfs_client_lock);
-                       new->cl_flags = cl_init->init_flags;
                        return rpc_ops->init_client(new, cl_init);
                }
 
index 848f3b8..7bc7cf6 100644 (file)
@@ -80,6 +80,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
                ctx->dir_cookie = 0;
                ctx->dup_cookie = 0;
                ctx->page_index = 0;
+               ctx->eof = false;
                spin_lock(&dir->i_lock);
                if (list_empty(&nfsi->open_files) &&
                    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -168,6 +169,7 @@ struct nfs_readdir_descriptor {
        unsigned int    cache_entry_index;
        signed char duped;
        bool plus;
+       bool eob;
        bool eof;
 };
 
@@ -867,7 +869,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 
                status = nfs_readdir_page_filler(desc, entry, pages, pglen,
                                                 arrays, narrays);
-       } while (!status && nfs_readdir_page_needs_filling(page));
+       } while (!status && nfs_readdir_page_needs_filling(page) &&
+               page_mapping(page));
 
        nfs_readdir_free_pages(pages, array_size);
 out:
@@ -988,7 +991,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
                ent = &array->array[i];
                if (!dir_emit(desc->ctx, ent->name, ent->name_len,
                    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
-                       desc->eof = true;
+                       desc->eob = true;
                        break;
                }
                memcpy(desc->verf, verf, sizeof(desc->verf));
@@ -1004,7 +1007,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
                        desc->duped = 1;
        }
        if (array->page_is_eof)
-               desc->eof = true;
+               desc->eof = !desc->eob;
 
        kunmap(desc->page);
        dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
@@ -1041,12 +1044,13 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
                goto out;
 
        desc->page_index = 0;
+       desc->cache_entry_index = 0;
        desc->last_cookie = desc->dir_cookie;
        desc->duped = 0;
 
        status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
 
-       for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
+       for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
                desc->page = arrays[i];
                nfs_do_filldir(desc, verf);
        }
@@ -1105,9 +1109,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
        desc->duped = dir_ctx->duped;
        page_index = dir_ctx->page_index;
        desc->attr_gencount = dir_ctx->attr_gencount;
+       desc->eof = dir_ctx->eof;
        memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
        spin_unlock(&file->f_lock);
 
+       if (desc->eof) {
+               res = 0;
+               goto out_free;
+       }
+
        if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
            list_is_singular(&nfsi->open_files))
                invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
@@ -1141,7 +1151,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 
                nfs_do_filldir(desc, nfsi->cookieverf);
                nfs_readdir_page_unlock_and_put_cached(desc);
-       } while (!desc->eof);
+       } while (!desc->eob && !desc->eof);
 
        spin_lock(&file->f_lock);
        dir_ctx->dir_cookie = desc->dir_cookie;
@@ -1149,9 +1159,10 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
        dir_ctx->duped = desc->duped;
        dir_ctx->attr_gencount = desc->attr_gencount;
        dir_ctx->page_index = desc->page_index;
+       dir_ctx->eof = desc->eof;
        memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
        spin_unlock(&file->f_lock);
-
+out_free:
        kfree(desc);
 
 out:
@@ -1193,6 +1204,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
                if (offset == 0)
                        memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
                dir_ctx->duped = 0;
+               dir_ctx->eof = false;
        }
        spin_unlock(&filp->f_lock);
        return offset;
index b18f31b..f502082 100644 (file)
@@ -8032,7 +8032,8 @@ static int _nfs41_proc_get_locations(struct nfs_server *server,
 
 /**
  * nfs4_proc_get_locations - discover locations for a migrated FSID
- * @inode: inode on FSID that is migrating
+ * @server: pointer to nfs_server to process
+ * @fhandle: pointer to the kernel NFS client file handle
  * @locations: result of query
  * @page: buffer
  * @cred: credential to use for this operation
index 8ef53f6..936eebd 100644 (file)
@@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
        unsigned int len;
        int v;
 
-       argp->count = min_t(u32, argp->count, max_blocksize);
-
        dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
                                SVCFH_fmt(&argp->fh),
                                (unsigned long) argp->count,
                                (unsigned long long) argp->offset);
 
+       argp->count = min_t(u32, argp->count, max_blocksize);
+       if (argp->offset > (u64)OFFSET_MAX)
+               argp->offset = (u64)OFFSET_MAX;
+       if (argp->offset + argp->count > (u64)OFFSET_MAX)
+               argp->count = (u64)OFFSET_MAX - argp->offset;
+
        v = 0;
        len = argp->count;
        resp->pages = rqstp->rq_next_page;
@@ -199,6 +203,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
                                (unsigned long long) argp->offset,
                                argp->stable? " stable" : "");
 
+       resp->status = nfserr_fbig;
+       if (argp->offset > (u64)OFFSET_MAX ||
+           argp->offset + argp->len > (u64)OFFSET_MAX)
+               return rpc_success;
+
        fh_copy(&resp->fh, &argp->fh);
        resp->committed = argp->stable;
        nvecs = svc_fill_write_vector(rqstp, &argp->payload);
@@ -651,15 +660,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
                                argp->count,
                                (unsigned long long) argp->offset);
 
-       if (argp->offset > NFS_OFFSET_MAX) {
-               resp->status = nfserr_inval;
-               goto out;
-       }
-
        fh_copy(&resp->fh, &argp->fh);
        resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
                                   argp->count, resp->verf);
-out:
        return rpc_success;
 }
 
index 7c45ba4..0293b8d 100644 (file)
@@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                if (xdr_stream_decode_u64(xdr, &newsize) < 0)
                        return false;
                iap->ia_valid |= ATTR_SIZE;
-               iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
+               iap->ia_size = newsize;
        }
        if (xdr_stream_decode_u32(xdr, &set_it) < 0)
                return false;
@@ -1060,7 +1060,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
                return false;
        /* cookie */
        resp->cookie_offset = dirlist->len;
-       if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0)
+       if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0)
                return false;
 
        return true;
index ed1ee25..b207c76 100644 (file)
@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        __be32 status;
 
        read->rd_nf = NULL;
-       if (read->rd_offset >= OFFSET_MAX)
-               return nfserr_inval;
 
        trace_nfsd_read_start(rqstp, &cstate->current_fh,
                              read->rd_offset, read->rd_length);
 
+       read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+       if (read->rd_offset > (u64)OFFSET_MAX)
+               read->rd_offset = (u64)OFFSET_MAX;
+       if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+               read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
        /*
         * If we do a zero copy read, then a client will see read data
         * that reflects the state of the file *after* performing the
@@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        unsigned long cnt;
        int nvecs;
 
-       if (write->wr_offset >= OFFSET_MAX)
-               return nfserr_inval;
+       if (write->wr_offset > (u64)OFFSET_MAX ||
+           write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
+               return nfserr_fbig;
 
        cnt = write->wr_buflen;
        trace_nfsd_write_start(rqstp, &cstate->current_fh,
index 899de43..714a3a3 100644 (file)
@@ -3495,7 +3495,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        p = xdr_reserve_space(xdr, 3*4 + namlen);
        if (!p)
                goto fail;
-       p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
+       p = xdr_encode_hyper(p, OFFSET_MAX);        /* offset of next entry */
        p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
        nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
@@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
        }
        xdr_commit_encode(xdr);
 
-       maxcount = svc_max_payload(resp->rqstp);
-       maxcount = min_t(unsigned long, maxcount,
+       maxcount = min_t(unsigned long, read->rd_length,
                         (xdr->buf->buflen - xdr->buf->len));
-       maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
        if (file->f_op->splice_read &&
            test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
@@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
                return nfserr_resource;
        xdr_commit_encode(xdr);
 
-       maxcount = svc_max_payload(resp->rqstp);
-       maxcount = min_t(unsigned long, maxcount,
+       maxcount = min_t(unsigned long, read->rd_length,
                         (xdr->buf->buflen - xdr->buf->len));
-       maxcount = min_t(unsigned long, maxcount, read->rd_length);
        count    = maxcount;
 
        eof = read->rd_offset >= i_size_read(file_inode(file));
index c4cf563..5889db6 100644 (file)
@@ -306,14 +306,14 @@ TRACE_EVENT(nfsd_export_update,
 DECLARE_EVENT_CLASS(nfsd_io_class,
        TP_PROTO(struct svc_rqst *rqstp,
                 struct svc_fh  *fhp,
-                loff_t         offset,
-                unsigned long  len),
+                u64            offset,
+                u32            len),
        TP_ARGS(rqstp, fhp, offset, len),
        TP_STRUCT__entry(
                __field(u32, xid)
                __field(u32, fh_hash)
-               __field(loff_t, offset)
-               __field(unsigned long, len)
+               __field(u64, offset)
+               __field(u32, len)
        ),
        TP_fast_assign(
                __entry->xid = be32_to_cpu(rqstp->rq_xid);
@@ -321,7 +321,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
                __entry->offset = offset;
                __entry->len = len;
        ),
-       TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
+       TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u",
                  __entry->xid, __entry->fh_hash,
                  __entry->offset, __entry->len)
 )
@@ -330,8 +330,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
 DEFINE_EVENT(nfsd_io_class, nfsd_##name,       \
        TP_PROTO(struct svc_rqst *rqstp,        \
                 struct svc_fh  *fhp,           \
-                loff_t         offset,         \
-                unsigned long  len),           \
+                u64            offset,         \
+                u32            len),           \
        TP_ARGS(rqstp, fhp, offset, len))
 
 DEFINE_NFSD_IO_EVENT(read_start);
index 99c2b9d..91600e7 100644 (file)
@@ -435,6 +435,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                        .ia_size        = iap->ia_size,
                };
 
+               host_err = -EFBIG;
+               if (iap->ia_size < 0)
+                       goto out_unlock;
+
                host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
                if (host_err)
                        goto out_unlock;
@@ -1110,42 +1114,61 @@ out:
 }
 
 #ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
  *
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
  *
  * Unfortunately we cannot lock the file to make sure we return full WCC
  * data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ *   An nfsstat value in network byte order.
  */
 __be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
-               loff_t offset, unsigned long count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+           u32 count, __be32 *verf)
 {
+       u64                     maxbytes;
+       loff_t                  start, end;
        struct nfsd_net         *nn;
        struct nfsd_file        *nf;
-       loff_t                  end = LLONG_MAX;
-       __be32                  err = nfserr_inval;
-
-       if (offset < 0)
-               goto out;
-       if (count != 0) {
-               end = offset + (loff_t)count - 1;
-               if (end < offset)
-                       goto out;
-       }
+       __be32                  err;
 
        err = nfsd_file_acquire(rqstp, fhp,
                        NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
        if (err)
                goto out;
+
+       /*
+        * Convert the client-provided (offset, count) range to a
+        * (start, end) range. If the client-provided range falls
+        * outside the maximum file size of the underlying FS,
+        * clamp the sync range appropriately.
+        */
+       start = 0;
+       end = LLONG_MAX;
+       maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+       if (offset < maxbytes) {
+               start = offset;
+               if (count && (offset + count - 1 < maxbytes))
+                       end = offset + count - 1;
+       }
+
        nn = net_generic(nf->nf_net, nfsd_net_id);
        if (EX_ISSYNC(fhp->fh_export)) {
                errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
                int err2;
 
-               err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+               err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
                switch (err2) {
                case 0:
                        nfsd_copy_write_verifier(verf, nn);
index 9f56dcb..2c43d10 100644 (file)
@@ -74,8 +74,8 @@ __be32                do_nfsd_create(struct svc_rqst *, struct svc_fh *,
                                char *name, int len, struct iattr *attrs,
                                struct svc_fh *res, int createmode,
                                u32 *verifier, bool *truncp, bool *created);
-__be32         nfsd_commit(struct svc_rqst *, struct svc_fh *,
-                               loff_t, unsigned long, __be32 *verf);
+__be32         nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+                               u64 offset, u32 count, __be32 *verf);
 #endif /* CONFIG_NFSD_V3 */
 #ifdef CONFIG_NFSD_V4
 __be32         nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
index 22d904b..a74aef9 100644 (file)
@@ -690,9 +690,14 @@ int dquot_quota_sync(struct super_block *sb, int type)
        /* This is not very clever (and fast) but currently I don't know about
         * any other simple way of getting quota data to disk and we must get
         * them there for userspace to be visible... */
-       if (sb->s_op->sync_fs)
-               sb->s_op->sync_fs(sb, 1);
-       sync_blockdev(sb->s_bdev);
+       if (sb->s_op->sync_fs) {
+               ret = sb->s_op->sync_fs(sb, 1);
+               if (ret)
+                       return ret;
+       }
+       ret = sync_blockdev(sb->s_bdev);
+       if (ret)
+               return ret;
 
        /*
         * Now when everything is written we can discard the pagecache so
index 7af820b..f1d4a19 100644 (file)
@@ -1616,11 +1616,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb)
                percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
 }
 
-static void sb_freeze_unlock(struct super_block *sb)
+static void sb_freeze_unlock(struct super_block *sb, int level)
 {
-       int level;
-
-       for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+       for (level--; level >= 0; level--)
                percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
@@ -1691,7 +1689,14 @@ int freeze_super(struct super_block *sb)
        sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
        /* All writers are done so after syncing there won't be dirty data */
-       sync_filesystem(sb);
+       ret = sync_filesystem(sb);
+       if (ret) {
+               sb->s_writers.frozen = SB_UNFROZEN;
+               sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+               wake_up(&sb->s_writers.wait_unfrozen);
+               deactivate_locked_super(sb);
+               return ret;
+       }
 
        /* Now wait for internal filesystem counter */
        sb->s_writers.frozen = SB_FREEZE_FS;
@@ -1703,7 +1708,7 @@ int freeze_super(struct super_block *sb)
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
                        sb->s_writers.frozen = SB_UNFROZEN;
-                       sb_freeze_unlock(sb);
+                       sb_freeze_unlock(sb, SB_FREEZE_FS);
                        wake_up(&sb->s_writers.wait_unfrozen);
                        deactivate_locked_super(sb);
                        return ret;
@@ -1748,7 +1753,7 @@ static int thaw_super_locked(struct super_block *sb)
        }
 
        sb->s_writers.frozen = SB_UNFROZEN;
-       sb_freeze_unlock(sb);
+       sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
        wake_up(&sb->s_writers.wait_unfrozen);
        deactivate_locked_super(sb);
index 3ce8e21..c769001 100644 (file)
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,7 +29,7 @@
  */
 int sync_filesystem(struct super_block *sb)
 {
-       int ret;
+       int ret = 0;
 
        /*
         * We need to be protected against the filesystem going from
@@ -52,15 +52,21 @@ int sync_filesystem(struct super_block *sb)
         * at a time.
         */
        writeback_inodes_sb(sb, WB_REASON_SYNC);
-       if (sb->s_op->sync_fs)
-               sb->s_op->sync_fs(sb, 0);
+       if (sb->s_op->sync_fs) {
+               ret = sb->s_op->sync_fs(sb, 0);
+               if (ret)
+                       return ret;
+       }
        ret = sync_blockdev_nowait(sb->s_bdev);
-       if (ret < 0)
+       if (ret)
                return ret;
 
        sync_inodes_sb(sb);
-       if (sb->s_op->sync_fs)
-               sb->s_op->sync_fs(sb, 1);
+       if (sb->s_op->sync_fs) {
+               ret = sb->s_op->sync_fs(sb, 1);
+               if (ret)
+                       return ret;
+       }
        return sync_blockdev(sb->s_bdev);
 }
 EXPORT_SYMBOL(sync_filesystem);
index 2705f91..9d6a67c 100644 (file)
@@ -136,7 +136,20 @@ done:
        memalloc_nofs_restore(nofs_flag);
 }
 
-/* Finish all pending io completions. */
+/*
+ * Finish all pending IO completions that require transactional modifications.
+ *
+ * We try to merge physical and logically contiguous ioends before completion to
+ * minimise the number of transactions we need to perform during IO completion.
+ * Both unwritten extent conversion and COW remapping need to iterate and modify
+ * one physical extent at a time, so we gain nothing by merging physically
+ * discontiguous extents here.
+ *
+ * The ioend chain length that we can be processing here is largely unbound in
+ * length and we may have to perform significant amounts of work on each ioend
+ * to complete it. Hence we have to be careful about holding the CPU for too
+ * long in this loop.
+ */
 void
 xfs_end_io(
        struct work_struct      *work)
@@ -157,6 +170,7 @@ xfs_end_io(
                list_del_init(&ioend->io_list);
                iomap_ioend_try_merge(ioend, &tmp);
                xfs_end_ioend(ioend);
+               cond_resched();
        }
 }
 
index d4a387d..eb2e387 100644 (file)
@@ -850,9 +850,6 @@ xfs_alloc_file_space(
                        rblocks = 0;
                }
 
-               /*
-                * Allocate and setup the transaction.
-                */
                error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
                                dblocks, rblocks, false, &tp);
                if (error)
@@ -869,9 +866,9 @@ xfs_alloc_file_space(
                if (error)
                        goto error;
 
-               /*
-                * Complete the transaction
-                */
+               ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
                error = xfs_trans_commit(tp);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
index 22ad207..5bddb1e 100644 (file)
@@ -66,40 +66,6 @@ xfs_is_falloc_aligned(
        return !((pos | len) & mask);
 }
 
-int
-xfs_update_prealloc_flags(
-       struct xfs_inode        *ip,
-       enum xfs_prealloc_flags flags)
-{
-       struct xfs_trans        *tp;
-       int                     error;
-
-       error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
-                       0, 0, 0, &tp);
-       if (error)
-               return error;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
-       if (!(flags & XFS_PREALLOC_INVISIBLE)) {
-               VFS_I(ip)->i_mode &= ~S_ISUID;
-               if (VFS_I(ip)->i_mode & S_IXGRP)
-                       VFS_I(ip)->i_mode &= ~S_ISGID;
-               xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       }
-
-       if (flags & XFS_PREALLOC_SET)
-               ip->i_diflags |= XFS_DIFLAG_PREALLOC;
-       if (flags & XFS_PREALLOC_CLEAR)
-               ip->i_diflags &= ~XFS_DIFLAG_PREALLOC;
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       if (flags & XFS_PREALLOC_SYNC)
-               xfs_trans_set_sync(tp);
-       return xfs_trans_commit(tp);
-}
-
 /*
  * Fsync operations on directories are much simpler than on regular files,
  * as there is no file data to flush, and thus also no need for explicit
@@ -895,6 +861,21 @@ xfs_break_layouts(
        return error;
 }
 
+/* Does this file, inode, or mount want synchronous writes? */
+static inline bool xfs_file_sync_writes(struct file *filp)
+{
+       struct xfs_inode        *ip = XFS_I(file_inode(filp));
+
+       if (xfs_has_wsync(ip->i_mount))
+               return true;
+       if (filp->f_flags & (__O_SYNC | O_DSYNC))
+               return true;
+       if (IS_SYNC(file_inode(filp)))
+               return true;
+
+       return false;
+}
+
 #define        XFS_FALLOC_FL_SUPPORTED                                         \
                (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
                 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
@@ -910,7 +891,6 @@ xfs_file_fallocate(
        struct inode            *inode = file_inode(file);
        struct xfs_inode        *ip = XFS_I(inode);
        long                    error;
-       enum xfs_prealloc_flags flags = 0;
        uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
        loff_t                  new_size = 0;
        bool                    do_file_insert = false;
@@ -955,6 +935,10 @@ xfs_file_fallocate(
                        goto out_unlock;
        }
 
+       error = file_modified(file);
+       if (error)
+               goto out_unlock;
+
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                error = xfs_free_file_space(ip, offset, len);
                if (error)
@@ -1004,8 +988,6 @@ xfs_file_fallocate(
                }
                do_file_insert = true;
        } else {
-               flags |= XFS_PREALLOC_SET;
-
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    offset + len > i_size_read(inode)) {
                        new_size = offset + len;
@@ -1057,13 +1039,6 @@ xfs_file_fallocate(
                }
        }
 
-       if (file->f_flags & O_DSYNC)
-               flags |= XFS_PREALLOC_SYNC;
-
-       error = xfs_update_prealloc_flags(ip, flags);
-       if (error)
-               goto out_unlock;
-
        /* Change file size if needed */
        if (new_size) {
                struct iattr iattr;
@@ -1082,8 +1057,14 @@ xfs_file_fallocate(
         * leave shifted extents past EOF and hence losing access to
         * the data that is contained within them.
         */
-       if (do_file_insert)
+       if (do_file_insert) {
                error = xfs_insert_file_space(ip, offset, len);
+               if (error)
+                       goto out_unlock;
+       }
+
+       if (xfs_file_sync_writes(file))
+               error = xfs_log_force_inode(ip);
 
 out_unlock:
        xfs_iunlock(ip, iolock);
@@ -1115,21 +1096,6 @@ xfs_file_fadvise(
        return ret;
 }
 
-/* Does this file, inode, or mount want synchronous writes? */
-static inline bool xfs_file_sync_writes(struct file *filp)
-{
-       struct xfs_inode        *ip = XFS_I(file_inode(filp));
-
-       if (xfs_has_wsync(ip->i_mount))
-               return true;
-       if (filp->f_flags & (__O_SYNC | O_DSYNC))
-               return true;
-       if (IS_SYNC(file_inode(filp)))
-               return true;
-
-       return false;
-}
-
 STATIC loff_t
 xfs_file_remap_range(
        struct file             *file_in,
index c447bf0..b7e8f14 100644 (file)
@@ -462,15 +462,6 @@ xfs_itruncate_extents(
 }
 
 /* from xfs_file.c */
-enum xfs_prealloc_flags {
-       XFS_PREALLOC_SET        = (1 << 1),
-       XFS_PREALLOC_CLEAR      = (1 << 2),
-       XFS_PREALLOC_SYNC       = (1 << 3),
-       XFS_PREALLOC_INVISIBLE  = (1 << 4),
-};
-
-int    xfs_update_prealloc_flags(struct xfs_inode *ip,
-                                 enum xfs_prealloc_flags flags);
 int    xfs_break_layouts(struct inode *inode, uint *iolock,
                enum layout_break_reason reason);
 
index 03a6198..2515fe8 100644 (file)
@@ -1464,7 +1464,7 @@ xfs_ioc_getbmap(
 
        if (bmx.bmv_count < 2)
                return -EINVAL;
-       if (bmx.bmv_count > ULONG_MAX / recsize)
+       if (bmx.bmv_count >= INT_MAX / recsize)
                return -ENOMEM;
 
        buf = kvcalloc(bmx.bmv_count, sizeof(*buf), GFP_KERNEL);
index d6334ab..4abe173 100644 (file)
@@ -71,6 +71,40 @@ xfs_fs_get_uuid(
 }
 
 /*
+ * We cannot use file based VFS helpers such as file_modified() to update
+ * inode state as we modify the data/metadata in the inode here. Hence we have
+ * to open code the timestamp updates and SUID/SGID stripping. We also need
+ * to set the inode prealloc flag to ensure that the extents we allocate are not
+ * removed if the inode is reclaimed from memory before xfs_fs_block_commit()
+ * is from the client to indicate that data has been written and the file size
+ * can be extended.
+ */
+static int
+xfs_fs_map_update_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_trans        *tp;
+       int                     error;
+
+       error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
+                       0, 0, 0, &tp);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+       VFS_I(ip)->i_mode &= ~S_ISUID;
+       if (VFS_I(ip)->i_mode & S_IXGRP)
+               VFS_I(ip)->i_mode &= ~S_ISGID;
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       return xfs_trans_commit(tp);
+}
+
+/*
  * Get a layout for the pNFS client.
  */
 int
@@ -164,10 +198,12 @@ xfs_fs_map_blocks(
                 * that the blocks allocated and handed out to the client are
                 * guaranteed to be present even after a server crash.
                 */
-               error = xfs_update_prealloc_flags(ip,
-                               XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
+               error = xfs_fs_map_update_inode(ip);
+               if (!error)
+                       error = xfs_log_force_inode(ip);
                if (error)
                        goto out_unlock;
+
        } else {
                xfs_iunlock(ip, lock_flags);
        }
@@ -255,7 +291,7 @@ xfs_fs_commit_blocks(
                length = end - start;
                if (!length)
                        continue;
-       
+
                /*
                 * Make sure reads through the pagecache see the new data.
                 */
index e8f37bd..4c0dee7 100644 (file)
@@ -735,6 +735,7 @@ xfs_fs_sync_fs(
        int                     wait)
 {
        struct xfs_mount        *mp = XFS_M(sb);
+       int                     error;
 
        trace_xfs_fs_sync_fs(mp, __return_address);
 
@@ -744,7 +745,10 @@ xfs_fs_sync_fs(
        if (!wait)
                return 0;
 
-       xfs_log_force(mp, XFS_LOG_SYNC);
+       error = xfs_log_force(mp, XFS_LOG_SYNC);
+       if (error)
+               return error;
+
        if (laptop_mode) {
                /*
                 * The disk must be active because we're syncing.
index d39cfa0..52363ee 100644 (file)
@@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state)
        state->f[0] = -1;
 }
 
-typedef void (*blake2s_compress_t)(struct blake2s_state *state,
-                                  const u8 *block, size_t nblocks, u32 inc);
-
 /* Helper functions for BLAKE2s shared by the library and shash APIs */
 
-static inline void __blake2s_update(struct blake2s_state *state,
-                                   const u8 *in, size_t inlen,
-                                   blake2s_compress_t compress)
+static __always_inline void
+__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
+                bool force_generic)
 {
        const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
 
@@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
                return;
        if (inlen > fill) {
                memcpy(state->buf + state->buflen, in, fill);
-               (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+               if (force_generic)
+                       blake2s_compress_generic(state, state->buf, 1,
+                                                BLAKE2S_BLOCK_SIZE);
+               else
+                       blake2s_compress(state, state->buf, 1,
+                                        BLAKE2S_BLOCK_SIZE);
                state->buflen = 0;
                in += fill;
                inlen -= fill;
@@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state,
        if (inlen > BLAKE2S_BLOCK_SIZE) {
                const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
                /* Hash one less (full) block than strictly possible */
-               (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+               if (force_generic)
+                       blake2s_compress_generic(state, in, nblocks - 1,
+                                                BLAKE2S_BLOCK_SIZE);
+               else
+                       blake2s_compress(state, in, nblocks - 1,
+                                        BLAKE2S_BLOCK_SIZE);
                in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
                inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
        }
@@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state,
        state->buflen += inlen;
 }
 
-static inline void __blake2s_final(struct blake2s_state *state, u8 *out,
-                                  blake2s_compress_t compress)
+static __always_inline void
+__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
 {
        blake2s_set_lastblock(state);
        memset(state->buf + state->buflen, 0,
               BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
-       (*compress)(state, state->buf, 1, state->buflen);
+       if (force_generic)
+               blake2s_compress_generic(state, state->buf, 1, state->buflen);
+       else
+               blake2s_compress(state, state->buf, 1, state->buflen);
        cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
        memcpy(out, state->h, state->outlen);
 }
@@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc)
 
 static inline int crypto_blake2s_update(struct shash_desc *desc,
                                        const u8 *in, unsigned int inlen,
-                                       blake2s_compress_t compress)
+                                       bool force_generic)
 {
        struct blake2s_state *state = shash_desc_ctx(desc);
 
-       __blake2s_update(state, in, inlen, compress);
+       __blake2s_update(state, in, inlen, force_generic);
        return 0;
 }
 
 static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
-                                      blake2s_compress_t compress)
+                                      bool force_generic)
 {
        struct blake2s_state *state = shash_desc_ctx(desc);
 
-       __blake2s_final(state, out, compress);
+       __blake2s_final(state, out, force_generic);
        return 0;
 }
 
index 199e47e..21292b5 100644 (file)
@@ -324,12 +324,12 @@ enum {
        ATA_LOG_NCQ_NON_DATA    = 0x12,
        ATA_LOG_NCQ_SEND_RECV   = 0x13,
        ATA_LOG_IDENTIFY_DEVICE = 0x30,
+       ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
 
        /* Identify device log pages: */
        ATA_LOG_SECURITY          = 0x06,
        ATA_LOG_SATA_SETTINGS     = 0x08,
        ATA_LOG_ZONED_INFORMATION = 0x09,
-       ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
 
        /* Identify device SATA settings log:*/
        ATA_LOG_DEVSLP_OFFSET     = 0x30,
index 6a89ea4..edf62ea 100644 (file)
@@ -35,6 +35,7 @@
 #define CEPH_OPT_TCP_NODELAY      (1<<4) /* TCP_NODELAY on TCP sockets */
 #define CEPH_OPT_NOMSGSIGN        (1<<5) /* don't sign msgs (msgr1) */
 #define CEPH_OPT_ABORT_ON_FULL    (1<<6) /* abort w/ ENOSPC when full */
+#define CEPH_OPT_RXBOUNCE         (1<<7) /* double-buffer read data */
 
 #define CEPH_OPT_DEFAULT   (CEPH_OPT_TCP_NODELAY)
 
index ff99ce0..e7f2fb2 100644 (file)
@@ -383,6 +383,10 @@ struct ceph_connection_v2_info {
        struct ceph_gcm_nonce in_gcm_nonce;
        struct ceph_gcm_nonce out_gcm_nonce;
 
+       struct page **in_enc_pages;
+       int in_enc_page_cnt;
+       int in_enc_resid;
+       int in_enc_i;
        struct page **out_enc_pages;
        int out_enc_page_cnt;
        int out_enc_resid;
@@ -457,6 +461,7 @@ struct ceph_connection {
        struct ceph_msg *out_msg;        /* sending message (== tail of
                                            out_sent) */
 
+       struct page *bounce_page;
        u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
 
        struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
index 3da9584..02f362c 100644 (file)
@@ -262,7 +262,7 @@ struct fb_ops {
 
        /* Draws a rectangle */
        void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
-       /* Copy data from area to another. Obsolete. */
+       /* Copy data from area to another */
        void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
        /* Draws a image to the display */
        void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
index b55bd49..97a3a2e 100644 (file)
@@ -263,9 +263,11 @@ struct iomap_ioend {
        struct list_head        io_list;        /* next ioend in chain */
        u16                     io_type;
        u16                     io_flags;       /* IOMAP_F_* */
+       u32                     io_folios;      /* folios added to ioend */
        struct inode            *io_inode;      /* file being written to */
        size_t                  io_size;        /* size of the extent */
        loff_t                  io_offset;      /* offset in the file */
+       sector_t                io_sector;      /* start sector of ioend */
        struct bio              *io_bio;        /* bio being built */
        struct bio              io_inline_bio;  /* MUST BE LAST! */
 };
index fd933c4..9c3ada7 100644 (file)
@@ -594,7 +594,7 @@ struct transaction_s
         */
        unsigned long           t_log_start;
 
-       /* 
+       /*
         * Number of buffers on the t_buffers list [j_list_lock, no locks
         * needed for jbd2 thread]
         */
@@ -1295,7 +1295,7 @@ struct journal_s
         * Clean-up after fast commit or full commit. JBD2 calls this function
         * after every commit operation.
         */
-       void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+       void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
 
        /**
         * @j_fc_replay_callback:
@@ -1419,9 +1419,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
 extern bool __jbd2_journal_refile_buffer(struct journal_head *);
 extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
 extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_free_buffer(struct journal_head *bh);
 extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
-extern void __journal_clean_data_list(transaction_t *transaction);
 static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
 {
        list_add_tail(&bh->b_assoc_buffers, head);
@@ -1486,9 +1484,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
                                              struct buffer_head **bh_out,
                                              sector_t blocknr);
 
-/* Transaction locking */
-extern void            __wait_on_journal (journal_t *);
-
 /* Transaction cache support */
 extern void jbd2_journal_destroy_transaction_cache(void);
 extern int __init jbd2_journal_init_transaction_cache(void);
@@ -1543,6 +1538,8 @@ extern int         jbd2_journal_flush(journal_t *journal, unsigned int flags);
 extern void     jbd2_journal_lock_updates (journal_t *);
 extern void     jbd2_journal_unlock_updates (journal_t *);
 
+void jbd2_journal_wait_updates(journal_t *);
+
 extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
                                struct block_device *fs_dev,
                                unsigned long long start, int len, int bsize);
@@ -1774,8 +1771,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
 #define BJ_Reserved    4       /* Buffer is reserved for access by journal */
 #define BJ_Types       5
 
-extern int jbd_blocks_per_page(struct inode *inode);
-
 /* JBD uses a CRC32 checksum */
 #define JBD_MAX_CHECKSUM_SIZE 4
 
index 06912d6..f110399 100644 (file)
@@ -29,7 +29,9 @@
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <linux/notifier.h>
+#include <linux/ftrace.h>
 #include <linux/hashtable.h>
+#include <linux/instrumentation.h>
 #include <linux/interval_tree.h>
 #include <linux/rbtree.h>
 #include <linux/xarray.h>
@@ -368,8 +370,11 @@ struct kvm_vcpu {
        u64 last_used_slot_gen;
 };
 
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+/*
+ * Start accounting time towards a guest.
+ * Must be called before entering guest context.
+ */
+static __always_inline void guest_timing_enter_irqoff(void)
 {
        /*
         * This is running in ioctl context so its safe to assume that it's the
@@ -378,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void)
        instrumentation_begin();
        vtime_account_guest_enter();
        instrumentation_end();
+}
 
+/*
+ * Enter guest context and enter an RCU extended quiescent state.
+ *
+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ * unsafe to use any code which may directly or indirectly use RCU, tracing
+ * (including IRQ flag tracing), or lockdep. All code in this period must be
+ * non-instrumentable.
+ */
+static __always_inline void guest_context_enter_irqoff(void)
+{
        /*
         * KVM does not hold any references to rcu protected data when it
         * switches CPU into a guest mode. In fact switching to a guest mode
@@ -394,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void)
        }
 }
 
-static __always_inline void guest_exit_irqoff(void)
+/*
+ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
+ * guest_state_enter_irqoff().
+ */
+static __always_inline void guest_enter_irqoff(void)
+{
+       guest_timing_enter_irqoff();
+       guest_context_enter_irqoff();
+}
+
+/**
+ * guest_state_enter_irqoff - Fixup state when entering a guest
+ *
+ * Entry to a guest will enable interrupts, but the kernel state is interrupts
+ * disabled when this is invoked. Also tell RCU about it.
+ *
+ * 1) Trace interrupts on state
+ * 2) Invoke context tracking if enabled to adjust RCU state
+ * 3) Tell lockdep that interrupts are enabled
+ *
+ * Invoked from architecture specific code before entering a guest.
+ * Must be called with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke guest_timing_enter_irqoff() before this.
+ *
+ * Note: this is analogous to exit_to_user_mode().
+ */
+static __always_inline void guest_state_enter_irqoff(void)
+{
+       instrumentation_begin();
+       trace_hardirqs_on_prepare();
+       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       instrumentation_end();
+
+       guest_context_enter_irqoff();
+       lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+/*
+ * Exit guest context and exit an RCU extended quiescent state.
+ *
+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ * unsafe to use any code which may directly or indirectly use RCU, tracing
+ * (including IRQ flag tracing), or lockdep. All code in this period must be
+ * non-instrumentable.
+ */
+static __always_inline void guest_context_exit_irqoff(void)
 {
        context_tracking_guest_exit();
+}
 
+/*
+ * Stop accounting time towards a guest.
+ * Must be called after exiting guest context.
+ */
+static __always_inline void guest_timing_exit_irqoff(void)
+{
        instrumentation_begin();
        /* Flush the guest cputime we spent on the guest */
        vtime_account_guest_exit();
        instrumentation_end();
 }
 
+/*
+ * Deprecated. Architectures should move to guest_state_exit_irqoff() and
+ * guest_timing_exit_irqoff().
+ */
+static __always_inline void guest_exit_irqoff(void)
+{
+       guest_context_exit_irqoff();
+       guest_timing_exit_irqoff();
+}
+
 static inline void guest_exit(void)
 {
        unsigned long flags;
@@ -413,6 +492,33 @@ static inline void guest_exit(void)
        local_irq_restore(flags);
 }
 
+/**
+ * guest_state_exit_irqoff - Establish state when returning from guest mode
+ *
+ * Entry from a guest disables interrupts, but guest mode is traced as
+ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
+ *
+ * 1) Tell lockdep that interrupts are disabled
+ * 2) Invoke context tracking if enabled to reactivate RCU
+ * 3) Trace interrupts off state
+ *
+ * Invoked from architecture specific code after exiting a guest.
+ * Must be invoked with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke guest_timing_exit_irqoff() after this.
+ *
+ * Note: this is analogous to enter_from_user_mode().
+ */
+static __always_inline void guest_state_exit_irqoff(void)
+{
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       guest_context_exit_irqoff();
+
+       instrumentation_begin();
+       trace_hardirqs_off_finish();
+       instrumentation_end();
+}
+
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 {
        /*
index 605756f..7f99b4d 100644 (file)
@@ -380,6 +380,7 @@ enum {
        ATA_HORKAGE_MAX_TRIM_128M = (1 << 26),  /* Limit max trim size to 128M */
        ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27),  /* Disable NCQ on ATI chipset */
        ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28),  /* Identify device log missing */
+       ATA_HORKAGE_NO_LOG_DIR  = (1 << 29),    /* Do not read log directory */
 
         /* DMA mask for user DMA control: User visible values; DO NOT
            renumber */
index b46c39d..614f222 100644 (file)
@@ -244,6 +244,13 @@ struct netfs_cache_ops {
        int (*prepare_write)(struct netfs_cache_resources *cres,
                             loff_t *_start, size_t *_len, loff_t i_size,
                             bool no_space_allocated_yet);
+
+       /* Query the occupancy of the cache in a region, returning where the
+        * next chunk of data starts and how long it is.
+        */
+       int (*query_occupancy)(struct netfs_cache_resources *cres,
+                              loff_t start, size_t len, size_t granularity,
+                              loff_t *_data_start, size_t *_data_len);
 };
 
 struct readahead_control;
index 0dc7ad3..b06375e 100644 (file)
@@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
        memcpy(target->data, source->data, source->size);
 }
 
-
-/*
- * This is really a general kernel constant, but since nothing like
- * this is defined in the kernel headers, I have to do it here.
- */
-#define NFS_OFFSET_MAX         ((__s64)((~(__u64)0) >> 1))
-
-
 enum nfs3_stable_how {
        NFS_UNSTABLE = 0,
        NFS_DATA_SYNC = 1,
index 02aa493..68f81d8 100644 (file)
@@ -107,6 +107,7 @@ struct nfs_open_dir_context {
        __u64 dup_cookie;
        pgoff_t page_index;
        signed char duped;
+       bool eof;
 };
 
 /*
index 38cace1..01e16c7 100644 (file)
@@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
                                pmd_t *pmdp, pmd_t pmd);
 void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
                                pud_t *pudp, pud_t pud);
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+                                       unsigned long addr,
+                                       pmd_t pmd);
 
 static inline void page_table_check_alloc(struct page *page, unsigned int order)
 {
@@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm,
        __page_table_check_pud_set(mm, addr, pudp, pud);
 }
 
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+                                                   unsigned long addr,
+                                                   pmd_t pmd)
+{
+       if (static_branch_likely(&page_table_check_disabled))
+               return;
+
+       __page_table_check_pte_clear_range(mm, addr, pmd);
+}
+
 #else
 
 static inline void page_table_check_alloc(struct page *page, unsigned int order)
@@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm,
 {
 }
 
+static inline void page_table_check_pte_clear_range(struct mm_struct *mm,
+                                                   unsigned long addr,
+                                                   pmd_t pmd)
+{
+}
+
 #endif /* CONFIG_PAGE_TABLE_CHECK */
 #endif /* __LINUX_PAGE_TABLE_CHECK_H */
index bc8713a..f4f4077 100644 (file)
@@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address)
 {
        return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 }
+#define pte_index pte_index
 
 #ifndef pmd_index
 static inline unsigned long pmd_index(unsigned long address)
index 14efa0d..adab27b 100644 (file)
@@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
 
        memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
               sizeof(struct ip_tunnel_info) + md_size);
+#ifdef CONFIG_DST_CACHE
+       /* Unclone the dst cache if there is one */
+       if (new_md->u.tun_info.dst_cache.cache) {
+               int ret;
+
+               ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC);
+               if (ret) {
+                       metadata_dst_free(new_md);
+                       return ERR_PTR(ret);
+               }
+       }
+#endif
+
        skb_dst_drop(skb);
-       dst_hold(&new_md->dst);
        skb_dst_set(skb, &new_md->dst);
        return new_md;
 }
index 9b187d8..36da42c 100644 (file)
@@ -617,6 +617,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream);
 void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream);
 void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream);
 unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream);
+unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream);
 
 /**
  * snd_pcm_stream_lock_irqsave - Lock the PCM stream
@@ -636,6 +637,20 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
                                      unsigned long flags);
 
 /**
+ * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking
+ * @substream: PCM substream
+ * @flags: irq flags
+ *
+ * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with
+ * the single-depth lockdep subclass.
+ */
+#define snd_pcm_stream_lock_irqsave_nested(substream, flags)           \
+       do {                                                            \
+               typecheck(unsigned long, flags);                        \
+               flags = _snd_pcm_stream_lock_irqsave_nested(substream); \
+       } while (0)
+
+/**
  * snd_pcm_group_for_each_entry - iterate over the linked substreams
  * @s: the iterator
  * @substream: the substream
index b46bcdb..5191b57 100644 (file)
@@ -1624,9 +1624,6 @@ struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET  _IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET   _IO(KVMIO,   0xc4)
 
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
-
 struct kvm_s390_pv_sec_parm {
        __u64 origin;
        __u64 length;
@@ -2048,4 +2045,7 @@ struct kvm_stats_desc {
 
 #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
 
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
+
 #endif /* __LINUX_KVM_H */
index 4b33950..2607102 100644 (file)
@@ -106,7 +106,7 @@ enum ip_conntrack_status {
        IPS_NAT_CLASH = IPS_UNTRACKED,
 #endif
 
-       /* Conntrack got a helper explicitly attached via CT target. */
+       /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */
        IPS_HELPER_BIT = 13,
        IPS_HELPER = (1 << IPS_HELPER_BIT),
 
index 1b65042..82858b6 100644 (file)
@@ -465,6 +465,8 @@ struct perf_event_attr {
        /*
         * User provided data if sigtrap=1, passed back to user via
         * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
+        * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be
+        * truncated accordingly on 32 bit architectures.
         */
        __u64   sig_data;
 };
index ef0cafe..2d3e5df 100644 (file)
  *                                                                          *
  ****************************************************************************/
 
+#define AES_IEC958_STATUS_SIZE         24
+
 struct snd_aes_iec958 {
-       unsigned char status[24];       /* AES/IEC958 channel status bits */
+       unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
        unsigned char subcode[147];     /* AES/IEC958 subcode bits */
        unsigned char pad;              /* nothing */
        unsigned char dig_subframe[4];  /* AES/IEC958 subframe bits */
index 9ac5515..7a71453 100644 (file)
@@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref {
 /*
  * Inserts the grant references into the mapping table of an instance
  * of gntdev. N.B. This does not perform the mapping, which is deferred
- * until mmap() is called with @index as the offset.
+ * until mmap() is called with @index as the offset. @index should be
+ * considered opaque to userspace, with one exception: if no grant
+ * references have ever been inserted into the mapping table of this
+ * instance, @index will be set to 0. This is necessary to use gntdev
+ * with userspace APIs that expect a file descriptor that can be
+ * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this
+ * ioctl will fail.
  */
 #define IOCTL_GNTDEV_MAP_GRANT_REF \
 _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
index bbee8c6..4dc45a5 100644 (file)
@@ -1,6 +1,4 @@
 /******************************************************************************
- * evtchn.h
- *
  * Interface to /dev/xen/xenbus_backend.
  *
  * Copyright (c) 2011 Bastian Blank <waldi@debian.org>
index 6693daf..0dbdb98 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
         */
        un = lookup_undo(ulp, semid);
        if (un) {
+               spin_unlock(&ulp->lock);
                kvfree(new);
                goto success;
        }
@@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        ipc_assert_locked_object(&sma->sem_perm);
        list_add(&new->list_id, &sma->list_id);
        un = new;
-
-success:
        spin_unlock(&ulp->lock);
+success:
        sem_unlock(sma, -1);
 out:
        return un;
index fce5d43..a83928c 100644 (file)
@@ -185,7 +185,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
        case AUDITSC_EXECVE:
                return mask & AUDIT_PERM_EXEC;
        case AUDITSC_OPENAT2:
-               return mask & ACC_MODE((u32)((struct open_how *)ctx->argv[2])->flags);
+               return mask & ACC_MODE((u32)ctx->openat2.flags);
        default:
                return 0;
        }
index 76c754e..57c7197 100644 (file)
@@ -3238,6 +3238,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
        return err;
 }
 
+/*
+ * Copy event-type-independent attributes that may be modified.
+ */
+static void perf_event_modify_copy_attr(struct perf_event_attr *to,
+                                       const struct perf_event_attr *from)
+{
+       to->sig_data = from->sig_data;
+}
+
 static int perf_event_modify_attr(struct perf_event *event,
                                  struct perf_event_attr *attr)
 {
@@ -3260,10 +3269,17 @@ static int perf_event_modify_attr(struct perf_event *event,
        WARN_ON_ONCE(event->ctx->parent_ctx);
 
        mutex_lock(&event->child_mutex);
+       /*
+        * Event-type-independent attributes must be copied before event-type
+        * modification, which will validate that final attributes match the
+        * source attributes after all relevant attributes have been copied.
+        */
+       perf_event_modify_copy_attr(&event->attr, attr);
        err = func(event, attr);
        if (err)
                goto out;
        list_for_each_entry(child, &event->child_list, child_list) {
+               perf_event_modify_copy_attr(&child->attr, attr);
                err = func(child, attr);
                if (err)
                        goto out;
index 9364f79..c71c096 100644 (file)
 
 void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
 {
-       __blake2s_update(state, in, inlen, blake2s_compress);
+       __blake2s_update(state, in, inlen, false);
 }
 EXPORT_SYMBOL(blake2s_update);
 
 void blake2s_final(struct blake2s_state *state, u8 *out)
 {
        WARN_ON(IS_ENABLED(DEBUG) && !out);
-       __blake2s_final(state, out, blake2s_compress);
+       __blake2s_final(state, out, false);
        memzero_explicit(state, sizeof(*state));
 }
 EXPORT_SYMBOL(blake2s_final);
index a7ac97c..db2abd9 100644 (file)
@@ -171,6 +171,8 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
        ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
        pte = ptep_get(args->ptep);
        WARN_ON(pte_young(pte));
+
+       ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
 }
 
 static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
index 35f14d0..131492f 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/hashtable.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/page_idle.h>
+#include <linux/page_table_check.h>
 #include <linux/swapops.h>
 #include <linux/shmem_fs.h>
 
@@ -1416,6 +1417,21 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
        return 0;
 }
 
+static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+                                 unsigned long addr, pmd_t *pmdp)
+{
+       spinlock_t *ptl;
+       pmd_t pmd;
+
+       mmap_assert_write_locked(mm);
+       ptl = pmd_lock(vma->vm_mm, pmdp);
+       pmd = pmdp_collapse_flush(vma, addr, pmdp);
+       spin_unlock(ptl);
+       mm_dec_nr_ptes(mm);
+       page_table_check_pte_clear_range(mm, addr, pmd);
+       pte_free(mm, pmd_pgtable(pmd));
+}
+
 /**
  * collapse_pte_mapped_thp - Try to collapse a pte-mapped THP for mm at
  * address haddr.
@@ -1433,7 +1449,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        struct vm_area_struct *vma = find_vma(mm, haddr);
        struct page *hpage;
        pte_t *start_pte, *pte;
-       pmd_t *pmd, _pmd;
+       pmd_t *pmd;
        spinlock_t *ptl;
        int count = 0;
        int i;
@@ -1509,12 +1525,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        }
 
        /* step 4: collapse pmd */
-       ptl = pmd_lock(vma->vm_mm, pmd);
-       _pmd = pmdp_collapse_flush(vma, haddr, pmd);
-       spin_unlock(ptl);
-       mm_dec_nr_ptes(mm);
-       pte_free(mm, pmd_pgtable(_pmd));
-
+       collapse_and_free_pmd(mm, vma, haddr, pmd);
 drop_hpage:
        unlock_page(hpage);
        put_page(hpage);
@@ -1552,7 +1563,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
        struct vm_area_struct *vma;
        struct mm_struct *mm;
        unsigned long addr;
-       pmd_t *pmd, _pmd;
+       pmd_t *pmd;
 
        i_mmap_lock_write(mapping);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
@@ -1591,14 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
                 * reverse order. Trylock is a way to avoid deadlock.
                 */
                if (mmap_write_trylock(mm)) {
-                       if (!khugepaged_test_exit(mm)) {
-                               spinlock_t *ptl = pmd_lock(mm, pmd);
-                               /* assume page table is clear */
-                               _pmd = pmdp_collapse_flush(vma, addr, pmd);
-                               spin_unlock(ptl);
-                               mm_dec_nr_ptes(mm);
-                               pte_free(mm, pmd_pgtable(_pmd));
-                       }
+                       if (!khugepaged_test_exit(mm))
+                               collapse_and_free_pmd(mm, vma, addr, pmd);
                        mmap_write_unlock(mm);
                } else {
                        /* Try again later */
index dc3758f..7580baa 100644 (file)
@@ -1410,7 +1410,8 @@ static void kmemleak_scan(void)
 {
        unsigned long flags;
        struct kmemleak_object *object;
-       int i;
+       struct zone *zone;
+       int __maybe_unused i;
        int new_leaks = 0;
 
        jiffies_last_scan = jiffies;
@@ -1450,9 +1451,9 @@ static void kmemleak_scan(void)
         * Struct page scanning for each node.
         */
        get_online_mems();
-       for_each_online_node(i) {
-               unsigned long start_pfn = node_start_pfn(i);
-               unsigned long end_pfn = node_end_pfn(i);
+       for_each_populated_zone(zone) {
+               unsigned long start_pfn = zone->zone_start_pfn;
+               unsigned long end_pfn = zone_end_pfn(zone);
                unsigned long pfn;
 
                for (pfn = start_pfn; pfn < end_pfn; pfn++) {
@@ -1461,8 +1462,8 @@ static void kmemleak_scan(void)
                        if (!page)
                                continue;
 
-                       /* only scan pages belonging to this node */
-                       if (page_to_nid(page) != i)
+                       /* only scan pages belonging to this zone */
+                       if (page_zone(page) != zone)
                                continue;
                        /* only scan if page is in use */
                        if (page_count(page) == 0)
index 6a0ddda..f67c4c7 100644 (file)
@@ -115,7 +115,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
         * onlining - just onlined memory won't immediately be considered for
         * allocation.
         */
-       if (!isolated_page && PageBuddy(page)) {
+       if (!isolated_page) {
                nr_pages = move_freepages_block(zone, page, migratetype, NULL);
                __mod_zone_freepage_state(zone, nr_pages, migratetype);
        }
index 7504e7c..3763bd0 100644 (file)
@@ -86,8 +86,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
 {
        struct page_ext *page_ext;
        struct page *page;
+       unsigned long i;
        bool anon;
-       int i;
 
        if (!pfn_valid(pfn))
                return;
@@ -121,8 +121,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
 {
        struct page_ext *page_ext;
        struct page *page;
+       unsigned long i;
        bool anon;
-       int i;
 
        if (!pfn_valid(pfn))
                return;
@@ -152,10 +152,10 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
 void __page_table_check_zero(struct page *page, unsigned int order)
 {
        struct page_ext *page_ext = lookup_page_ext(page);
-       int i;
+       unsigned long i;
 
        BUG_ON(!page_ext);
-       for (i = 0; i < (1 << order); i++) {
+       for (i = 0; i < (1ul << order); i++) {
                struct page_table_check *ptc = get_page_table_check(page_ext);
 
                BUG_ON(atomic_read(&ptc->anon_map_count));
@@ -206,17 +206,10 @@ EXPORT_SYMBOL(__page_table_check_pud_clear);
 void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
                                pte_t *ptep, pte_t pte)
 {
-       pte_t old_pte;
-
        if (&init_mm == mm)
                return;
 
-       old_pte = *ptep;
-       if (pte_user_accessible_page(old_pte)) {
-               page_table_check_clear(mm, addr, pte_pfn(old_pte),
-                                      PAGE_SIZE >> PAGE_SHIFT);
-       }
-
+       __page_table_check_pte_clear(mm, addr, *ptep);
        if (pte_user_accessible_page(pte)) {
                page_table_check_set(mm, addr, pte_pfn(pte),
                                     PAGE_SIZE >> PAGE_SHIFT,
@@ -228,17 +221,10 @@ EXPORT_SYMBOL(__page_table_check_pte_set);
 void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
                                pmd_t *pmdp, pmd_t pmd)
 {
-       pmd_t old_pmd;
-
        if (&init_mm == mm)
                return;
 
-       old_pmd = *pmdp;
-       if (pmd_user_accessible_page(old_pmd)) {
-               page_table_check_clear(mm, addr, pmd_pfn(old_pmd),
-                                      PMD_PAGE_SIZE >> PAGE_SHIFT);
-       }
-
+       __page_table_check_pmd_clear(mm, addr, *pmdp);
        if (pmd_user_accessible_page(pmd)) {
                page_table_check_set(mm, addr, pmd_pfn(pmd),
                                     PMD_PAGE_SIZE >> PAGE_SHIFT,
@@ -250,17 +236,10 @@ EXPORT_SYMBOL(__page_table_check_pmd_set);
 void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
                                pud_t *pudp, pud_t pud)
 {
-       pud_t old_pud;
-
        if (&init_mm == mm)
                return;
 
-       old_pud = *pudp;
-       if (pud_user_accessible_page(old_pud)) {
-               page_table_check_clear(mm, addr, pud_pfn(old_pud),
-                                      PUD_PAGE_SIZE >> PAGE_SHIFT);
-       }
-
+       __page_table_check_pud_clear(mm, addr, *pudp);
        if (pud_user_accessible_page(pud)) {
                page_table_check_set(mm, addr, pud_pfn(pud),
                                     PUD_PAGE_SIZE >> PAGE_SHIFT,
@@ -268,3 +247,23 @@ void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
        }
 }
 EXPORT_SYMBOL(__page_table_check_pud_set);
+
+void __page_table_check_pte_clear_range(struct mm_struct *mm,
+                                       unsigned long addr,
+                                       pmd_t pmd)
+{
+       if (&init_mm == mm)
+               return;
+
+       if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
+               pte_t *ptep = pte_offset_map(&pmd, addr);
+               unsigned long i;
+
+               pte_unmap(ptep);
+               for (i = 0; i < PTRS_PER_PTE; i++) {
+                       __page_table_check_pte_clear(mm, addr, *ptep);
+                       addr += PAGE_SIZE;
+                       ptep++;
+               }
+       }
+}
index 1a705a4..5eaf388 100644 (file)
@@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
                                   u32 skb_prio, u16 vlan_prio);
 int vlan_dev_set_egress_priority(const struct net_device *dev,
                                 u32 skb_prio, u16 vlan_prio);
+void vlan_dev_free_egress_priority(const struct net_device *dev);
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
                               size_t size);
@@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev,
 void vlan_setup(struct net_device *dev);
 int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
-void vlan_dev_uninit(struct net_device *dev);
 bool vlan_dev_inherit_address(struct net_device *dev,
                              struct net_device *real_dev);
 
index 26d031a..d190282 100644 (file)
@@ -622,7 +622,7 @@ static int vlan_dev_init(struct net_device *dev)
 }
 
 /* Note: this function might be called multiple times for the same device. */
-void vlan_dev_uninit(struct net_device *dev)
+void vlan_dev_free_egress_priority(const struct net_device *dev)
 {
        struct vlan_priority_tci_mapping *pm;
        struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -636,6 +636,16 @@ void vlan_dev_uninit(struct net_device *dev)
        }
 }
 
+static void vlan_dev_uninit(struct net_device *dev)
+{
+       struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+       vlan_dev_free_egress_priority(dev);
+
+       /* Get rid of the vlan's reference to real_dev */
+       dev_put_track(vlan->real_dev, &vlan->dev_tracker);
+}
+
 static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
        netdev_features_t features)
 {
@@ -846,9 +856,6 @@ static void vlan_dev_free(struct net_device *dev)
 
        free_percpu(vlan->vlan_pcpu_stats);
        vlan->vlan_pcpu_stats = NULL;
-
-       /* Get rid of the vlan's reference to real_dev */
-       dev_put_track(vlan->real_dev, &vlan->dev_tracker);
 }
 
 void vlan_setup(struct net_device *dev)
index 0db85ae..53b1955 100644 (file)
@@ -183,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
                return -EINVAL;
 
        err = vlan_changelink(dev, tb, data, extack);
-       if (!err)
-               err = register_vlan_dev(dev, extack);
        if (err)
-               vlan_dev_uninit(dev);
+               return err;
+       err = register_vlan_dev(dev, extack);
+       if (err)
+               vlan_dev_free_egress_priority(dev);
        return err;
 }
 
index 3e49d28..d53cbb4 100644 (file)
@@ -91,9 +91,10 @@ again:
                        spin_unlock_bh(&ax25_list_lock);
                        lock_sock(sk);
                        s->ax25_dev = NULL;
+                       dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
                        ax25_dev_put(ax25_dev);
-                       release_sock(sk);
                        ax25_disconnect(s, ENETUNREACH);
+                       release_sock(sk);
                        spin_lock_bh(&ax25_list_lock);
                        sock_put(sk);
                        /* The entry could have been deleted from the
@@ -1116,8 +1117,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                }
        }
 
-       if (ax25_dev != NULL)
+       if (ax25_dev) {
                ax25_fillin_cb(ax25, ax25_dev);
+               dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
+       }
 
 done:
        ax25_cb_add(ax25);
index 02cbcb2..d2a430b 100644 (file)
@@ -56,6 +56,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/spinlock.h>
 #include <linux/hrtimer.h>
 #include <linux/wait.h>
 #include <linux/uio.h>
@@ -145,6 +146,7 @@ struct isotp_sock {
        struct tpcon rx, tx;
        struct list_head notifier;
        wait_queue_head_t wait;
+       spinlock_t rx_lock; /* protect single thread state machine */
 };
 
 static LIST_HEAD(isotp_notifier_list);
@@ -615,11 +617,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
 
        n_pci_type = cf->data[ae] & 0xF0;
 
+       /* Make sure the state changes and data structures stay consistent at
+        * CAN frame reception time. This locking is not needed in real world
+        * use cases but the inconsistency can be triggered with syzkaller.
+        */
+       spin_lock(&so->rx_lock);
+
        if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
                /* check rx/tx path half duplex expectations */
                if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
                    (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
-                       return;
+                       goto out_unlock;
        }
 
        switch (n_pci_type) {
@@ -668,6 +676,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data)
                isotp_rcv_cf(sk, cf, ae, skb);
                break;
        }
+
+out_unlock:
+       spin_unlock(&so->rx_lock);
 }
 
 static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
@@ -876,7 +887,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
        if (!size || size > MAX_MSG_LENGTH) {
                err = -EINVAL;
-               goto err_out;
+               goto err_out_drop;
        }
 
        /* take care of a potential SF_DL ESC offset for TX_DL > 8 */
@@ -886,24 +897,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
            (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
                err = -EINVAL;
-               goto err_out;
+               goto err_out_drop;
        }
 
        err = memcpy_from_msg(so->tx.buf, msg, size);
        if (err < 0)
-               goto err_out;
+               goto err_out_drop;
 
        dev = dev_get_by_index(sock_net(sk), so->ifindex);
        if (!dev) {
                err = -ENXIO;
-               goto err_out;
+               goto err_out_drop;
        }
 
        skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
                                  msg->msg_flags & MSG_DONTWAIT, &err);
        if (!skb) {
                dev_put(dev);
-               goto err_out;
+               goto err_out_drop;
        }
 
        can_skb_reserve(skb);
@@ -965,7 +976,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        if (err) {
                pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
                               __func__, ERR_PTR(err));
-               goto err_out;
+               goto err_out_drop;
        }
 
        if (wait_tx_done) {
@@ -978,6 +989,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
        return size;
 
+err_out_drop:
+       /* drop this PDU and unlock a potential wait queue */
+       old_state = ISOTP_IDLE;
 err_out:
        so->tx.state = old_state;
        if (so->tx.state == ISOTP_IDLE)
@@ -1444,6 +1458,7 @@ static int isotp_init(struct sock *sk)
        so->txtimer.function = isotp_tx_timer_handler;
 
        init_waitqueue_head(&so->wait);
+       spin_lock_init(&so->rx_lock);
 
        spin_lock(&isotp_notifier_lock);
        list_add_tail(&so->notifier, &isotp_notifier_list);
index ecc400a..4c64415 100644 (file)
@@ -246,6 +246,7 @@ enum {
        Opt_cephx_sign_messages,
        Opt_tcp_nodelay,
        Opt_abort_on_full,
+       Opt_rxbounce,
 };
 
 enum {
@@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
        fsparam_u32     ("osdkeepalive",                Opt_osdkeepalivetimeout),
        fsparam_enum    ("read_from_replica",           Opt_read_from_replica,
                         ceph_param_read_from_replica),
+       fsparam_flag    ("rxbounce",                    Opt_rxbounce),
        fsparam_enum    ("ms_mode",                     Opt_ms_mode,
                         ceph_param_ms_mode),
        fsparam_string  ("secret",                      Opt_secret),
@@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
        case Opt_abort_on_full:
                opt->flags |= CEPH_OPT_ABORT_ON_FULL;
                break;
+       case Opt_rxbounce:
+               opt->flags |= CEPH_OPT_RXBOUNCE;
+               break;
 
        default:
                BUG();
@@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
                seq_puts(m, "notcp_nodelay,");
        if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
                seq_puts(m, "abort_on_full,");
+       if (opt->flags & CEPH_OPT_RXBOUNCE)
+               seq_puts(m, "rxbounce,");
 
        if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
                seq_printf(m, "mount_timeout=%d,",
index 45eba2d..d3bb656 100644 (file)
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
                ceph_msg_put(con->out_msg);
                con->out_msg = NULL;
        }
+       if (con->bounce_page) {
+               __free_page(con->bounce_page);
+               con->bounce_page = NULL;
+       }
 
        if (ceph_msgr2(from_msgr(con->msgr)))
                ceph_con_v2_reset_protocol(con);
index 2cb5ffd..6b014ec 100644 (file)
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
 
 static int read_partial_msg_data(struct ceph_connection *con)
 {
-       struct ceph_msg *msg = con->in_msg;
-       struct ceph_msg_data_cursor *cursor = &msg->cursor;
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
        bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
        struct page *page;
        size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
        u32 crc = 0;
        int ret;
 
-       if (!msg->num_data_items)
-               return -EIO;
-
        if (do_datacrc)
                crc = con->in_data_crc;
        while (cursor->total_resid) {
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
        return 1;       /* must return > 0 to indicate success */
 }
 
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+       struct page *page;
+       size_t off, len;
+       u32 crc;
+       int ret;
+
+       if (unlikely(!con->bounce_page)) {
+               con->bounce_page = alloc_page(GFP_NOIO);
+               if (!con->bounce_page) {
+                       pr_err("failed to allocate bounce page\n");
+                       return -ENOMEM;
+               }
+       }
+
+       crc = con->in_data_crc;
+       while (cursor->total_resid) {
+               if (!cursor->resid) {
+                       ceph_msg_data_advance(cursor, 0);
+                       continue;
+               }
+
+               page = ceph_msg_data_next(cursor, &off, &len, NULL);
+               ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+               if (ret <= 0) {
+                       con->in_data_crc = crc;
+                       return ret;
+               }
+
+               crc = crc32c(crc, page_address(con->bounce_page), ret);
+               memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+               ceph_msg_data_advance(cursor, ret);
+       }
+       con->in_data_crc = crc;
+
+       return 1;       /* must return > 0 to indicate success */
+}
+
 /*
  * read (part of) a message.
  */
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
 
        /* (page) data */
        if (data_len) {
-               ret = read_partial_msg_data(con);
+               if (!m->num_data_items)
+                       return -EIO;
+
+               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+                       ret = read_partial_msg_data_bounce(con);
+               else
+                       ret = read_partial_msg_data(con);
                if (ret <= 0)
                        return ret;
        }
index c4099b6..c81379f 100644 (file)
@@ -57,8 +57,9 @@
 #define IN_S_HANDLE_CONTROL_REMAINDER  3
 #define IN_S_PREPARE_READ_DATA         4
 #define IN_S_PREPARE_READ_DATA_CONT    5
-#define IN_S_HANDLE_EPILOGUE           6
-#define IN_S_FINISH_SKIP               7
+#define IN_S_PREPARE_READ_ENC_PAGE     6
+#define IN_S_HANDLE_EPILOGUE           7
+#define IN_S_FINISH_SKIP               8
 
 #define OUT_S_QUEUE_DATA               1
 #define OUT_S_QUEUE_DATA_CONT          2
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
                         padded_len(rem_len) + CEPH_GCM_TAG_LEN);
 }
 
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
 {
+       struct sg_table enc_sgt = {};
        struct sg_table sgt = {};
+       int tail_len;
        int ret;
 
+       tail_len = tail_onwire_len(con->in_msg, true);
+       ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+                                       con->v2.in_enc_page_cnt, 0, tail_len,
+                                       GFP_NOIO);
+       if (ret)
+               goto out;
+
        ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
                        MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
                        con->v2.in_buf, true);
        if (ret)
                goto out;
 
-       ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
-                       tail_onwire_len(con->in_msg, true));
+       dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+            con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+       ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+       if (ret)
+               goto out;
+
+       WARN_ON(!con->v2.in_enc_page_cnt);
+       ceph_release_page_vector(con->v2.in_enc_pages,
+                                con->v2.in_enc_page_cnt);
+       con->v2.in_enc_pages = NULL;
+       con->v2.in_enc_page_cnt = 0;
 
 out:
        sg_free_table(&sgt);
+       sg_free_table(&enc_sgt);
        return ret;
 }
 
@@ -1733,54 +1753,157 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
        return 0;
 }
 
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
 {
        struct bio_vec bv;
 
-       if (!con_secure(con))
-               con->in_data_crc = -1;
+       con->in_data_crc = -1;
        ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
                                  data_len(con->in_msg));
 
        get_bvec_at(&con->v2.in_cursor, &bv);
-       set_in_bvec(con, &bv);
+       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+               if (unlikely(!con->bounce_page)) {
+                       con->bounce_page = alloc_page(GFP_NOIO);
+                       if (!con->bounce_page) {
+                               pr_err("failed to allocate bounce page\n");
+                               return -ENOMEM;
+                       }
+               }
+
+               bv.bv_page = con->bounce_page;
+               bv.bv_offset = 0;
+               set_in_bvec(con, &bv);
+       } else {
+               set_in_bvec(con, &bv);
+       }
        con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+       return 0;
 }
 
 static void prepare_read_data_cont(struct ceph_connection *con)
 {
        struct bio_vec bv;
 
-       if (!con_secure(con))
+       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+               con->in_data_crc = crc32c(con->in_data_crc,
+                                         page_address(con->bounce_page),
+                                         con->v2.in_bvec.bv_len);
+
+               get_bvec_at(&con->v2.in_cursor, &bv);
+               memcpy_to_page(bv.bv_page, bv.bv_offset,
+                              page_address(con->bounce_page),
+                              con->v2.in_bvec.bv_len);
+       } else {
                con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
                                                    con->v2.in_bvec.bv_page,
                                                    con->v2.in_bvec.bv_offset,
                                                    con->v2.in_bvec.bv_len);
+       }
 
        ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
        if (con->v2.in_cursor.total_resid) {
                get_bvec_at(&con->v2.in_cursor, &bv);
-               set_in_bvec(con, &bv);
+               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+                       bv.bv_page = con->bounce_page;
+                       bv.bv_offset = 0;
+                       set_in_bvec(con, &bv);
+               } else {
+                       set_in_bvec(con, &bv);
+               }
                WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
                return;
        }
 
        /*
-        * We've read all data.  Prepare to read data padding (if any)
-        * and epilogue.
+        * We've read all data.  Prepare to read epilogue.
         */
        reset_in_kvecs(con);
-       if (con_secure(con)) {
-               if (need_padding(data_len(con->in_msg)))
-                       add_in_kvec(con, DATA_PAD(con->v2.in_buf),
-                                   padding_len(data_len(con->in_msg)));
-               add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+       add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+       con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+       struct ceph_msg *msg = con->in_msg;
+
+       if (!front_len(msg) && !middle_len(msg)) {
+               WARN_ON(!data_len(msg));
+               return prepare_read_data(con);
+       }
+
+       reset_in_kvecs(con);
+       if (front_len(msg)) {
+               add_in_kvec(con, msg->front.iov_base, front_len(msg));
+               WARN_ON(msg->front.iov_len != front_len(msg));
+       }
+       if (middle_len(msg)) {
+               add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+               WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+       }
+
+       if (data_len(msg)) {
+               con->v2.in_state = IN_S_PREPARE_READ_DATA;
        } else {
                add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+               con->v2.in_state = IN_S_HANDLE_EPILOGUE;
        }
+       return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+       struct bio_vec bv;
+
+       dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+            con->v2.in_enc_resid);
+       WARN_ON(!con->v2.in_enc_resid);
+
+       bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+       bv.bv_offset = 0;
+       bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+       set_in_bvec(con, &bv);
+       con->v2.in_enc_i++;
+       con->v2.in_enc_resid -= bv.bv_len;
+
+       if (con->v2.in_enc_resid) {
+               con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+               return;
+       }
+
+       /*
+        * We are set to read the last piece of ciphertext (ending
+        * with epilogue) + auth tag.
+        */
+       WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
        con->v2.in_state = IN_S_HANDLE_EPILOGUE;
 }
 
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+       struct page **enc_pages;
+       int enc_page_cnt;
+       int tail_len;
+
+       tail_len = tail_onwire_len(con->in_msg, true);
+       WARN_ON(!tail_len);
+
+       enc_page_cnt = calc_pages_for(0, tail_len);
+       enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+       if (IS_ERR(enc_pages))
+               return PTR_ERR(enc_pages);
+
+       WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+       con->v2.in_enc_pages = enc_pages;
+       con->v2.in_enc_page_cnt = enc_page_cnt;
+       con->v2.in_enc_resid = tail_len;
+       con->v2.in_enc_i = 0;
+
+       prepare_read_enc_page(con);
+       return 0;
+}
+
 static void __finish_skip(struct ceph_connection *con)
 {
        con->in_seq++;
@@ -2589,47 +2712,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
        }
 
        msg = con->in_msg;  /* set in process_message_header() */
-       if (!front_len(msg) && !middle_len(msg)) {
-               if (!data_len(msg))
-                       return process_message(con);
-
-               prepare_read_data(con);
-               return 0;
-       }
-
-       reset_in_kvecs(con);
        if (front_len(msg)) {
                WARN_ON(front_len(msg) > msg->front_alloc_len);
-               add_in_kvec(con, msg->front.iov_base, front_len(msg));
                msg->front.iov_len = front_len(msg);
-
-               if (con_secure(con) && need_padding(front_len(msg)))
-                       add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
-                                   padding_len(front_len(msg)));
        } else {
                msg->front.iov_len = 0;
        }
        if (middle_len(msg)) {
                WARN_ON(middle_len(msg) > msg->middle->alloc_len);
-               add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
                msg->middle->vec.iov_len = middle_len(msg);
-
-               if (con_secure(con) && need_padding(middle_len(msg)))
-                       add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
-                                   padding_len(middle_len(msg)));
        } else if (msg->middle) {
                msg->middle->vec.iov_len = 0;
        }
 
-       if (data_len(msg)) {
-               con->v2.in_state = IN_S_PREPARE_READ_DATA;
-       } else {
-               add_in_kvec(con, con->v2.in_buf,
-                           con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
-                                             CEPH_EPILOGUE_PLAIN_LEN);
-               con->v2.in_state = IN_S_HANDLE_EPILOGUE;
-       }
-       return 0;
+       if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+               return process_message(con);
+
+       if (con_secure(con))
+               return prepare_read_tail_secure(con);
+
+       return prepare_read_tail_plain(con);
 }
 
 static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2819,7 @@ static int handle_epilogue(struct ceph_connection *con)
        int ret;
 
        if (con_secure(con)) {
-               ret = decrypt_message(con);
+               ret = decrypt_tail(con);
                if (ret) {
                        if (ret == -EBADMSG)
                                con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2887,16 @@ static int populate_in_iter(struct ceph_connection *con)
                        ret = handle_control_remainder(con);
                        break;
                case IN_S_PREPARE_READ_DATA:
-                       prepare_read_data(con);
-                       ret = 0;
+                       ret = prepare_read_data(con);
                        break;
                case IN_S_PREPARE_READ_DATA_CONT:
                        prepare_read_data_cont(con);
                        ret = 0;
                        break;
+               case IN_S_PREPARE_READ_ENC_PAGE:
+                       prepare_read_enc_page(con);
+                       ret = 0;
+                       break;
                case IN_S_HANDLE_EPILOGUE:
                        ret = handle_epilogue(con);
                        break;
@@ -3326,20 +3431,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
 
 static void revoke_at_prepare_read_data(struct ceph_connection *con)
 {
-       int remaining;  /* data + [data padding] + epilogue */
+       int remaining;
        int resid;
 
+       WARN_ON(con_secure(con));
        WARN_ON(!data_len(con->in_msg));
        WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
        resid = iov_iter_count(&con->v2.in_iter);
        WARN_ON(!resid);
 
-       if (con_secure(con))
-               remaining = padded_len(data_len(con->in_msg)) +
-                           CEPH_EPILOGUE_SECURE_LEN;
-       else
-               remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+       remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
        dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
             remaining);
        con->v2.in_iter.count -= resid;
@@ -3350,8 +3451,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
 static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
 {
        int recved, resid;  /* current piece of data */
-       int remaining;  /* [data padding] + epilogue */
+       int remaining;
 
+       WARN_ON(con_secure(con));
        WARN_ON(!data_len(con->in_msg));
        WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
        resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3465,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
                ceph_msg_data_advance(&con->v2.in_cursor, recved);
        WARN_ON(resid > con->v2.in_cursor.total_resid);
 
-       if (con_secure(con))
-               remaining = padding_len(data_len(con->in_msg)) +
-                           CEPH_EPILOGUE_SECURE_LEN;
-       else
-               remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+       remaining = CEPH_EPILOGUE_PLAIN_LEN;
        dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
             con->v2.in_cursor.total_resid, remaining);
        con->v2.in_iter.count -= resid;
@@ -3376,11 +3473,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
        con->v2.in_state = IN_S_FINISH_SKIP;
 }
 
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+       int resid;  /* current enc page (not necessarily data) */
+
+       WARN_ON(!con_secure(con));
+       WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+       resid = iov_iter_count(&con->v2.in_iter);
+       WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+       dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+            con->v2.in_enc_resid);
+       con->v2.in_iter.count -= resid;
+       set_in_skip(con, resid + con->v2.in_enc_resid);
+       con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
 static void revoke_at_handle_epilogue(struct ceph_connection *con)
 {
        int resid;
 
-       WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
        resid = iov_iter_count(&con->v2.in_iter);
        WARN_ON(!resid);
 
@@ -3399,6 +3511,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
        case IN_S_PREPARE_READ_DATA_CONT:
                revoke_at_prepare_read_data_cont(con);
                break;
+       case IN_S_PREPARE_READ_ENC_PAGE:
+               revoke_at_prepare_read_enc_page(con);
+               break;
        case IN_S_HANDLE_EPILOGUE:
                revoke_at_handle_epilogue(con);
                break;
@@ -3432,6 +3547,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
        clear_out_sign_kvecs(con);
        free_conn_bufs(con);
 
+       if (con->v2.in_enc_pages) {
+               WARN_ON(!con->v2.in_enc_page_cnt);
+               ceph_release_page_vector(con->v2.in_enc_pages,
+                                        con->v2.in_enc_page_cnt);
+               con->v2.in_enc_pages = NULL;
+               con->v2.in_enc_page_cnt = 0;
+       }
        if (con->v2.out_enc_pages) {
                WARN_ON(!con->v2.out_enc_page_cnt);
                ceph_release_page_vector(con->v2.out_enc_pages,
index 0118f0a..9d0388b 100644 (file)
@@ -681,7 +681,7 @@ exit:
         * while trying to recycle fragments on __skb_frag_unref() we need
         * to make one SKB responsible for triggering the recycle path.
         * So disable the recycling bit if an SKB is cloned and we have
-        * additional references to to the fragmented part of the SKB.
+        * additional references to the fragmented part of the SKB.
         * Eventually the last SKB will have the recycling bit set and it's
         * dataref set to 0, which will trigger the recycling
         */
index 909b045..e498c92 100644 (file)
@@ -1784,7 +1784,6 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch);
 void dsa_switch_shutdown(struct dsa_switch *ds)
 {
        struct net_device *master, *slave_dev;
-       LIST_HEAD(unregister_list);
        struct dsa_port *dp;
 
        mutex_lock(&dsa2_mutex);
@@ -1795,25 +1794,13 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
                slave_dev = dp->slave;
 
                netdev_upper_dev_unlink(master, slave_dev);
-               /* Just unlinking ourselves as uppers of the master is not
-                * sufficient. When the master net device unregisters, that will
-                * also call dev_close, which we will catch as NETDEV_GOING_DOWN
-                * and trigger a dev_close on our own devices (dsa_slave_close).
-                * In turn, that will call dev_mc_unsync on the master's net
-                * device. If the master is also a DSA switch port, this will
-                * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
-                * its own master. Lockdep will complain about the fact that
-                * all cascaded masters have the same dsa_master_addr_list_lock_key,
-                * which it normally would not do if the cascaded masters would
-                * be in a proper upper/lower relationship, which we've just
-                * destroyed.
-                * To suppress the lockdep warnings, let's actually unregister
-                * the DSA slave interfaces too, to avoid the nonsensical
-                * multicast address list synchronization on shutdown.
-                */
-               unregister_netdevice_queue(slave_dev, &unregister_list);
        }
-       unregister_netdevice_many(&unregister_list);
+
+       /* Disconnect from further netdevice notifiers on the master,
+        * since netdev_uses_dsa() will now return false.
+        */
+       dsa_switch_for_each_cpu_port(dp, ds)
+               dp->master->dsa_ptr = NULL;
 
        rtnl_unlock();
        mutex_unlock(&dsa2_mutex);
index 4a55a62..c860519 100644 (file)
@@ -256,7 +256,9 @@ static int __net_init ipmr_rules_init(struct net *net)
        return 0;
 
 err2:
+       rtnl_lock();
        ipmr_free_table(mrt);
+       rtnl_unlock();
 err1:
        fib_rules_unregister(ops);
        return err;
index a03a6bf..760e822 100644 (file)
@@ -936,6 +936,22 @@ void tcp_remove_empty_skb(struct sock *sk)
        }
 }
 
+/* skb changing from pure zc to mixed, must charge zc */
+static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
+{
+       if (unlikely(skb_zcopy_pure(skb))) {
+               u32 extra = skb->truesize -
+                           SKB_TRUESIZE(skb_end_offset(skb));
+
+               if (!sk_wmem_schedule(sk, extra))
+                       return -ENOMEM;
+
+               sk_mem_charge(sk, extra);
+               skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+       }
+       return 0;
+}
+
 static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
                                      struct page *page, int offset, size_t *size)
 {
@@ -971,7 +987,7 @@ new_segment:
                tcp_mark_push(tp, skb);
                goto new_segment;
        }
-       if (!sk_wmem_schedule(sk, copy))
+       if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
                return NULL;
 
        if (can_coalesce) {
@@ -1319,19 +1335,8 @@ new_segment:
 
                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-                       /* skb changing from pure zc to mixed, must charge zc */
-                       if (unlikely(skb_zcopy_pure(skb))) {
-                               u32 extra = skb->truesize -
-                                           SKB_TRUESIZE(skb_end_offset(skb));
-
-                               if (!sk_wmem_schedule(sk, extra))
-                                       goto wait_for_space;
-
-                               sk_mem_charge(sk, extra);
-                               skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
-                       }
-
-                       if (!sk_wmem_schedule(sk, copy))
+                       if (tcp_downgrade_zcopy_pure(sk, skb) ||
+                           !sk_wmem_schedule(sk, copy))
                                goto wait_for_space;
 
                        err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
index 881fe6b..0ebaaec 100644 (file)
@@ -243,7 +243,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
        return 0;
 
 err2:
+       rtnl_lock();
        ip6mr_free_table(mrt);
+       rtnl_unlock();
 err1:
        fib_rules_unregister(ops);
        return err;
index 48f75a5..d6fdc57 100644 (file)
@@ -1607,6 +1607,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct mpls_dev *mdev;
        unsigned int flags;
+       int err;
 
        if (event == NETDEV_REGISTER) {
                mdev = mpls_add_dev(dev);
@@ -1621,7 +1622,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
                return NOTIFY_OK;
 
        switch (event) {
-               int err;
 
        case NETDEV_DOWN:
                err = mpls_ifdown(dev, event);
index 5464c2d..e4fd54f 100644 (file)
@@ -925,6 +925,7 @@ out:
 static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
                                            struct mptcp_pm_addr_entry *entry)
 {
+       int addrlen = sizeof(struct sockaddr_in);
        struct sockaddr_storage addr;
        struct mptcp_sock *msk;
        struct socket *ssock;
@@ -949,8 +950,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
        }
 
        mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
-       err = kernel_bind(ssock, (struct sockaddr *)&addr,
-                         sizeof(struct sockaddr_in));
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+       if (entry->addr.family == AF_INET6)
+               addrlen = sizeof(struct sockaddr_in6);
+#endif
+       err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
        if (err) {
                pr_warn("kernel_bind error, err=%d", err);
                goto out;
index 3d9f9ee..1ea2ad7 100644 (file)
@@ -2321,7 +2321,8 @@ ctnetlink_create_conntrack(struct net *net,
                        if (helper->from_nlattr)
                                helper->from_nlattr(helpinfo, ct);
 
-                       /* not in hash table yet so not strictly necessary */
+                       /* disable helper auto-assignment for this entry */
+                       ct->status |= IPS_HELPER;
                        RCU_INIT_POINTER(help->helper, helper);
                }
        } else {
index 2394238..5a93633 100644 (file)
@@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
                        pr_debug("Setting vtag %x for dir %d\n",
                                 ih->init_tag, !dir);
                        ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+                       /* don't renew timeout on init retransmit so
+                        * port reuse by client or NAT middlebox cannot
+                        * keep entry alive indefinitely (incl. nat info).
+                        */
+                       if (new_state == SCTP_CONNTRACK_CLOSED &&
+                           old_state == SCTP_CONNTRACK_CLOSED &&
+                           nf_ct_is_confirmed(ct))
+                               ignore = true;
                }
 
                ct->proto.sctp.state = new_state;
index af5115e..d1582b8 100644 (file)
@@ -446,6 +446,32 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
        }
 }
 
+static void tcp_init_sender(struct ip_ct_tcp_state *sender,
+                           struct ip_ct_tcp_state *receiver,
+                           const struct sk_buff *skb,
+                           unsigned int dataoff,
+                           const struct tcphdr *tcph,
+                           u32 end, u32 win)
+{
+       /* SYN-ACK in reply to a SYN
+        * or SYN from reply direction in simultaneous open.
+        */
+       sender->td_end =
+       sender->td_maxend = end;
+       sender->td_maxwin = (win == 0 ? 1 : win);
+
+       tcp_options(skb, dataoff, tcph, sender);
+       /* RFC 1323:
+        * Both sides must send the Window Scale option
+        * to enable window scaling in either direction.
+        */
+       if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+             receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
+               sender->td_scale = 0;
+               receiver->td_scale = 0;
+       }
+}
+
 static bool tcp_in_window(struct nf_conn *ct,
                          enum ip_conntrack_dir dir,
                          unsigned int index,
@@ -499,24 +525,9 @@ static bool tcp_in_window(struct nf_conn *ct,
                 * Initialize sender data.
                 */
                if (tcph->syn) {
-                       /*
-                        * SYN-ACK in reply to a SYN
-                        * or SYN from reply direction in simultaneous open.
-                        */
-                       sender->td_end =
-                       sender->td_maxend = end;
-                       sender->td_maxwin = (win == 0 ? 1 : win);
-
-                       tcp_options(skb, dataoff, tcph, sender);
-                       /*
-                        * RFC 1323:
-                        * Both sides must send the Window Scale option
-                        * to enable window scaling in either direction.
-                        */
-                       if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
-                             && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
-                               sender->td_scale =
-                               receiver->td_scale = 0;
+                       tcp_init_sender(sender, receiver,
+                                       skb, dataoff, tcph,
+                                       end, win);
                        if (!tcph->ack)
                                /* Simultaneous open */
                                return true;
@@ -560,6 +571,18 @@ static bool tcp_in_window(struct nf_conn *ct,
                sender->td_maxwin = (win == 0 ? 1 : win);
 
                tcp_options(skb, dataoff, tcph, sender);
+       } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
+                  state->state == TCP_CONNTRACK_SYN_SENT) {
+               /* Retransmitted syn-ack, or syn (simultaneous open).
+                *
+                * Re-init state for this direction, just like for the first
+                * syn(-ack) reply, it might differ in seq, ack or tcp options.
+                */
+               tcp_init_sender(sender, receiver,
+                               skb, dataoff, tcph,
+                               end, win);
+               if (!tcph->ack)
+                       return true;
        }
 
        if (!(tcph->ack)) {
index dfc06cd..d2b9378 100644 (file)
@@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
 {
        struct tcphdr *tcph;
 
-       if (pkt->tprot != IPPROTO_TCP)
+       if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
                return NULL;
 
        tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
index 940fed9..5cc06ae 100644 (file)
@@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt)
 {
        unsigned int thoff = nft_thoff(pkt);
 
-       if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+       if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
                return -1;
 
        switch (pkt->tprot) {
@@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr,
                offset = skb_network_offset(skb);
                break;
        case NFT_PAYLOAD_TRANSPORT_HEADER:
-               if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+               if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
                        goto err;
                offset = nft_thoff(pkt);
                break;
@@ -688,7 +688,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
                offset = skb_network_offset(skb);
                break;
        case NFT_PAYLOAD_TRANSPORT_HEADER:
-               if (!(pkt->flags & NFT_PKTINFO_L4PROTO))
+               if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff)
                        goto err;
                offset = nft_thoff(pkt);
                break;
@@ -728,7 +728,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
        if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
            pkt->tprot == IPPROTO_SCTP &&
            skb->ip_summed != CHECKSUM_PARTIAL) {
-               if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
+               if (pkt->fragoff == 0 &&
+                   nft_payload_csum_sctp(skb, nft_thoff(pkt)))
                        goto err;
        }
 
index 291f148..0599246 100644 (file)
@@ -368,9 +368,6 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
        new_pe->type = SMC_PNET_ETH;
        memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
        strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
-       new_pe->ndev = ndev;
-       if (ndev)
-               netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
        rc = -EEXIST;
        new_netdev = true;
        write_lock(&pnettable->lock);
@@ -382,6 +379,11 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
                }
        }
        if (new_netdev) {
+               if (ndev) {
+                       new_pe->ndev = ndev;
+                       netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
+                                            GFP_ATOMIC);
+               }
                list_add_tail(&new_pe->list, &pnettable->pnetlist);
                write_unlock(&pnettable->lock);
        } else {
index b64a028..05c758d 100644 (file)
@@ -115,11 +115,14 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
        }
 
        sock = container_of(xprt, struct sock_xprt, xprt);
-       if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+       mutex_lock(&sock->recv_mutex);
+       if (sock->sock == NULL ||
+           kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
                goto out;
 
        ret = sprintf(buf, "%pISc\n", &saddr);
 out:
+       mutex_unlock(&sock->recv_mutex);
        xprt_put(xprt);
        return ret + 1;
 }
index f172d12..7b5fce2 100644 (file)
@@ -413,6 +413,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
                                              IB_POLL_WORKQUEUE);
        if (IS_ERR(ep->re_attr.send_cq)) {
                rc = PTR_ERR(ep->re_attr.send_cq);
+               ep->re_attr.send_cq = NULL;
                goto out_destroy;
        }
 
@@ -421,6 +422,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
                                              IB_POLL_WORKQUEUE);
        if (IS_ERR(ep->re_attr.recv_cq)) {
                rc = PTR_ERR(ep->re_attr.recv_cq);
+               ep->re_attr.recv_cq = NULL;
                goto out_destroy;
        }
        ep->re_receive_count = 0;
@@ -459,6 +461,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
        ep->re_pd = ib_alloc_pd(device, 0);
        if (IS_ERR(ep->re_pd)) {
                rc = PTR_ERR(ep->re_pd);
+               ep->re_pd = NULL;
                goto out_destroy;
        }
 
index 69b6ee5..0f39e08 100644 (file)
@@ -1641,7 +1641,12 @@ static int xs_get_srcport(struct sock_xprt *transport)
 unsigned short get_srcport(struct rpc_xprt *xprt)
 {
        struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
-       return xs_sock_getport(sock->sock);
+       unsigned short ret = 0;
+       mutex_lock(&sock->recv_mutex);
+       if (sock->sock)
+               ret = xs_sock_getport(sock->sock);
+       mutex_unlock(&sock->recv_mutex);
+       return ret;
 }
 EXPORT_SYMBOL(get_srcport);
 
index 8d9e09f..1e14d7f 100644 (file)
@@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
        struct tipc_msg *hdr = buf_msg(skb);
        struct tipc_gap_ack_blks *ga = NULL;
        bool reply = msg_probe(hdr), retransmitted = false;
-       u16 dlen = msg_data_sz(hdr), glen = 0;
+       u32 dlen = msg_data_sz(hdr), glen = 0;
        u16 peers_snd_nxt =  msg_next_sent(hdr);
        u16 peers_tol = msg_link_tolerance(hdr);
        u16 peers_prio = msg_linkprio(hdr);
@@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
        void *data;
 
        trace_tipc_proto_rcv(skb, false, l->name);
+
+       if (dlen > U16_MAX)
+               goto exit;
+
        if (tipc_link_is_blocked(l) || !xmitq)
                goto exit;
 
@@ -2309,7 +2313,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 
                /* Receive Gap ACK blocks from peer if any */
                glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
-
+               if(glen > dlen)
+                       break;
                tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
                             &l->mon_state, l->bearer_id);
 
index 4076196..2f4d232 100644 (file)
@@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
        state->probing = false;
 
        /* Sanity check received domain record */
+       if (new_member_cnt > MAX_MON_DOMAIN)
+               return;
        if (dlen < dom_rec_len(arrv_dom, 0))
                return;
        if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
index bda902c..8267b75 100644 (file)
@@ -313,7 +313,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
                pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
                                    ua.sr.type, ua.sr.lower, node);
        } else {
-               pr_warn("Unrecognized name table message received\n");
+               pr_warn_ratelimited("Unknown name table message received\n");
        }
        return false;
 }
index 23240d7..895f4b9 100644 (file)
@@ -109,22 +109,25 @@ int asymmetric_verify(struct key *keyring, const char *sig,
 
        pk = asymmetric_key_public_key(key);
        pks.pkey_algo = pk->pkey_algo;
-       if (!strcmp(pk->pkey_algo, "rsa"))
+       if (!strcmp(pk->pkey_algo, "rsa")) {
                pks.encoding = "pkcs1";
-       else if (!strncmp(pk->pkey_algo, "ecdsa-", 6))
+       } else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) {
                /* edcsa-nist-p192 etc. */
                pks.encoding = "x962";
-       else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
-                  !strcmp(pk->pkey_algo, "sm2"))
+       else if (!strcmp(pk->pkey_algo, "ecrdsa") ||
+                  !strcmp(pk->pkey_algo, "sm2")) {
                pks.encoding = "raw";
-       else
-               return -ENOPKG;
+       } else {
+               ret = -ENOPKG;
+               goto out;
+       }
 
        pks.digest = (u8 *)data;
        pks.digest_size = datalen;
        pks.s = hdr->sig;
        pks.s_size = siglen;
        ret = verify_signature(key, &pks);
+out:
        key_put(key);
        pr_debug("%s() = %d\n", __func__, ret);
        return ret;
index 3d8e9d5..3ad8f77 100644 (file)
@@ -496,12 +496,12 @@ int __init ima_fs_init(void)
 
        return 0;
 out:
+       securityfs_remove(ima_policy);
        securityfs_remove(violations);
        securityfs_remove(runtime_measurements_count);
        securityfs_remove(ascii_runtime_measurements);
        securityfs_remove(binary_runtime_measurements);
        securityfs_remove(ima_symlink);
        securityfs_remove(ima_dir);
-       securityfs_remove(ima_policy);
        return -1;
 }
index 320ca80..2a1f641 100644 (file)
@@ -1967,6 +1967,14 @@ int ima_policy_show(struct seq_file *m, void *v)
 
        rcu_read_lock();
 
+       /* Do not print rules with inactive LSM labels */
+       for (i = 0; i < MAX_LSM_RULES; i++) {
+               if (entry->lsm[i].args_p && !entry->lsm[i].rule) {
+                       rcu_read_unlock();
+                       return 0;
+               }
+       }
+
        if (entry->action & MEASURE)
                seq_puts(m, pt(Opt_measure));
        if (entry->action & DONT_MEASURE)
index 6945603..db1ad6d 100644 (file)
@@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = {
 
 static LIST_HEAD(defined_templates);
 static DEFINE_SPINLOCK(template_list);
+static int template_setup_done;
 
 static const struct ima_template_field supported_fields[] = {
        {.field_id = "d", .field_init = ima_eventdigest_init,
@@ -101,10 +102,11 @@ static int __init ima_template_setup(char *str)
        struct ima_template_desc *template_desc;
        int template_len = strlen(str);
 
-       if (ima_template)
+       if (template_setup_done)
                return 1;
 
-       ima_init_template_list();
+       if (!ima_template)
+               ima_init_template_list();
 
        /*
         * Verify that a template with the supplied name exists.
@@ -128,6 +130,7 @@ static int __init ima_template_setup(char *str)
        }
 
        ima_template = template_desc;
+       template_setup_done = 1;
        return 1;
 }
 __setup("ima_template=", ima_template_setup);
@@ -136,7 +139,7 @@ static int __init ima_template_fmt_setup(char *str)
 {
        int num_templates = ARRAY_SIZE(builtin_templates);
 
-       if (ima_template)
+       if (template_setup_done)
                return 1;
 
        if (template_desc_init_fields(str, NULL, NULL) < 0) {
@@ -147,6 +150,7 @@ static int __init ima_template_fmt_setup(char *str)
 
        builtin_templates[num_templates - 1].fmt = str;
        ima_template = builtin_templates + num_templates - 1;
+       template_setup_done = 1;
 
        return 1;
 }
index 2922005..0ec5e4c 100644 (file)
@@ -45,6 +45,8 @@ void integrity_audit_message(int audit_msgno, struct inode *inode,
                return;
 
        ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno);
+       if (!ab)
+               return;
        audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
                         task_pid_nr(current),
                         from_kuid(&init_user_ns, current_uid()),
index 621883e..a056b3e 100644 (file)
@@ -172,6 +172,19 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
 }
 EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);
 
+unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream)
+{
+       unsigned long flags = 0;
+       if (substream->pcm->nonatomic)
+               mutex_lock_nested(&substream->self_group.mutex,
+                                 SINGLE_DEPTH_NESTING);
+       else
+               spin_lock_irqsave_nested(&substream->self_group.lock, flags,
+                                        SINGLE_DEPTH_NESTING);
+       return flags;
+}
+EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested);
+
 /**
  * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream
  * @substream: PCM substream
index b7758db..5cb92f7 100644 (file)
@@ -50,11 +50,11 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
 static int
 sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
 {
-       struct acpi_device *adev;
+       struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle);
        int ret, i;
        u8 count;
 
-       if (acpi_bus_get_device(info->handle, &adev))
+       if (!adev)
                return -EINVAL;
 
        /* Found controller, find links supported */
@@ -119,7 +119,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
                                     void *cdata, void **return_value)
 {
        struct sdw_intel_acpi_info *info = cdata;
-       struct acpi_device *adev;
        acpi_status status;
        u64 adr;
 
@@ -127,7 +126,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
        if (ACPI_FAILURE(status))
                return AE_OK; /* keep going */
 
-       if (acpi_bus_get_device(handle, &adev)) {
+       if (!acpi_fetch_acpi_dev(handle)) {
                pr_err("%s: Couldn't find ACPI handle\n", __func__);
                return AE_NOT_FOUND;
        }
index 82c492b..cd1db94 100644 (file)
@@ -981,7 +981,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec,
        int id = HDA_FIXUP_ID_NOT_SET;
        const char *name = NULL;
        const char *type = NULL;
-       int vendor, device;
+       unsigned int vendor, device;
 
        if (codec->fixup_id != HDA_FIXUP_ID_NOT_SET)
                return;
index 7016b48..f552785 100644 (file)
@@ -3000,6 +3000,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec)
 {
        struct hda_pcm *cpcm;
 
+       /* Skip the shutdown if codec is not registered */
+       if (!codec->registered)
+               return;
+
        list_for_each_entry(cpcm, &codec->pcm_list_head, list)
                snd_pcm_suspend_all(cpcm->pcm);
 
index 3bf5e34..fc114e5 100644 (file)
@@ -91,6 +91,12 @@ static void snd_hda_gen_spec_free(struct hda_gen_spec *spec)
        free_kctls(spec);
        snd_array_free(&spec->paths);
        snd_array_free(&spec->loopback_list);
+#ifdef CONFIG_SND_HDA_GENERIC_LEDS
+       if (spec->led_cdevs[LED_AUDIO_MUTE])
+               led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]);
+       if (spec->led_cdevs[LED_AUDIO_MICMUTE])
+               led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]);
+#endif
 }
 
 /*
@@ -3922,7 +3928,10 @@ static int create_mute_led_cdev(struct hda_codec *codec,
                                                enum led_brightness),
                                bool micmute)
 {
+       struct hda_gen_spec *spec = codec->spec;
        struct led_classdev *cdev;
+       int idx = micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE;
+       int err;
 
        cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL);
        if (!cdev)
@@ -3932,10 +3941,14 @@ static int create_mute_led_cdev(struct hda_codec *codec,
        cdev->max_brightness = 1;
        cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute";
        cdev->brightness_set_blocking = callback;
-       cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE);
+       cdev->brightness = ledtrig_audio_get(idx);
        cdev->flags = LED_CORE_SUSPENDRESUME;
 
-       return devm_led_classdev_register(&codec->core.dev, cdev);
+       err = led_classdev_register(&codec->core.dev, cdev);
+       if (err < 0)
+               return err;
+       spec->led_cdevs[idx] = cdev;
+       return 0;
 }
 
 /**
index 8e1bc8e..34eba40 100644 (file)
@@ -294,6 +294,9 @@ struct hda_gen_spec {
                                   struct hda_jack_callback *cb);
        void (*mic_autoswitch_hook)(struct hda_codec *codec,
                                    struct hda_jack_callback *cb);
+
+       /* leds */
+       struct led_classdev *led_cdevs[NUM_AUDIO_LEDS];
 };
 
 /* values for add_stereo_mix_input flag */
index 668274e..8315bf7 100644 (file)
@@ -98,6 +98,7 @@ struct alc_spec {
        unsigned int gpio_mic_led_mask;
        struct alc_coef_led mute_led_coef;
        struct alc_coef_led mic_led_coef;
+       struct mutex coef_mutex;
 
        hda_nid_t headset_mic_pin;
        hda_nid_t headphone_mic_pin;
@@ -137,8 +138,8 @@ struct alc_spec {
  * COEF access helper functions
  */
 
-static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
-                              unsigned int coef_idx)
+static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+                                unsigned int coef_idx)
 {
        unsigned int val;
 
@@ -147,28 +148,61 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
        return val;
 }
 
+static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+                              unsigned int coef_idx)
+{
+       struct alc_spec *spec = codec->spec;
+       unsigned int val;
+
+       mutex_lock(&spec->coef_mutex);
+       val = __alc_read_coefex_idx(codec, nid, coef_idx);
+       mutex_unlock(&spec->coef_mutex);
+       return val;
+}
+
 #define alc_read_coef_idx(codec, coef_idx) \
        alc_read_coefex_idx(codec, 0x20, coef_idx)
 
-static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
-                                unsigned int coef_idx, unsigned int coef_val)
+static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+                                  unsigned int coef_idx, unsigned int coef_val)
 {
        snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx);
        snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val);
 }
 
+static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+                                unsigned int coef_idx, unsigned int coef_val)
+{
+       struct alc_spec *spec = codec->spec;
+
+       mutex_lock(&spec->coef_mutex);
+       __alc_write_coefex_idx(codec, nid, coef_idx, coef_val);
+       mutex_unlock(&spec->coef_mutex);
+}
+
 #define alc_write_coef_idx(codec, coef_idx, coef_val) \
        alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val)
 
+static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+                                   unsigned int coef_idx, unsigned int mask,
+                                   unsigned int bits_set)
+{
+       unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx);
+
+       if (val != -1)
+               __alc_write_coefex_idx(codec, nid, coef_idx,
+                                      (val & ~mask) | bits_set);
+}
+
 static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
                                  unsigned int coef_idx, unsigned int mask,
                                  unsigned int bits_set)
 {
-       unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx);
+       struct alc_spec *spec = codec->spec;
 
-       if (val != -1)
-               alc_write_coefex_idx(codec, nid, coef_idx,
-                                    (val & ~mask) | bits_set);
+       mutex_lock(&spec->coef_mutex);
+       __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set);
+       mutex_unlock(&spec->coef_mutex);
 }
 
 #define alc_update_coef_idx(codec, coef_idx, mask, bits_set)   \
@@ -201,13 +235,17 @@ struct coef_fw {
 static void alc_process_coef_fw(struct hda_codec *codec,
                                const struct coef_fw *fw)
 {
+       struct alc_spec *spec = codec->spec;
+
+       mutex_lock(&spec->coef_mutex);
        for (; fw->nid; fw++) {
                if (fw->mask == (unsigned short)-1)
-                       alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
+                       __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
                else
-                       alc_update_coefex_idx(codec, fw->nid, fw->idx,
-                                             fw->mask, fw->val);
+                       __alc_update_coefex_idx(codec, fw->nid, fw->idx,
+                                               fw->mask, fw->val);
        }
+       mutex_unlock(&spec->coef_mutex);
 }
 
 /*
@@ -1153,6 +1191,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid)
        codec->spdif_status_reset = 1;
        codec->forced_resume = 1;
        codec->patch_ops = alc_patch_ops;
+       mutex_init(&spec->coef_mutex);
 
        err = alc_codec_rename_from_preset(codec);
        if (err < 0) {
@@ -2125,6 +2164,7 @@ static void alc1220_fixup_gb_x570(struct hda_codec *codec,
 {
        static const hda_nid_t conn1[] = { 0x0c };
        static const struct coef_fw gb_x570_coefs[] = {
+               WRITE_COEF(0x07, 0x03c0),
                WRITE_COEF(0x1a, 0x01c1),
                WRITE_COEF(0x1b, 0x0202),
                WRITE_COEF(0x43, 0x3005),
@@ -2551,7 +2591,8 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
        SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
        SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570),
-       SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570),
+       SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570),
        SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950),
@@ -2626,6 +2667,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
        {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
        {.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"},
        {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
+       {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"},
        {.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"},
        {}
 };
@@ -8969,6 +9011,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
        SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
+       SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
        SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
index c9caade..cd05ee2 100644 (file)
@@ -303,11 +303,11 @@ static const struct snd_soc_dapm_route rt1019_map_lr[] = {
 
 static struct snd_soc_codec_conf rt1019_conf[] = {
        {
-                .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"),
+                .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"),
                 .name_prefix = "Left",
        },
        {
-                .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"),
+                .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"),
                 .name_prefix = "Right",
        },
 };
index 598e090..ffdf8b6 100644 (file)
@@ -1667,6 +1667,8 @@ static int cpcap_codec_probe(struct platform_device *pdev)
 {
        struct device_node *codec_node =
                of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec");
+       if (!codec_node)
+               return -ENODEV;
 
        pdev->dev.of_node = codec_node;
 
index b61f980..b07607a 100644 (file)
@@ -277,7 +277,7 @@ struct hdmi_codec_priv {
        bool busy;
        struct snd_soc_jack *jack;
        unsigned int jack_status;
-       u8 iec_status[5];
+       u8 iec_status[AES_IEC958_STATUS_SIZE];
 };
 
 static const struct snd_soc_dapm_widget hdmi_widgets[] = {
index aec5127..6ffe883 100644 (file)
@@ -2688,8 +2688,8 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
        int reg, b2_reg;
 
        /* Address does not automatically update if reading */
-       reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
-       b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
+       reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
+       b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
 
        snd_soc_component_write(component, reg,
                                ((band_idx * BAND_MAX + coeff_idx) *
@@ -2718,7 +2718,7 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component,
 static void set_iir_band_coeff(struct snd_soc_component *component,
                               int iir_idx, int band_idx, uint32_t value)
 {
-       int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx;
+       int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx;
 
        snd_soc_component_write(component, reg, (value & 0xFF));
        snd_soc_component_write(component, reg, (value >> 8) & 0xFF);
@@ -2739,7 +2739,7 @@ static int rx_macro_put_iir_band_audio_mixer(
        int iir_idx = ctl->iir_idx;
        int band_idx = ctl->band_idx;
        u32 coeff[BAND_MAX];
-       int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx;
+       int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx;
 
        memcpy(&coeff[0], ucontrol->value.bytes.data, params->max);
 
index d75fd61..bc57d76 100644 (file)
@@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol,
        struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol);
        struct max9759 *priv = snd_soc_component_get_drvdata(c);
 
-       if (ucontrol->value.integer.value[0] > 3)
+       if (ucontrol->value.integer.value[0] < 0 ||
+           ucontrol->value.integer.value[0] > 3)
                return -EINVAL;
 
        priv->gain = ucontrol->value.integer.value[0];
index 20e0f90..20fc0f3 100644 (file)
@@ -59,18 +59,12 @@ static void rt5682_jd_check_handler(struct work_struct *work)
        struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv,
                jd_check_work.work);
 
-       if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
-               & RT5682_JDH_RS_MASK) {
+       if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK)
                /* jack out */
-               rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0);
-
-               snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
-                       SND_JACK_HEADSET |
-                       SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-                       SND_JACK_BTN_2 | SND_JACK_BTN_3);
-       } else {
+               mod_delayed_work(system_power_efficient_wq,
+                                &rt5682->jack_detect_work, 0);
+       else
                schedule_delayed_work(&rt5682->jd_check_work, 500);
-       }
 }
 
 static irqreturn_t rt5682_irq(int irq, void *data)
@@ -198,7 +192,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c,
        }
 
        mutex_init(&rt5682->calibrate_mutex);
-       mutex_init(&rt5682->jdet_mutex);
        rt5682_calibrate(rt5682);
 
        rt5682_apply_patch_list(rt5682, &i2c->dev);
index 415ec56..0a0ec4a 100644 (file)
@@ -922,15 +922,13 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component,
  *
  * Returns detect status.
  */
-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
+static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
 {
        struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
        struct snd_soc_dapm_context *dapm = &component->dapm;
        unsigned int val, count;
 
        if (jack_insert) {
-               snd_soc_dapm_mutex_lock(dapm);
-
                snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
                        RT5682_PWR_VREF2 | RT5682_PWR_MB,
                        RT5682_PWR_VREF2 | RT5682_PWR_MB);
@@ -981,8 +979,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
                snd_soc_component_update_bits(component, RT5682_MICBIAS_2,
                        RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK,
                        RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU);
-
-               snd_soc_dapm_mutex_unlock(dapm);
        } else {
                rt5682_enable_push_button_irq(component, false);
                snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
@@ -1011,7 +1007,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
        dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type);
        return rt5682->jack_type;
 }
-EXPORT_SYMBOL_GPL(rt5682_headset_detect);
 
 static int rt5682_set_jack_detect(struct snd_soc_component *component,
                struct snd_soc_jack *hs_jack, void *data)
@@ -1094,6 +1089,7 @@ void rt5682_jack_detect_handler(struct work_struct *work)
 {
        struct rt5682_priv *rt5682 =
                container_of(work, struct rt5682_priv, jack_detect_work.work);
+       struct snd_soc_dapm_context *dapm;
        int val, btn_type;
 
        while (!rt5682->component)
@@ -1102,7 +1098,9 @@ void rt5682_jack_detect_handler(struct work_struct *work)
        while (!rt5682->component->card->instantiated)
                usleep_range(10000, 15000);
 
-       mutex_lock(&rt5682->jdet_mutex);
+       dapm = snd_soc_component_get_dapm(rt5682->component);
+
+       snd_soc_dapm_mutex_lock(dapm);
        mutex_lock(&rt5682->calibrate_mutex);
 
        val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
@@ -1162,6 +1160,9 @@ void rt5682_jack_detect_handler(struct work_struct *work)
                rt5682->irq_work_delay_time = 50;
        }
 
+       mutex_unlock(&rt5682->calibrate_mutex);
+       snd_soc_dapm_mutex_unlock(dapm);
+
        snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
                SND_JACK_HEADSET |
                SND_JACK_BTN_0 | SND_JACK_BTN_1 |
@@ -1174,9 +1175,6 @@ void rt5682_jack_detect_handler(struct work_struct *work)
                else
                        cancel_delayed_work_sync(&rt5682->jd_check_work);
        }
-
-       mutex_unlock(&rt5682->calibrate_mutex);
-       mutex_unlock(&rt5682->jdet_mutex);
 }
 EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler);
 
@@ -1526,7 +1524,6 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
 {
        struct snd_soc_component *component =
                snd_soc_dapm_to_component(w->dapm);
-       struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
 
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
@@ -1538,17 +1535,12 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
                        RT5682_DEPOP_1, 0x60, 0x60);
                snd_soc_component_update_bits(component,
                        RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080);
-
-               mutex_lock(&rt5682->jdet_mutex);
-
                snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
                        RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN,
                        RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN);
                usleep_range(5000, 10000);
                snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
                        RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L);
-
-               mutex_unlock(&rt5682->jdet_mutex);
                break;
 
        case SND_SOC_DAPM_POST_PMD:
index c917c76..52ff0d9 100644 (file)
@@ -1463,7 +1463,6 @@ struct rt5682_priv {
 
        int jack_type;
        int irq_work_delay_time;
-       struct mutex jdet_mutex;
 };
 
 extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES];
@@ -1473,7 +1472,6 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component,
 
 void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev);
 
-int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert);
 void rt5682_jack_detect_handler(struct work_struct *work);
 
 bool rt5682_volatile_register(struct device *dev, unsigned int reg);
index eff200a..36cbc66 100644 (file)
@@ -1432,14 +1432,10 @@ static int wcd938x_sdw_connect_port(struct wcd938x_sdw_ch_info *ch_info,
        return 0;
 }
 
-static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 ch_id, u8 enable)
+static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 port_num, u8 ch_id, u8 enable)
 {
-       u8 port_num;
-
-       port_num = wcd->ch_info[ch_id].port_num;
-
        return wcd938x_sdw_connect_port(&wcd->ch_info[ch_id],
-                                       &wcd->port_config[port_num],
+                                       &wcd->port_config[port_num - 1],
                                        enable);
 }
 
@@ -2563,7 +2559,7 @@ static int wcd938x_ear_pa_put_gain(struct snd_kcontrol *kcontrol,
                                      WCD938X_EAR_GAIN_MASK,
                                      ucontrol->value.integer.value[0]);
 
-       return 0;
+       return 1;
 }
 
 static int wcd938x_get_compander(struct snd_kcontrol *kcontrol,
@@ -2593,6 +2589,7 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
        struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
        struct wcd938x_sdw_priv *wcd;
        int value = ucontrol->value.integer.value[0];
+       int portidx;
        struct soc_mixer_control *mc;
        bool hphr;
 
@@ -2606,12 +2603,14 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol,
        else
                wcd938x->comp1_enable = value;
 
+       portidx = wcd->ch_info[mc->reg].port_num;
+
        if (value)
-               wcd938x_connect_port(wcd, mc->reg, true);
+               wcd938x_connect_port(wcd, portidx, mc->reg, true);
        else
-               wcd938x_connect_port(wcd, mc->reg, false);
+               wcd938x_connect_port(wcd, portidx, mc->reg, false);
 
-       return 0;
+       return 1;
 }
 
 static int wcd938x_ldoh_get(struct snd_kcontrol *kcontrol,
@@ -2882,9 +2881,11 @@ static int wcd938x_get_swr_port(struct snd_kcontrol *kcontrol,
        struct wcd938x_sdw_priv *wcd;
        struct soc_mixer_control *mixer = (struct soc_mixer_control *)kcontrol->private_value;
        int dai_id = mixer->shift;
-       int portidx = mixer->reg;
+       int portidx, ch_idx = mixer->reg;
+
 
        wcd = wcd938x->sdw_priv[dai_id];
+       portidx = wcd->ch_info[ch_idx].port_num;
 
        ucontrol->value.integer.value[0] = wcd->port_enable[portidx];
 
@@ -2899,12 +2900,14 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
        struct wcd938x_sdw_priv *wcd;
        struct soc_mixer_control *mixer =
                (struct soc_mixer_control *)kcontrol->private_value;
-       int portidx = mixer->reg;
+       int ch_idx = mixer->reg;
+       int portidx;
        int dai_id = mixer->shift;
        bool enable;
 
        wcd = wcd938x->sdw_priv[dai_id];
 
+       portidx = wcd->ch_info[ch_idx].port_num;
        if (ucontrol->value.integer.value[0])
                enable = true;
        else
@@ -2912,9 +2915,9 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol,
 
        wcd->port_enable[portidx] = enable;
 
-       wcd938x_connect_port(wcd, portidx, enable);
+       wcd938x_connect_port(wcd, portidx, ch_idx, enable);
 
-       return 0;
+       return 1;
 
 }
 
index af3c3b9..83b4a22 100644 (file)
@@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op)
                dev_err(&op->dev, "platform_device_alloc() failed\n");
 
        ret = platform_device_add(pdata->codec_device);
-       if (ret)
+       if (ret) {
                dev_err(&op->dev, "platform_device_add() failed: %d\n", ret);
+               platform_device_put(pdata->codec_device);
+       }
 
        ret = snd_soc_register_card(card);
-       if (ret)
+       if (ret) {
                dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret);
+               platform_device_del(pdata->codec_device);
+               platform_device_put(pdata->codec_device);
+       }
 
        platform_set_drvdata(op, pdata);
-
        return ret;
+
 }
 
 static int pcm030_fabric_remove(struct platform_device *op)
index a89d1cf..78419e1 100644 (file)
@@ -28,6 +28,30 @@ static const struct snd_soc_ops simple_ops = {
        .hw_params      = asoc_simple_hw_params,
 };
 
+static int asoc_simple_parse_platform(struct device_node *node,
+                                     struct snd_soc_dai_link_component *dlc)
+{
+       struct of_phandle_args args;
+       int ret;
+
+       if (!node)
+               return 0;
+
+       /*
+        * Get node via "sound-dai = <&phandle port>"
+        * it will be used as xxx_of_node on soc_bind_dai_link()
+        */
+       ret = of_parse_phandle_with_args(node, DAI, CELL, 0, &args);
+       if (ret)
+               return ret;
+
+       /* dai_name is not required and may not exist for plat component */
+
+       dlc->of_node = args.np;
+
+       return 0;
+}
+
 static int asoc_simple_parse_dai(struct device_node *node,
                                 struct snd_soc_dai_link_component *dlc,
                                 int *is_single_link)
@@ -289,7 +313,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
        if (ret < 0)
                goto dai_link_of_err;
 
-       ret = asoc_simple_parse_dai(plat, platforms, NULL);
+       ret = asoc_simple_parse_platform(plat, platforms);
        if (ret < 0)
                goto dai_link_of_err;
 
index 9306b7c..0d15435 100644 (file)
@@ -216,7 +216,7 @@ config SND_SOC_MT8195_MT6359_RT1019_RT5682
 
 config SND_SOC_MT8195_MT6359_RT1011_RT5682
        tristate "ASoC Audio driver for MT8195 with MT6359 RT1011 RT5682 codec"
-       depends on I2C
+       depends on I2C && GPIOLIB
        depends on SND_SOC_MT8195 && MTK_PMIC_WRAP
        select SND_SOC_MT6359
        select SND_SOC_RT1011
index eb1c3ae..19c4a90 100644 (file)
@@ -308,8 +308,11 @@ static int q6apm_dai_close(struct snd_soc_component *component,
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct q6apm_dai_rtd *prtd = runtime->private_data;
 
-       q6apm_graph_stop(prtd->graph);
-       q6apm_unmap_memory_regions(prtd->graph, substream->stream);
+       if (prtd->state) { /* only stop graph that is started */
+               q6apm_graph_stop(prtd->graph);
+               q6apm_unmap_memory_regions(prtd->graph, substream->stream);
+       }
+
        q6apm_graph_close(prtd->graph);
        prtd->graph = NULL;
        kfree(prtd);
index cbd7ea4..142476f 100644 (file)
@@ -55,16 +55,13 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_find_machine);
 static acpi_status snd_soc_acpi_find_package(acpi_handle handle, u32 level,
                                             void *context, void **ret)
 {
-       struct acpi_device *adev;
+       struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
        acpi_status status;
        struct snd_soc_acpi_package_context *pkg_ctx = context;
 
        pkg_ctx->data_valid = false;
 
-       if (acpi_bus_get_device(handle, &adev))
-               return AE_OK;
-
-       if (adev->status.present && adev->status.functional) {
+       if (adev && adev->status.present && adev->status.functional) {
                struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
                union acpi_object  *myobj = NULL;
 
index 08eaa9d..9833611 100644 (file)
@@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol,
        if (sign_bit)
                mask = BIT(sign_bit + 1) - 1;
 
-       val = ((ucontrol->value.integer.value[0] + min) & mask);
+       if (ucontrol->value.integer.value[0] < 0)
+               return -EINVAL;
+       val = ucontrol->value.integer.value[0];
+       if (mc->platform_max && val > mc->platform_max)
+               return -EINVAL;
+       if (val > max - min)
+               return -EINVAL;
+       val = (val + min) & mask;
        if (invert)
                val = max - val;
        val_mask = mask << shift;
        val = val << shift;
        if (snd_soc_volsw_is_stereo(mc)) {
-               val2 = ((ucontrol->value.integer.value[1] + min) & mask);
+               if (ucontrol->value.integer.value[1] < 0)
+                       return -EINVAL;
+               val2 = ucontrol->value.integer.value[1];
+               if (mc->platform_max && val2 > mc->platform_max)
+                       return -EINVAL;
+               if (val2 > max - min)
+                       return -EINVAL;
+               val2 = (val2 + min) & mask;
                if (invert)
                        val2 = max - val2;
                if (reg == reg2) {
@@ -409,8 +423,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
        int err = 0;
        unsigned int val, val_mask;
 
+       if (ucontrol->value.integer.value[0] < 0)
+               return -EINVAL;
+       val = ucontrol->value.integer.value[0];
+       if (mc->platform_max && val > mc->platform_max)
+               return -EINVAL;
+       if (val > max - min)
+               return -EINVAL;
        val_mask = mask << shift;
-       val = (ucontrol->value.integer.value[0] + min) & mask;
+       val = (val + min) & mask;
        val = val << shift;
 
        err = snd_soc_component_update_bits(component, reg, val_mask, val);
@@ -858,6 +879,8 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol,
        long val = ucontrol->value.integer.value[0];
        unsigned int i;
 
+       if (val < mc->min || val > mc->max)
+               return -EINVAL;
        if (invert)
                val = max - val;
        val &= mask;
index 7abfc48..9a95468 100644 (file)
@@ -46,8 +46,8 @@ static inline void snd_soc_dpcm_stream_lock_irq(struct snd_soc_pcm_runtime *rtd,
        snd_pcm_stream_lock_irq(snd_soc_dpcm_get_substream(rtd, stream));
 }
 
-#define snd_soc_dpcm_stream_lock_irqsave(rtd, stream, flags) \
-       snd_pcm_stream_lock_irqsave(snd_soc_dpcm_get_substream(rtd, stream), flags)
+#define snd_soc_dpcm_stream_lock_irqsave_nested(rtd, stream, flags) \
+       snd_pcm_stream_lock_irqsave_nested(snd_soc_dpcm_get_substream(rtd, stream), flags)
 
 static inline void snd_soc_dpcm_stream_unlock_irq(struct snd_soc_pcm_runtime *rtd,
                                                  int stream)
@@ -1268,6 +1268,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
 void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
 {
        struct snd_soc_dpcm *dpcm, *d;
+       LIST_HEAD(deleted_dpcms);
 
        snd_soc_dpcm_mutex_assert_held(fe);
 
@@ -1287,13 +1288,18 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
                /* BEs still alive need new FE */
                dpcm_be_reparent(fe, dpcm->be, stream);
 
-               dpcm_remove_debugfs_state(dpcm);
-
                list_del(&dpcm->list_be);
+               list_move(&dpcm->list_fe, &deleted_dpcms);
+       }
+       snd_soc_dpcm_stream_unlock_irq(fe, stream);
+
+       while (!list_empty(&deleted_dpcms)) {
+               dpcm = list_first_entry(&deleted_dpcms, struct snd_soc_dpcm,
+                                       list_fe);
                list_del(&dpcm->list_fe);
+               dpcm_remove_debugfs_state(dpcm);
                kfree(dpcm);
        }
-       snd_soc_dpcm_stream_unlock_irq(fe, stream);
 }
 
 /* get BE for DAI widget and stream */
@@ -2094,7 +2100,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream,
                be = dpcm->be;
                be_substream = snd_soc_dpcm_get_substream(be, stream);
 
-               snd_soc_dpcm_stream_lock_irqsave(be, stream, flags);
+               snd_soc_dpcm_stream_lock_irqsave_nested(be, stream, flags);
 
                /* is this op for this BE ? */
                if (!snd_soc_dpcm_be_can_update(fe, be, stream))
index 91afea9..ce19a60 100644 (file)
@@ -37,6 +37,7 @@
 #define XLNX_AUD_XFER_COUNT    0x28
 #define XLNX_AUD_CH_STS_START  0x2C
 #define XLNX_BYTES_PER_CH      0x44
+#define XLNX_AUD_ALIGN_BYTES   64
 
 #define AUD_STS_IOC_IRQ_MASK   BIT(31)
 #define AUD_STS_CH_STS_MASK    BIT(29)
@@ -368,12 +369,32 @@ static int xlnx_formatter_pcm_open(struct snd_soc_component *component,
        snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware);
        runtime->private_data = stream_data;
 
-       /* Resize the period size divisible by 64 */
+       /* Resize the period bytes as divisible by 64 */
        err = snd_pcm_hw_constraint_step(runtime, 0,
-                                        SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64);
+                                        SNDRV_PCM_HW_PARAM_PERIOD_BYTES,
+                                        XLNX_AUD_ALIGN_BYTES);
        if (err) {
                dev_err(component->dev,
-                       "unable to set constraint on period bytes\n");
+                       "Unable to set constraint on period bytes\n");
+               return err;
+       }
+
+       /* Resize the buffer bytes as divisible by 64 */
+       err = snd_pcm_hw_constraint_step(runtime, 0,
+                                        SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+                                        XLNX_AUD_ALIGN_BYTES);
+       if (err) {
+               dev_err(component->dev,
+                       "Unable to set constraint on buffer bytes\n");
+               return err;
+       }
+
+       /* Set periods as integer multiple */
+       err = snd_pcm_hw_constraint_integer(runtime,
+                                           SNDRV_PCM_HW_PARAM_PERIODS);
+       if (err < 0) {
+               dev_err(component->dev,
+                       "Unable to set constraint on periods to be integer\n");
                return err;
        }
 
index e8f3f8d..630766b 100644 (file)
@@ -1527,6 +1527,10 @@ error:
                usb_audio_err(chip,
                        "cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
                        UAC_GET_CUR, validx, idx, cval->val_type);
+
+               if (val)
+                       *val = 0;
+
                return filter_error(cval, ret);
        }
 
index b1522e4..0ea3956 100644 (file)
@@ -84,7 +84,7 @@
  * combination.
  */
 {
-       USB_DEVICE(0x041e, 0x4095),
+       USB_AUDIO_DEVICE(0x041e, 0x4095),
        .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
                .ifnum = QUIRK_ANY_INTERFACE,
                .type = QUIRK_COMPOSITE,
index 18de5f7..6db4e29 100644 (file)
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16                (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_BF16           (18*32+22) /* AMX bf16 Support */
 #define X86_FEATURE_AMX_TILE           (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8           (18*32+25) /* AMX int8 Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO             (13*32+ 0) /* CLZERO instruction */
index b46bcdb..5191b57 100644 (file)
@@ -1624,9 +1624,6 @@ struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET  _IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET   _IO(KVMIO,   0xc4)
 
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
-
 struct kvm_s390_pv_sec_parm {
        __u64 origin;
        __u64 length;
@@ -2048,4 +2045,7 @@ struct kvm_stats_desc {
 
 #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
 
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2           _IOR(KVMIO,  0xcf, struct kvm_xsave)
+
 #endif /* __LINUX_KVM_H */
index 4cd39aa..1b65042 100644 (file)
@@ -1332,9 +1332,9 @@ union perf_mem_data_src {
 
 /* hop level */
 #define PERF_MEM_HOPS_0                0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1         0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2         0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3         0x04 /* remote board */
+#define PERF_MEM_HOPS_1                0x02 /* remote node, same socket */
+#define PERF_MEM_HOPS_2                0x03 /* remote socket, same board */
+#define PERF_MEM_HOPS_3                0x04 /* remote board */
 /* 5-7 available */
 #define PERF_MEM_HOPS_SHIFT    43
 
index bb73e9a..e998764 100644 (file)
@@ -272,4 +272,7 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SCOPE_THREAD_GROUP      1
 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP     2
 
+#define PR_SET_VMA             0x53564d41
+# define PR_SET_VMA_ANON_NAME          0
+
 #endif /* _LINUX_PRCTL_H */
index 5fbb79e..2d3e5df 100644 (file)
  *                                                                          *
  ****************************************************************************/
 
+#define AES_IEC958_STATUS_SIZE         24
+
 struct snd_aes_iec958 {
-       unsigned char status[24];       /* AES/IEC958 channel status bits */
+       unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
        unsigned char subcode[147];     /* AES/IEC958 subcode bits */
        unsigned char pad;              /* nothing */
        unsigned char dig_subframe[4];  /* AES/IEC958 subframe bits */
@@ -202,6 +204,11 @@ typedef int __bitwise snd_pcm_format_t;
 #define        SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */
 #define        SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */
 #define        SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */
+/*
+ * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
+ * available bit count in most significant bit. It's for the case of so-called 'left-justified' or
+ * `right-padding` sample which has less width than 32 bit.
+ */
 #define        SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10)
 #define        SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11)
 #define        SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12)
@@ -300,7 +307,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME    0x04000000  /* report estimated link audio time */
 #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000  /* report synchronized audio/system time */
 #define SNDRV_PCM_INFO_EXPLICIT_SYNC   0x10000000      /* needs explicit sync of pointers and data */
-
+#define SNDRV_PCM_INFO_NO_REWINDS      0x20000000      /* hardware can only support monotonic changes of appl_ptr */
 #define SNDRV_PCM_INFO_DRAIN_TRIGGER   0x40000000              /* internal kernel flag - trigger in drain */
 #define SNDRV_PCM_INFO_FIFO_IN_FRAMES  0x80000000      /* internal kernel flag - FIFO size is in frames */
 
index f7ee07c..0d1634c 100644 (file)
@@ -13,6 +13,7 @@
 #include <internal/lib.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
+#include <linux/stringify.h>
 #include "internal.h"
 
 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -294,6 +295,103 @@ static u64 read_timestamp(void)
 
        return low | ((u64)high) << 32;
 }
+#elif defined(__aarch64__)
+#define read_sysreg(r) ({                                              \
+       u64 __val;                                                      \
+       asm volatile("mrs %0, " __stringify(r) : "=r" (__val));         \
+       __val;                                                          \
+})
+
+static u64 read_pmccntr(void)
+{
+       return read_sysreg(pmccntr_el0);
+}
+
+#define PMEVCNTR_READ(idx)                                     \
+       static u64 read_pmevcntr_##idx(void) {                  \
+               return read_sysreg(pmevcntr##idx##_el0);        \
+       }
+
+PMEVCNTR_READ(0);
+PMEVCNTR_READ(1);
+PMEVCNTR_READ(2);
+PMEVCNTR_READ(3);
+PMEVCNTR_READ(4);
+PMEVCNTR_READ(5);
+PMEVCNTR_READ(6);
+PMEVCNTR_READ(7);
+PMEVCNTR_READ(8);
+PMEVCNTR_READ(9);
+PMEVCNTR_READ(10);
+PMEVCNTR_READ(11);
+PMEVCNTR_READ(12);
+PMEVCNTR_READ(13);
+PMEVCNTR_READ(14);
+PMEVCNTR_READ(15);
+PMEVCNTR_READ(16);
+PMEVCNTR_READ(17);
+PMEVCNTR_READ(18);
+PMEVCNTR_READ(19);
+PMEVCNTR_READ(20);
+PMEVCNTR_READ(21);
+PMEVCNTR_READ(22);
+PMEVCNTR_READ(23);
+PMEVCNTR_READ(24);
+PMEVCNTR_READ(25);
+PMEVCNTR_READ(26);
+PMEVCNTR_READ(27);
+PMEVCNTR_READ(28);
+PMEVCNTR_READ(29);
+PMEVCNTR_READ(30);
+
+/*
+ * Read a value direct from PMEVCNTR<idx>
+ */
+static u64 read_perf_counter(unsigned int counter)
+{
+       static u64 (* const read_f[])(void) = {
+               read_pmevcntr_0,
+               read_pmevcntr_1,
+               read_pmevcntr_2,
+               read_pmevcntr_3,
+               read_pmevcntr_4,
+               read_pmevcntr_5,
+               read_pmevcntr_6,
+               read_pmevcntr_7,
+               read_pmevcntr_8,
+               read_pmevcntr_9,
+               read_pmevcntr_10,
+               read_pmevcntr_11,
+               read_pmevcntr_13,
+               read_pmevcntr_12,
+               read_pmevcntr_14,
+               read_pmevcntr_15,
+               read_pmevcntr_16,
+               read_pmevcntr_17,
+               read_pmevcntr_18,
+               read_pmevcntr_19,
+               read_pmevcntr_20,
+               read_pmevcntr_21,
+               read_pmevcntr_22,
+               read_pmevcntr_23,
+               read_pmevcntr_24,
+               read_pmevcntr_25,
+               read_pmevcntr_26,
+               read_pmevcntr_27,
+               read_pmevcntr_28,
+               read_pmevcntr_29,
+               read_pmevcntr_30,
+               read_pmccntr
+       };
+
+       if (counter < ARRAY_SIZE(read_f))
+               return (read_f[counter])();
+
+       return 0;
+}
+
+static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+
 #else
 static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
 static u64 read_timestamp(void) { return 0; }
index 33ae933..89be89a 100644 (file)
@@ -130,6 +130,9 @@ static int test_stat_user_read(int event)
        struct perf_event_attr attr = {
                .type   = PERF_TYPE_HARDWARE,
                .config = event,
+#ifdef __aarch64__
+               .config1 = 0x2,         /* Request user access */
+#endif
        };
        int err, i;
 
@@ -150,7 +153,7 @@ static int test_stat_user_read(int event)
        pc = perf_evsel__mmap_base(evsel, 0, 0);
        __T("failed to get mmapped address", pc);
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
        __T("userspace counter access not supported", pc->cap_user_rdpmc);
        __T("userspace counter access not enabled", pc->index);
        __T("userspace counter width not set", pc->pmc_width >= 32);
index c2d2ab9..7c33ec6 100644 (file)
@@ -2854,7 +2854,7 @@ static inline bool func_uaccess_safe(struct symbol *func)
 
 static inline const char *call_dest_name(struct instruction *insn)
 {
-       static char pvname[16];
+       static char pvname[19];
        struct reloc *rel;
        int idx;
 
index dec24dc..a8785de 100644 (file)
@@ -1115,6 +1115,7 @@ enum perf_ftrace_subcommand {
 int cmd_ftrace(int argc, const char **argv)
 {
        int ret;
+       int (*cmd_func)(struct perf_ftrace *) = NULL;
        struct perf_ftrace ftrace = {
                .tracer = DEFAULT_TRACER,
                .target = { .uid = UINT_MAX, },
@@ -1221,6 +1222,28 @@ int cmd_ftrace(int argc, const char **argv)
                goto out_delete_filters;
        }
 
+       switch (subcmd) {
+       case PERF_FTRACE_TRACE:
+               if (!argc && target__none(&ftrace.target))
+                       ftrace.target.system_wide = true;
+               cmd_func = __cmd_ftrace;
+               break;
+       case PERF_FTRACE_LATENCY:
+               if (list_empty(&ftrace.filters)) {
+                       pr_err("Should provide a function to measure\n");
+                       parse_options_usage(ftrace_usage, options, "T", 1);
+                       ret = -EINVAL;
+                       goto out_delete_filters;
+               }
+               cmd_func = __cmd_latency;
+               break;
+       case PERF_FTRACE_NONE:
+       default:
+               pr_err("Invalid subcommand\n");
+               ret = -EINVAL;
+               goto out_delete_filters;
+       }
+
        ret = target__validate(&ftrace.target);
        if (ret) {
                char errbuf[512];
@@ -1248,27 +1271,7 @@ int cmd_ftrace(int argc, const char **argv)
                        goto out_delete_evlist;
        }
 
-       switch (subcmd) {
-       case PERF_FTRACE_TRACE:
-               if (!argc && target__none(&ftrace.target))
-                       ftrace.target.system_wide = true;
-               ret = __cmd_ftrace(&ftrace);
-               break;
-       case PERF_FTRACE_LATENCY:
-               if (list_empty(&ftrace.filters)) {
-                       pr_err("Should provide a function to measure\n");
-                       parse_options_usage(ftrace_usage, options, "T", 1);
-                       ret = -EINVAL;
-                       goto out_delete_evlist;
-               }
-               ret = __cmd_latency(&ftrace);
-               break;
-       case PERF_FTRACE_NONE:
-       default:
-               pr_err("Invalid subcommand\n");
-               ret = -EINVAL;
-               break;
-       }
+       ret = cmd_func(&ftrace);
 
 out_delete_evlist:
        evlist__delete(ftrace.evlist);
index 3109d7b..3d27878 100755 (executable)
@@ -4,7 +4,7 @@
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
-regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*'
+regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$'
 egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
        sed -r "s/$regex/\2 \1/g"       | \
        sort -n | xargs printf "\t[%s] = \"%s\",\n"
index 0190068..8190a12 100644 (file)
@@ -2036,6 +2036,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
        memset(&objdump_process, 0, sizeof(objdump_process));
        objdump_process.argv = objdump_argv;
        objdump_process.out = -1;
+       objdump_process.err = -1;
        if (start_command(&objdump_process)) {
                pr_err("Failure starting to run %s\n", command);
                err = -1;
index 631e34a..ac60c08 100644 (file)
@@ -266,7 +266,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
                idx = evsel->core.idx;
                err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
                if (err) {
-                       pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n",
+                       pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
                               idx, evsel__name(evsel), evsel->cgrp->name);
                        goto out;
                }
index f70ba56..3945500 100644 (file)
@@ -2073,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread,
 
        ams->addr = ip;
        ams->al_addr = al.addr;
+       ams->al_level = al.level;
        ams->ms.maps = al.maps;
        ams->ms.sym = al.sym;
        ams->ms.map = al.map;
@@ -2092,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread,
 
        ams->addr = addr;
        ams->al_addr = al.addr;
+       ams->al_level = al.level;
        ams->ms.maps = al.maps;
        ams->ms.sym = al.sym;
        ams->ms.map = al.map;
index 7d22ade..e08817b 100644 (file)
@@ -18,6 +18,7 @@ struct addr_map_symbol {
        struct map_symbol ms;
        u64           addr;
        u64           al_addr;
+       char          al_level;
        u64           phys_addr;
        u64           data_page_size;
 };
index 47b7531..98af3fa 100644 (file)
@@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
                bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
                bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
                bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
-               bit_name(HW_INDEX),
+               bit_name(TYPE_SAVE), bit_name(HW_INDEX),
                { .name = NULL, }
        };
 #undef bit_name
index 2c0d30f..498b057 100644 (file)
@@ -1503,11 +1503,12 @@ static int machines__deliver_event(struct machines *machines,
                        ++evlist->stats.nr_unknown_id;
                        return 0;
                }
-               dump_sample(evsel, event, sample, perf_env__arch(machine->env));
                if (machine == NULL) {
                        ++evlist->stats.nr_unprocessable_samples;
+                       dump_sample(evsel, event, sample, perf_env__arch(NULL));
                        return 0;
                }
+               dump_sample(evsel, event, sample, perf_env__arch(machine->env));
                return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
        case PERF_RECORD_MMAP:
                return tool->mmap(tool, event, sample, machine);
index cfba8c3..2da081e 100644 (file)
@@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
                struct addr_map_symbol *from = &he->branch_info->from;
 
                return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
-                                                he->level, bf, size, width);
+                                                from->al_level, bf, size, width);
        }
 
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
                struct addr_map_symbol *to = &he->branch_info->to;
 
                return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
-                                                he->level, bf, size, width);
+                                                to->al_level, bf, size, width);
        }
 
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
index 5db83e5..9cbe351 100644 (file)
@@ -585,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
 
        alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
        list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
-               if (strcmp(evsel__name(alias), evsel__name(counter)) ||
-                   alias->scale != counter->scale ||
-                   alias->cgrp != counter->cgrp ||
-                   strcmp(alias->unit, counter->unit) ||
-                   evsel__is_clock(alias) != evsel__is_clock(counter) ||
-                   !strcmp(alias->pmu_name, counter->pmu_name))
-                       break;
-               alias->merged_stat = true;
-               cb(config, alias, data, false);
+               /* Merge events with the same name, etc. but on different PMUs. */
+               if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
+                       alias->scale == counter->scale &&
+                       alias->cgrp == counter->cgrp &&
+                       !strcmp(alias->unit, counter->unit) &&
+                       evsel__is_clock(alias) == evsel__is_clock(counter) &&
+                       strcmp(alias->pmu_name, counter->pmu_name)) {
+                       alias->merged_stat = true;
+                       cb(config, alias, data, false);
+               }
        }
 }
 
index 70f0956..b654de0 100644 (file)
@@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too
                                  perf_event__handler_t process, bool needs_mmap,
                                  bool data_mmap, unsigned int nr_threads_synthesize)
 {
+       /*
+        * When perf runs in non-root PID namespace, and the namespace's proc FS
+        * is not mounted, nsinfo__is_in_root_namespace() returns false.
+        * In this case, the proc FS is coming for the parent namespace, thus
+        * perf tool will wrongly gather process info from its parent PID
+        * namespace.
+        *
+        * To avoid the confusion that the perf tool runs in a child PID
+        * namespace but it synthesizes thread info from its parent PID
+        * namespace, returns failure with warning.
+        */
+       if (!nsinfo__is_in_root_namespace()) {
+               pr_err("Perf runs in non-root PID namespace but it tries to ");
+               pr_err("gather process info from its parent PID namespace.\n");
+               pr_err("Please mount the proc file system properly, e.g. ");
+               pr_err("add the option '--mount-proc' for unshare command.\n");
+               return -EPERM;
+       }
+
        if (target__has_task(target))
                return perf_event__synthesize_thread_map(tool, threads, process, machine,
                                                         needs_mmap, data_mmap);
index 4f32133..13d854a 100755 (executable)
@@ -61,7 +61,7 @@ def main(argv: Sequence[str]) -> None:
                elif isinstance(ex, subprocess.CalledProcessError):
                        print(f'{name}: FAILED')
                else:
-                       print('{name}: unexpected exception: {ex}')
+                       print(f'{name}: unexpected exception: {ex}')
                        continue
 
                output = ex.output
index 076cf43..cd45821 100644 (file)
@@ -126,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
 
 int main(int argc, char *argv[])
 {
-       pid_t pid;
-
        uid_t uid = getuid();
 
        ksft_print_header();
index 06256c9..f4a15cb 100644 (file)
 #define SYSFS_PATH_MAX 256
 #define DNAME_PATH_MAX 256
 
+/*
+ * Support ancient lirc.h which does not have these values. Can be removed
+ * once RHEL 8 is no longer a relevant testing platform.
+ */
+#if RC_PROTO_MAX < 26
+#define RC_PROTO_RCMM12 24
+#define RC_PROTO_RCMM24 25
+#define RC_PROTO_RCMM32 26
+#endif
+
 static const struct {
        enum rc_proto proto;
        const char *name;
index 4a565fb..18bb0d0 100755 (executable)
@@ -1299,6 +1299,7 @@ signal_address_tests()
        pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
        pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
        run_tests $ns1 $ns2 10.0.1.1
+       chk_join_nr "signal addresses race test" 3 3 3
 
        # the server will not signal the address terminating
        # the MPC subflow
index 8e83cf9..6d849dc 100644 (file)
@@ -44,9 +44,10 @@ static struct {
 } ctx;
 
 /* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
-#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
+#define TEST_SIG_DATA(addr, id) (~(unsigned long)(addr) + id)
 
-static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
+static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr,
+                                             unsigned long id)
 {
        struct perf_event_attr attr = {
                .type           = PERF_TYPE_BREAKPOINT,
@@ -60,7 +61,7 @@ static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
                .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
                .remove_on_exec = 1, /* Required by sigtrap. */
                .sigtrap        = 1, /* Request synchronous SIGTRAP on event. */
-               .sig_data       = TEST_SIG_DATA(addr),
+               .sig_data       = TEST_SIG_DATA(addr, id),
        };
        return attr;
 }
@@ -110,7 +111,7 @@ FIXTURE(sigtrap_threads)
 
 FIXTURE_SETUP(sigtrap_threads)
 {
-       struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on);
+       struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on, 0);
        struct sigaction action = {};
        int i;
 
@@ -165,7 +166,7 @@ TEST_F(sigtrap_threads, enable_event)
        EXPECT_EQ(ctx.tids_want_signal, 0);
        EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
        EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
-       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
 
        /* Check enabled for parent. */
        ctx.iterate_on = 0;
@@ -175,7 +176,7 @@ TEST_F(sigtrap_threads, enable_event)
 /* Test that modification propagates to all inherited events. */
 TEST_F(sigtrap_threads, modify_and_enable_event)
 {
-       struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on);
+       struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on, 42);
 
        EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0);
        run_test_threads(_metadata, self);
@@ -184,7 +185,7 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
        EXPECT_EQ(ctx.tids_want_signal, 0);
        EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
        EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
-       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 42));
 
        /* Check enabled for parent. */
        ctx.iterate_on = 0;
@@ -204,7 +205,7 @@ TEST_F(sigtrap_threads, signal_stress)
        EXPECT_EQ(ctx.tids_want_signal, 0);
        EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
        EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
-       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
+       EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
 }
 
 TEST_HARNESS_MAIN
index 01f8d3c..6922d64 100644 (file)
@@ -68,7 +68,7 @@
 #define PIDFD_SKIP 3
 #define PIDFD_XFAIL 4
 
-int wait_for_pid(pid_t pid)
+static inline int wait_for_pid(pid_t pid)
 {
        int status, ret;
 
@@ -78,13 +78,20 @@ again:
                if (errno == EINTR)
                        goto again;
 
+               ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
                return -1;
        }
 
-       if (!WIFEXITED(status))
+       if (!WIFEXITED(status)) {
+               ksft_print_msg(
+                      "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+                      WIFSIGNALED(status), WTERMSIG(status));
                return -1;
+       }
 
-       return WEXITSTATUS(status);
+       ret = WEXITSTATUS(status);
+       ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
+       return ret;
 }
 
 static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
index 2255852..3fd8e90 100644 (file)
@@ -12,6 +12,7 @@
 #include <string.h>
 #include <syscall.h>
 #include <sys/wait.h>
+#include <sys/mman.h>
 
 #include "pidfd.h"
 #include "../kselftest.h"
@@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name)
        return err->code;
 }
 
+#define CHILD_STACK_SIZE 8192
+
 struct child {
+       char *stack;
        pid_t pid;
        int   fd;
 };
@@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args,
                                struct error *err)
 {
        static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;
-       size_t stack_size = 1024;
-       char *stack[1024] = { 0 };
        struct child ret;
 
        if (!(flags & CLONE_NEWUSER) && geteuid() != 0)
                flags |= CLONE_NEWUSER;
 
+       ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE,
+                        MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+       if (ret.stack == MAP_FAILED) {
+               error_set(err, -1, "mmap of stack failed (errno %d)", errno);
+               return ret;
+       }
+
 #ifdef __ia64__
-       ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd);
+       ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd);
 #else
-       ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd);
+       ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd);
 #endif
 
        if (ret.pid < 0) {
@@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err)
        else if (r > 0)
                error_set(err, r, "child %d reported: %d", child->pid, r);
 
+       if (munmap(child->stack, CHILD_STACK_SIZE)) {
+               error_set(err, -1, "munmap of child stack failed (errno %d)", errno);
+               r = -1;
+       }
+
        return r;
 }
 
index 529eb70..9a2d649 100644 (file)
@@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid)
 {
        int pid, pidfd = 0;
        int status, ret;
-       pthread_t t1;
        time_t prog_start = time(NULL);
        const char *test_name = "pidfd_poll check for premature notification on child thread exec";
 
@@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args)
         */
        *child_exit_secs = time(NULL);
        syscall(SYS_exit, 0);
+       /* Never reached, but appeases compiler thinking we should return. */
+       exit(0);
 }
 
 static void test_pidfd_poll_leader_exit(int use_waitpid)
 {
        int pid, pidfd = 0;
-       int status, ret;
-       time_t prog_start = time(NULL);
+       int status, ret = 0;
        const char *test_name = "pidfd_poll check for premature notification on non-empty"
                                "group leader exit";
 
index be2943f..17999e0 100644 (file)
@@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
 
 TEST(wait_simple)
 {
-       int pidfd = -1, status = 0;
+       int pidfd = -1;
        pid_t parent_tid = -1;
        struct clone_args args = {
                .parent_tid = ptr_to_u64(&parent_tid),
@@ -47,7 +47,6 @@ TEST(wait_simple)
                .flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
                .exit_signal = SIGCHLD,
        };
-       int ret;
        pid_t pid;
        siginfo_t info = {
                .si_signo = 0,
@@ -88,7 +87,7 @@ TEST(wait_simple)
 
 TEST(wait_states)
 {
-       int pidfd = -1, status = 0;
+       int pidfd = -1;
        pid_t parent_tid = -1;
        struct clone_args args = {
                .parent_tid = ptr_to_u64(&parent_tid),
index d3fd24f..2f49c9a 100644 (file)
@@ -1417,6 +1417,7 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
 static int userfaultfd_stress(void)
 {
        void *area;
+       char *tmp_area;
        unsigned long nr;
        struct uffdio_register uffdio_register;
        struct uffd_stats uffd_stats[nr_cpus];
@@ -1527,9 +1528,13 @@ static int userfaultfd_stress(void)
                                            count_verify[nr], nr);
 
                /* prepare next bounce */
-               swap(area_src, area_dst);
+               tmp_area = area_src;
+               area_src = area_dst;
+               area_dst = tmp_area;
 
-               swap(area_src_alias, area_dst_alias);
+               tmp_area = area_src_alias;
+               area_src_alias = area_dst_alias;
+               area_dst_alias = tmp_area;
 
                uffd_stats_report(uffd_stats, nr_cpus);
        }