Merge tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
Pull more RISC-V updates from Palmer Dabbelt:

 - Support for sv48 paging

 - Hart ID mappings are now sparse, which enables more CPUs to come up
   on systems with sparse hart IDs

 - A handful of cleanups and fixes

* tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (27 commits)
  RISC-V: nommu_virt: Drop unused SLAB_MERGE_DEFAULT
  RISC-V: Remove redundant err variable
  riscv: dts: sifive unmatched: Add gpio poweroff
  riscv: canaan: remove useless select of non-existing config SYSCON
  RISC-V: Do not use cpumask data structure for hartid bitmap
  RISC-V: Move spinwait booting method to its own config
  RISC-V: Move the entire hart selection via lottery to SMP
  RISC-V: Use __cpu_up_stack/task_pointer only for spinwait method
  RISC-V: Do not print the SBI version during HSM extension boot print
  RISC-V: Avoid using per cpu array for ordered booting
  riscv: default to CONFIG_RISCV_SBI_V01=n
  riscv: fix boolconv.cocci warnings
  riscv: Explicit comment about user virtual address space size
  riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo
  riscv: Implement sv48 support
  asm-generic: Prepare for riscv use of pud_alloc_one and pud_free
  riscv: Allow to dynamically define VA_BITS
  riscv: Introduce functions to switch pt_ops
  riscv: Split early kasan mapping to prepare sv48 introduction
  riscv: Move KASAN mapping next to the kernel mapping
  ...

326 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/accounting/delay-accounting.rst
Documentation/devicetree/bindings/net/fsl-fman.txt
Documentation/devicetree/bindings/net/oxnas-dwmac.txt
Documentation/devicetree/bindings/pwm/pwm.yaml
Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml [new file with mode: 0644]
Documentation/driver-api/firewire.rst
Documentation/filesystems/ceph.rst
Documentation/filesystems/f2fs.rst
Documentation/staging/tee.rst
Documentation/trace/ftrace.rst
MAINTAINERS
arch/Kconfig
arch/alpha/kernel/rtc.c
arch/arm64/Kconfig
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/cmpxchg.h
arch/arm64/mm/init.c
arch/ia64/Kconfig
arch/mips/Kconfig
arch/mips/mm/init.c
arch/powerpc/Kconfig
arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
arch/powerpc/boot/dts/wii.dts
arch/powerpc/configs/gamecube_defconfig
arch/powerpc/configs/wii_defconfig
arch/powerpc/kernel/setup_64.c
arch/riscv/Kconfig
arch/s390/include/asm/cpu_mf.h
arch/s390/include/asm/uaccess.h
arch/s390/kernel/perf_cpum_cf_common.c
arch/s390/kernel/perf_cpum_cf_events.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/lib/uaccess.c
arch/sparc/Kconfig
arch/sparc/kernel/led.c
arch/sparc/kernel/smp_64.c
arch/x86/Kconfig
arch/x86/kernel/early-quirks.c
arch/x86/kernel/hpet.c
arch/x86/kernel/setup_percpu.c
block/bio.c
block/blk-mq-tag.c
block/blk-mq.c
block/blk-sysfs.c
block/mq-deadline.c
drivers/atm/iphase.c
drivers/base/arch_numa.c
drivers/base/power/trace.c
drivers/block/aoe/aoecmd.c
drivers/block/brd.c
drivers/block/loop.c
drivers/block/rbd.c
drivers/clk/clk-si5341.c
drivers/clk/mediatek/clk-mt7986-apmixed.c
drivers/clk/mediatek/clk-mt7986-infracfg.c
drivers/clk/mediatek/clk-mt7986-topckgen.c
drivers/clk/visconti/pll.c
drivers/gpio/gpio-idt3243x.c
drivers/gpio/gpio-mpc8xxx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
drivers/gpu/drm/amd/amdgpu/cik.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
drivers/gpu/drm/amd/display/dc/irq/irq_service.c
drivers/gpu/drm/amd/display/dc/irq/irq_service.h
drivers/gpu/drm/i915/display/intel_ddi.c
drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/radeon/radeon_kms.c
drivers/hid/hid-ids.h
drivers/hid/hid-input.c
drivers/hid/hid-vivaldi.c
drivers/hid/uhid.c
drivers/hid/wacom_wac.c
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/net/bonding/bond_main.c
drivers/net/ethernet/allwinner/sun4i-emac.c
drivers/net/ethernet/allwinner/sun4i-emac.h
drivers/net/ethernet/apple/bmac.c
drivers/net/ethernet/apple/mace.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
drivers/net/ethernet/freescale/xgmac_mdio.c
drivers/net/ethernet/i825xx/sni_82596.c
drivers/net/ethernet/marvell/prestera/prestera.h
drivers/net/ethernet/marvell/prestera/prestera_hw.c
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/marvell/prestera/prestera_router.c
drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
drivers/net/ethernet/marvell/prestera/prestera_router_hw.h
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_flower.c
drivers/net/ethernet/mscc/ocelot_net.c
drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpsw_new.c
drivers/net/ethernet/ti/cpsw_priv.c
drivers/net/ethernet/vertexcom/Kconfig
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_endpoint.h
drivers/net/phy/at803x.c
drivers/net/phy/marvell.c
drivers/net/phy/micrel.c
drivers/net/phy/sfp.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/smsc95xx.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
drivers/net/wwan/mhi_wwan_mbim.c
drivers/nfc/pn544/i2c.c
drivers/nfc/st21nfca/se.c
drivers/pwm/core.c
drivers/pwm/pwm-img.c
drivers/pwm/pwm-twl.c
drivers/pwm/pwm-vt8500.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/dev.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-da9063.c
drivers/rtc/rtc-ftrtc010.c
drivers/rtc/rtc-gamecube.c [new file with mode: 0644]
drivers/rtc/rtc-mc146818-lib.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pxa.c
drivers/rtc/rtc-rs5c372.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-sunplus.c [new file with mode: 0644]
drivers/vfio/pci/vfio_pci_igd.c
drivers/vfio/vfio_iommu_type1.c
fs/adfs/inode.c
fs/binfmt_elf.c
fs/btrfs/Kconfig
fs/ceph/caps.c
fs/ceph/file.c
fs/ceph/metric.c
fs/ceph/quota.c
fs/ceph/super.c
fs/ceph/super.h
fs/exec.c
fs/f2fs/Kconfig
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/iostat.c
fs/f2fs/node.c
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/xattr.c
fs/fat/file.c
fs/hfsplus/hfsplus_raw.h
fs/hfsplus/xattr.c
fs/io-wq.c
fs/io_uring.c
fs/ksmbd/asn1.c
fs/ksmbd/auth.c
fs/ksmbd/auth.h
fs/ksmbd/connection.c
fs/ksmbd/connection.h
fs/ksmbd/ksmbd_netlink.h
fs/ksmbd/mgmt/user_config.c
fs/ksmbd/mgmt/user_config.h
fs/ksmbd/mgmt/user_session.h
fs/ksmbd/smb2misc.c
fs/ksmbd/smb2ops.c
fs/ksmbd/smb2pdu.c
fs/ksmbd/smb2pdu.h
fs/ksmbd/smb_common.h
fs/ksmbd/transport_ipc.c
fs/ksmbd/transport_rdma.c
fs/ksmbd/transport_rdma.h
fs/ksmbd/transport_tcp.c
fs/ksmbd/vfs_cache.h
fs/nilfs2/page.c
fs/proc/array.c
fs/proc/base.c
fs/proc/proc_sysctl.c
fs/proc/vmcore.c
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.h
fs/xfs/xfs_file.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.h
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_ioctl32.h
include/asm-generic/barrier.h
include/kunit/assert.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/ceph/libceph.h
include/linux/ceph/messenger.h
include/linux/delayacct.h
include/linux/elfcore-compat.h
include/linux/elfcore.h
include/linux/hash.h
include/linux/kernel.h
include/linux/kthread.h
include/linux/list.h
include/linux/mc146818rtc.h
include/linux/percpu.h
include/linux/proc_fs.h
include/linux/sbitmap.h
include/linux/sched.h
include/linux/unaligned/packed_struct.h
include/net/inet_frag.h
include/net/ipv6_frag.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/trace/events/error_report.h
include/trace/events/f2fs.h
include/uapi/linux/magic.h
include/uapi/linux/taskstats.h
include/uapi/linux/uuid.h
kernel/bpf/btf.c
kernel/bpf/inode.c
kernel/bpf/verifier.c
kernel/configs/debug.config [new file with mode: 0644]
kernel/delayacct.c
kernel/kthread.c
kernel/panic.c
kernel/sys.c
lib/Kconfig.debug
lib/Kconfig.ubsan
lib/Makefile
lib/kstrtox.c
lib/list_debug.c
lib/lz4/lz4defs.h
lib/ref_tracker.c
lib/sbitmap.c
lib/test_hash.c
lib/test_meminit.c
lib/test_ubsan.c
mm/Kconfig
mm/memory.c
mm/page_alloc.c
mm/page_io.c
mm/percpu.c
net/bridge/br_if.c
net/ceph/ceph_common.c
net/ceph/messenger.c
net/core/dev.c
net/core/net_namespace.c
net/core/of_net.c
net/core/sock.c
net/ipv4/fib_semantics.c
net/ipv4/inet_fragment.c
net/ipv4/ip_fragment.c
net/ipv4/ip_gre.c
net/ipv6/sit.c
net/mctp/test/route-test.c
net/netfilter/nft_connlimit.c
net/netfilter/nft_last.c
net/netfilter/nft_limit.c
net/netfilter/nft_quota.c
net/nfc/llcp_sock.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_cdc.c
net/smc/smc_clc.c
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_diag.c
net/smc/smc_pnet.c
net/smc/smc_wr.h
net/tls/tls_sw.c
net/unix/garbage.c
net/unix/scm.c
net/xfrm/xfrm_policy.c
samples/bpf/offwaketime_kern.c
samples/bpf/test_overhead_kprobe_kern.c
samples/bpf/test_overhead_tp_kern.c
scripts/Makefile.ubsan
scripts/checkpatch.pl
scripts/const_structs.checkpatch
scripts/get_maintainer.pl
scripts/sorttable.h
sound/core/init.c
sound/core/misc.c
sound/pci/hda/cs35l41_hda.c
sound/pci/hda/cs35l41_hda.h
sound/pci/hda/cs35l41_hda_i2c.c
sound/pci/hda/cs35l41_hda_spi.c
sound/pci/hda/patch_cs8409-tables.c
sound/pci/hda/patch_realtek.c
sound/usb/mixer_maps.c
tools/accounting/getdelays.c
tools/bpf/bpftool/skeleton/pid_iter.bpf.c
tools/include/linux/hash.h
tools/testing/selftests/bpf/prog_tests/d_path.c
tools/testing/selftests/bpf/prog_tests/xdp_link.c
tools/testing/selftests/bpf/progs/test_d_path_check_types.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_stacktrace_map.c
tools/testing/selftests/bpf/progs/test_tracepoint.c
tools/testing/selftests/bpf/progs/test_xdp_link.c
tools/testing/selftests/bpf/verifier/ringbuf.c [new file with mode: 0644]
tools/testing/selftests/bpf/verifier/spill_fill.c
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/settings

index b268e3e..2416b03 100644 (file)
@@ -112,6 +112,11 @@ Contact:   "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:   Set timeout to issue discard commands during umount.
                Default: 5 secs
 
+What:          /sys/fs/f2fs/<disk>/pending_discard
+Date:          November 2021
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:   Shows the number of pending discard commands in the queue.
+
 What:          /sys/fs/f2fs/<disk>/max_victim_search
 Date:          January 2014
 Contact:       "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -528,3 +533,10 @@ Description:       With "mode=fragment:block" mount options, we can scatter block allo
                f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
                in the length of 1..<max_fragment_hole> by turns. This value can be set
                between 1..512 and the default value is 4.
+
+What:          /sys/fs/f2fs/<disk>/gc_urgent_high_remaining
+Date:          December 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   You can set the trial count limit for GC urgent high mode with this value.
+               If GC thread gets to the limit, the mode will turn back to GC normal mode.
+               By default, the value is zero, which means there is no limit like before.
index 1b8b46d..197fe31 100644 (file)
@@ -13,6 +13,8 @@ a) waiting for a CPU (while being runnable)
 b) completion of synchronous block I/O initiated by the task
 c) swapping in pages
 d) memory reclaim
+e) thrashing page cache
+f) direct compact
 
 and makes these statistics available to userspace through
 the taskstats interface.
@@ -41,11 +43,12 @@ generic data structure to userspace corresponding to per-pid and per-tgid
 statistics. The delay accounting functionality populates specific fields of
 this structure. See
 
-     include/linux/taskstats.h
+     include/uapi/linux/taskstats.h
 
 for a description of the fields pertaining to delay accounting.
 It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
+cache, direct compact etc.
 
 Taking the difference of two successive readings of a given
 counter (say cpu_delay_total) for a task will give the delay
@@ -88,41 +91,37 @@ seen.
 
 General format of the getdelays command::
 
-       getdelays [-t tgid] [-p pid] [-c cmd...]
-
+       getdelays [-dilv] [-t tgid] [-p pid]
 
 Get delays, since system boot, for pid 10::
 
-       # ./getdelays -p 10
+       # ./getdelays -d -p 10
        (output similar to next case)
 
 Get sum of delays, since system boot, for all pids with tgid 5::
 
-       # ./getdelays -t 5
-
-
-       CPU     count   real total      virtual total   delay total
-               7876    92005750        100000000       24001500
-       IO      count   delay total
-               0       0
-       SWAP    count   delay total
-               0       0
-       RECLAIM count   delay total
-               0       0
+       # ./getdelays -d -t 5
+       print delayacct stats ON
+       TGID    5
 
-Get delays seen in executing a given simple command::
 
-  # ./getdelays -c ls /
+       CPU             count     real total  virtual total    delay total  delay average
+                           8        7000000        6872122        3382277          0.423ms
+       IO              count    delay total  delay average
+                           0              0              0ms
+       SWAP            count    delay total  delay average
+                           0              0              0ms
+       RECLAIM         count    delay total  delay average
+                           0              0              0ms
+       THRASHING       count    delay total  delay average
+                           0              0              0ms
+       COMPACT         count    delay total  delay average
+                           0              0              0ms
 
-  bin   data1  data3  data5  dev  home  media  opt   root  srv        sys  usr
-  boot  data2  data4  data6  etc  lib   mnt    proc  sbin  subdomain  tmp  var
+Get IO accounting for pid 1, it works only with -p::
 
+       # ./getdelays -i -p 1
+       printing IO accounting
+       linuxrc: read=65536, write=0, cancelled_write=0
 
-  CPU  count   real total      virtual total   delay total
-       6       4000250         4000000         0
-  IO   count   delay total
-       0       0
-  SWAP count   delay total
-       0       0
-  RECLAIM      count   delay total
-       0       0
+The above command can be used with -v to get more debug information.
index c00fb0d..020337f 100644 (file)
@@ -410,6 +410,15 @@ PROPERTIES
                The settings and programming routines for internal/external
                MDIO are different. Must be included for internal MDIO.
 
+- fsl,erratum-a009885
+               Usage: optional
+               Value type: <boolean>
+               Definition: Indicates the presence of the A009885
+               erratum describing that the contents of MDIO_DATA may
+               become corrupt unless it is read within 16 MDC cycles
+               of MDIO_CFG[BSY] being cleared, when performing an
+               MDIO read operation.
+
 - fsl,erratum-a011043
                Usage: optional
                Value type: <boolean>
index d7117a2..27db496 100644 (file)
@@ -9,6 +9,9 @@ Required properties on all platforms:
 - compatible:  For the OX820 SoC, it should be :
                - "oxsemi,ox820-dwmac" to select glue
                - "snps,dwmac-3.512" to select IP version.
+               For the OX810SE SoC, it should be :
+               - "oxsemi,ox810se-dwmac" to select glue
+               - "snps,dwmac-3.512" to select IP version.
 
 - clocks: Should contain phandles to the following clocks
 - clock-names: Should contain the following:
index 2effe6c..3c01f85 100644 (file)
@@ -9,6 +9,8 @@ title: PWM controllers (providers)
 maintainers:
   - Thierry Reding <thierry.reding@gmail.com>
 
+select: false
+
 properties:
   $nodename:
     pattern: "^pwm(@.*|-[0-9a-f])*$"
index 29fe39b..d12855e 100644 (file)
@@ -15,6 +15,7 @@ allOf:
 properties:
   compatible:
     enum:
+      - epson,rx8804
       - epson,rx8900
       - microcrystal,rv8803
 
index 4fba6db..6fa7d9f 100644 (file)
@@ -19,7 +19,14 @@ properties:
       - qcom,pmk8350-rtc
 
   reg:
-    maxItems: 1
+    minItems: 1
+    maxItems: 2
+
+  reg-names:
+    minItems: 1
+    items:
+      - const: rtc
+      - const: alarm
 
   interrupts:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml b/Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
new file mode 100644 (file)
index 0000000..fd1b3e7
--- /dev/null
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) Sunplus Co., Ltd. 2021
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/sunplus,sp7021-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sunplus SP7021 Real Time Clock controller
+
+maintainers:
+  - Vincent Shih <vincent.sunplus@gmail.com>
+
+properties:
+  compatible:
+    const: sunplus,sp7021-rtc
+
+  reg:
+    maxItems: 1
+
+  reg-names:
+    items:
+      - const: rtc
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - resets
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    rtc: serial@9c003a00 {
+        compatible = "sunplus,sp7021-rtc";
+        reg = <0x9c003a00 0x80>;
+        reg-names = "rtc";
+        clocks = <&clkc 0x12>;
+        resets = <&rstc 0x02>;
+        interrupt-parent = <&intc>;
+        interrupts = <163 IRQ_TYPE_EDGE_RISING>;
+    };
+...
index 94a2d7f..d3cfa73 100644 (file)
@@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module
 Firewire char device data structures
 ====================================
 
-.. include:: /ABI/stable/firewire-cdev
+.. include:: ../ABI/stable/firewire-cdev
     :literal:
 
 .. kernel-doc:: include/uapi/linux/firewire-cdev.h
@@ -28,7 +28,7 @@ Firewire char device data structures
 Firewire device probing and sysfs interfaces
 ============================================
 
-.. include:: /ABI/stable/sysfs-bus-firewire
+.. include:: ../ABI/stable/sysfs-bus-firewire
     :literal:
 
 .. kernel-doc:: drivers/firewire/core-device.c
index 7d2ef4e..4942e01 100644 (file)
@@ -82,7 +82,7 @@ Mount Syntax
 
 The basic mount syntax is::
 
- # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+ # mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]]
 
 You only need to specify a single monitor, as the client will get the
 full list when it connects.  (However, if the monitor you specify
@@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.)  The port can be left
 off if the monitor is using the default.  So if the monitor is at
 1.2.3.4::
 
- # mount -t ceph 1.2.3.4:/ /mnt/ceph
+ # mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4
 
 is sufficient.  If /sbin/mount.ceph is installed, a hostname can be
-used instead of an IP address.
+used instead of an IP address and the cluster FSID can be left out
+(as the mount helper will fill it in by reading the ceph configuration
+file)::
 
+  # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr
 
+Multiple monitor addresses can be passed by separating each address with a slash (`/`)::
+
+  # mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101
+
+When using the mount helper, monitor address can be read from ceph
+configuration file if available. Note that, the cluster FSID (passed as part
+of the device string) is validated by checking it with the FSID reported by
+the monitor.
 
 Mount Options
 =============
 
+  mon_addr=ip_address[:port][/ip_address[:port]]
+       Monitor address to the cluster. This is used to bootstrap the
+        connection to the cluster. Once connection is established, the
+        monitor addresses in the monitor map are followed.
+
+  fsid=cluster-id
+       FSID of the cluster (from `ceph fsid` command).
+
   ip=A.B.C.D[:N]
        Specify the IP and/or port the client should bind to locally.
        There is normally not much reason to do this.  If the IP is not
index d7b8469..4a2426f 100644 (file)
@@ -198,6 +198,7 @@ fault_type=%d                Support configuring fault injection type, should be
                         FAULT_WRITE_IO           0x000004000
                         FAULT_SLAB_ALLOC         0x000008000
                         FAULT_DQUOT_INIT         0x000010000
+                        FAULT_LOCK_OP            0x000020000
                         ===================      ===========
 mode=%s                         Control block allocation mode which supports "adaptive"
                         and "lfs". In "lfs" mode, there should be no random
index 3c63d8d..498343c 100644 (file)
@@ -255,7 +255,7 @@ The following picture shows a high level overview of AMD-TEE::
  +--------------------------+      +---------+--------------------+
 
 At the lowest level (in x86), the AMD Secure Processor (ASP) driver uses the
-CPU to PSP mailbox regsister to submit commands to the PSP. The format of the
+CPU to PSP mailbox register to submit commands to the PSP. The format of the
 command buffer is opaque to the ASP driver. It's role is to submit commands to
 the secure processor and return results to AMD-TEE driver. The interface
 between AMD-TEE driver and AMD Secure Processor driver can be found in [6].
@@ -290,7 +290,7 @@ cancel_req driver callback is not supported by AMD-TEE.
 
 The GlobalPlatform TEE Client API [5] can be used by the user space (client) to
 talk to AMD's TEE. AMD's TEE provides a secure environment for loading, opening
-a session, invoking commands and clossing session with TA.
+a session, invoking commands and closing session with TA.
 
 References
 ==========
index b3166c4..45b8c56 100644 (file)
@@ -3370,7 +3370,7 @@ one of the latency tracers, you will get the following results.
 
 Instances
 ---------
-In the tracefs tracing directory is a directory called "instances".
+In the tracefs tracing directory, there is a directory called "instances".
 This directory can have new directories created inside of it using
 mkdir, and removing directories with rmdir. The directory created
 with mkdir in this directory will already contain files and other
index b84e2d5..6c08cbe 100644 (file)
@@ -16807,6 +16807,7 @@ M:      Heiko Carstens <hca@linux.ibm.com>
 M:     Vasily Gorbik <gor@linux.ibm.com>
 M:     Christian Borntraeger <borntraeger@linux.ibm.com>
 R:     Alexander Gordeev <agordeev@linux.ibm.com>
+R:     Sven Schnelle <svens@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -18490,6 +18491,13 @@ L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/dlink/sundance.c
 
+SUNPLUS RTC DRIVER
+M:     Vincent Shih <vincent.sunplus@gmail.com>
+L:     linux-rtc@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/rtc/sunplus,sp7021-rtc.yaml
+F:     drivers/rtc/rtc-sunplus.c
+
 SUPERH
 M:     Yoshinori Sato <ysato@users.sourceforge.jp>
 M:     Rich Felker <dalias@libc.org>
index 874c65d..678a807 100644 (file)
@@ -997,6 +997,10 @@ config PAGE_SIZE_LESS_THAN_64KB
        depends on !PAGE_SIZE_64KB
        depends on !PARISC_PAGE_SIZE_64KB
        depends on !PPC_64K_PAGES
+       depends on PAGE_SIZE_LESS_THAN_256KB
+
+config PAGE_SIZE_LESS_THAN_256KB
+       def_bool y
        depends on !PPC_256K_PAGES
        depends on !PAGE_SIZE_256KB
 
index ce30779..fb30253 100644 (file)
@@ -80,7 +80,12 @@ init_rtc_epoch(void)
 static int
 alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       mc146818_get_time(tm);
+       int ret = mc146818_get_time(tm);
+
+       if (ret < 0) {
+               dev_err_ratelimited(dev, "unable to read current time\n");
+               return ret;
+       }
 
        /* Adjust for non-default epochs.  It's easier to depend on the
           generic __get_rtc_time and adjust the epoch here than create
index f6e333b..dc10d26 100644 (file)
@@ -1136,6 +1136,10 @@ config NUMA
        select GENERIC_ARCH_NUMA
        select ACPI_NUMA if ACPI
        select OF_NUMA
+       select HAVE_SETUP_PER_CPU_AREA
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK
+       select NEED_PER_CPU_PAGE_FIRST_CHUNK
+       select USE_PERCPU_NUMA_NODE_ID
        help
          Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -1152,22 +1156,6 @@ config NODES_SHIFT
          Specify the maximum number of NUMA Nodes available on the target
          system.  Increases memory reserved to accommodate various tables.
 
-config USE_PERCPU_NUMA_NODE_ID
-       def_bool y
-       depends on NUMA
-
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool y
-       depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y
-       depends on NUMA
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-       def_bool y
-       depends on NUMA
-
 source "kernel/Kconfig.hz"
 
 config ARCH_SPARSEMEM_ENABLE
index d955ade..5d460f6 100644 (file)
@@ -249,7 +249,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr,                  \
        "       mov     %" #w "[tmp], %" #w "[old]\n"                   \
        "       cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n"    \
        "       mov     %" #w "[ret], %" #w "[tmp]"                     \
-       : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr),            \
+       : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr),                    \
          [tmp] "=&r" (tmp)                                             \
        : [old] "r" (x1), [new] "r" (x2)                                \
        : cl);                                                          \
index f9bef42..497acf1 100644 (file)
@@ -243,7 +243,7 @@ static inline void __cmpwait_case_##sz(volatile void *ptr,          \
        "       cbnz    %" #w "[tmp], 1f\n"                             \
        "       wfe\n"                                                  \
        "1:"                                                            \
-       : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr)           \
+       : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr)                   \
        : [val] "r" (val));                                             \
 }
 
index a883443..db63cc8 100644 (file)
@@ -172,7 +172,7 @@ int pfn_is_map_memory(unsigned long pfn)
 }
 EXPORT_SYMBOL(pfn_is_map_memory);
 
-static phys_addr_t memory_limit = PHYS_ADDR_MAX;
+static phys_addr_t memory_limit __ro_after_init = PHYS_ADDR_MAX;
 
 /*
  * Limit the memory size that was specified via FDT.
index 1e33666..7039528 100644 (file)
@@ -32,6 +32,7 @@ config IA64
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
        select HAVE_FUNCTION_TRACER
+       select HAVE_SETUP_PER_CPU_AREA
        select TTY
        select HAVE_ARCH_TRACEHOOK
        select HAVE_VIRT_CPU_ACCOUNTING
@@ -88,9 +89,6 @@ config GENERIC_CALIBRATE_DELAY
        bool
        default y
 
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool y
-
 config DMI
        bool
        default y
@@ -292,6 +290,7 @@ config NUMA
        bool "NUMA support"
        depends on !FLATMEM
        select SMP
+       select USE_PERCPU_NUMA_NODE_ID
        help
          Say Y to compile the kernel to support NUMA (Non-Uniform Memory
          Access).  This option is for configuring high-end multiprocessor
@@ -311,10 +310,6 @@ config HAVE_ARCH_NODEDATA_EXTENSION
        def_bool y
        depends on NUMA
 
-config USE_PERCPU_NUMA_NODE_ID
-       def_bool y
-       depends on NUMA
-
 config HAVE_MEMORYLESS_NODES
        def_bool NUMA
 
index 3dd8c46..edf6c15 100644 (file)
@@ -2674,6 +2674,8 @@ config NUMA
        bool "NUMA Support"
        depends on SYS_SUPPORTS_NUMA
        select SMP
+       select HAVE_SETUP_PER_CPU_AREA
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK
        help
          Say Y to compile the kernel to support NUMA (Non-Uniform Memory
          Access).  This option improves performance on systems with more
@@ -2684,14 +2686,6 @@ config NUMA
 config SYS_SUPPORTS_NUMA
        bool
 
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool y
-       depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y
-       depends on NUMA
-
 config RELOCATABLE
        bool "Relocatable kernel"
        depends on SYS_SUPPORTS_RELOCATABLE
index 325e155..5a80028 100644 (file)
@@ -519,17 +519,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
        return node_distance(cpu_to_node(from), cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
-                                      size_t align)
+static int __init pcpu_cpu_to_node(int cpu)
 {
-       return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS),
-                                     MEMBLOCK_ALLOC_ACCESSIBLE,
-                                     cpu_to_node(cpu));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
+       return cpu_to_node(cpu);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -545,7 +537,7 @@ void __init setup_per_cpu_areas(void)
        rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
                                    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
                                    pcpu_cpu_distance,
-                                   pcpu_fc_alloc, pcpu_fc_free);
+                                   pcpu_cpu_to_node);
        if (rc < 0)
                panic("Failed to initialize percpu areas.");
 
index 0631c92..b779603 100644 (file)
@@ -55,15 +55,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
        default 9 if PPC_16K_PAGES      #  9 = 23 (8MB) - 14 (16K)
        default 11                      # 11 = 23 (8MB) - 12 (4K)
 
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y if PPC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-       def_bool y if PPC64
-
 config NR_IRQS
        int "Number of virtual interrupt numbers"
        range 32 1048576
@@ -241,6 +232,7 @@ config PPC
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE
        select HAVE_RSEQ
+       select HAVE_SETUP_PER_CPU_AREA          if PPC64
        select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
        select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
@@ -255,6 +247,8 @@ config PPC
        select MMU_GATHER_RCU_TABLE_FREE
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE               if PPC64 || NOT_COHERENT_CACHE
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK   if PPC64
+       select NEED_PER_CPU_PAGE_FIRST_CHUNK    if PPC64
        select NEED_SG_DMA_LENGTH
        select OF
        select OF_DMA_DEFAULT_COHERENT          if !NOT_COHERENT_CACHE
@@ -660,6 +654,7 @@ config NUMA
        bool "NUMA Memory Allocation and Scheduler Support"
        depends on PPC64 && SMP
        default y if PPC_PSERIES || PPC_POWERNV
+       select USE_PERCPU_NUMA_NODE_ID
        help
          Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -673,10 +668,6 @@ config NODES_SHIFT
        default "4"
        depends on NUMA
 
-config USE_PERCPU_NUMA_NODE_ID
-       def_bool y
-       depends on NUMA
-
 config HAVE_MEMORYLESS_NODES
        def_bool y
        depends on NUMA
index c90702b..48e5cd6 100644 (file)
@@ -79,6 +79,7 @@ fman0: fman@400000 {
                #size-cells = <0>;
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfc000 0x1000>;
+               fsl,erratum-a009885;
        };
 
        xmdio0: mdio@fd000 {
@@ -86,6 +87,7 @@ fman0: fman@400000 {
                #size-cells = <0>;
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfd000 0x1000>;
+               fsl,erratum-a009885;
        };
 };
 
index e9c945b..e46143c 100644 (file)
                        interrupts = <14>;
                };
 
+               srnprot@d800060 {
+                       compatible = "nintendo,hollywood-srnprot";
+                       reg = <0x0d800060 0x4>;
+               };
+
                GPIO: gpio@d8000c0 {
                        #gpio-cells = <2>;
                        compatible = "nintendo,hollywood-gpio";
index 24c0e0e..91a1b99 100644 (file)
@@ -68,7 +68,7 @@ CONFIG_SND_SEQUENCER=y
 CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_ISO9660_FS=y
index a0c45bf..0ab78c5 100644 (file)
@@ -98,7 +98,7 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_PANIC=y
 CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
 CONFIG_NVMEM_NINTENDO_OTP=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
index d87f7c1..be8577a 100644 (file)
@@ -771,50 +771,6 @@ void __init emergency_stack_init(void)
 }
 
 #ifdef CONFIG_SMP
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
-                                       size_t align)
-{
-       const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-       int node = early_cpu_to_node(cpu);
-       void *ptr;
-
-       if (!node_online(node) || !NODE_DATA(node)) {
-               ptr = memblock_alloc_from(size, align, goal);
-               pr_info("cpu %d has no node %d or node-local memory\n",
-                       cpu, node);
-               pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-                        cpu, size, __pa(ptr));
-       } else {
-               ptr = memblock_alloc_try_nid(size, align, goal,
-                                            MEMBLOCK_ALLOC_ACCESSIBLE, node);
-               pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
-                        "%016lx\n", cpu, size, node, __pa(ptr));
-       }
-       return ptr;
-#else
-       return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
-}
-
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
        if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
                return REMOTE_DISTANCE;
 }
 
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init pcpu_populate_pte(unsigned long addr)
+static __init int pcpu_cpu_to_node(int cpu)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       p4d = p4d_offset(pgd, addr);
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               p4d_populate(&init_mm, p4d, new);
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pud_populate(&init_mm, pud, new);
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
-
-       return;
-
-err_alloc:
-       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+       return early_cpu_to_node(cpu);
 }
 
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
 
 void __init setup_per_cpu_areas(void)
 {
@@ -900,7 +816,7 @@ void __init setup_per_cpu_areas(void)
 
        if (pcpu_chosen_fc != PCPU_FC_PAGE) {
                rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
-                                           pcpu_alloc_bootmem, pcpu_free_bootmem);
+                                           pcpu_cpu_to_node);
                if (rc)
                        pr_warn("PERCPU: %s allocator failed (%d), "
                                "falling back to page size\n",
@@ -908,8 +824,7 @@ void __init setup_per_cpu_areas(void)
        }
 
        if (rc < 0)
-               rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
-                                          pcpu_populate_pte);
+               rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
        if (rc < 0)
                panic("cannot initialize percpu area (err=%d)", rc);
 
index fd60ab9..5adcbd9 100644 (file)
@@ -306,6 +306,8 @@ config NUMA
        select GENERIC_ARCH_NUMA
        select OF_NUMA
        select ARCH_SUPPORTS_NUMA_BALANCING
+       select USE_PERCPU_NUMA_NODE_ID
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK
        help
          Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -321,14 +323,6 @@ config NODES_SHIFT
          Specify the maximum number of NUMA Nodes available on the target
          system.  Increases memory reserved to accommodate various tables.
 
-config USE_PERCPU_NUMA_NODE_ID
-       def_bool y
-       depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y
-       depends on NUMA
-
 config RISCV_ISA_C
        bool "Emit compressed instructions when building Linux"
        default y
index 0d90cbe..e3f12db 100644 (file)
@@ -109,7 +109,9 @@ struct hws_basic_entry {
        unsigned int AS:2;          /* 29-30 PSW address-space control   */
        unsigned int I:1;           /* 31 entry valid or invalid         */
        unsigned int CL:2;          /* 32-33 Configuration Level         */
-       unsigned int:14;
+       unsigned int H:1;           /* 34 Host Indicator                 */
+       unsigned int LS:1;          /* 35 Limited Sampling               */
+       unsigned int:12;
        unsigned int prim_asn:16;   /* primary ASN                       */
        unsigned long long ia;      /* Instruction Address               */
        unsigned long long gpp;     /* Guest Program Parameter           */
index ce550d0..147cb35 100644 (file)
@@ -49,51 +49,85 @@ int __get_user_bad(void) __attribute__((noreturn));
 
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
 
-#define __put_get_user_asm(to, from, size, insn)               \
-({                                                             \
-       int __rc;                                               \
-                                                               \
-       asm volatile(                                           \
-               insn "          0,%[spec]\n"                    \
-               "0:     mvcos   %[_to],%[_from],%[_size]\n"     \
-               "1:     xr      %[rc],%[rc]\n"                  \
-               "2:\n"                                          \
-               ".pushsection .fixup, \"ax\"\n"                 \
-               "3:     lhi     %[rc],%[retval]\n"              \
-               "       jg      2b\n"                           \
-               ".popsection\n"                                 \
-               EX_TABLE(0b,3b) EX_TABLE(1b,3b)                 \
-               : [rc] "=&d" (__rc), [_to] "+Q" (*(to))         \
-               : [_size] "d" (size), [_from] "Q" (*(from)),    \
-                 [retval] "K" (-EFAULT), [spec] "K" (0x81UL)   \
-               : "cc", "0");                                   \
-       __rc;                                                   \
+union oac {
+       unsigned int val;
+       struct {
+               struct {
+                       unsigned short key : 4;
+                       unsigned short     : 4;
+                       unsigned short as  : 2;
+                       unsigned short     : 4;
+                       unsigned short k   : 1;
+                       unsigned short a   : 1;
+               } oac1;
+               struct {
+                       unsigned short key : 4;
+                       unsigned short     : 4;
+                       unsigned short as  : 2;
+                       unsigned short     : 4;
+                       unsigned short k   : 1;
+                       unsigned short a   : 1;
+               } oac2;
+       };
+};
+
+#define __put_get_user_asm(to, from, size, oac_spec)                   \
+({                                                                     \
+       int __rc;                                                       \
+                                                                       \
+       asm volatile(                                                   \
+               "       lr      0,%[spec]\n"                            \
+               "0:     mvcos   %[_to],%[_from],%[_size]\n"             \
+               "1:     xr      %[rc],%[rc]\n"                          \
+               "2:\n"                                                  \
+               ".pushsection .fixup, \"ax\"\n"                         \
+               "3:     lhi     %[rc],%[retval]\n"                      \
+               "       jg      2b\n"                                   \
+               ".popsection\n"                                         \
+               EX_TABLE(0b,3b) EX_TABLE(1b,3b)                         \
+               : [rc] "=&d" (__rc), [_to] "+Q" (*(to))                 \
+               : [_size] "d" (size), [_from] "Q" (*(from)),            \
+                 [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val)     \
+               : "cc", "0");                                           \
+       __rc;                                                           \
 })
 
+#define __put_user_asm(to, from, size)                         \
+       __put_get_user_asm(to, from, size, ((union oac) {       \
+               .oac1.as = PSW_BITS_AS_SECONDARY,               \
+               .oac1.a = 1                                     \
+       }))
+
+#define __get_user_asm(to, from, size)                         \
+       __put_get_user_asm(to, from, size, ((union oac) {       \
+               .oac2.as = PSW_BITS_AS_SECONDARY,               \
+               .oac2.a = 1                                     \
+       }))                                                     \
+
 static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 {
        int rc;
 
        switch (size) {
        case 1:
-               rc = __put_get_user_asm((unsigned char __user *)ptr,
-                                       (unsigned char *)x,
-                                       size, "llilh");
+               rc = __put_user_asm((unsigned char __user *)ptr,
+                                   (unsigned char *)x,
+                                   size);
                break;
        case 2:
-               rc = __put_get_user_asm((unsigned short __user *)ptr,
-                                       (unsigned short *)x,
-                                       size, "llilh");
+               rc = __put_user_asm((unsigned short __user *)ptr,
+                                   (unsigned short *)x,
+                                   size);
                break;
        case 4:
-               rc = __put_get_user_asm((unsigned int __user *)ptr,
-                                       (unsigned int *)x,
-                                       size, "llilh");
+               rc = __put_user_asm((unsigned int __user *)ptr,
+                                   (unsigned int *)x,
+                                   size);
                break;
        case 8:
-               rc = __put_get_user_asm((unsigned long __user *)ptr,
-                                       (unsigned long *)x,
-                                       size, "llilh");
+               rc = __put_user_asm((unsigned long __user *)ptr,
+                                   (unsigned long *)x,
+                                   size);
                break;
        default:
                __put_user_bad();
@@ -108,24 +142,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
 
        switch (size) {
        case 1:
-               rc = __put_get_user_asm((unsigned char *)x,
-                                       (unsigned char __user *)ptr,
-                                       size, "lghi");
+               rc = __get_user_asm((unsigned char *)x,
+                                   (unsigned char __user *)ptr,
+                                   size);
                break;
        case 2:
-               rc = __put_get_user_asm((unsigned short *)x,
-                                       (unsigned short __user *)ptr,
-                                       size, "lghi");
+               rc = __get_user_asm((unsigned short *)x,
+                                   (unsigned short __user *)ptr,
+                                   size);
                break;
        case 4:
-               rc = __put_get_user_asm((unsigned int *)x,
-                                       (unsigned int __user *)ptr,
-                                       size, "lghi");
+               rc = __get_user_asm((unsigned int *)x,
+                                   (unsigned int __user *)ptr,
+                                   size);
                break;
        case 8:
-               rc = __put_get_user_asm((unsigned long *)x,
-                                       (unsigned long __user *)ptr,
-                                       size, "lghi");
+               rc = __get_user_asm((unsigned long *)x,
+                                   (unsigned long __user *)ptr,
+                                   size);
                break;
        default:
                __get_user_bad();
index 30f0242..8ee4867 100644 (file)
@@ -178,7 +178,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
        case CPUMF_CTR_SET_CRYPTO:
                if (info->csvn >= 1 && info->csvn <= 5)
                        ctrset_size = 16;
-               else if (info->csvn == 6)
+               else if (info->csvn == 6 || info->csvn == 7)
                        ctrset_size = 20;
                break;
        case CPUMF_CTR_SET_EXT:
@@ -188,7 +188,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
                        ctrset_size = 48;
                else if (info->csvn >= 3 && info->csvn <= 5)
                        ctrset_size = 128;
-               else if (info->csvn == 6)
+               else if (info->csvn == 6 || info->csvn == 7)
                        ctrset_size = 160;
                break;
        case CPUMF_CTR_SET_MT_DIAG:
index 37265f5..52c1fe2 100644 (file)
@@ -344,7 +344,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
        NULL,
 };
 
-static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
        CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
        CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
        CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -715,8 +715,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
        case 1 ... 5:
                csvn = cpumcf_svn_12345_pmu_event_attr;
                break;
-       case 6:
-               csvn = cpumcf_svn_6_pmu_event_attr;
+       case 6 ... 7:
+               csvn = cpumcf_svn_67_pmu_event_attr;
                break;
        default:
                csvn = none;
index db62def..332a499 100644 (file)
@@ -1179,7 +1179,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
        sample = (struct hws_basic_entry *) *sdbt;
        while ((unsigned long *) sample < (unsigned long *) te) {
                /* Check for an empty sample */
-               if (!sample->def)
+               if (!sample->def || sample->LS)
                        break;
 
                /* Update perf event period */
index a596e69..8a5d214 100644 (file)
@@ -62,10 +62,14 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
                                                 unsigned long size)
 {
        unsigned long tmp1, tmp2;
+       union oac spec = {
+               .oac2.as = PSW_BITS_AS_SECONDARY,
+               .oac2.a = 1,
+       };
 
        tmp1 = -4096UL;
        asm volatile(
-               "   lghi  0,%[spec]\n"
+               "   lr    0,%[spec]\n"
                "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
                "6: jz    4f\n"
                "1: algr  %0,%3\n"
@@ -84,7 +88,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
                "5:\n"
                EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
                : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
-               : [spec] "K" (0x81UL)
+               : [spec] "d" (spec.val)
                : "cc", "memory", "0");
        return size;
 }
@@ -135,10 +139,14 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
                                               unsigned long size)
 {
        unsigned long tmp1, tmp2;
+       union oac spec = {
+               .oac1.as = PSW_BITS_AS_SECONDARY,
+               .oac1.a = 1,
+       };
 
        tmp1 = -4096UL;
        asm volatile(
-               "   llilh 0,%[spec]\n"
+               "   lr    0,%[spec]\n"
                "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
                "6: jz    4f\n"
                "1: algr  %0,%3\n"
@@ -157,7 +165,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
                "5:\n"
                EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
                : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
-               : [spec] "K" (0x81UL)
+               : [spec] "d" (spec.val)
                : "cc", "memory", "0");
        return size;
 }
@@ -207,10 +215,14 @@ EXPORT_SYMBOL(raw_copy_to_user);
 static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
 {
        unsigned long tmp1, tmp2;
+       union oac spec = {
+               .oac1.as = PSW_BITS_AS_SECONDARY,
+               .oac1.a = 1,
+       };
 
        tmp1 = -4096UL;
        asm volatile(
-               "   llilh 0,%[spec]\n"
+               "   lr    0,%[spec]\n"
                "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
                "   jz    4f\n"
                "1: algr  %0,%2\n"
@@ -228,7 +240,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
                "5:\n"
                EX_TABLE(0b,2b) EX_TABLE(3b,5b)
                : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
-               : "a" (empty_zero_page), [spec] "K" (0x81UL)
+               : "a" (empty_zero_page), [spec] "d" (spec.val)
                : "cc", "memory", "0");
        return size;
 }
index 66fc086..1cab1b2 100644 (file)
@@ -97,6 +97,9 @@ config SPARC64
        select PCI_DOMAINS if PCI
        select ARCH_HAS_GIGANTIC_PAGE
        select HAVE_SOFTIRQ_ON_OWN_STACK
+       select HAVE_SETUP_PER_CPU_AREA
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK
+       select NEED_PER_CPU_PAGE_FIRST_CHUNK
 
 config ARCH_PROC_KCORE_TEXT
        def_bool y
@@ -123,15 +126,6 @@ config AUDIT_ARCH
        bool
        default y
 
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool y if SPARC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y if SPARC64
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-       def_bool y if SPARC64
-
 config MMU
        bool
        default y
index 3a66e62..ab657b3 100644 (file)
@@ -114,18 +114,16 @@ static const struct proc_ops led_proc_ops = {
 };
 #endif
 
-static struct proc_dir_entry *led;
-
 #define LED_VERSION    "0.1"
 
 static int __init led_init(void)
 {
        timer_setup(&led_blink_timer, led_blink, 0);
 
-       led = proc_create("led", 0, NULL, &led_proc_ops);
-       if (!led)
+#ifdef CONFIG_PROC_FS
+       if (!proc_create("led", 0, NULL, &led_proc_ops))
                return -ENOMEM;
-
+#endif
        printk(KERN_INFO
               "led: version %s, Lars Kotthoff <metalhead@metalhead.ws>\n",
               LED_VERSION);
index b98a7bb..a1f78e9 100644 (file)
@@ -1526,50 +1526,6 @@ void smp_send_stop(void)
                smp_call_function(stop_this_cpu, NULL, 0);
 }
 
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
-                                       size_t align)
-{
-       const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-       int node = cpu_to_node(cpu);
-       void *ptr;
-
-       if (!node_online(node) || !NODE_DATA(node)) {
-               ptr = memblock_alloc_from(size, align, goal);
-               pr_info("cpu %d has no node %d or node-local memory\n",
-                       cpu, node);
-               pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-                        cpu, size, __pa(ptr));
-       } else {
-               ptr = memblock_alloc_try_nid(size, align, goal,
-                                            MEMBLOCK_ALLOC_ACCESSIBLE, node);
-               pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
-                        "%016lx\n", cpu, size, node, __pa(ptr));
-       }
-       return ptr;
-#else
-       return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
-}
-
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
        if (cpu_to_node(from) == cpu_to_node(to))
@@ -1578,57 +1534,9 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
                return REMOTE_DISTANCE;
 }
 
-static void __init pcpu_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       if (pgd_none(*pgd)) {
-               pud_t *new;
-
-               new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pgd_populate(&init_mm, pgd, new);
-       }
-
-       p4d = p4d_offset(pgd, addr);
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               p4d_populate(&init_mm, p4d, new);
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pud_populate(&init_mm, pud, new);
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
-
-       return;
-
-err_alloc:
-       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+       return cpu_to_node(cpu);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -1641,8 +1549,7 @@ void __init setup_per_cpu_areas(void)
                rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
                                            PERCPU_DYNAMIC_RESERVE, 4 << 20,
                                            pcpu_cpu_distance,
-                                           pcpu_alloc_bootmem,
-                                           pcpu_free_bootmem);
+                                           pcpu_cpu_to_node);
                if (rc)
                        pr_warn("PERCPU: %s allocator failed (%d), "
                                "falling back to page size\n",
@@ -1650,9 +1557,7 @@ void __init setup_per_cpu_areas(void)
        }
        if (rc < 0)
                rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-                                          pcpu_alloc_bootmem,
-                                          pcpu_free_bootmem,
-                                          pcpu_populate_pte);
+                                          pcpu_cpu_to_node);
        if (rc < 0)
                panic("cannot initialize percpu area (err=%d)", rc);
 
index 407533c..6fddb63 100644 (file)
@@ -78,7 +78,7 @@ config X86
        select ARCH_HAS_FILTER_PGPROT
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
-       select ARCH_HAS_KCOV                    if X86_64 && STACK_VALIDATION
+       select ARCH_HAS_KCOV                    if X86_64
        select ARCH_HAS_MEM_ENCRYPT
        select ARCH_HAS_MEMBARRIER_SYNC_CORE
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
@@ -240,6 +240,7 @@ config X86
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE         if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
        select HAVE_FUNCTION_ARG_ACCESS_API
+       select HAVE_SETUP_PER_CPU_AREA
        select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_STACKPROTECTOR              if CC_HAS_SANE_STACKPROTECTOR
        select HAVE_STACK_VALIDATION            if X86_64
@@ -253,6 +254,8 @@ config X86
        select HAVE_GENERIC_VDSO
        select HOTPLUG_SMT                      if SMP
        select IRQ_FORCED_THREADING
+       select NEED_PER_CPU_EMBED_FIRST_CHUNK
+       select NEED_PER_CPU_PAGE_FIRST_CHUNK
        select NEED_SG_DMA_LENGTH
        select PCI_DOMAINS                      if PCI
        select PCI_LOCKLESS_CONFIG              if PCI
@@ -333,15 +336,6 @@ config ARCH_HAS_CPU_RELAX
 config ARCH_HAS_FILTER_PGPROT
        def_bool y
 
-config HAVE_SETUP_PER_CPU_AREA
-       def_bool y
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
-       def_bool y
-
-config NEED_PER_CPU_PAGE_FIRST_CHUNK
-       def_bool y
-
 config ARCH_HIBERNATION_POSSIBLE
        def_bool y
 
@@ -1575,6 +1569,7 @@ config NUMA
        depends on SMP
        depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
        default y if X86_BIGSMP
+       select USE_PERCPU_NUMA_NODE_ID
        help
          Enable NUMA (Non-Uniform Memory Access) support.
 
@@ -2450,10 +2445,6 @@ config ARCH_HAS_ADD_PAGES
 config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
        def_bool y
 
-config USE_PERCPU_NUMA_NODE_ID
-       def_bool y
-       depends on NUMA
-
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
index fd2d3ab..dc7da08 100644 (file)
@@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = {
        .stolen_size = gen9_stolen_size,
 };
 
+/* Intel integrated GPUs for which we need to reserve "stolen memory" */
 static const struct pci_device_id intel_early_ids[] __initconst = {
        INTEL_I830_IDS(&i830_early_ops),
        INTEL_I845G_IDS(&i845_early_ops),
@@ -592,6 +593,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func)
        u16 device;
        int i;
 
+       /*
+        * Reserve "stolen memory" for an integrated GPU.  If we've already
+        * found one, there's nothing to do for other (discrete) GPUs.
+        */
+       if (resource_size(&intel_graphics_stolen_res))
+               return;
+
        device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
 
        for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
@@ -704,7 +712,7 @@ static struct chipset early_qrk[] __initdata = {
        { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
          PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
        { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
-         QFLAG_APPLY_ONCE, intel_graphics_quirks },
+         0, intel_graphics_quirks },
        /*
         * HPET on the current version of the Baytrail platform has accuracy
         * problems: it will halt in deep idle state - so we disable it.
index 882213d..71f3364 100644 (file)
@@ -1435,8 +1435,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
        hpet_rtc_timer_reinit();
        memset(&curr_time, 0, sizeof(struct rtc_time));
 
-       if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-               mc146818_get_time(&curr_time);
+       if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
+               if (unlikely(mc146818_get_time(&curr_time) < 0)) {
+                       pr_err_ratelimited("unable to read current time from RTC\n");
+                       return IRQ_HANDLED;
+               }
+       }
 
        if (hpet_rtc_flags & RTC_UIE &&
            curr_time.tm_sec != hpet_prev_update_sec) {
index 7b65275..49325ca 100644 (file)
@@ -84,60 +84,6 @@ static bool __init pcpu_need_numa(void)
 }
 #endif
 
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
-                                       unsigned long align)
-{
-       const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
-       int node = early_cpu_to_node(cpu);
-       void *ptr;
-
-       if (!node_online(node) || !NODE_DATA(node)) {
-               ptr = memblock_alloc_from(size, align, goal);
-               pr_info("cpu %d has no node %d or node-local memory\n",
-                       cpu, node);
-               pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
-                        cpu, size, __pa(ptr));
-       } else {
-               ptr = memblock_alloc_try_nid(size, align, goal,
-                                            MEMBLOCK_ALLOC_ACCESSIBLE,
-                                            node);
-
-               pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
-                        cpu, size, node, __pa(ptr));
-       }
-       return ptr;
-#else
-       return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
-       return pcpu_alloc_bootmem(cpu, size, align);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
-}
-
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
 #ifdef CONFIG_NUMA
@@ -150,7 +96,12 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 #endif
 }
 
-static void __init pcpup_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+       return early_cpu_to_node(cpu);
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
 {
        populate_extra_pte(addr);
 }
@@ -205,15 +156,14 @@ void __init setup_per_cpu_areas(void)
                rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
                                            dyn_size, atom_size,
                                            pcpu_cpu_distance,
-                                           pcpu_fc_alloc, pcpu_fc_free);
+                                           pcpu_cpu_to_node);
                if (rc < 0)
                        pr_warn("%s allocator failed (%d), falling back to page size\n",
                                pcpu_fc_names[pcpu_chosen_fc], rc);
        }
        if (rc < 0)
                rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
-                                          pcpu_fc_alloc, pcpu_fc_free,
-                                          pcpup_populate_pte);
+                                          pcpu_cpu_to_node);
        if (rc < 0)
                panic("cannot initialize percpu area (err=%d)", rc);
 
index 0d400ba..4312a80 100644 (file)
@@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size)
                                offset = new_size - done;
                        else
                                offset = 0;
-                       zero_user(bv.bv_page, offset, bv.bv_len - offset);
+                       zero_user(bv.bv_page, bv.bv_offset + offset,
+                                 bv.bv_len - offset);
                        truncated = true;
                }
                done += bv.bv_len;
index e55a683..845f74e 100644 (file)
 #include "blk-mq-tag.h"
 
 /*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+               unsigned int users)
+{
+       if (!users)
+               return;
+
+       sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+                       users);
+       sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+                       users);
+}
+
+/*
  * If a previously inactive queue goes active, bump the active user count.
  * We need to do this before try to allocate driver tag, then even if fail
  * to get tag when first time, the other shared-tag users could reserve
  */
 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
+       unsigned int users;
+
        if (blk_mq_is_shared_tags(hctx->flags)) {
                struct request_queue *q = hctx->queue;
 
-               if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
-                   !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
-                       atomic_inc(&hctx->tags->active_queues);
+               if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+                   test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+                       return true;
+               }
        } else {
-               if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
-                   !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-                       atomic_inc(&hctx->tags->active_queues);
+               if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+                   test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+                       return true;
+               }
        }
 
+       users = atomic_inc_return(&hctx->tags->active_queues);
+
+       blk_mq_update_wake_batch(hctx->tags, users);
+
        return true;
 }
 
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
        struct blk_mq_tags *tags = hctx->tags;
+       unsigned int users;
 
        if (blk_mq_is_shared_tags(hctx->flags)) {
                struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
                        return;
        }
 
-       atomic_dec(&tags->active_queues);
+       users = atomic_dec_return(&tags->active_queues);
+
+       blk_mq_update_wake_batch(tags, users);
 
        blk_mq_tag_wakeup_all(tags, false);
 }
index a6d4780..b5e35e6 100644 (file)
@@ -2976,6 +2976,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
                bio = bio_clone_fast(bio_src, gfp_mask, bs);
                if (!bio)
                        goto free_and_out;
+               bio->bi_bdev = rq->q->disk->part0;
 
                if (bio_ctr && bio_ctr(bio, bio_src, data))
                        goto free_and_out;
index e20eadf..9f32882 100644 (file)
@@ -811,6 +811,9 @@ static void blk_release_queue(struct kobject *kobj)
 
        bioset_exit(&q->bio_split);
 
+       if (blk_queue_has_srcu(q))
+               cleanup_srcu_struct(q->srcu);
+
        ida_simple_remove(&blk_queue_ida, q->id);
        call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
@@ -887,7 +890,6 @@ int blk_register_queue(struct gendisk *disk)
                kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
        mutex_unlock(&q->sysfs_lock);
 
-       ret = 0;
 unlock:
        mutex_unlock(&q->sysfs_dir_lock);
 
index 85d919b..3ed5eaf 100644 (file)
@@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
 SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
 SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->front_merges);
+SHOW_INT(deadline_async_depth_show, dd->async_depth);
 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
 #undef SHOW_INT
 #undef SHOW_JIFFIES
@@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
 STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
+STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
 STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
 #undef STORE_FUNCTION
 #undef STORE_INT
index bc8e8d9..3e726ee 100644 (file)
@@ -178,7 +178,6 @@ static void ia_hack_tcq(IADEV *dev) {
 
 static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
   u_short              desc_num, i;
-  struct sk_buff        *skb;
   struct ia_vcc         *iavcc_r = NULL; 
   unsigned long delta;
   static unsigned long timer = 0;
@@ -202,8 +201,7 @@ static u16 get_desc (IADEV *dev, struct ia_vcc *iavcc) {
            else 
               dev->ffL.tcq_rd -= 2;
            *(u_short *)(dev->seg_ram + dev->ffL.tcq_rd) = i+1;
-           if (!(skb = dev->desc_tbl[i].txskb) || 
-                          !(iavcc_r = dev->desc_tbl[i].iavcc))
+           if (!dev->desc_tbl[i].txskb || !(iavcc_r = dev->desc_tbl[i].iavcc))
               printk("Fatal err, desc table vcc or skb is NULL\n");
            else 
               iavcc_r->vc_desc_cnt--;
index bc18769..eaa31e5 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/of.h>
 
 #include <asm/sections.h>
-#include <asm/pgalloc.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
@@ -155,66 +154,6 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
        return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
-                                      size_t align)
-{
-       int nid = early_cpu_to_node(cpu);
-
-       return  memblock_alloc_try_nid(size, align,
-                       __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
-}
-
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-static void __init pcpu_populate_pte(unsigned long addr)
-{
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-
-       p4d = p4d_offset(pgd, addr);
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               p4d_populate(&init_mm, p4d, new);
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pud_populate(&init_mm, pud, new);
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               if (!new)
-                       goto err_alloc;
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
-
-       return;
-
-err_alloc:
-       panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
-             __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
-}
-#endif
-
 void __init setup_per_cpu_areas(void)
 {
        unsigned long delta;
@@ -229,7 +168,7 @@ void __init setup_per_cpu_areas(void)
                rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
                                            PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
                                            pcpu_cpu_distance,
-                                           pcpu_fc_alloc, pcpu_fc_free);
+                                           early_cpu_to_node);
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
                if (rc < 0)
                        pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
@@ -239,10 +178,7 @@ void __init setup_per_cpu_areas(void)
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
        if (rc < 0)
-               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
-                                          pcpu_fc_alloc,
-                                          pcpu_fc_free,
-                                          pcpu_populate_pte);
+               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
 #endif
        if (rc < 0)
                panic("Failed to initialize percpu areas (err=%d).", rc);
index 9466503..72b7a92 100644 (file)
@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void)
        struct rtc_time time;
        unsigned int val;
 
-       mc146818_get_time(&time);
+       if (mc146818_get_time(&time) < 0) {
+               pr_err("Unable to read current time from RTC\n");
+               return 0;
+       }
+
        pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time);
        val = time.tm_year;                             /* 100 years */
        if (val > 100)
index 588889b..6af111f 100644 (file)
@@ -122,7 +122,7 @@ newtag(struct aoedev *d)
        register ulong n;
 
        n = jiffies & 0xffff;
-       return n |= (++d->lasttag & 0x7fff) << 16;
+       return n | (++d->lasttag & 0x7fff) << 16;
 }
 
 static u32
index 8fe2e42..6e3f2f0 100644 (file)
@@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size);
  * (should share code eventually).
  */
 static LIST_HEAD(brd_devices);
-static DEFINE_MUTEX(brd_devices_mutex);
 static struct dentry *brd_debugfs_dir;
 
 static int brd_alloc(int i)
@@ -372,21 +371,14 @@ static int brd_alloc(int i)
        char buf[DISK_NAME_LEN];
        int err = -ENOMEM;
 
-       mutex_lock(&brd_devices_mutex);
-       list_for_each_entry(brd, &brd_devices, brd_list) {
-               if (brd->brd_number == i) {
-                       mutex_unlock(&brd_devices_mutex);
+       list_for_each_entry(brd, &brd_devices, brd_list)
+               if (brd->brd_number == i)
                        return -EEXIST;
-               }
-       }
        brd = kzalloc(sizeof(*brd), GFP_KERNEL);
-       if (!brd) {
-               mutex_unlock(&brd_devices_mutex);
+       if (!brd)
                return -ENOMEM;
-       }
        brd->brd_number         = i;
        list_add_tail(&brd->brd_list, &brd_devices);
-       mutex_unlock(&brd_devices_mutex);
 
        spin_lock_init(&brd->brd_lock);
        INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
@@ -429,9 +421,7 @@ static int brd_alloc(int i)
 out_cleanup_disk:
        blk_cleanup_disk(disk);
 out_free_dev:
-       mutex_lock(&brd_devices_mutex);
        list_del(&brd->brd_list);
-       mutex_unlock(&brd_devices_mutex);
        kfree(brd);
        return err;
 }
@@ -441,15 +431,19 @@ static void brd_probe(dev_t dev)
        brd_alloc(MINOR(dev) / max_part);
 }
 
-static void brd_del_one(struct brd_device *brd)
+static void brd_cleanup(void)
 {
-       del_gendisk(brd->brd_disk);
-       blk_cleanup_disk(brd->brd_disk);
-       brd_free_pages(brd);
-       mutex_lock(&brd_devices_mutex);
-       list_del(&brd->brd_list);
-       mutex_unlock(&brd_devices_mutex);
-       kfree(brd);
+       struct brd_device *brd, *next;
+
+       debugfs_remove_recursive(brd_debugfs_dir);
+
+       list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+               del_gendisk(brd->brd_disk);
+               blk_cleanup_disk(brd->brd_disk);
+               brd_free_pages(brd);
+               list_del(&brd->brd_list);
+               kfree(brd);
+       }
 }
 
 static inline void brd_check_and_reset_par(void)
@@ -473,9 +467,18 @@ static inline void brd_check_and_reset_par(void)
 
 static int __init brd_init(void)
 {
-       struct brd_device *brd, *next;
        int err, i;
 
+       brd_check_and_reset_par();
+
+       brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
+       for (i = 0; i < rd_nr; i++) {
+               err = brd_alloc(i);
+               if (err)
+                       goto out_free;
+       }
+
        /*
         * brd module now has a feature to instantiate underlying device
         * structure on-demand, provided that there is an access dev node.
@@ -491,28 +494,16 @@ static int __init brd_init(void)
         *      dynamically.
         */
 
-       if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
-               return -EIO;
-
-       brd_check_and_reset_par();
-
-       brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
-       for (i = 0; i < rd_nr; i++) {
-               err = brd_alloc(i);
-               if (err)
-                       goto out_free;
+       if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
+               err = -EIO;
+               goto out_free;
        }
 
        pr_info("brd: module loaded\n");
        return 0;
 
 out_free:
-       unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
-       debugfs_remove_recursive(brd_debugfs_dir);
-
-       list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
-               brd_del_one(brd);
+       brd_cleanup();
 
        pr_info("brd: module NOT loaded !!!\n");
        return err;
@@ -520,13 +511,9 @@ out_free:
 
 static void __exit brd_exit(void)
 {
-       struct brd_device *brd, *next;
 
        unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
-       debugfs_remove_recursive(brd_debugfs_dir);
-
-       list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
-               brd_del_one(brd);
+       brd_cleanup();
 
        pr_info("brd: module unloaded\n");
 }
index b1b05c4..01cbbfc 100644 (file)
@@ -820,7 +820,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
 
 static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
 {
-       struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+       struct rb_node **node, *parent = NULL;
        struct loop_worker *cur_worker, *worker = NULL;
        struct work_struct *work;
        struct list_head *cmd_list;
index 8f140da..4203cda 100644 (file)
@@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf)
         * These are the characters that produce nonzero for
         * isspace() in the "C" and "POSIX" locales.
         */
-        const char *spaces = " \f\n\r\t\v";
+       static const char spaces[] = " \f\n\r\t\v";
 
         *buf += strspn(*buf, spaces);  /* Find start of token */
 
@@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf,
        pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
        pctx.opts->trim = RBD_TRIM_DEFAULT;
 
-       ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
+       ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
+                                ',');
        if (ret)
                goto out_err;
 
index 57ae183..f7b4136 100644 (file)
@@ -1740,7 +1740,7 @@ static int si5341_probe(struct i2c_client *client,
                        clk_prepare(data->clk[i].hw.clk);
        }
 
-       err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get,
+       err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get,
                        data);
        if (err) {
                dev_err(&client->dev, "unable to add clk provider\n");
index 76c8ebd..98ec388 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2021 MediaTek Inc.
  * Author: Sam Shih <sam.shih@mediatek.com>
index 3be168c..f209c55 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2021 MediaTek Inc.
  * Author: Sam Shih <sam.shih@mediatek.com>
index 8550e2b..8f6f79b 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-1.0
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2021 MediaTek Inc.
  * Author: Sam Shih <sam.shih@mediatek.com>
index a2398bc..a484cb9 100644 (file)
@@ -246,7 +246,6 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
 {
        struct clk_init_data init;
        struct visconti_pll *pll;
-       struct clk *pll_clk;
        struct clk_hw *pll_hw_clk;
        size_t len;
        int ret;
@@ -277,7 +276,7 @@ static struct clk_hw *visconti_register_pll(struct visconti_pll_provider *ctx,
        pll_hw_clk = &pll->hw;
        ret = clk_hw_register(NULL, &pll->hw);
        if (ret) {
-               pr_err("failed to register pll clock %s : %ld\n", name, PTR_ERR(pll_clk));
+               pr_err("failed to register pll clock %s : %d\n", name, ret);
                kfree(pll);
                pll_hw_clk = ERR_PTR(ret);
        }
index 50003ad..52b8b72 100644 (file)
@@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct gpio_irq_chip *girq;
        struct idt_gpio_ctrl *ctrl;
-       unsigned int parent_irq;
+       int parent_irq;
        int ngpios;
        int ret;
 
@@ -164,8 +164,8 @@ static int idt_gpio_probe(struct platform_device *pdev)
                        return PTR_ERR(ctrl->pic);
 
                parent_irq = platform_get_irq(pdev, 0);
-               if (!parent_irq)
-                       return -EINVAL;
+               if (parent_irq < 0)
+                       return parent_irq;
 
                girq = &ctrl->gc.irq;
                girq->chip = &idt_gpio_irqchip;
index 70d6ae2..a964e25 100644 (file)
@@ -47,7 +47,7 @@ struct mpc8xxx_gpio_chip {
                                unsigned offset, int value);
 
        struct irq_domain *irq;
-       unsigned int irqn;
+       int irqn;
 };
 
 /*
@@ -388,8 +388,8 @@ static int mpc8xxx_probe(struct platform_device *pdev)
        }
 
        mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
-       if (!mpc8xxx_gc->irqn)
-               return 0;
+       if (mpc8xxx_gc->irqn < 0)
+               return mpc8xxx_gc->irqn;
 
        mpc8xxx_gc->irq = irq_domain_create_linear(fwnode,
                                                   MPC8XXX_GPIO_PINS,
index cf7fad8..ed077de 100644 (file)
@@ -2354,7 +2354,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
        }
 
        if (amdgpu_sriov_vf(adev))
-               amdgpu_virt_exchange_data(adev);
+               amdgpu_virt_init_data_exchange(adev);
 
        r = amdgpu_ib_pool_init(adev);
        if (r) {
@@ -4450,33 +4450,24 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 
        if (amdgpu_gpu_recovery == -1) {
                switch (adev->asic_type) {
-               case CHIP_BONAIRE:
-               case CHIP_HAWAII:
-               case CHIP_TOPAZ:
-               case CHIP_TONGA:
-               case CHIP_FIJI:
-               case CHIP_POLARIS10:
-               case CHIP_POLARIS11:
-               case CHIP_POLARIS12:
-               case CHIP_VEGAM:
-               case CHIP_VEGA20:
-               case CHIP_VEGA10:
-               case CHIP_VEGA12:
-               case CHIP_RAVEN:
-               case CHIP_ARCTURUS:
-               case CHIP_RENOIR:
-               case CHIP_NAVI10:
-               case CHIP_NAVI14:
-               case CHIP_NAVI12:
-               case CHIP_SIENNA_CICHLID:
-               case CHIP_NAVY_FLOUNDER:
-               case CHIP_DIMGREY_CAVEFISH:
-               case CHIP_BEIGE_GOBY:
-               case CHIP_VANGOGH:
-               case CHIP_ALDEBARAN:
-                       break;
-               default:
+#ifdef CONFIG_DRM_AMDGPU_SI
+               case CHIP_VERDE:
+               case CHIP_TAHITI:
+               case CHIP_PITCAIRN:
+               case CHIP_OLAND:
+               case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+               case CHIP_KAVERI:
+               case CHIP_KABINI:
+               case CHIP_MULLINS:
+#endif
+               case CHIP_CARRIZO:
+               case CHIP_STONEY:
+               case CHIP_CYAN_SKILLFISH:
                        goto disabled;
+               default:
+                       break;
                }
        }
 
index be45650..81bfee9 100644 (file)
@@ -243,6 +243,30 @@ static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
        return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
 }
 
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+       /*
+        * So far, apply this quirk only on those Navy Flounder boards which
+        * have a bad harvest table of VCN config.
+        */
+       if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
+               (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) {
+               switch (adev->pdev->revision) {
+               case 0xC1:
+               case 0xC2:
+               case 0xC3:
+               case 0xC5:
+               case 0xC7:
+               case 0xCF:
+               case 0xDF:
+                       adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
 static int amdgpu_discovery_init(struct amdgpu_device *adev)
 {
        struct table_info *info;
@@ -548,11 +572,9 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
                        break;
                }
        }
-       /* some IP discovery tables on Navy Flounder don't have this set correctly */
-       if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
-           (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2)) &&
-           (adev->pdev->revision != 0xFF))
-               adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+
+       amdgpu_discovery_harvest_config_quirk(adev);
+
        if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
                adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
                adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
index c610e27..b21bcdc 100644 (file)
@@ -1930,11 +1930,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
                        return -ENODEV;
        }
 
-       if (flags == 0) {
-               DRM_INFO("Unsupported asic.  Remove me when IP discovery init is in place.\n");
-               return -ENODEV;
-       }
-
        if (amdgpu_virtual_display ||
            amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
                supports_atomic = true;
index 894444a..07bc0f5 100644 (file)
@@ -625,20 +625,20 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
        adev->virt.fw_reserve.p_vf2pf = NULL;
        adev->virt.vf2pf_update_interval_ms = 0;
 
-       if (adev->bios != NULL) {
-               adev->virt.vf2pf_update_interval_ms = 2000;
+       if (adev->mman.fw_vram_usage_va != NULL) {
+               /* go through this logic in ip_init and reset to init workqueue*/
+               amdgpu_virt_exchange_data(adev);
 
+               INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+               schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+       } else if (adev->bios != NULL) {
+               /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
                adev->virt.fw_reserve.p_pf2vf =
                        (struct amd_sriov_msg_pf2vf_info_header *)
                        (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
 
                amdgpu_virt_read_pf2vf_data(adev);
        }
-
-       if (adev->virt.vf2pf_update_interval_ms != 0) {
-               INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
-               schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
-       }
 }
 
 
@@ -674,12 +674,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
                                if (adev->virt.ras_init_done)
                                        amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
                        }
-       } else if (adev->bios != NULL) {
-               adev->virt.fw_reserve.p_pf2vf =
-                       (struct amd_sriov_msg_pf2vf_info_header *)
-                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
-               amdgpu_virt_read_pf2vf_data(adev);
        }
 }
 
index 54f28c0..f10ce74 100644 (file)
@@ -1428,6 +1428,10 @@ static int cik_asic_reset(struct amdgpu_device *adev)
 {
        int r;
 
+       /* APUs don't have full asic reset */
+       if (adev->flags & AMD_IS_APU)
+               return 0;
+
        if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
                dev_info(adev->dev, "BACO reset\n");
                r = amdgpu_dpm_baco_reset(adev);
index fe9a7cc..6645ebb 100644 (file)
@@ -956,6 +956,10 @@ static int vi_asic_reset(struct amdgpu_device *adev)
 {
        int r;
 
+       /* APUs don't have full asic reset */
+       if (adev->flags & AMD_IS_APU)
+               return 0;
+
        if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
                dev_info(adev->dev, "BACO reset\n");
                r = amdgpu_dpm_baco_reset(adev);
index 9f35f2e..cac80ba 100644 (file)
@@ -38,7 +38,6 @@
 #include "clk/clk_11_0_0_offset.h"
 #include "clk/clk_11_0_0_sh_mask.h"
 
-#include "irq/dcn20/irq_service_dcn20.h"
 
 #undef FN
 #define FN(reg_name, field_name) \
@@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
        bool force_reset = false;
        bool p_state_change_support;
        int total_plane_count;
-       int irq_src;
-       uint32_t hpd_state;
 
        if (dc->work_arounds.skip_clock_update)
                return;
@@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
        if (dc->res_pool->pp_smu)
                pp_smu = &dc->res_pool->pp_smu->nv_funcs;
 
-       for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD6; irq_src++) {
-               hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, irq_src);
-               if (hpd_state)
-                       break;
-       }
-
-       if (display_count == 0 && !hpd_state)
+       if (display_count == 0)
                enter_display_off = true;
 
        if (enter_display_off == safe_to_lower) {
index fbda423..f4dee0e 100644 (file)
@@ -42,7 +42,6 @@
 #include "clk/clk_10_0_2_sh_mask.h"
 #include "renoir_ip_offset.h"
 
-#include "irq/dcn21/irq_service_dcn21.h"
 
 /* Constants */
 
@@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
        struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
        struct dc *dc = clk_mgr_base->ctx->dc;
        int display_count;
-       int irq_src;
        bool update_dppclk = false;
        bool update_dispclk = false;
        bool dpp_clock_lowered = false;
-       uint32_t hpd_state;
 
        struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
 
@@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 
                        display_count = rn_get_active_display_cnt_wa(dc, context);
 
-                       for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD5; irq_src++) {
-                               hpd_state = dc_get_hpd_state_dcn21(dc->res_pool->irqs, irq_src);
-                               if (hpd_state)
-                                       break;
-                       }
-
                        /* if we can go lower, go lower */
-                       if (display_count == 0 && !hpd_state) {
+                       if (display_count == 0) {
                                rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
                                /* update power state */
                                clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
index 9ccafe0..c4b067d 100644 (file)
@@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
        }
 }
 
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source)
-{
-       const struct irq_source_info *info;
-       uint32_t addr;
-       uint32_t value;
-       uint32_t current_status;
-
-       info = find_irq_source_info(irq_service, source);
-       if (!info)
-               return 0;
-
-       addr = info->status_reg;
-       if (!addr)
-               return 0;
-
-       value = dm_read_reg(irq_service->ctx, addr);
-       current_status =
-               get_reg_field_value(
-                       value,
-                       HPD0_DC_HPD_INT_STATUS,
-                       DC_HPD_SENSE);
-
-       return current_status;
-}
-
 static bool hpd_ack(
        struct irq_service *irq_service,
        const struct irq_source_info *info)
index 4d69ab2..aee4b37 100644 (file)
@@ -31,6 +31,4 @@
 struct irq_service *dal_irq_service_dcn20_create(
        struct irq_service_init_data *init_data);
 
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source);
-
 #endif
index 2352945..0f15bca 100644 (file)
@@ -134,31 +134,6 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic
        return DC_IRQ_SOURCE_INVALID;
 }
 
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source)
-{
-       const struct irq_source_info *info;
-       uint32_t addr;
-       uint32_t value;
-       uint32_t current_status;
-
-       info = find_irq_source_info(irq_service, source);
-       if (!info)
-               return 0;
-
-       addr = info->status_reg;
-       if (!addr)
-               return 0;
-
-       value = dm_read_reg(irq_service->ctx, addr);
-       current_status =
-               get_reg_field_value(
-                       value,
-                       HPD0_DC_HPD_INT_STATUS,
-                       DC_HPD_SENSE);
-
-       return current_status;
-}
-
 static bool hpd_ack(
        struct irq_service *irq_service,
        const struct irq_source_info *info)
index 616470e..da2bd0e 100644 (file)
@@ -31,6 +31,4 @@
 struct irq_service *dal_irq_service_dcn21_create(
        struct irq_service_init_data *init_data);
 
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source);
-
 #endif
index 4db1133..a2a4fbe 100644 (file)
@@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service **irq_service)
        *irq_service = NULL;
 }
 
-const struct irq_source_info *find_irq_source_info(
+static const struct irq_source_info *find_irq_source_info(
        struct irq_service *irq_service,
        enum dc_irq_source source)
 {
index e60b824..dbfcb09 100644 (file)
@@ -69,10 +69,6 @@ struct irq_service {
        const struct irq_service_funcs *funcs;
 };
 
-const struct irq_source_info *find_irq_source_info(
-       struct irq_service *irq_service,
-       enum dc_irq_source source);
-
 void dal_irq_service_construct(
        struct irq_service *irq_service,
        struct irq_service_init_data *init_data);
index 9c9d574..cab5052 100644 (file)
@@ -1298,6 +1298,28 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder,
 
                intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
                             DKL_TX_DP20BITMODE, 0);
+
+               if (IS_ALDERLAKE_P(dev_priv)) {
+                       u32 val;
+
+                       if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
+                               if (ln == 0) {
+                                       val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+                                       val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(2);
+                               } else {
+                                       val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(3);
+                                       val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(3);
+                               }
+                       } else {
+                               val = DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(0);
+                               val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0);
+                       }
+
+                       intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port),
+                                    DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK |
+                                    DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK,
+                                    val);
+               }
        }
 }
 
index 1e689d5..e2dfb93 100644 (file)
@@ -477,14 +477,14 @@ static const struct intel_ddi_buf_trans icl_combo_phy_trans_hdmi = {
 static const union intel_ddi_buf_trans_entry _ehl_combo_phy_trans_dp[] = {
                                                        /* NT mV Trans mV db    */
        { .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } },     /* 350   350      0.0   */
-       { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } },     /* 350   500      3.1   */
-       { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } },     /* 350   700      6.0   */
-       { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } },     /* 350   900      8.2   */
+       { .icl = { 0xA, 0x47, 0x38, 0x00, 0x07 } },     /* 350   500      3.1   */
+       { .icl = { 0xC, 0x64, 0x33, 0x00, 0x0C } },     /* 350   700      6.0   */
+       { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } },     /* 350   900      8.2   */
        { .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } },     /* 500   500      0.0   */
-       { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } },     /* 500   700      2.9   */
+       { .icl = { 0xC, 0x64, 0x37, 0x00, 0x08 } },     /* 500   700      2.9   */
        { .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } },     /* 500   900      5.1   */
        { .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } },     /* 650   700      0.6   */
-       { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } },     /* 600   900      3.5   */
+       { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } },     /* 600   900      3.5   */
        { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } },     /* 900   900      0.0   */
 };
 
index 4c28dad..971d601 100644 (file)
@@ -11166,8 +11166,12 @@ enum skl_power_gate {
                                                     _DKL_PHY2_BASE) + \
                                                     _DKL_TX_DPCNTL1)
 
-#define _DKL_TX_DPCNTL2                                0x2C8
-#define  DKL_TX_DP20BITMODE                            (1 << 2)
+#define _DKL_TX_DPCNTL2                                        0x2C8
+#define  DKL_TX_DP20BITMODE                            REG_BIT(2)
+#define  DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK     REG_GENMASK(4, 3)
+#define  DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(val)     REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK, (val))
+#define  DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK     REG_GENMASK(6, 5)
+#define  DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(val)     REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, (val))
 #define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \
                                                     _DKL_PHY1_BASE, \
                                                     _DKL_PHY2_BASE) + \
index e248855..11ad210 100644 (file)
@@ -666,18 +666,18 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
                fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
                if (unlikely(!fpriv)) {
                        r = -ENOMEM;
-                       goto out_suspend;
+                       goto err_suspend;
                }
 
                if (rdev->accel_working) {
                        vm = &fpriv->vm;
                        r = radeon_vm_init(rdev, vm);
                        if (r)
-                               goto out_fpriv;
+                               goto err_fpriv;
 
                        r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
                        if (r)
-                               goto out_vm_fini;
+                               goto err_vm_fini;
 
                        /* map the ib pool buffer read only into
                         * virtual address space */
@@ -685,7 +685,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
                                                        rdev->ring_tmp_bo.bo);
                        if (!vm->ib_bo_va) {
                                r = -ENOMEM;
-                               goto out_vm_fini;
+                               goto err_vm_fini;
                        }
 
                        r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va,
@@ -693,19 +693,21 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
                                                  RADEON_VM_PAGE_READABLE |
                                                  RADEON_VM_PAGE_SNOOPED);
                        if (r)
-                               goto out_vm_fini;
+                               goto err_vm_fini;
                }
                file_priv->driver_priv = fpriv;
        }
 
-       if (!r)
-               goto out_suspend;
+       pm_runtime_mark_last_busy(dev->dev);
+       pm_runtime_put_autosuspend(dev->dev);
+       return 0;
 
-out_vm_fini:
+err_vm_fini:
        radeon_vm_fini(rdev, vm);
-out_fpriv:
+err_fpriv:
        kfree(fpriv);
-out_suspend:
+
+err_suspend:
        pm_runtime_mark_last_busy(dev->dev);
        pm_runtime_put_autosuspend(dev->dev);
        return r;
index 26cee45..8597503 100644 (file)
 #define USB_DEVICE_ID_HP_X2            0x074d
 #define USB_DEVICE_ID_HP_X2_10_COVER   0x0755
 #define I2C_DEVICE_ID_HP_ENVY_X360_15  0x2d05
+#define I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100   0x29CF
 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15       0x2817
 #define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN   0x2706
index 1ce75e8..112901d 100644 (file)
@@ -330,6 +330,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
          HID_BATTERY_QUIRK_IGNORE },
+       { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15T_DR100),
+         HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
index 72957a9..efa6140 100644 (file)
@@ -6,16 +6,17 @@
  * Author: Sean O'Brien <seobrien@chromium.org>
  */
 
+#include <linux/device.h>
 #include <linux/hid.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sysfs.h>
 
 #define MIN_FN_ROW_KEY 1
 #define MAX_FN_ROW_KEY 24
 #define HID_VD_FN_ROW_PHYSMAP 0x00000001
 #define HID_USAGE_FN_ROW_PHYSMAP (HID_UP_GOOGLEVENDOR | HID_VD_FN_ROW_PHYSMAP)
 
-static struct hid_driver hid_vivaldi;
-
 struct vivaldi_data {
        u32 function_row_physmap[MAX_FN_ROW_KEY - MIN_FN_ROW_KEY + 1];
        int max_function_row_key;
@@ -40,7 +41,7 @@ static ssize_t function_row_physmap_show(struct device *dev,
        return size;
 }
 
-DEVICE_ATTR_RO(function_row_physmap);
+static DEVICE_ATTR_RO(function_row_physmap);
 static struct attribute *sysfs_attrs[] = {
        &dev_attr_function_row_physmap.attr,
        NULL
@@ -74,10 +75,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
                                    struct hid_usage *usage)
 {
        struct vivaldi_data *drvdata = hid_get_drvdata(hdev);
+       struct hid_report *report = field->report;
        int fn_key;
        int ret;
        u32 report_len;
-       u8 *buf;
+       u8 *report_data, *buf;
 
        if (field->logical != HID_USAGE_FN_ROW_PHYSMAP ||
            (usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL)
@@ -89,12 +91,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
        if (fn_key > drvdata->max_function_row_key)
                drvdata->max_function_row_key = fn_key;
 
-       buf = hid_alloc_report_buf(field->report, GFP_KERNEL);
-       if (!buf)
+       report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL);
+       if (!report_data)
                return;
 
-       report_len = hid_report_len(field->report);
-       ret = hid_hw_raw_request(hdev, field->report->id, buf,
+       report_len = hid_report_len(report);
+       if (!report->id) {
+               /*
+                * hid_hw_raw_request() will stuff report ID (which will be 0)
+                * into the first byte of the buffer even for unnumbered
+                * reports, so we need to account for this to avoid getting
+                * -EOVERFLOW in return.
+                * Note that hid_alloc_report_buf() adds 7 bytes to the size
+                * so we can safely say that we have space for an extra byte.
+                */
+               report_len++;
+       }
+
+       ret = hid_hw_raw_request(hdev, report->id, report_data,
                                 report_len, HID_FEATURE_REPORT,
                                 HID_REQ_GET_REPORT);
        if (ret < 0) {
@@ -103,7 +117,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
                goto out;
        }
 
-       ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
+       if (!report->id) {
+               /*
+                * Undo the damage from hid_hw_raw_request() for unnumbered
+                * reports.
+                */
+               report_data++;
+               report_len--;
+       }
+
+       ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
                                   report_len, 0);
        if (ret) {
                dev_warn(&hdev->dev, "failed to report feature %d\n",
index 8fe3efc..614adb5 100644 (file)
 
 struct uhid_device {
        struct mutex devlock;
+
+       /* This flag tracks whether the HID device is usable for commands from
+        * userspace. The flag is already set before hid_add_device(), which
+        * runs in workqueue context, to allow hid_add_device() to communicate
+        * with userspace.
+        * However, if hid_add_device() fails, the flag is cleared without
+        * holding devlock.
+        * We guarantee that if @running changes from true to false while you're
+        * holding @devlock, it's still fine to access @hid.
+        */
        bool running;
 
        __u8 *rd_data;
        uint rd_size;
 
+       /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */
        struct hid_device *hid;
        struct uhid_event input_buf;
 
@@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work)
        if (ret) {
                hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret);
 
-               hid_destroy_device(uhid->hid);
-               uhid->hid = NULL;
-               uhid->running = false;
+               /* We used to call hid_destroy_device() here, but that's really
+                * messy to get right because we have to coordinate with
+                * concurrent writes from userspace that might be in the middle
+                * of using uhid->hid.
+                * Just leave uhid->hid as-is for now, and clean it up when
+                * userspace tries to close or reinitialize the uhid instance.
+                *
+                * However, we do have to clear the ->running flag and do a
+                * wakeup to make sure userspace knows that the device is gone.
+                */
+               WRITE_ONCE(uhid->running, false);
+               wake_up_interruptible(&uhid->report_wait);
        }
 }
 
@@ -174,9 +194,9 @@ static int __uhid_report_queue_and_wait(struct uhid_device *uhid,
        spin_unlock_irqrestore(&uhid->qlock, flags);
 
        ret = wait_event_interruptible_timeout(uhid->report_wait,
-                               !uhid->report_running || !uhid->running,
+                               !uhid->report_running || !READ_ONCE(uhid->running),
                                5 * HZ);
-       if (!ret || !uhid->running || uhid->report_running)
+       if (!ret || !READ_ONCE(uhid->running) || uhid->report_running)
                ret = -EIO;
        else if (ret < 0)
                ret = -ERESTARTSYS;
@@ -217,7 +237,7 @@ static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum,
        struct uhid_event *ev;
        int ret;
 
-       if (!uhid->running)
+       if (!READ_ONCE(uhid->running))
                return -EIO;
 
        ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -259,7 +279,7 @@ static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum,
        struct uhid_event *ev;
        int ret;
 
-       if (!uhid->running || count > UHID_DATA_MAX)
+       if (!READ_ONCE(uhid->running) || count > UHID_DATA_MAX)
                return -EIO;
 
        ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -474,7 +494,7 @@ static int uhid_dev_create2(struct uhid_device *uhid,
        void *rd_data;
        int ret;
 
-       if (uhid->running)
+       if (uhid->hid)
                return -EALREADY;
 
        rd_size = ev->u.create2.rd_size;
@@ -556,15 +576,16 @@ static int uhid_dev_create(struct uhid_device *uhid,
 
 static int uhid_dev_destroy(struct uhid_device *uhid)
 {
-       if (!uhid->running)
+       if (!uhid->hid)
                return -EINVAL;
 
-       uhid->running = false;
+       WRITE_ONCE(uhid->running, false);
        wake_up_interruptible(&uhid->report_wait);
 
        cancel_work_sync(&uhid->worker);
 
        hid_destroy_device(uhid->hid);
+       uhid->hid = NULL;
        kfree(uhid->rd_data);
 
        return 0;
@@ -572,7 +593,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid)
 
 static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
 {
-       if (!uhid->running)
+       if (!READ_ONCE(uhid->running))
                return -EINVAL;
 
        hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input.data,
@@ -583,7 +604,7 @@ static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
 
 static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
 {
-       if (!uhid->running)
+       if (!READ_ONCE(uhid->running))
                return -EINVAL;
 
        hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data,
@@ -595,7 +616,7 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
 static int uhid_dev_get_report_reply(struct uhid_device *uhid,
                                     struct uhid_event *ev)
 {
-       if (!uhid->running)
+       if (!READ_ONCE(uhid->running))
                return -EINVAL;
 
        uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev);
@@ -605,7 +626,7 @@ static int uhid_dev_get_report_reply(struct uhid_device *uhid,
 static int uhid_dev_set_report_reply(struct uhid_device *uhid,
                                     struct uhid_event *ev)
 {
-       if (!uhid->running)
+       if (!READ_ONCE(uhid->running))
                return -EINVAL;
 
        uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev);
index 2a4cc39..a7176fc 100644 (file)
@@ -2588,6 +2588,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
        }
 }
 
+static bool wacom_wac_slot_is_active(struct input_dev *dev, int key)
+{
+       struct input_mt *mt = dev->mt;
+       struct input_mt_slot *s;
+
+       if (!mt)
+               return false;
+
+       for (s = mt->slots; s != mt->slots + mt->num_slots; s++) {
+               if (s->key == key &&
+                       input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) {
+                       return true;
+               }
+       }
+
+       return false;
+}
+
 static void wacom_wac_finger_event(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage, __s32 value)
 {
@@ -2638,9 +2656,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
        }
 
        if (usage->usage_index + 1 == field->report_count) {
-               if (equivalent_usage == wacom_wac->hid_data.last_slot_field &&
-                   wacom_wac->hid_data.confidence)
-                       wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+               if (equivalent_usage == wacom_wac->hid_data.last_slot_field) {
+                       bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input,
+                               wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch;
+
+                       if (wacom_wac->hid_data.confidence || touch_removed) {
+                               wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
+                       }
+               }
        }
 }
 
@@ -2659,6 +2682,10 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
 
        hid_data->confidence = true;
 
+       hid_data->cc_report = 0;
+       hid_data->cc_index = -1;
+       hid_data->cc_value_index = -1;
+
        for (i = 0; i < report->maxfield; i++) {
                struct hid_field *field = report->field[i];
                int j;
@@ -2692,11 +2719,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
            hid_data->cc_index >= 0) {
                struct hid_field *field = report->field[hid_data->cc_index];
                int value = field->value[hid_data->cc_value_index];
-               if (value)
+               if (value) {
                        hid_data->num_expected = value;
+                       hid_data->num_received = 0;
+               }
        }
        else {
                hid_data->num_expected = wacom_wac->features.touch_max;
+               hid_data->num_received = 0;
        }
 }
 
@@ -2724,6 +2754,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev,
 
        input_sync(input);
        wacom_wac->hid_data.num_received = 0;
+       wacom_wac->hid_data.num_expected = 0;
 
        /* keep touch state for pen event */
        wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac);
index 9363bcc..a8e1c30 100644 (file)
@@ -196,7 +196,7 @@ struct qib_ctxtdata {
        pid_t pid;
        pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
        /* same size as task_struct .comm[], command that opened context */
-       char comm[16];
+       char comm[TASK_COMM_LEN];
        /* pkeys set by this use of this ctxt */
        u16 pkeys[4];
        /* so file ops can get at unit */
index 63854f4..aa29092 100644 (file)
@@ -1321,7 +1321,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
        rcd->tid_pg_list = ptmp;
        rcd->pid = current->pid;
        init_waitqueue_head(&dd->rcd[ctxt]->wait);
-       strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
+       get_task_comm(rcd->comm, current);
        ctxt_fp(fp) = rcd;
        qib_stats.sps_ctxts++;
        dd->freectxts--;
index afe11f4..5018b93 100644 (file)
@@ -217,8 +217,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
         * the port number must be in the Dynamic Ports range
         * (0xc000 - 0xffff).
         */
-       qp->src_port = RXE_ROCE_V2_SPORT +
-               (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+       qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
        qp->sq.max_wr           = init->cap.max_send_wr;
 
        /* These caps are limited by rxe_qp_chk_cap() done by the caller */
index 07fc603..ec498ce 100644 (file)
@@ -3874,8 +3874,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
            skb->l4_hash)
                return skb->hash;
 
-       return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
-                               skb->mac_header, skb->network_header,
+       return __bond_xmit_hash(bond, skb, skb->data, skb->protocol,
+                               skb_mac_offset(skb), skb_network_offset(skb),
                                skb_headlen(skb));
 }
 
@@ -4884,25 +4884,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave = NULL;
        struct list_head *iter;
+       bool xmit_suc = false;
+       bool skb_used = false;
 
        bond_for_each_slave_rcu(bond, slave, iter) {
-               if (bond_is_last_slave(bond, slave))
-                       break;
-               if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
-                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+               struct sk_buff *skb2;
+
+               if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
+                       continue;
 
+               if (bond_is_last_slave(bond, slave)) {
+                       skb2 = skb;
+                       skb_used = true;
+               } else {
+                       skb2 = skb_clone(skb, GFP_ATOMIC);
                        if (!skb2) {
                                net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
                                                    bond_dev->name, __func__);
                                continue;
                        }
-                       bond_dev_queue_xmit(bond, skb2, slave->dev);
                }
+
+               if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK)
+                       xmit_suc = true;
        }
-       if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
-               return bond_dev_queue_xmit(bond, skb, slave->dev);
 
-       return bond_tx_drop(bond_dev, skb);
+       if (!skb_used)
+               dev_kfree_skb_any(skb);
+
+       if (xmit_suc)
+               return NETDEV_TX_OK;
+
+       atomic_long_inc(&bond_dev->tx_dropped);
+       return NET_XMIT_DROP;
 }
 
 /*------------------------- Device initialization ---------------------------*/
index 849de45..621ce74 100644 (file)
@@ -106,9 +106,9 @@ static void emac_update_speed(struct net_device *dev)
 
        /* set EMAC SPEED, depend on PHY  */
        reg_val = readl(db->membase + EMAC_MAC_SUPP_REG);
-       reg_val &= ~(0x1 << 8);
+       reg_val &= ~EMAC_MAC_SUPP_100M;
        if (db->speed == SPEED_100)
-               reg_val |= 1 << 8;
+               reg_val |= EMAC_MAC_SUPP_100M;
        writel(reg_val, db->membase + EMAC_MAC_SUPP_REG);
 }
 
@@ -264,7 +264,7 @@ static void emac_dma_done_callback(void *arg)
 
        /* re enable interrupt */
        reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-       reg_val |= (0x01 << 8);
+       reg_val |= EMAC_INT_CTL_RX_EN;
        writel(reg_val, db->membase + EMAC_INT_CTL_REG);
 
        db->emacrx_completed_flag = 1;
@@ -429,7 +429,7 @@ static unsigned int emac_powerup(struct net_device *ndev)
        /* initial EMAC */
        /* flush RX FIFO */
        reg_val = readl(db->membase + EMAC_RX_CTL_REG);
-       reg_val |= 0x8;
+       reg_val |= EMAC_RX_CTL_FLUSH_FIFO;
        writel(reg_val, db->membase + EMAC_RX_CTL_REG);
        udelay(1);
 
@@ -441,8 +441,8 @@ static unsigned int emac_powerup(struct net_device *ndev)
 
        /* set MII clock */
        reg_val = readl(db->membase + EMAC_MAC_MCFG_REG);
-       reg_val &= (~(0xf << 2));
-       reg_val |= (0xD << 2);
+       reg_val &= ~EMAC_MAC_MCFG_MII_CLKD_MASK;
+       reg_val |= EMAC_MAC_MCFG_MII_CLKD_72;
        writel(reg_val, db->membase + EMAC_MAC_MCFG_REG);
 
        /* clear RX counter */
@@ -506,7 +506,7 @@ static void emac_init_device(struct net_device *dev)
 
        /* enable RX/TX0/RX Hlevel interrup */
        reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-       reg_val |= (0xf << 0) | (0x01 << 8);
+       reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
        writel(reg_val, db->membase + EMAC_INT_CTL_REG);
 
        spin_unlock_irqrestore(&db->lock, flags);
@@ -637,7 +637,9 @@ static void emac_rx(struct net_device *dev)
                if (!rxcount) {
                        db->emacrx_completed_flag = 1;
                        reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-                       reg_val |= (0xf << 0) | (0x01 << 8);
+                       reg_val |= (EMAC_INT_CTL_TX_EN |
+                                       EMAC_INT_CTL_TX_ABRT_EN |
+                                       EMAC_INT_CTL_RX_EN);
                        writel(reg_val, db->membase + EMAC_INT_CTL_REG);
 
                        /* had one stuck? */
@@ -669,7 +671,9 @@ static void emac_rx(struct net_device *dev)
                        writel(reg_val | EMAC_CTL_RX_EN,
                               db->membase + EMAC_CTL_REG);
                        reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-                       reg_val |= (0xf << 0) | (0x01 << 8);
+                       reg_val |= (EMAC_INT_CTL_TX_EN |
+                                       EMAC_INT_CTL_TX_ABRT_EN |
+                                       EMAC_INT_CTL_RX_EN);
                        writel(reg_val, db->membase + EMAC_INT_CTL_REG);
 
                        db->emacrx_completed_flag = 1;
@@ -783,20 +787,20 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
        }
 
        /* Transmit Interrupt check */
-       if (int_status & (0x01 | 0x02))
+       if (int_status & EMAC_INT_STA_TX_COMPLETE)
                emac_tx_done(dev, db, int_status);
 
-       if (int_status & (0x04 | 0x08))
+       if (int_status & EMAC_INT_STA_TX_ABRT)
                netdev_info(dev, " ab : %x\n", int_status);
 
        /* Re-enable interrupt mask */
        if (db->emacrx_completed_flag == 1) {
                reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-               reg_val |= (0xf << 0) | (0x01 << 8);
+               reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN | EMAC_INT_CTL_RX_EN);
                writel(reg_val, db->membase + EMAC_INT_CTL_REG);
        } else {
                reg_val = readl(db->membase + EMAC_INT_CTL_REG);
-               reg_val |= (0xf << 0);
+               reg_val |= (EMAC_INT_CTL_TX_EN | EMAC_INT_CTL_TX_ABRT_EN);
                writel(reg_val, db->membase + EMAC_INT_CTL_REG);
        }
 
@@ -1068,6 +1072,7 @@ out_clk_disable_unprepare:
        clk_disable_unprepare(db->clk);
 out_dispose_mapping:
        irq_dispose_mapping(ndev->irq);
+       dma_release_channel(db->rx_chan);
 out_iounmap:
        iounmap(db->membase);
 out:
index 38c72d9..90bd9ad 100644 (file)
@@ -38,6 +38,7 @@
 #define EMAC_RX_CTL_REG                (0x3c)
 #define EMAC_RX_CTL_AUTO_DRQ_EN                (1 << 1)
 #define EMAC_RX_CTL_DMA_EN             (1 << 2)
+#define EMAC_RX_CTL_FLUSH_FIFO         (1 << 3)
 #define EMAC_RX_CTL_PASS_ALL_EN                (1 << 4)
 #define EMAC_RX_CTL_PASS_CTL_EN                (1 << 5)
 #define EMAC_RX_CTL_PASS_CRC_ERR_EN    (1 << 6)
 #define EMAC_RX_IO_DATA_STATUS_OK      (1 << 7)
 #define EMAC_RX_FBC_REG                (0x50)
 #define EMAC_INT_CTL_REG       (0x54)
+#define EMAC_INT_CTL_RX_EN     (1 << 8)
+#define EMAC_INT_CTL_TX0_EN    (1)
+#define EMAC_INT_CTL_TX1_EN    (1 << 1)
+#define EMAC_INT_CTL_TX_EN     (EMAC_INT_CTL_TX0_EN | EMAC_INT_CTL_TX1_EN)
+#define EMAC_INT_CTL_TX0_ABRT_EN       (0x1 << 2)
+#define EMAC_INT_CTL_TX1_ABRT_EN       (0x1 << 3)
+#define EMAC_INT_CTL_TX_ABRT_EN        (EMAC_INT_CTL_TX0_ABRT_EN | EMAC_INT_CTL_TX1_ABRT_EN)
 #define EMAC_INT_STA_REG       (0x58)
+#define EMAC_INT_STA_TX0_COMPLETE      (0x1)
+#define EMAC_INT_STA_TX1_COMPLETE      (0x1 << 1)
+#define EMAC_INT_STA_TX_COMPLETE       (EMAC_INT_STA_TX0_COMPLETE | EMAC_INT_STA_TX1_COMPLETE)
+#define EMAC_INT_STA_TX0_ABRT  (0x1 << 2)
+#define EMAC_INT_STA_TX1_ABRT  (0x1 << 3)
+#define EMAC_INT_STA_TX_ABRT   (EMAC_INT_STA_TX0_ABRT | EMAC_INT_STA_TX1_ABRT)
+#define EMAC_INT_STA_RX_COMPLETE       (0x1 << 8)
 #define EMAC_MAC_CTL0_REG      (0x5c)
 #define EMAC_MAC_CTL0_RX_FLOW_CTL_EN   (1 << 2)
 #define EMAC_MAC_CTL0_TX_FLOW_CTL_EN   (1 << 3)
 #define EMAC_MAC_CLRT_RM               (0x0f)
 #define EMAC_MAC_MAXF_REG      (0x70)
 #define EMAC_MAC_SUPP_REG      (0x74)
+#define EMAC_MAC_SUPP_100M     (0x1 << 8)
 #define EMAC_MAC_TEST_REG      (0x78)
 #define EMAC_MAC_MCFG_REG      (0x7c)
+#define EMAC_MAC_MCFG_MII_CLKD_MASK    (0xff << 2)
+#define EMAC_MAC_MCFG_MII_CLKD_72      (0x0d << 2)
 #define EMAC_MAC_A0_REG                (0x98)
 #define EMAC_MAC_A1_REG                (0x9c)
 #define EMAC_MAC_A2_REG                (0xa0)
index 9a650d1..4d2ba30 100644 (file)
@@ -1237,6 +1237,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
        struct bmac_data *bp;
        const unsigned char *prop_addr;
        unsigned char addr[6];
+       u8 macaddr[6];
        struct net_device *dev;
        int is_bmac_plus = ((int)match->data) != 0;
 
@@ -1284,7 +1285,9 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
 
        rev = addr[0] == 0 && addr[1] == 0xA0;
        for (j = 0; j < 6; ++j)
-               dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+               macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
+
+       eth_hw_addr_set(dev, macaddr);
 
        /* Enable chip without interrupts for now */
        bmac_enable_and_reset_chip(dev);
index 4b80e3a..6f8c91e 100644 (file)
@@ -90,7 +90,7 @@ static void mace_set_timeout(struct net_device *dev);
 static void mace_tx_timeout(struct timer_list *t);
 static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma);
 static inline void mace_clean_rings(struct mace_data *mp);
-static void __mace_set_address(struct net_device *dev, void *addr);
+static void __mace_set_address(struct net_device *dev, const void *addr);
 
 /*
  * If we can't get a skbuff when we need it, we use this area for DMA.
@@ -112,6 +112,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
        struct net_device *dev;
        struct mace_data *mp;
        const unsigned char *addr;
+       u8 macaddr[ETH_ALEN];
        int j, rev, rc = -EBUSY;
 
        if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
@@ -167,8 +168,9 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 
        rev = addr[0] == 0 && addr[1] == 0xA0;
        for (j = 0; j < 6; ++j) {
-               dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j];
+               macaddr[j] = rev ? bitrev8(addr[j]): addr[j];
        }
+       eth_hw_addr_set(dev, macaddr);
        mp->chipid = (in_8(&mp->mace->chipid_hi) << 8) |
                        in_8(&mp->mace->chipid_lo);
 
@@ -369,11 +371,12 @@ static void mace_reset(struct net_device *dev)
        out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO);
 }
 
-static void __mace_set_address(struct net_device *dev, void *addr)
+static void __mace_set_address(struct net_device *dev, const void *addr)
 {
     struct mace_data *mp = netdev_priv(dev);
     volatile struct mace __iomem *mb = mp->mace;
-    unsigned char *p = addr;
+    const unsigned char *p = addr;
+    u8 macaddr[ETH_ALEN];
     int i;
 
     /* load up the hardware address */
@@ -385,7 +388,10 @@ static void __mace_set_address(struct net_device *dev, void *addr)
            ;
     }
     for (i = 0; i < 6; ++i)
-       out_8(&mb->padr, dev->dev_addr[i] = p[i]);
+        out_8(&mb->padr, macaddr[i] = p[i]);
+
+    eth_hw_addr_set(dev, macaddr);
+
     if (mp->chipid != BROKEN_ADDRCHG_REV)
         out_8(&mb->iac, 0);
 }
index 226f440..87f1056 100644 (file)
@@ -4020,10 +4020,12 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
        /* Request the WOL interrupt and advertise suspend if available */
        priv->wol_irq_disabled = true;
-       err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0,
-                              dev->name, priv);
-       if (!err)
-               device_set_wakeup_capable(&pdev->dev, 1);
+       if (priv->wol_irq > 0) {
+               err = devm_request_irq(&pdev->dev, priv->wol_irq,
+                                      bcmgenet_wol_isr, 0, dev->name, priv);
+               if (!err)
+                       device_set_wakeup_capable(&pdev->dev, 1);
+       }
 
        /* Set the needed headroom to account for any possible
         * features enabling/disabling at runtime
index d04a6c1..da8d104 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/tcp.h>
 #include <linux/ipv6.h>
+#include <net/inet_ecn.h>
 #include <net/route.h>
 #include <net/ip6_route.h>
 
@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
 
        rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
                                   peer_port, local_port, IPPROTO_TCP,
-                                  tos, 0);
+                                  tos & ~INET_ECN_MASK, 0);
        if (IS_ERR(rt))
                return NULL;
        n = dst_neigh_lookup(&rt->dst, &peer_ip);
index 5b8b9bc..266e562 100644 (file)
@@ -51,6 +51,7 @@ struct tgec_mdio_controller {
 struct mdio_fsl_priv {
        struct  tgec_mdio_controller __iomem *mdio_base;
        bool    is_little_endian;
+       bool    has_a009885;
        bool    has_a011043;
 };
 
@@ -186,10 +187,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
        struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
        struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+       unsigned long flags;
        uint16_t dev_addr;
        uint32_t mdio_stat;
        uint32_t mdio_ctl;
-       uint16_t value;
        int ret;
        bool endian = priv->is_little_endian;
 
@@ -221,12 +222,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
                        return ret;
        }
 
+       if (priv->has_a009885)
+               /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we
+                * must read back the data register within 16 MDC cycles.
+                */
+               local_irq_save(flags);
+
        /* Initiate the read */
        xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
 
        ret = xgmac_wait_until_done(&bus->dev, regs, endian);
        if (ret)
-               return ret;
+               goto irq_restore;
 
        /* Return all Fs if nothing was there */
        if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
@@ -234,13 +241,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
                dev_dbg(&bus->dev,
                        "Error while reading PHY%d reg at %d.%hhu\n",
                        phy_id, dev_addr, regnum);
-               return 0xffff;
+               ret = 0xffff;
+       } else {
+               ret = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
+               dev_dbg(&bus->dev, "read %04x\n", ret);
        }
 
-       value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
-       dev_dbg(&bus->dev, "read %04x\n", value);
+irq_restore:
+       if (priv->has_a009885)
+               local_irq_restore(flags);
 
-       return value;
+       return ret;
 }
 
 static int xgmac_mdio_probe(struct platform_device *pdev)
@@ -287,6 +298,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
        priv->is_little_endian = device_property_read_bool(&pdev->dev,
                                                           "little-endian");
 
+       priv->has_a009885 = device_property_read_bool(&pdev->dev,
+                                                     "fsl,erratum-a009885");
        priv->has_a011043 = device_property_read_bool(&pdev->dev,
                                                      "fsl,erratum-a011043");
 
@@ -318,9 +331,10 @@ err_ioremap:
 static int xgmac_mdio_remove(struct platform_device *pdev)
 {
        struct mii_bus *bus = platform_get_drvdata(pdev);
+       struct mdio_fsl_priv *priv = bus->priv;
 
        mdiobus_unregister(bus);
-       iounmap(bus->priv);
+       iounmap(priv->mdio_base);
        mdiobus_free(bus);
 
        return 0;
index 27937c5..daec9ce 100644 (file)
@@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev)
        netdevice->dev_addr[5] = readb(eth_addr + 0x06);
        iounmap(eth_addr);
 
-       if (!netdevice->irq) {
+       if (netdevice->irq < 0) {
                printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n",
                        __FILE__, netdevice->base_addr);
+               retval = netdevice->irq;
                goto probe_failed;
        }
 
index a0a5a8e..2fd9ef2 100644 (file)
@@ -283,7 +283,6 @@ struct prestera_router {
        struct list_head rif_entry_list;
        struct notifier_block inetaddr_nb;
        struct notifier_block inetaddr_valid_nb;
-       bool aborted;
 };
 
 struct prestera_rxtx_params {
index 51fc841..e6bfadc 100644 (file)
@@ -1831,8 +1831,8 @@ static int prestera_iface_to_msg(struct prestera_iface *iface,
 int prestera_hw_rif_create(struct prestera_switch *sw,
                           struct prestera_iface *iif, u8 *mac, u16 *rif_id)
 {
-       struct prestera_msg_rif_req req;
        struct prestera_msg_rif_resp resp;
+       struct prestera_msg_rif_req req;
        int err;
 
        memcpy(req.mac, mac, ETH_ALEN);
@@ -1868,9 +1868,9 @@ int prestera_hw_rif_delete(struct prestera_switch *sw, u16 rif_id,
 
 int prestera_hw_vr_create(struct prestera_switch *sw, u16 *vr_id)
 {
-       int err;
        struct prestera_msg_vr_resp resp;
        struct prestera_msg_vr_req req;
+       int err;
 
        err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ROUTER_VR_CREATE,
                               &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
index 08fdd1e..cad93f7 100644 (file)
@@ -982,6 +982,7 @@ static void prestera_switch_fini(struct prestera_switch *sw)
        prestera_event_handlers_unregister(sw);
        prestera_rxtx_switch_fini(sw);
        prestera_switchdev_fini(sw);
+       prestera_router_fini(sw);
        prestera_netdev_event_handler_unregister(sw);
        prestera_hw_switch_fini(sw);
 }
index 8a3b7b6..6ef4d32 100644 (file)
@@ -25,10 +25,10 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
                                          struct netlink_ext_ack *extack)
 {
        struct prestera_port *port = netdev_priv(port_dev);
-       int err;
-       struct prestera_rif_entry *re;
        struct prestera_rif_entry_key re_key = {};
+       struct prestera_rif_entry *re;
        u32 kern_tb_id;
+       int err;
 
        err = prestera_is_valid_mac_addr(port, port_dev->dev_addr);
        if (err) {
@@ -45,21 +45,21 @@ static int __prestera_inetaddr_port_event(struct net_device *port_dev,
        switch (event) {
        case NETDEV_UP:
                if (re) {
-                       NL_SET_ERR_MSG_MOD(extack, "rif_entry already exist");
+                       NL_SET_ERR_MSG_MOD(extack, "RIF already exist");
                        return -EEXIST;
                }
                re = prestera_rif_entry_create(port->sw, &re_key,
                                               prestera_fix_tb_id(kern_tb_id),
                                               port_dev->dev_addr);
                if (!re) {
-                       NL_SET_ERR_MSG_MOD(extack, "Can't create rif_entry");
+                       NL_SET_ERR_MSG_MOD(extack, "Can't create RIF");
                        return -EINVAL;
                }
                dev_hold(port_dev);
                break;
        case NETDEV_DOWN:
                if (!re) {
-                       NL_SET_ERR_MSG_MOD(extack, "rif_entry not exist");
+                       NL_SET_ERR_MSG_MOD(extack, "Can't find RIF");
                        return -EEXIST;
                }
                prestera_rif_entry_destroy(port->sw, re);
@@ -75,11 +75,11 @@ static int __prestera_inetaddr_event(struct prestera_switch *sw,
                                     unsigned long event,
                                     struct netlink_ext_ack *extack)
 {
-       if (prestera_netdev_check(dev) && !netif_is_bridge_port(dev) &&
-           !netif_is_lag_port(dev) && !netif_is_ovs_port(dev))
-               return __prestera_inetaddr_port_event(dev, event, extack);
+       if (!prestera_netdev_check(dev) || netif_is_bridge_port(dev) ||
+           netif_is_lag_port(dev) || netif_is_ovs_port(dev))
+               return 0;
 
-       return 0;
+       return __prestera_inetaddr_port_event(dev, event, extack);
 }
 
 static int __prestera_inetaddr_cb(struct notifier_block *nb,
@@ -126,6 +126,8 @@ static int __prestera_inetaddr_valid_cb(struct notifier_block *nb,
                goto out;
 
        if (ipv4_is_multicast(ivi->ivi_addr)) {
+               NL_SET_ERR_MSG_MOD(ivi->extack,
+                                  "Multicast addr on RIF is not supported");
                err = -EINVAL;
                goto out;
        }
@@ -166,7 +168,7 @@ int prestera_router_init(struct prestera_switch *sw)
 err_register_inetaddr_notifier:
        unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
 err_register_inetaddr_validator_notifier:
-       /* prestera_router_hw_fini */
+       prestera_router_hw_fini(sw);
 err_router_lib_init:
        kfree(sw->router);
        return err;
@@ -176,7 +178,7 @@ void prestera_router_fini(struct prestera_switch *sw)
 {
        unregister_inetaddr_notifier(&sw->router->inetaddr_nb);
        unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb);
-       /* router_hw_fini */
+       prestera_router_hw_fini(sw);
        kfree(sw->router);
        sw->router = NULL;
 }
index 5866a4b..e5592b6 100644 (file)
@@ -29,6 +29,12 @@ int prestera_router_hw_init(struct prestera_switch *sw)
        return 0;
 }
 
+void prestera_router_hw_fini(struct prestera_switch *sw)
+{
+       WARN_ON(!list_empty(&sw->router->vr_list));
+       WARN_ON(!list_empty(&sw->router->rif_entry_list));
+}
+
 static struct prestera_vr *__prestera_vr_find(struct prestera_switch *sw,
                                              u32 tb_id)
 {
@@ -47,13 +53,8 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
                                                struct netlink_ext_ack *extack)
 {
        struct prestera_vr *vr;
-       u16 hw_vr_id;
        int err;
 
-       err = prestera_hw_vr_create(sw, &hw_vr_id);
-       if (err)
-               return ERR_PTR(-ENOMEM);
-
        vr = kzalloc(sizeof(*vr), GFP_KERNEL);
        if (!vr) {
                err = -ENOMEM;
@@ -61,23 +62,26 @@ static struct prestera_vr *__prestera_vr_create(struct prestera_switch *sw,
        }
 
        vr->tb_id = tb_id;
-       vr->hw_vr_id = hw_vr_id;
+
+       err = prestera_hw_vr_create(sw, &vr->hw_vr_id);
+       if (err)
+               goto err_hw_create;
 
        list_add(&vr->router_node, &sw->router->vr_list);
 
        return vr;
 
-err_alloc_vr:
-       prestera_hw_vr_delete(sw, hw_vr_id);
+err_hw_create:
        kfree(vr);
+err_alloc_vr:
        return ERR_PTR(err);
 }
 
 static void __prestera_vr_destroy(struct prestera_switch *sw,
                                  struct prestera_vr *vr)
 {
-       prestera_hw_vr_delete(sw, vr->hw_vr_id);
        list_del(&vr->router_node);
+       prestera_hw_vr_delete(sw, vr->hw_vr_id);
        kfree(vr);
 }
 
@@ -87,17 +91,22 @@ static struct prestera_vr *prestera_vr_get(struct prestera_switch *sw, u32 tb_id
        struct prestera_vr *vr;
 
        vr = __prestera_vr_find(sw, tb_id);
-       if (!vr)
+       if (vr) {
+               refcount_inc(&vr->refcount);
+       } else {
                vr = __prestera_vr_create(sw, tb_id, extack);
-       if (IS_ERR(vr))
-               return ERR_CAST(vr);
+               if (IS_ERR(vr))
+                       return ERR_CAST(vr);
+
+               refcount_set(&vr->refcount, 1);
+       }
 
        return vr;
 }
 
 static void prestera_vr_put(struct prestera_switch *sw, struct prestera_vr *vr)
 {
-       if (!vr->ref_cnt)
+       if (refcount_dec_and_test(&vr->refcount))
                __prestera_vr_destroy(sw, vr);
 }
 
@@ -120,7 +129,7 @@ __prestera_rif_entry_key_copy(const struct prestera_rif_entry_key *in,
                out->iface.vlan_id = in->iface.vlan_id;
                break;
        default:
-               pr_err("Unsupported iface type");
+               WARN(1, "Unsupported iface type");
                return -EINVAL;
        }
 
@@ -158,7 +167,6 @@ void prestera_rif_entry_destroy(struct prestera_switch *sw,
        iface.vr_id = e->vr->hw_vr_id;
        prestera_hw_rif_delete(sw, e->hw_id, &iface);
 
-       e->vr->ref_cnt--;
        prestera_vr_put(sw, e->vr);
        kfree(e);
 }
@@ -183,7 +191,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
        if (IS_ERR(e->vr))
                goto err_vr_get;
 
-       e->vr->ref_cnt++;
        memcpy(&e->addr, addr, sizeof(e->addr));
 
        /* HW */
@@ -198,7 +205,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
        return e;
 
 err_hw_create:
-       e->vr->ref_cnt--;
        prestera_vr_put(sw, e->vr);
 err_vr_get:
 err_key_copy:
index fed5359..b6b0285 100644 (file)
@@ -6,7 +6,7 @@
 
 struct prestera_vr {
        struct list_head router_node;
-       unsigned int ref_cnt;
+       refcount_t refcount;
        u32 tb_id;                      /* key (kernel fib table id) */
        u16 hw_vr_id;                   /* virtual router ID */
        u8 __pad[2];
@@ -32,5 +32,6 @@ prestera_rif_entry_create(struct prestera_switch *sw,
                          struct prestera_rif_entry_key *k,
                          u32 tb_id, const unsigned char *addr);
 int prestera_router_hw_init(struct prestera_switch *sw);
+void prestera_router_hw_fini(struct prestera_switch *sw);
 
 #endif /* _PRESTERA_ROUTER_HW_H_ */
index b67b432..f02d07e 100644 (file)
@@ -267,7 +267,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
                                           phylink_config);
        struct mtk_eth *eth = mac->hw;
        u32 mcr_cur, mcr_new, sid, i;
-       int val, ge_mode, err;
+       int val, ge_mode, err = 0;
 
        /* MT76x8 has no hardware settings between for the MAC */
        if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) &&
index 3381524..378fc8e 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /* Copyright (c) 2018 Mellanox Technologies. */
 
+#include <net/inet_ecn.h>
 #include <net/vxlan.h>
 #include <net/gre.h>
 #include <net/geneve.h>
@@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        int err;
 
        /* add the IP fields */
-       attr.fl.fl4.flowi4_tos = tun_key->tos;
+       attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
        attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
        attr.fl.fl4.saddr = tun_key->u.ipv4.src;
        attr.ttl = tun_key->ttl;
@@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
        int err;
 
        /* add the IP fields */
-       attr.fl.fl4.flowi4_tos = tun_key->tos;
+       attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
        attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
        attr.fl.fl4.saddr = tun_key->u.ipv4.src;
        attr.ttl = tun_key->ttl;
index b1311b6..455293a 100644 (file)
@@ -771,7 +771,10 @@ void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
 
        ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
 
-       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
+       /* Don't attempt to send PAUSE frames on the NPI port, it's broken */
+       if (port != ocelot->npi)
+               ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA,
+                                   tx_pause);
 
        /* Undo the effects of ocelot_phylink_mac_link_down:
         * enable MAC module
index beb9379..9498588 100644 (file)
@@ -559,13 +559,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
                        return -EOPNOTSUPP;
                }
 
-               if (filter->block_id == VCAP_IS1 &&
-                   !is_zero_ether_addr(match.mask->dst)) {
-                       NL_SET_ERR_MSG_MOD(extack,
-                                          "Key type S1_NORMAL cannot match on destination MAC");
-                       return -EOPNOTSUPP;
-               }
-
                /* The hw support mac matches only for MAC_ETYPE key,
                 * therefore if other matches(port, tcp flags, etc) are added
                 * then just bail out
@@ -580,6 +573,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
                        return -EOPNOTSUPP;
 
                flow_rule_match_eth_addrs(rule, &match);
+
+               if (filter->block_id == VCAP_IS1 &&
+                   !is_zero_ether_addr(match.mask->dst)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Key type S1_NORMAL cannot match on destination MAC");
+                       return -EOPNOTSUPP;
+               }
+
                filter->key_type = OCELOT_VCAP_KEY_ETYPE;
                ether_addr_copy(filter->key.etype.dmac.value,
                                match.key->dst);
@@ -805,13 +806,34 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
        struct netlink_ext_ack *extack = f->common.extack;
        struct ocelot_vcap_filter *filter;
        int chain = f->common.chain_index;
-       int ret;
+       int block_id, ret;
 
        if (chain && !ocelot_find_vcap_filter_that_points_at(ocelot, chain)) {
                NL_SET_ERR_MSG_MOD(extack, "No default GOTO action points to this chain");
                return -EOPNOTSUPP;
        }
 
+       block_id = ocelot_chain_to_block(chain, ingress);
+       if (block_id < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot offload to this chain");
+               return -EOPNOTSUPP;
+       }
+
+       filter = ocelot_vcap_block_find_filter_by_id(&ocelot->block[block_id],
+                                                    f->cookie, true);
+       if (filter) {
+               /* Filter already exists on other ports */
+               if (!ingress) {
+                       NL_SET_ERR_MSG_MOD(extack, "VCAP ES0 does not support shared filters");
+                       return -EOPNOTSUPP;
+               }
+
+               filter->ingress_port_mask |= BIT(port);
+
+               return ocelot_vcap_filter_replace(ocelot, filter);
+       }
+
+       /* Filter didn't exist, create it now */
        filter = ocelot_vcap_filter_create(ocelot, port, ingress, f);
        if (!filter)
                return -ENOMEM;
@@ -874,6 +896,12 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port,
        if (filter->type == OCELOT_VCAP_FILTER_DUMMY)
                return ocelot_vcap_dummy_filter_del(ocelot, filter);
 
+       if (ingress) {
+               filter->ingress_port_mask &= ~BIT(port);
+               if (filter->ingress_port_mask)
+                       return ocelot_vcap_filter_replace(ocelot, filter);
+       }
+
        return ocelot_vcap_filter_del(ocelot, filter);
 }
 EXPORT_SYMBOL_GPL(ocelot_cls_flower_destroy);
index 8115c3d..e271b62 100644 (file)
@@ -1187,7 +1187,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
        ocelot_port_bridge_join(ocelot, port, bridge);
 
        err = switchdev_bridge_port_offload(brport_dev, dev, priv,
-                                           &ocelot_netdevice_nb,
+                                           &ocelot_switchdev_nb,
                                            &ocelot_switchdev_blocking_nb,
                                            false, extack);
        if (err)
@@ -1201,7 +1201,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
 
 err_switchdev_sync:
        switchdev_bridge_port_unoffload(brport_dev, priv,
-                                       &ocelot_netdevice_nb,
+                                       &ocelot_switchdev_nb,
                                        &ocelot_switchdev_blocking_nb);
 err_switchdev_offload:
        ocelot_port_bridge_leave(ocelot, port, bridge);
@@ -1214,7 +1214,7 @@ static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
        struct ocelot_port_private *priv = netdev_priv(dev);
 
        switchdev_bridge_port_unoffload(brport_dev, priv,
-                                       &ocelot_netdevice_nb,
+                                       &ocelot_switchdev_nb,
                                        &ocelot_switchdev_blocking_nb);
 }
 
index adfeb8d..62a69a9 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 #define DWMAC_RX_VARDELAY(d)           ((d) << DWMAC_RX_VARDELAY_SHIFT)
 #define DWMAC_RXN_VARDELAY(d)          ((d) << DWMAC_RXN_VARDELAY_SHIFT)
 
+struct oxnas_dwmac;
+
+struct oxnas_dwmac_data {
+       int (*setup)(struct oxnas_dwmac *dwmac);
+};
+
 struct oxnas_dwmac {
        struct device   *dev;
        struct clk      *clk;
        struct regmap   *regmap;
+       const struct oxnas_dwmac_data   *data;
 };
 
-static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+static int oxnas_dwmac_setup_ox810se(struct oxnas_dwmac *dwmac)
 {
-       struct oxnas_dwmac *dwmac = priv;
        unsigned int value;
        int ret;
 
-       /* Reset HW here before changing the glue configuration */
-       ret = device_reset(dwmac->dev);
-       if (ret)
+       ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
+       if (ret < 0)
                return ret;
 
-       ret = clk_prepare_enable(dwmac->clk);
-       if (ret)
-               return ret;
+       /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
+       value |= BIT(DWMAC_CKEN_GTX)            |
+                /* Use simple mux for 25/125 Mhz clock switching */
+                BIT(DWMAC_SIMPLE_MUX);
+
+       regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
+
+       return 0;
+}
+
+static int oxnas_dwmac_setup_ox820(struct oxnas_dwmac *dwmac)
+{
+       unsigned int value;
+       int ret;
 
        ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
-       if (ret < 0) {
-               clk_disable_unprepare(dwmac->clk);
+       if (ret < 0)
                return ret;
-       }
 
        /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
        value |= BIT(DWMAC_CKEN_GTX)            |
                 /* Use simple mux for 25/125 Mhz clock switching */
-                BIT(DWMAC_SIMPLE_MUX)          |
-                /* set auto switch tx clock source */
-                BIT(DWMAC_AUTO_TX_SOURCE)      |
-                /* enable tx & rx vardelay */
-                BIT(DWMAC_CKEN_TX_OUT)         |
-                BIT(DWMAC_CKEN_TXN_OUT)        |
-                BIT(DWMAC_CKEN_TX_IN)          |
-                BIT(DWMAC_CKEN_RX_OUT)         |
-                BIT(DWMAC_CKEN_RXN_OUT)        |
-                BIT(DWMAC_CKEN_RX_IN);
+               BIT(DWMAC_SIMPLE_MUX)           |
+               /* set auto switch tx clock source */
+               BIT(DWMAC_AUTO_TX_SOURCE)       |
+               /* enable tx & rx vardelay */
+               BIT(DWMAC_CKEN_TX_OUT)          |
+               BIT(DWMAC_CKEN_TXN_OUT) |
+               BIT(DWMAC_CKEN_TX_IN)           |
+               BIT(DWMAC_CKEN_RX_OUT)          |
+               BIT(DWMAC_CKEN_RXN_OUT) |
+               BIT(DWMAC_CKEN_RX_IN);
        regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
 
        /* set tx & rx vardelay */
@@ -100,6 +115,27 @@ static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
        return 0;
 }
 
+static int oxnas_dwmac_init(struct platform_device *pdev, void *priv)
+{
+       struct oxnas_dwmac *dwmac = priv;
+       int ret;
+
+       /* Reset HW here before changing the glue configuration */
+       ret = device_reset(dwmac->dev);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(dwmac->clk);
+       if (ret)
+               return ret;
+
+       ret = dwmac->data->setup(dwmac);
+       if (ret)
+               clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
 static void oxnas_dwmac_exit(struct platform_device *pdev, void *priv)
 {
        struct oxnas_dwmac *dwmac = priv;
@@ -128,6 +164,12 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
                goto err_remove_config_dt;
        }
 
+       dwmac->data = (const struct oxnas_dwmac_data *)of_device_get_match_data(&pdev->dev);
+       if (!dwmac->data) {
+               ret = -EINVAL;
+               goto err_remove_config_dt;
+       }
+
        dwmac->dev = &pdev->dev;
        plat_dat->bsp_priv = dwmac;
        plat_dat->init = oxnas_dwmac_init;
@@ -166,8 +208,23 @@ err_remove_config_dt:
        return ret;
 }
 
+static const struct oxnas_dwmac_data ox810se_dwmac_data = {
+       .setup = oxnas_dwmac_setup_ox810se,
+};
+
+static const struct oxnas_dwmac_data ox820_dwmac_data = {
+       .setup = oxnas_dwmac_setup_ox820,
+};
+
 static const struct of_device_id oxnas_dwmac_match[] = {
-       { .compatible = "oxsemi,ox820-dwmac" },
+       {
+               .compatible = "oxsemi,ox810se-dwmac",
+               .data = &ox810se_dwmac_data,
+       },
+       {
+               .compatible = "oxsemi,ox820-dwmac",
+               .data = &ox820_dwmac_data,
+       },
        { }
 };
 MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
index 63ff2da..6708ca2 100644 (file)
@@ -7159,7 +7159,8 @@ int stmmac_dvr_probe(struct device *device,
 
        pm_runtime_get_noresume(device);
        pm_runtime_set_active(device);
-       pm_runtime_enable(device);
+       if (!pm_runtime_enabled(device))
+               pm_runtime_enable(device);
 
        if (priv->hw->pcs != STMMAC_PCS_TBI &&
            priv->hw->pcs != STMMAC_PCS_RTBI) {
index 33142d5..03575c0 100644 (file)
@@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        struct cpsw_common      *cpsw = ndev_to_cpsw(xmeta->ndev);
        int                     pkt_size = cpsw->rx_packet_max;
        int                     ret = 0, port, ch = xmeta->ch;
-       int                     headroom = CPSW_HEADROOM;
+       int                     headroom = CPSW_HEADROOM_NA;
        struct net_device       *ndev = xmeta->ndev;
        struct cpsw_priv        *priv;
        struct page_pool        *pool;
@@ -392,7 +392,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        }
 
        if (priv->xdp_prog) {
-               int headroom = CPSW_HEADROOM, size = len;
+               int size = len;
 
                xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
                if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -442,7 +442,7 @@ requeue:
        xmeta->ndev = ndev;
        xmeta->ch = ch;
 
-       dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+       dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
        ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
                                       pkt_size, 0);
        if (ret < 0) {
index 279e261..bd4b152 100644 (file)
@@ -283,7 +283,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
 {
        struct page *new_page, *page = token;
        void *pa = page_address(page);
-       int headroom = CPSW_HEADROOM;
+       int headroom = CPSW_HEADROOM_NA;
        struct cpsw_meta_xdp *xmeta;
        struct cpsw_common *cpsw;
        struct net_device *ndev;
@@ -336,7 +336,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        }
 
        if (priv->xdp_prog) {
-               int headroom = CPSW_HEADROOM, size = len;
+               int size = len;
 
                xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]);
                if (status & CPDMA_RX_VLAN_ENCAP) {
@@ -386,7 +386,7 @@ requeue:
        xmeta->ndev = ndev;
        xmeta->ch = ch;
 
-       dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM;
+       dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM_NA;
        ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma,
                                       pkt_size, 0);
        if (ret < 0) {
index 3537502..ba22059 100644 (file)
@@ -1122,7 +1122,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv)
                        xmeta->ndev = priv->ndev;
                        xmeta->ch = ch;
 
-                       dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM;
+                       dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM_NA;
                        ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch,
                                                            page, dma,
                                                            cpsw->rx_packet_max,
index 6e2cf06..4184a63 100644 (file)
@@ -5,7 +5,7 @@
 
 config NET_VENDOR_VERTEXCOM
        bool "Vertexcom devices"
-       default n
+       default y
        help
          If you have a network (Ethernet) card belonging to this class, say Y.
 
index 23ac353..377c94e 100644 (file)
@@ -41,8 +41,9 @@
 #include "xilinx_axienet.h"
 
 /* Descriptors defines for Tx and Rx DMA */
-#define TX_BD_NUM_DEFAULT              64
+#define TX_BD_NUM_DEFAULT              128
 #define RX_BD_NUM_DEFAULT              1024
+#define TX_BD_NUM_MIN                  (MAX_SKB_FRAGS + 1)
 #define TX_BD_NUM_MAX                  4096
 #define RX_BD_NUM_MAX                  4096
 
@@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
 
 static int __axienet_device_reset(struct axienet_local *lp)
 {
-       u32 timeout;
+       u32 value;
+       int ret;
 
        /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
         * process of Axi DMA takes a while to complete as all pending
@@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp)
         * they both reset the entire DMA core, so only one needs to be used.
         */
        axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
-       timeout = DELAY_OF_ONE_MILLISEC;
-       while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
-                               XAXIDMA_CR_RESET_MASK) {
-               udelay(1);
-               if (--timeout == 0) {
-                       netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
-                                  __func__);
-                       return -ETIMEDOUT;
-               }
+       ret = read_poll_timeout(axienet_dma_in32, value,
+                               !(value & XAXIDMA_CR_RESET_MASK),
+                               DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+                               XAXIDMA_TX_CR_OFFSET);
+       if (ret) {
+               dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
+               return ret;
+       }
+
+       /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
+       ret = read_poll_timeout(axienet_ior, value,
+                               value & XAE_INT_PHYRSTCMPLT_MASK,
+                               DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+                               XAE_IS_OFFSET);
+       if (ret) {
+               dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
+               return ret;
        }
 
        return 0;
@@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
                if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK))
                        break;
 
+               /* Ensure we see complete descriptor update */
+               dma_rmb();
                phys = desc_get_phys_addr(lp, cur_p);
                dma_unmap_single(ndev->dev.parent, phys,
                                 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
@@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
                if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
                        dev_consume_skb_irq(cur_p->skb);
 
-               cur_p->cntrl = 0;
                cur_p->app0 = 0;
                cur_p->app1 = 0;
                cur_p->app2 = 0;
                cur_p->app4 = 0;
-               cur_p->status = 0;
                cur_p->skb = NULL;
+               /* ensure our transmit path and device don't prematurely see status cleared */
+               wmb();
+               cur_p->cntrl = 0;
+               cur_p->status = 0;
 
                if (sizep)
                        *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
@@ -647,6 +661,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 }
 
 /**
+ * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
+ * @lp:                Pointer to the axienet_local structure
+ * @num_frag:  The number of BDs to check for
+ *
+ * Return: 0, on success
+ *         NETDEV_TX_BUSY, if any of the descriptors are not free
+ *
+ * This function is invoked before BDs are allocated and transmission starts.
+ * This function returns 0 if a BD or group of BDs can be allocated for
+ * transmission. If the BD or any of the BDs are not free the function
+ * returns a busy status. This is invoked from axienet_start_xmit.
+ */
+static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
+                                           int num_frag)
+{
+       struct axidma_bd *cur_p;
+
+       /* Ensure we see all descriptor updates from device or TX IRQ path */
+       rmb();
+       cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
+       if (cur_p->cntrl)
+               return NETDEV_TX_BUSY;
+       return 0;
+}
+
+/**
  * axienet_start_xmit_done - Invoked once a transmit is completed by the
  * Axi DMA Tx channel.
  * @ndev:      Pointer to the net_device structure
@@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev)
        /* Matches barrier in axienet_start_xmit */
        smp_mb();
 
-       netif_wake_queue(ndev);
-}
-
-/**
- * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
- * @lp:                Pointer to the axienet_local structure
- * @num_frag:  The number of BDs to check for
- *
- * Return: 0, on success
- *         NETDEV_TX_BUSY, if any of the descriptors are not free
- *
- * This function is invoked before BDs are allocated and transmission starts.
- * This function returns 0 if a BD or group of BDs can be allocated for
- * transmission. If the BD or any of the BDs are not free the function
- * returns a busy status. This is invoked from axienet_start_xmit.
- */
-static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
-                                           int num_frag)
-{
-       struct axidma_bd *cur_p;
-       cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
-       if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
-               return NETDEV_TX_BUSY;
-       return 0;
+       if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+               netif_wake_queue(ndev);
 }
 
 /**
@@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        num_frag = skb_shinfo(skb)->nr_frags;
        cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
-       if (axienet_check_tx_bd_space(lp, num_frag)) {
-               if (netif_queue_stopped(ndev))
-                       return NETDEV_TX_BUSY;
-
+       if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
+               /* Should not happen as last start_xmit call should have
+                * checked for sufficient space and queue should only be
+                * woken when sufficient space is available.
+                */
                netif_stop_queue(ndev);
-
-               /* Matches barrier in axienet_start_xmit_done */
-               smp_mb();
-
-               /* Space might have just been freed - check again */
-               if (axienet_check_tx_bd_space(lp, num_frag))
-                       return NETDEV_TX_BUSY;
-
-               netif_wake_queue(ndev);
+               if (net_ratelimit())
+                       netdev_warn(ndev, "TX ring unexpectedly full\n");
+               return NETDEV_TX_BUSY;
        }
 
        if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        if (++lp->tx_bd_tail >= lp->tx_bd_num)
                lp->tx_bd_tail = 0;
 
+       /* Stop queue if next transmit may not have space */
+       if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+               netif_stop_queue(ndev);
+
+               /* Matches barrier in axienet_start_xmit_done */
+               smp_mb();
+
+               /* Space might have just been freed - check again */
+               if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+                       netif_wake_queue(ndev);
+       }
+
        return NETDEV_TX_OK;
 }
 
@@ -834,6 +859,8 @@ static void axienet_recv(struct net_device *ndev)
 
                tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
 
+               /* Ensure we see complete descriptor update */
+               dma_rmb();
                phys = desc_get_phys_addr(lp, cur_p);
                dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
                                 DMA_FROM_DEVICE);
@@ -1352,7 +1379,8 @@ axienet_ethtools_set_ringparam(struct net_device *ndev,
        if (ering->rx_pending > RX_BD_NUM_MAX ||
            ering->rx_mini_pending ||
            ering->rx_jumbo_pending ||
-           ering->rx_pending > TX_BD_NUM_MAX)
+           ering->tx_pending < TX_BD_NUM_MIN ||
+           ering->tx_pending > TX_BD_NUM_MAX)
                return -EINVAL;
 
        if (netif_running(ndev))
@@ -2027,6 +2055,11 @@ static int axienet_probe(struct platform_device *pdev)
        lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
        lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
 
+       /* Reset core now that clocks are enabled, prior to accessing MDIO */
+       ret = __axienet_device_reset(lp);
+       if (ret)
+               goto cleanup_clk;
+
        lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
        if (lp->phy_node) {
                ret = axienet_mdio_setup(lp);
index 49d9a07..68291a3 100644 (file)
@@ -1080,27 +1080,38 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one)
 {
        struct gsi *gsi;
        u32 backlog;
+       int delta;
 
-       if (!endpoint->replenish_enabled) {
+       if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) {
                if (add_one)
                        atomic_inc(&endpoint->replenish_saved);
                return;
        }
 
+       /* If already active, just update the backlog */
+       if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) {
+               if (add_one)
+                       atomic_inc(&endpoint->replenish_backlog);
+               return;
+       }
+
        while (atomic_dec_not_zero(&endpoint->replenish_backlog))
                if (ipa_endpoint_replenish_one(endpoint))
                        goto try_again_later;
+
+       clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
+
        if (add_one)
                atomic_inc(&endpoint->replenish_backlog);
 
        return;
 
 try_again_later:
-       /* The last one didn't succeed, so fix the backlog */
-       backlog = atomic_inc_return(&endpoint->replenish_backlog);
+       clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
 
-       if (add_one)
-               atomic_inc(&endpoint->replenish_backlog);
+       /* The last one didn't succeed, so fix the backlog */
+       delta = add_one ? 2 : 1;
+       backlog = atomic_add_return(delta, &endpoint->replenish_backlog);
 
        /* Whenever a receive buffer transaction completes we'll try to
         * replenish again.  It's unlikely, but if we fail to supply even
@@ -1120,7 +1131,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint)
        u32 max_backlog;
        u32 saved;
 
-       endpoint->replenish_enabled = true;
+       set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
        while ((saved = atomic_xchg(&endpoint->replenish_saved, 0)))
                atomic_add(saved, &endpoint->replenish_backlog);
 
@@ -1134,7 +1145,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint)
 {
        u32 backlog;
 
-       endpoint->replenish_enabled = false;
+       clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
        while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0)))
                atomic_add(backlog, &endpoint->replenish_saved);
 }
@@ -1691,7 +1702,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint)
                /* RX transactions require a single TRE, so the maximum
                 * backlog is the same as the maximum outstanding TREs.
                 */
-               endpoint->replenish_enabled = false;
+               clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags);
+               clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags);
                atomic_set(&endpoint->replenish_saved,
                           gsi_channel_tre_max(gsi, endpoint->channel_id));
                atomic_set(&endpoint->replenish_backlog, 0);
index 0a859d1..0313cdc 100644 (file)
@@ -41,6 +41,19 @@ enum ipa_endpoint_name {
 #define IPA_ENDPOINT_MAX               32      /* Max supported by driver */
 
 /**
+ * enum ipa_replenish_flag:    RX buffer replenish flags
+ *
+ * @IPA_REPLENISH_ENABLED:     Whether receive buffer replenishing is enabled
+ * @IPA_REPLENISH_ACTIVE:      Whether replenishing is underway
+ * @IPA_REPLENISH_COUNT:       Number of defined replenish flags
+ */
+enum ipa_replenish_flag {
+       IPA_REPLENISH_ENABLED,
+       IPA_REPLENISH_ACTIVE,
+       IPA_REPLENISH_COUNT,    /* Number of flags (must be last) */
+};
+
+/**
  * struct ipa_endpoint - IPA endpoint information
  * @ipa:               IPA pointer
  * @ee_id:             Execution environmnent endpoint is associated with
@@ -51,7 +64,7 @@ enum ipa_endpoint_name {
  * @trans_tre_max:     Maximum number of TRE descriptors per transaction
  * @evt_ring_id:       GSI event ring used by the endpoint
  * @netdev:            Network device pointer, if endpoint uses one
- * @replenish_enabled: Whether receive buffer replenishing is enabled
+ * @replenish_flags:   Replenishing state flags
  * @replenish_ready:   Number of replenish transactions without doorbell
  * @replenish_saved:   Replenish requests held while disabled
  * @replenish_backlog: Number of buffers needed to fill hardware queue
@@ -72,7 +85,7 @@ struct ipa_endpoint {
        struct net_device *netdev;
 
        /* Receive buffer replenishing for RX endpoints */
-       bool replenish_enabled;
+       DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT);
        u32 replenish_ready;
        atomic_t replenish_saved;
        atomic_t replenish_backlog;
index dae95d9..5b6c0d1 100644 (file)
@@ -421,7 +421,7 @@ static int at803x_set_wol(struct phy_device *phydev,
        const u8 *mac;
        int ret, irq_enabled;
        unsigned int i;
-       const unsigned int offsets[] = {
+       static const unsigned int offsets[] = {
                AT803X_LOC_MAC_ADDR_32_47_OFFSET,
                AT803X_LOC_MAC_ADDR_16_31_OFFSET,
                AT803X_LOC_MAC_ADDR_0_15_OFFSET,
index 739859c..fa71fb7 100644 (file)
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII    0x4
 #define MII_88E1510_GEN_CTRL_REG_1_RESET       0x8000  /* Soft reset */
 
+#define MII_88E1510_MSCR_2             0x15
+
 #define MII_VCT5_TX_RX_MDI0_COUPLING   0x10
 #define MII_VCT5_TX_RX_MDI1_COUPLING   0x11
 #define MII_VCT5_TX_RX_MDI2_COUPLING   0x12
@@ -1932,6 +1934,58 @@ static void marvell_get_stats(struct phy_device *phydev,
                data[i] = marvell_get_stat(phydev, i);
 }
 
+static int m88e1510_loopback(struct phy_device *phydev, bool enable)
+{
+       int err;
+
+       if (enable) {
+               u16 bmcr_ctl = 0, mscr2_ctl = 0;
+
+               if (phydev->speed == SPEED_1000)
+                       bmcr_ctl = BMCR_SPEED1000;
+               else if (phydev->speed == SPEED_100)
+                       bmcr_ctl = BMCR_SPEED100;
+
+               if (phydev->duplex == DUPLEX_FULL)
+                       bmcr_ctl |= BMCR_FULLDPLX;
+
+               err = phy_write(phydev, MII_BMCR, bmcr_ctl);
+               if (err < 0)
+                       return err;
+
+               if (phydev->speed == SPEED_1000)
+                       mscr2_ctl = BMCR_SPEED1000;
+               else if (phydev->speed == SPEED_100)
+                       mscr2_ctl = BMCR_SPEED100;
+
+               err = phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE,
+                                      MII_88E1510_MSCR_2, BMCR_SPEED1000 |
+                                      BMCR_SPEED100, mscr2_ctl);
+               if (err < 0)
+                       return err;
+
+               /* Need soft reset to have speed configuration takes effect */
+               err = genphy_soft_reset(phydev);
+               if (err < 0)
+                       return err;
+
+               /* FIXME: Based on trial and error test, it seem 1G need to have
+                * delay between soft reset and loopback enablement.
+                */
+               if (phydev->speed == SPEED_1000)
+                       msleep(1000);
+
+               return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
+                                 BMCR_LOOPBACK);
+       } else {
+               err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0);
+               if (err < 0)
+                       return err;
+
+               return phy_config_aneg(phydev);
+       }
+}
+
 static int marvell_vct5_wait_complete(struct phy_device *phydev)
 {
        int i;
@@ -3078,7 +3132,7 @@ static struct phy_driver marvell_drivers[] = {
                .get_sset_count = marvell_get_sset_count,
                .get_strings = marvell_get_strings,
                .get_stats = marvell_get_stats,
-               .set_loopback = genphy_loopback,
+               .set_loopback = m88e1510_loopback,
                .get_tunable = m88e1011_get_tunable,
                .set_tunable = m88e1011_set_tunable,
                .cable_test_start = marvell_vct7_cable_test_start,
index 4570cb9..a7ebcda 100644 (file)
@@ -1726,8 +1726,8 @@ static struct phy_driver ksphy_driver[] = {
        .config_init    = kszphy_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8021,
        .phy_id_mask    = 0x00ffffff,
@@ -1741,8 +1741,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8031,
        .phy_id_mask    = 0x00ffffff,
@@ -1756,8 +1756,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8041,
        .phy_id_mask    = MICREL_PHY_ID_MASK,
@@ -1788,8 +1788,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .name           = "Micrel KSZ8051",
        /* PHY_BASIC_FEATURES */
@@ -1802,8 +1802,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
        .match_phy_device = ksz8051_match_phy_device,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8001,
        .name           = "Micrel KSZ8001 or KS8721",
@@ -1817,8 +1817,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8081,
        .name           = "Micrel KSZ8081 or KSZ8091",
@@ -1848,8 +1848,8 @@ static struct phy_driver ksphy_driver[] = {
        .config_init    = ksz8061_config_init,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ9021,
        .phy_id_mask    = 0x000ffffe,
@@ -1864,8 +1864,8 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .suspend        = kszphy_suspend,
+       .resume         = kszphy_resume,
        .read_mmd       = genphy_read_mmd_unsupported,
        .write_mmd      = genphy_write_mmd_unsupported,
 }, {
@@ -1883,7 +1883,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
+       .suspend        = kszphy_suspend,
        .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_LAN8814,
@@ -1928,7 +1928,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_sset_count = kszphy_get_sset_count,
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
+       .suspend        = kszphy_suspend,
        .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8873MLL,
index ab77a9f..4720b24 100644 (file)
@@ -1641,17 +1641,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp)
 static int sfp_module_parse_power(struct sfp *sfp)
 {
        u32 power_mW = 1000;
+       bool supports_a2;
 
        if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
                power_mW = 1500;
        if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
                power_mW = 2000;
 
+       supports_a2 = sfp->id.ext.sff8472_compliance !=
+                               SFP_SFF8472_COMPLIANCE_NONE ||
+                     sfp->id.ext.diagmon & SFP_DIAGMON_DDM;
+
        if (power_mW > sfp->max_power_mW) {
                /* Module power specification exceeds the allowed maximum. */
-               if (sfp->id.ext.sff8472_compliance ==
-                       SFP_SFF8472_COMPLIANCE_NONE &&
-                   !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) {
+               if (!supports_a2) {
                        /* The module appears not to implement bus address
                         * 0xa2, so assume that the module powers up in the
                         * indicated mode.
@@ -1668,11 +1671,25 @@ static int sfp_module_parse_power(struct sfp *sfp)
                }
        }
 
+       if (power_mW <= 1000) {
+               /* Modules below 1W do not require a power change sequence */
+               sfp->module_power_mW = power_mW;
+               return 0;
+       }
+
+       if (!supports_a2) {
+               /* The module power level is below the host maximum and the
+                * module appears not to implement bus address 0xa2, so assume
+                * that the module powers up in the indicated mode.
+                */
+               return 0;
+       }
+
        /* If the module requires a higher power mode, but also requires
         * an address change sequence, warn the user that the module may
         * not be functional.
         */
-       if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) {
+       if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) {
                dev_warn(sfp->dev,
                         "Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n",
                         power_mW / 1000, (power_mW / 100) % 10);
index f510e82..37e5f34 100644 (file)
@@ -1316,6 +1316,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x19d2, 0x1426, 2)},    /* ZTE MF91 */
        {QMI_FIXED_INTF(0x19d2, 0x1428, 2)},    /* Telewell TW-LTE 4G v2 */
        {QMI_FIXED_INTF(0x19d2, 0x1432, 3)},    /* ZTE ME3620 */
+       {QMI_FIXED_INTF(0x19d2, 0x1485, 5)},    /* ZTE MF286D */
        {QMI_FIXED_INTF(0x19d2, 0x2002, 4)},    /* ZTE (Vodafone) K3765-Z */
        {QMI_FIXED_INTF(0x2001, 0x7e16, 3)},    /* D-Link DWM-221 */
        {QMI_FIXED_INTF(0x2001, 0x7e19, 4)},    /* D-Link DWM-221 B1 */
@@ -1401,6 +1402,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x413c, 0x81e0, 0)},    /* Dell Wireless 5821e with eSIM support*/
        {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},    /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
        {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},    /* HP lt4120 Snapdragon X5 LTE */
+       {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */
        {QMI_FIXED_INTF(0x22de, 0x9061, 3)},    /* WeTelecom WPD-600N */
        {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */
        {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
index abe0149..bc1e3dd 100644 (file)
@@ -1962,7 +1962,8 @@ static const struct driver_info smsc95xx_info = {
        .bind           = smsc95xx_bind,
        .unbind         = smsc95xx_unbind,
        .link_reset     = smsc95xx_link_reset,
-       .reset          = smsc95xx_start_phy,
+       .reset          = smsc95xx_reset,
+       .check_connect  = smsc95xx_start_phy,
        .stop           = smsc95xx_stop,
        .rx_fixup       = smsc95xx_rx_fixup,
        .tx_fixup       = smsc95xx_tx_fixup,
index 2f3c451..2f89080 100644 (file)
@@ -4,6 +4,8 @@
  */
 
 #include <asm/unaligned.h>
+
+#include <linux/math.h>
 #include <linux/string.h>
 #include <linux/bug.h>
 
index 71bf9b4..6872782 100644 (file)
@@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
        int err;
 
        while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
-               struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+               struct sk_buff *skb = alloc_skb(mbim->mru, GFP_KERNEL);
 
                if (unlikely(!skb))
                        break;
 
                err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
-                                   MHI_DEFAULT_MRU, MHI_EOT);
+                                   mbim->mru, MHI_EOT);
                if (unlikely(err)) {
                        kfree_skb(skb);
                        break;
index 37d26f0..62a0f1a 100644 (file)
@@ -188,7 +188,7 @@ do {                                                                \
 static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy)
 {
        int polarity, retry, ret;
-       char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
+       static const char rset_cmd[] = { 0x05, 0xF9, 0x04, 0x00, 0xC3, 0xE5 };
        int count = sizeof(rset_cmd);
 
        nfc_info(&phy->i2c_dev->dev, "Detecting nfc_en polarity\n");
index a43fc41..c922f10 100644 (file)
@@ -316,6 +316,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
                        return -ENOMEM;
 
                transaction->aid_len = skb->data[1];
+
+               /* Checking if the length of the AID is valid */
+               if (transaction->aid_len > sizeof(transaction->aid))
+                       return -EINVAL;
+
                memcpy(transaction->aid, &skb->data[2],
                       transaction->aid_len);
 
@@ -325,6 +330,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
                        return -EPROTO;
 
                transaction->params_len = skb->data[transaction->aid_len + 3];
+
+               /* Total size is allocated (skb->len - 2) minus fixed array members */
+               if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction)))
+                       return -EINVAL;
+
                memcpy(transaction->params, skb->data +
                       transaction->aid_len + 4, transaction->params_len);
 
index 93772ab..c7552df 100644 (file)
@@ -548,6 +548,73 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
        }
 }
 
+static int pwm_apply_legacy(struct pwm_chip *chip, struct pwm_device *pwm,
+                           const struct pwm_state *state)
+{
+       int err;
+       struct pwm_state initial_state = pwm->state;
+
+       if (state->polarity != pwm->state.polarity) {
+               if (!chip->ops->set_polarity)
+                       return -EINVAL;
+
+               /*
+                * Changing the polarity of a running PWM is only allowed when
+                * the PWM driver implements ->apply().
+                */
+               if (pwm->state.enabled) {
+                       chip->ops->disable(chip, pwm);
+
+                       /*
+                        * Update pwm->state already here in case
+                        * .set_polarity() or another callback depend on that.
+                        */
+                       pwm->state.enabled = false;
+               }
+
+               err = chip->ops->set_polarity(chip, pwm, state->polarity);
+               if (err)
+                       goto rollback;
+
+               pwm->state.polarity = state->polarity;
+       }
+
+       if (!state->enabled) {
+               if (pwm->state.enabled)
+                       chip->ops->disable(chip, pwm);
+
+               return 0;
+       }
+
+       /*
+        * We cannot skip calling ->config even if state->period ==
+        * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+        * because we might have exited early in the last call to
+        * pwm_apply_state because of !state->enabled and so the two values in
+        * pwm->state might not be configured in hardware.
+        */
+       err = chip->ops->config(pwm->chip, pwm,
+                               state->duty_cycle,
+                               state->period);
+       if (err)
+               goto rollback;
+
+       pwm->state.period = state->period;
+       pwm->state.duty_cycle = state->duty_cycle;
+
+       if (!pwm->state.enabled) {
+               err = chip->ops->enable(chip, pwm);
+               if (err)
+                       goto rollback;
+       }
+
+       return 0;
+
+rollback:
+       pwm->state = initial_state;
+       return err;
+}
+
 /**
  * pwm_apply_state() - atomically apply a new state to a PWM device
  * @pwm: PWM device
@@ -580,70 +647,22 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
            state->usage_power == pwm->state.usage_power)
                return 0;
 
-       if (chip->ops->apply) {
+       if (chip->ops->apply)
                err = chip->ops->apply(chip, pwm, state);
-               if (err)
-                       return err;
-
-               trace_pwm_apply(pwm, state);
-
-               pwm->state = *state;
-
-               /*
-                * only do this after pwm->state was applied as some
-                * implementations of .get_state depend on this
-                */
-               pwm_apply_state_debug(pwm, state);
-       } else {
-               /*
-                * FIXME: restore the initial state in case of error.
-                */
-               if (state->polarity != pwm->state.polarity) {
-                       if (!chip->ops->set_polarity)
-                               return -EINVAL;
+       else
+               err = pwm_apply_legacy(chip, pwm, state);
+       if (err)
+               return err;
 
-                       /*
-                        * Changing the polarity of a running PWM is
-                        * only allowed when the PWM driver implements
-                        * ->apply().
-                        */
-                       if (pwm->state.enabled) {
-                               chip->ops->disable(chip, pwm);
-                               pwm->state.enabled = false;
-                       }
+       trace_pwm_apply(pwm, state);
 
-                       err = chip->ops->set_polarity(chip, pwm,
-                                                     state->polarity);
-                       if (err)
-                               return err;
+       pwm->state = *state;
 
-                       pwm->state.polarity = state->polarity;
-               }
-
-               if (state->period != pwm->state.period ||
-                   state->duty_cycle != pwm->state.duty_cycle) {
-                       err = chip->ops->config(pwm->chip, pwm,
-                                               state->duty_cycle,
-                                               state->period);
-                       if (err)
-                               return err;
-
-                       pwm->state.duty_cycle = state->duty_cycle;
-                       pwm->state.period = state->period;
-               }
-
-               if (state->enabled != pwm->state.enabled) {
-                       if (state->enabled) {
-                               err = chip->ops->enable(chip, pwm);
-                               if (err)
-                                       return err;
-                       } else {
-                               chip->ops->disable(chip, pwm);
-                       }
-
-                       pwm->state.enabled = state->enabled;
-               }
-       }
+       /*
+        * only do this after pwm->state was applied as some
+        * implementations of .get_state depend on this
+        */
+       pwm_apply_state_debug(pwm, state);
 
        return 0;
 }
index f97f825..5996049 100644 (file)
@@ -128,11 +128,9 @@ static int img_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
        duty = DIV_ROUND_UP(timebase * duty_ns, period_ns);
 
-       ret = pm_runtime_get_sync(chip->dev);
-       if (ret < 0) {
-               pm_runtime_put_autosuspend(chip->dev);
+       ret = pm_runtime_resume_and_get(chip->dev);
+       if (ret < 0)
                return ret;
-       }
 
        val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG);
        val &= ~(PWM_CTRL_CFG_DIV_MASK << PWM_CTRL_CFG_DIV_SHIFT(pwm->hwpwm));
@@ -184,10 +182,33 @@ static void img_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        pm_runtime_put_autosuspend(chip->dev);
 }
 
+static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                        const struct pwm_state *state)
+{
+       int err;
+
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
+       if (!state->enabled) {
+               if (pwm->state.enabled)
+                       img_pwm_disable(chip, pwm);
+
+               return 0;
+       }
+
+       err = img_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+       if (err)
+               return err;
+
+       if (!pwm->state.enabled)
+               err = img_pwm_enable(chip, pwm);
+
+       return err;
+}
+
 static const struct pwm_ops img_pwm_ops = {
-       .config = img_pwm_config,
-       .enable = img_pwm_enable,
-       .disable = img_pwm_disable,
+       .apply = img_pwm_apply,
        .owner = THIS_MODULE,
 };
 
index 203194f..86567ad 100644 (file)
@@ -58,9 +58,9 @@ static inline struct twl_pwm_chip *to_twl(struct pwm_chip *chip)
 }
 
 static int twl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                             int duty_ns, int period_ns)
+                         u64 duty_ns, u64 period_ns)
 {
-       int duty_cycle = DIV_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
+       int duty_cycle = DIV64_U64_ROUND_UP(duty_ns * TWL_PWM_MAX, period_ns) + 1;
        u8 pwm_config[2] = { 1, 0 };
        int base, ret;
 
@@ -279,19 +279,65 @@ out:
        mutex_unlock(&twl->mutex);
 }
 
+static int twl4030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                            const struct pwm_state *state)
+{
+       int err;
+
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
+       if (!state->enabled) {
+               if (pwm->state.enabled)
+                       twl4030_pwm_disable(chip, pwm);
+
+               return 0;
+       }
+
+       err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+       if (err)
+               return err;
+
+       if (!pwm->state.enabled)
+               err = twl4030_pwm_enable(chip, pwm);
+
+       return err;
+}
+
+static int twl6030_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                            const struct pwm_state *state)
+{
+       int err;
+
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
+       if (!state->enabled) {
+               if (pwm->state.enabled)
+                       twl6030_pwm_disable(chip, pwm);
+
+               return 0;
+       }
+
+       err = twl_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+       if (err)
+               return err;
+
+       if (!pwm->state.enabled)
+               err = twl6030_pwm_enable(chip, pwm);
+
+       return err;
+}
+
 static const struct pwm_ops twl4030_pwm_ops = {
-       .config = twl_pwm_config,
-       .enable = twl4030_pwm_enable,
-       .disable = twl4030_pwm_disable,
+       .apply = twl4030_pwm_apply,
        .request = twl4030_pwm_request,
        .free = twl4030_pwm_free,
        .owner = THIS_MODULE,
 };
 
 static const struct pwm_ops twl6030_pwm_ops = {
-       .config = twl_pwm_config,
-       .enable = twl6030_pwm_enable,
-       .disable = twl6030_pwm_disable,
+       .apply = twl6030_pwm_apply,
        .owner = THIS_MODULE,
 };
 
index 480bfc2..7170a31 100644 (file)
@@ -70,7 +70,7 @@ static inline void vt8500_pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 b
 }
 
 static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-               int duty_ns, int period_ns)
+               u64 duty_ns, u64 period_ns)
 {
        struct vt8500_chip *vt8500 = to_vt8500_chip(chip);
        unsigned long long c;
@@ -102,8 +102,8 @@ static int vt8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        c = (unsigned long long)pv * duty_ns;
-       do_div(c, period_ns);
-       dc = c;
+
+       dc = div64_u64(c, period_ns);
 
        writel(prescale, vt8500->base + REG_SCALAR(pwm->hwpwm));
        vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE);
@@ -176,11 +176,54 @@ static int vt8500_pwm_set_polarity(struct pwm_chip *chip,
        return 0;
 }
 
+static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                           const struct pwm_state *state)
+{
+       int err;
+       bool enabled = pwm->state.enabled;
+
+       if (state->polarity != pwm->state.polarity) {
+               /*
+                * Changing the polarity of a running PWM is only allowed when
+                * the PWM driver implements ->apply().
+                */
+               if (enabled) {
+                       vt8500_pwm_disable(chip, pwm);
+
+                       enabled = false;
+               }
+
+               err = vt8500_pwm_set_polarity(chip, pwm, state->polarity);
+               if (err)
+                       return err;
+       }
+
+       if (!state->enabled) {
+               if (enabled)
+                       vt8500_pwm_disable(chip, pwm);
+
+               return 0;
+       }
+
+       /*
+        * We cannot skip calling ->config even if state->period ==
+        * pwm->state.period && state->duty_cycle == pwm->state.duty_cycle
+        * because we might have exited early in the last call to
+        * pwm_apply_state because of !state->enabled and so the two values in
+        * pwm->state might not be configured in hardware.
+        */
+       err = vt8500_pwm_config(pwm->chip, pwm, state->duty_cycle, state->period);
+       if (err)
+               return err;
+
+       if (!enabled)
+               err = vt8500_pwm_enable(chip, pwm);
+
+       return err;
+}
+
 static const struct pwm_ops vt8500_pwm_ops = {
-       .enable = vt8500_pwm_enable,
-       .disable = vt8500_pwm_disable,
-       .config = vt8500_pwm_config,
-       .set_polarity = vt8500_pwm_set_polarity,
+       .apply = vt8500_pwm_apply,
        .owner = THIS_MODULE,
 };
 
index 058e56a..d85a3c3 100644 (file)
@@ -1216,6 +1216,17 @@ config RTC_DRV_V3020
          This driver can also be built as a module. If so, the module
          will be called rtc-v3020.
 
+config RTC_DRV_GAMECUBE
+       tristate "Nintendo GameCube, Wii and Wii U RTC"
+       depends on GAMECUBE || WII || COMPILE_TEST
+       select REGMAP
+       help
+         If you say yes here you will get support for the RTC subsystem
+         of the Nintendo GameCube, Wii and Wii U.
+
+         This driver can also be built as a module. If so, the module
+         will be called "rtc-gamecube".
+
 config RTC_DRV_WM831X
        tristate "Wolfson Microelectronics WM831x RTC"
        depends on MFD_WM831X
@@ -1444,6 +1455,19 @@ config RTC_DRV_SH
          To compile this driver as a module, choose M here: the
          module will be called rtc-sh.
 
+config RTC_DRV_SUNPLUS
+       tristate "Sunplus SP7021 RTC"
+       depends on SOC_SP7021
+       help
+         Say 'yes' to get support for the real-time clock present in
+         Sunplus SP7021 - a SoC for industrial applications. It provides
+         RTC status check, timer/alarm functionalities, user data
+         reservation with the battery over 2.5V, RTC power status check
+         and battery charge.
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-sunplus.
+
 config RTC_DRV_VR41XX
        tristate "NEC VR41XX"
        depends on CPU_VR41XX || COMPILE_TEST
index 678a8ef..e92f3e9 100644 (file)
@@ -111,6 +111,7 @@ obj-$(CONFIG_RTC_DRV_MT7622)        += rtc-mt7622.o
 obj-$(CONFIG_RTC_DRV_MV)       += rtc-mv.o
 obj-$(CONFIG_RTC_DRV_MXC)      += rtc-mxc.o
 obj-$(CONFIG_RTC_DRV_MXC_V2)   += rtc-mxc_v2.o
+obj-$(CONFIG_RTC_DRV_GAMECUBE) += rtc-gamecube.o
 obj-$(CONFIG_RTC_DRV_NTXEC)    += rtc-ntxec.o
 obj-$(CONFIG_RTC_DRV_OMAP)     += rtc-omap.o
 obj-$(CONFIG_RTC_DRV_OPAL)     += rtc-opal.o
@@ -165,6 +166,7 @@ obj-$(CONFIG_RTC_DRV_STM32)         += rtc-stm32.o
 obj-$(CONFIG_RTC_DRV_STMP)     += rtc-stmp3xxx.o
 obj-$(CONFIG_RTC_DRV_SUN4V)    += rtc-sun4v.o
 obj-$(CONFIG_RTC_DRV_SUN6I)    += rtc-sun6i.o
+obj-$(CONFIG_RTC_DRV_SUNPLUS)  += rtc-sunplus.o
 obj-$(CONFIG_RTC_DRV_SUNXI)    += rtc-sunxi.o
 obj-$(CONFIG_RTC_DRV_TEGRA)    += rtc-tegra.o
 obj-$(CONFIG_RTC_DRV_TEST)     += rtc-test.o
index e104972..69325ae 100644 (file)
@@ -391,14 +391,14 @@ static long rtc_dev_ioctl(struct file *file,
                }
 
                switch(param.param) {
-                       long offset;
                case RTC_PARAM_FEATURES:
                        if (param.index != 0)
                                err = -EINVAL;
                        param.uvalue = rtc->features[0];
                        break;
 
-               case RTC_PARAM_CORRECTION:
+               case RTC_PARAM_CORRECTION: {
+                       long offset;
                        mutex_unlock(&rtc->ops_lock);
                        if (param.index != 0)
                                return -EINVAL;
@@ -407,7 +407,7 @@ static long rtc_dev_ioctl(struct file *file,
                        if (err == 0)
                                param.svalue = offset;
                        break;
-
+               }
                default:
                        if (rtc->ops->param_get)
                                err = rtc->ops->param_get(rtc->dev.parent, &param);
index 4eb5341..7c006c2 100644 (file)
@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
 
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
+       int ret;
+
        /*
         * If pm_trace abused the RTC for storage, set the timespec to 0,
         * which tells the caller that this RTC value is unusable.
@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t)
        if (!pm_trace_rtc_valid())
                return -EIO;
 
-       mc146818_get_time(t);
+       ret = mc146818_get_time(t);
+       if (ret < 0) {
+               dev_err_ratelimited(dev, "unable to read current time\n");
+               return ret;
+       }
+
        return 0;
 }
 
@@ -242,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t)
        return mc146818_set_time(t);
 }
 
+struct cmos_read_alarm_callback_param {
+       struct cmos_rtc *cmos;
+       struct rtc_time *time;
+       unsigned char   rtc_control;
+};
+
+static void cmos_read_alarm_callback(unsigned char __always_unused seconds,
+                                    void *param_in)
+{
+       struct cmos_read_alarm_callback_param *p =
+               (struct cmos_read_alarm_callback_param *)param_in;
+       struct rtc_time *time = p->time;
+
+       time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
+       time->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
+       time->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
+
+       if (p->cmos->day_alrm) {
+               /* ignore upper bits on readback per ACPI spec */
+               time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f;
+               if (!time->tm_mday)
+                       time->tm_mday = -1;
+
+               if (p->cmos->mon_alrm) {
+                       time->tm_mon = CMOS_READ(p->cmos->mon_alrm);
+                       if (!time->tm_mon)
+                               time->tm_mon = -1;
+               }
+       }
+
+       p->rtc_control = CMOS_READ(RTC_CONTROL);
+}
+
 static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
        struct cmos_rtc *cmos = dev_get_drvdata(dev);
-       unsigned char   rtc_control;
+       struct cmos_read_alarm_callback_param p = {
+               .cmos = cmos,
+               .time = &t->time,
+       };
 
        /* This not only a rtc_op, but also called directly */
        if (!is_valid_irq(cmos->irq))
@@ -256,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
         * the future.
         */
 
-       spin_lock_irq(&rtc_lock);
-       t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
-       t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM);
-       t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM);
-
-       if (cmos->day_alrm) {
-               /* ignore upper bits on readback per ACPI spec */
-               t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f;
-               if (!t->time.tm_mday)
-                       t->time.tm_mday = -1;
-
-               if (cmos->mon_alrm) {
-                       t->time.tm_mon = CMOS_READ(cmos->mon_alrm);
-                       if (!t->time.tm_mon)
-                               t->time.tm_mon = -1;
-               }
-       }
-
-       rtc_control = CMOS_READ(RTC_CONTROL);
-       spin_unlock_irq(&rtc_lock);
+       /* Some Intel chipsets disconnect the alarm registers when the clock
+        * update is in progress - during this time reads return bogus values
+        * and writes may fail silently. See for example "7th Generation Intel®
+        * Processor Family I/O for U/Y Platforms [...] Datasheet", section
+        * 27.7.1
+        *
+        * Use the mc146818_avoid_UIP() function to avoid this.
+        */
+       if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p))
+               return -EIO;
 
-       if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+       if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
                if (((unsigned)t->time.tm_sec) < 0x60)
                        t->time.tm_sec = bcd2bin(t->time.tm_sec);
                else
@@ -306,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
                }
        }
 
-       t->enabled = !!(rtc_control & RTC_AIE);
+       t->enabled = !!(p.rtc_control & RTC_AIE);
        t->pending = 0;
 
        return 0;
@@ -437,10 +470,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t)
        return 0;
 }
 
+struct cmos_set_alarm_callback_param {
+       struct cmos_rtc *cmos;
+       unsigned char mon, mday, hrs, min, sec;
+       struct rtc_wkalrm *t;
+};
+
+/* Note: this function may be executed by mc146818_avoid_UIP() more then
+ *      once
+ */
+static void cmos_set_alarm_callback(unsigned char __always_unused seconds,
+                                   void *param_in)
+{
+       struct cmos_set_alarm_callback_param *p =
+               (struct cmos_set_alarm_callback_param *)param_in;
+
+       /* next rtc irq must not be from previous alarm setting */
+       cmos_irq_disable(p->cmos, RTC_AIE);
+
+       /* update alarm */
+       CMOS_WRITE(p->hrs, RTC_HOURS_ALARM);
+       CMOS_WRITE(p->min, RTC_MINUTES_ALARM);
+       CMOS_WRITE(p->sec, RTC_SECONDS_ALARM);
+
+       /* the system may support an "enhanced" alarm */
+       if (p->cmos->day_alrm) {
+               CMOS_WRITE(p->mday, p->cmos->day_alrm);
+               if (p->cmos->mon_alrm)
+                       CMOS_WRITE(p->mon, p->cmos->mon_alrm);
+       }
+
+       if (use_hpet_alarm()) {
+               /*
+                * FIXME the HPET alarm glue currently ignores day_alrm
+                * and mon_alrm ...
+                */
+               hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min,
+                                   p->t->time.tm_sec);
+       }
+
+       if (p->t->enabled)
+               cmos_irq_enable(p->cmos, RTC_AIE);
+}
+
 static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
        struct cmos_rtc *cmos = dev_get_drvdata(dev);
-       unsigned char mon, mday, hrs, min, sec, rtc_control;
+       struct cmos_set_alarm_callback_param p = {
+               .cmos = cmos,
+               .t = t
+       };
+       unsigned char rtc_control;
        int ret;
 
        /* This not only a rtc_op, but also called directly */
@@ -451,52 +531,33 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
        if (ret < 0)
                return ret;
 
-       mon = t->time.tm_mon + 1;
-       mday = t->time.tm_mday;
-       hrs = t->time.tm_hour;
-       min = t->time.tm_min;
-       sec = t->time.tm_sec;
+       p.mon = t->time.tm_mon + 1;
+       p.mday = t->time.tm_mday;
+       p.hrs = t->time.tm_hour;
+       p.min = t->time.tm_min;
+       p.sec = t->time.tm_sec;
 
+       spin_lock_irq(&rtc_lock);
        rtc_control = CMOS_READ(RTC_CONTROL);
+       spin_unlock_irq(&rtc_lock);
+
        if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
                /* Writing 0xff means "don't care" or "match all".  */
-               mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
-               mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
-               hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
-               min = (min < 60) ? bin2bcd(min) : 0xff;
-               sec = (sec < 60) ? bin2bcd(sec) : 0xff;
-       }
-
-       spin_lock_irq(&rtc_lock);
-
-       /* next rtc irq must not be from previous alarm setting */
-       cmos_irq_disable(cmos, RTC_AIE);
-
-       /* update alarm */
-       CMOS_WRITE(hrs, RTC_HOURS_ALARM);
-       CMOS_WRITE(min, RTC_MINUTES_ALARM);
-       CMOS_WRITE(sec, RTC_SECONDS_ALARM);
-
-       /* the system may support an "enhanced" alarm */
-       if (cmos->day_alrm) {
-               CMOS_WRITE(mday, cmos->day_alrm);
-               if (cmos->mon_alrm)
-                       CMOS_WRITE(mon, cmos->mon_alrm);
+               p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff;
+               p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff;
+               p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff;
+               p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff;
+               p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff;
        }
 
-       if (use_hpet_alarm()) {
-               /*
-                * FIXME the HPET alarm glue currently ignores day_alrm
-                * and mon_alrm ...
-                */
-               hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min,
-                                   t->time.tm_sec);
-       }
-
-       if (t->enabled)
-               cmos_irq_enable(cmos, RTC_AIE);
-
-       spin_unlock_irq(&rtc_lock);
+       /*
+        * Some Intel chipsets disconnect the alarm registers when the clock
+        * update is in progress - during this time writes fail silently.
+        *
+        * Use mc146818_avoid_UIP() to avoid this.
+        */
+       if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p))
+               return -EIO;
 
        cmos->alarm_expires = rtc_tm_to_time64(&t->time);
 
@@ -790,16 +851,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
        rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
 
-       spin_lock_irq(&rtc_lock);
-
-       /* Ensure that the RTC is accessible. Bit 6 must be 0! */
-       if ((CMOS_READ(RTC_VALID) & 0x40) != 0) {
-               spin_unlock_irq(&rtc_lock);
-               dev_warn(dev, "not accessible\n");
+       if (!mc146818_does_rtc_work()) {
+               dev_warn(dev, "broken or not accessible\n");
                retval = -ENXIO;
                goto cleanup1;
        }
 
+       spin_lock_irq(&rtc_lock);
+
        if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
                /* force periodic irq to CMOS reset default of 1024Hz;
                 *
index d4b72a9..ee2efb4 100644 (file)
@@ -475,12 +475,14 @@ static int da9063_rtc_probe(struct platform_device *pdev)
        da9063_data_to_tm(data, &rtc->alarm_time, rtc);
        rtc->rtc_sync = false;
 
-       /*
-        * TODO: some models have alarms on a minute boundary but still support
-        * real hardware interrupts. Add this once the core supports it.
-        */
-       if (config->rtc_data_start != RTC_SEC)
-               rtc->rtc_dev->uie_unsupported = 1;
+       if (config->rtc_data_start != RTC_SEC) {
+               set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtc_dev->features);
+               /*
+                * TODO: some models have alarms on a minute boundary but still
+                * support real hardware interrupts.
+                */
+               clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtc_dev->features);
+       }
 
        irq_alarm = platform_get_irq_byname(pdev, "ALARM");
        if (irq_alarm < 0)
@@ -494,6 +496,8 @@ static int da9063_rtc_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
                        irq_alarm, ret);
 
+       device_init_wakeup(&pdev->dev, true);
+
        return devm_rtc_register_device(rtc->rtc_dev);
 }
 
index ad3add5..53bb08f 100644 (file)
@@ -141,11 +141,9 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
                }
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-       if (!res)
-               return -ENODEV;
-
-       rtc->rtc_irq = res->start;
+       rtc->rtc_irq = platform_get_irq(pdev, 0);
+       if (rtc->rtc_irq < 0)
+               return rtc->rtc_irq;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!res)
diff --git a/drivers/rtc/rtc-gamecube.c b/drivers/rtc/rtc-gamecube.c
new file mode 100644 (file)
index 0000000..f717b36
--- /dev/null
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Nintendo GameCube, Wii and Wii U RTC driver
+ *
+ * This driver is for the MX23L4005, more specifically its real-time clock and
+ * SRAM storage.  The value returned by the RTC counter must be added with the
+ * offset stored in a bias register in SRAM (on the GameCube and Wii) or in
+ * /config/rtc.xml (on the Wii U).  The latter being very impractical to access
+ * from Linux, this driver assumes the bootloader has read it and stored it in
+ * SRAM like for the other two consoles.
+ *
+ * This device sits on a bus named EXI (which is similar to SPI), channel 0,
+ * device 1.  This driver assumes no other user of the EXI bus, which is
+ * currently the case but would have to be reworked to add support for other
+ * GameCube hardware exposed on this bus.
+ *
+ * References:
+ * - https://wiiubrew.org/wiki/Hardware/RTC
+ * - https://wiibrew.org/wiki/MX23L4005
+ *
+ * Copyright (C) 2018 rw-r-r-0644
+ * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+ *
+ * Based on rtc-gcn.c
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2005,2008,2009 Albert Herranz
+ * Based on gamecube_time.c from Torben Nielsen.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+/* EXI registers */
+#define EXICSR 0
+#define EXICR  12
+#define EXIDATA        16
+
+/* EXI register values */
+#define EXICSR_DEV             0x380
+       #define EXICSR_DEV1     0x100
+#define EXICSR_CLK             0x070
+       #define EXICSR_CLK_1MHZ 0x000
+       #define EXICSR_CLK_2MHZ 0x010
+       #define EXICSR_CLK_4MHZ 0x020
+       #define EXICSR_CLK_8MHZ 0x030
+       #define EXICSR_CLK_16MHZ 0x040
+       #define EXICSR_CLK_32MHZ 0x050
+#define EXICSR_INT             0x008
+       #define EXICSR_INTSET   0x008
+
+#define EXICR_TSTART           0x001
+#define EXICR_TRSMODE          0x002
+       #define EXICR_TRSMODE_IMM 0x000
+#define EXICR_TRSTYPE          0x00C
+       #define EXICR_TRSTYPE_R 0x000
+       #define EXICR_TRSTYPE_W 0x004
+#define EXICR_TLEN             0x030
+       #define EXICR_TLEN32    0x030
+
+/* EXI registers values to access the RTC */
+#define RTC_EXICSR     (EXICSR_DEV1 | EXICSR_CLK_8MHZ | EXICSR_INTSET)
+#define RTC_EXICR_W    (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_W | EXICR_TLEN32)
+#define RTC_EXICR_R    (EXICR_TSTART | EXICR_TRSMODE_IMM | EXICR_TRSTYPE_R | EXICR_TLEN32)
+#define RTC_EXIDATA_W  0x80000000
+
+/* RTC registers */
+#define RTC_COUNTER    0x200000
+#define RTC_SRAM       0x200001
+#define RTC_SRAM_BIAS  0x200004
+#define RTC_SNAPSHOT   0x204000
+#define RTC_ONTMR      0x210000
+#define RTC_OFFTMR     0x210001
+#define RTC_TEST0      0x210004
+#define RTC_TEST1      0x210005
+#define RTC_TEST2      0x210006
+#define RTC_TEST3      0x210007
+#define RTC_CONTROL0   0x21000c
+#define RTC_CONTROL1   0x21000d
+
+/* RTC flags */
+#define RTC_CONTROL0_UNSTABLE_POWER    0x00000800
+#define RTC_CONTROL0_LOW_BATTERY       0x00000200
+
+struct priv {
+       struct regmap *regmap;
+       void __iomem *iob;
+       u32 rtc_bias;
+};
+
+static int exi_read(void *context, u32 reg, u32 *data)
+{
+       struct priv *d = (struct priv *)context;
+       void __iomem *iob = d->iob;
+
+       /* The spin loops here loop about 15~16 times each, so there is no need
+        * to use a more expensive sleep method.
+        */
+
+       /* Write register offset */
+       iowrite32be(RTC_EXICSR, iob + EXICSR);
+       iowrite32be(reg << 8, iob + EXIDATA);
+       iowrite32be(RTC_EXICR_W, iob + EXICR);
+       while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+               cpu_relax();
+
+       /* Read data */
+       iowrite32be(RTC_EXICSR, iob + EXICSR);
+       iowrite32be(RTC_EXICR_R, iob + EXICR);
+       while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+               cpu_relax();
+       *data = ioread32be(iob + EXIDATA);
+
+       /* Clear channel parameters */
+       iowrite32be(0, iob + EXICSR);
+
+       return 0;
+}
+
+static int exi_write(void *context, u32 reg, u32 data)
+{
+       struct priv *d = (struct priv *)context;
+       void __iomem *iob = d->iob;
+
+       /* The spin loops here loop about 15~16 times each, so there is no need
+        * to use a more expensive sleep method.
+        */
+
+       /* Write register offset */
+       iowrite32be(RTC_EXICSR, iob + EXICSR);
+       iowrite32be(RTC_EXIDATA_W | (reg << 8), iob + EXIDATA);
+       iowrite32be(RTC_EXICR_W, iob + EXICR);
+       while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+               cpu_relax();
+
+       /* Write data */
+       iowrite32be(RTC_EXICSR, iob + EXICSR);
+       iowrite32be(data, iob + EXIDATA);
+       iowrite32be(RTC_EXICR_W, iob + EXICR);
+       while (!(ioread32be(iob + EXICSR) & EXICSR_INTSET))
+               cpu_relax();
+
+       /* Clear channel parameters */
+       iowrite32be(0, iob + EXICSR);
+
+       return 0;
+}
+
+static const struct regmap_bus exi_bus = {
+       /* TODO: is that true?  Not that it matters here, but still. */
+       .fast_io = true,
+       .reg_read = exi_read,
+       .reg_write = exi_write,
+};
+
+static int gamecube_rtc_read_time(struct device *dev, struct rtc_time *t)
+{
+       struct priv *d = dev_get_drvdata(dev);
+       int ret;
+       u32 counter;
+       time64_t timestamp;
+
+       ret = regmap_read(d->regmap, RTC_COUNTER, &counter);
+       if (ret)
+               return ret;
+
+       /* Add the counter and the bias to obtain the timestamp */
+       timestamp = (time64_t)d->rtc_bias + counter;
+       rtc_time64_to_tm(timestamp, t);
+
+       return 0;
+}
+
+static int gamecube_rtc_set_time(struct device *dev, struct rtc_time *t)
+{
+       struct priv *d = dev_get_drvdata(dev);
+       time64_t timestamp;
+
+       /* Subtract the timestamp and the bias to obtain the counter value */
+       timestamp = rtc_tm_to_time64(t);
+       return regmap_write(d->regmap, RTC_COUNTER, timestamp - d->rtc_bias);
+}
+
+static int gamecube_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct priv *d = dev_get_drvdata(dev);
+       int value;
+       int control0;
+       int ret;
+
+       switch (cmd) {
+       case RTC_VL_READ:
+               ret = regmap_read(d->regmap, RTC_CONTROL0, &control0);
+               if (ret)
+                       return ret;
+
+               value = 0;
+               if (control0 & RTC_CONTROL0_UNSTABLE_POWER)
+                       value |= RTC_VL_DATA_INVALID;
+               if (control0 & RTC_CONTROL0_LOW_BATTERY)
+                       value |= RTC_VL_BACKUP_LOW;
+               return put_user(value, (unsigned int __user *)arg);
+
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
+static const struct rtc_class_ops gamecube_rtc_ops = {
+       .read_time      = gamecube_rtc_read_time,
+       .set_time       = gamecube_rtc_set_time,
+       .ioctl          = gamecube_rtc_ioctl,
+};
+
+static int gamecube_rtc_read_offset_from_sram(struct priv *d)
+{
+       struct device_node *np;
+       int ret;
+       struct resource res;
+       void __iomem *hw_srnprot;
+       u32 old;
+
+       np = of_find_compatible_node(NULL, NULL, "nintendo,latte-srnprot");
+       if (!np)
+               np = of_find_compatible_node(NULL, NULL,
+                                            "nintendo,hollywood-srnprot");
+       if (!np) {
+               pr_info("HW_SRNPROT not found, assuming a GameCube\n");
+               return regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+       }
+
+       ret = of_address_to_resource(np, 0, &res);
+       if (ret) {
+               pr_err("no io memory range found\n");
+               return -1;
+       }
+
+       hw_srnprot = ioremap(res.start, resource_size(&res));
+       old = ioread32be(hw_srnprot);
+
+       /* TODO: figure out why we use this magic constant.  I obtained it by
+        * reading the leftover value after boot, after IOSU already ran.
+        *
+        * On my Wii U, setting this register to 1 prevents the console from
+        * rebooting properly, so wiiubrew.org must be missing something.
+        *
+        * See https://wiiubrew.org/wiki/Hardware/Latte_registers
+        */
+       if (old != 0x7bf)
+               iowrite32be(0x7bf, hw_srnprot);
+
+       /* Get the offset from RTC SRAM.
+        *
+        * Its default location on the GameCube and on the Wii is in the SRAM,
+        * while on the Wii U the bootloader needs to fill it with the contents
+        * of /config/rtc.xml on the SLC (the eMMC).  We don’t do that from
+        * Linux since it requires implementing a proprietary filesystem and do
+        * file decryption, instead we require the bootloader to fill the same
+        * SRAM address as on previous consoles.
+        */
+       ret = regmap_read(d->regmap, RTC_SRAM_BIAS, &d->rtc_bias);
+       if (ret) {
+               pr_err("failed to get the RTC bias\n");
+               return -1;
+       }
+
+       /* Reset SRAM access to how it was before, our job here is done. */
+       if (old != 0x7bf)
+               iowrite32be(old, hw_srnprot);
+       iounmap(hw_srnprot);
+
+       return 0;
+}
+
+static const struct regmap_range rtc_rd_ranges[] = {
+       regmap_reg_range(0x200000, 0x200010),
+       regmap_reg_range(0x204000, 0x204000),
+       regmap_reg_range(0x210000, 0x210001),
+       regmap_reg_range(0x210004, 0x210007),
+       regmap_reg_range(0x21000c, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_rd_regs = {
+       .yes_ranges =   rtc_rd_ranges,
+       .n_yes_ranges = ARRAY_SIZE(rtc_rd_ranges),
+};
+
+static const struct regmap_range rtc_wr_ranges[] = {
+       regmap_reg_range(0x200000, 0x200010),
+       regmap_reg_range(0x204000, 0x204000),
+       regmap_reg_range(0x210000, 0x210001),
+       regmap_reg_range(0x21000d, 0x21000d),
+};
+
+static const struct regmap_access_table rtc_wr_regs = {
+       .yes_ranges =   rtc_wr_ranges,
+       .n_yes_ranges = ARRAY_SIZE(rtc_wr_ranges),
+};
+
+static const struct regmap_config gamecube_rtc_regmap_config = {
+       .reg_bits = 24,
+       .val_bits = 32,
+       .rd_table = &rtc_rd_regs,
+       .wr_table = &rtc_wr_regs,
+       .max_register = 0x21000d,
+       .name = "gamecube-rtc",
+};
+
+static int gamecube_rtc_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct rtc_device *rtc;
+       struct priv *d;
+       int ret;
+
+       d = devm_kzalloc(dev, sizeof(struct priv), GFP_KERNEL);
+       if (!d)
+               return -ENOMEM;
+
+       d->iob = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(d->iob))
+               return PTR_ERR(d->iob);
+
+       d->regmap = devm_regmap_init(dev, &exi_bus, d,
+                                    &gamecube_rtc_regmap_config);
+       if (IS_ERR(d->regmap))
+               return PTR_ERR(d->regmap);
+
+       ret = gamecube_rtc_read_offset_from_sram(d);
+       if (ret)
+               return ret;
+       dev_dbg(dev, "SRAM bias: 0x%x", d->rtc_bias);
+
+       dev_set_drvdata(dev, d);
+
+       rtc = devm_rtc_allocate_device(dev);
+       if (IS_ERR(rtc))
+               return PTR_ERR(rtc);
+
+       /* We can represent further than that, but it depends on the stored
+        * bias and we can’t modify it persistently on all supported consoles,
+        * so here we pretend to be limited to 2106.
+        */
+       rtc->range_min = 0;
+       rtc->range_max = U32_MAX;
+       rtc->ops = &gamecube_rtc_ops;
+
+       devm_rtc_register_device(rtc);
+
+       return 0;
+}
+
+static const struct of_device_id gamecube_rtc_of_match[] = {
+       {.compatible = "nintendo,latte-exi" },
+       {.compatible = "nintendo,hollywood-exi" },
+       {.compatible = "nintendo,flipper-exi" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, gamecube_rtc_of_match);
+
+static struct platform_driver gamecube_rtc_driver = {
+       .probe          = gamecube_rtc_probe,
+       .driver         = {
+               .name   = "rtc-gamecube",
+               .of_match_table = gamecube_rtc_of_match,
+       },
+};
+module_platform_driver(gamecube_rtc_driver);
+
+MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
+MODULE_DESCRIPTION("Nintendo GameCube, Wii and Wii U RTC driver");
+MODULE_LICENSE("GPL");
index dcfaf09..ae9f131 100644 (file)
 #include <linux/acpi.h>
 #endif
 
-unsigned int mc146818_get_time(struct rtc_time *time)
+/*
+ * Execute a function while the UIP (Update-in-progress) bit of the RTC is
+ * unset.
+ *
+ * Warning: callback may be executed more then once.
+ */
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+                       void *param)
 {
-       unsigned char ctrl;
+       int i;
        unsigned long flags;
-       unsigned char century = 0;
-       bool retry;
+       unsigned char seconds;
 
-#ifdef CONFIG_MACH_DECSTATION
-       unsigned int real_year;
-#endif
+       for (i = 0; i < 10; i++) {
+               spin_lock_irqsave(&rtc_lock, flags);
 
-again:
-       spin_lock_irqsave(&rtc_lock, flags);
-       /* Ensure that the RTC is accessible. Bit 6 must be 0! */
-       if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) {
-               spin_unlock_irqrestore(&rtc_lock, flags);
-               memset(time, 0xff, sizeof(*time));
-               return 0;
-       }
+               /*
+                * Check whether there is an update in progress during which the
+                * readout is unspecified. The maximum update time is ~2ms. Poll
+                * every msec for completion.
+                *
+                * Store the second value before checking UIP so a long lasting
+                * NMI which happens to hit after the UIP check cannot make
+                * an update cycle invisible.
+                */
+               seconds = CMOS_READ(RTC_SECONDS);
 
-       /*
-        * Check whether there is an update in progress during which the
-        * readout is unspecified. The maximum update time is ~2ms. Poll
-        * every msec for completion.
-        *
-        * Store the second value before checking UIP so a long lasting NMI
-        * which happens to hit after the UIP check cannot make an update
-        * cycle invisible.
-        */
-       time->tm_sec = CMOS_READ(RTC_SECONDS);
+               if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+                       spin_unlock_irqrestore(&rtc_lock, flags);
+                       mdelay(1);
+                       continue;
+               }
 
-       if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
-               spin_unlock_irqrestore(&rtc_lock, flags);
-               mdelay(1);
-               goto again;
-       }
+               /* Revalidate the above readout */
+               if (seconds != CMOS_READ(RTC_SECONDS)) {
+                       spin_unlock_irqrestore(&rtc_lock, flags);
+                       continue;
+               }
 
-       /* Revalidate the above readout */
-       if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
+               if (callback)
+                       callback(seconds, param);
+
+               /*
+                * Check for the UIP bit again. If it is set now then
+                * the above values may contain garbage.
+                */
+               if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+                       spin_unlock_irqrestore(&rtc_lock, flags);
+                       mdelay(1);
+                       continue;
+               }
+
+               /*
+                * A NMI might have interrupted the above sequence so check
+                * whether the seconds value has changed which indicates that
+                * the NMI took longer than the UIP bit was set. Unlikely, but
+                * possible and there is also virt...
+                */
+               if (seconds != CMOS_READ(RTC_SECONDS)) {
+                       spin_unlock_irqrestore(&rtc_lock, flags);
+                       continue;
+               }
                spin_unlock_irqrestore(&rtc_lock, flags);
-               goto again;
+
+               return true;
        }
+       return false;
+}
+EXPORT_SYMBOL_GPL(mc146818_avoid_UIP);
+
+/*
+ * If the UIP (Update-in-progress) bit of the RTC is set for more then
+ * 10ms, the RTC is apparently broken or not present.
+ */
+bool mc146818_does_rtc_work(void)
+{
+       return mc146818_avoid_UIP(NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(mc146818_does_rtc_work);
+
+struct mc146818_get_time_callback_param {
+       struct rtc_time *time;
+       unsigned char ctrl;
+#ifdef CONFIG_ACPI
+       unsigned char century;
+#endif
+#ifdef CONFIG_MACH_DECSTATION
+       unsigned int real_year;
+#endif
+};
+
+static void mc146818_get_time_callback(unsigned char seconds, void *param_in)
+{
+       struct mc146818_get_time_callback_param *p = param_in;
 
        /*
         * Only the values that we read from the RTC are set. We leave
@@ -57,39 +109,39 @@ again:
         * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
         * by the RTC when initially set to a non-zero value.
         */
-       time->tm_min = CMOS_READ(RTC_MINUTES);
-       time->tm_hour = CMOS_READ(RTC_HOURS);
-       time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
-       time->tm_mon = CMOS_READ(RTC_MONTH);
-       time->tm_year = CMOS_READ(RTC_YEAR);
+       p->time->tm_sec = seconds;
+       p->time->tm_min = CMOS_READ(RTC_MINUTES);
+       p->time->tm_hour = CMOS_READ(RTC_HOURS);
+       p->time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
+       p->time->tm_mon = CMOS_READ(RTC_MONTH);
+       p->time->tm_year = CMOS_READ(RTC_YEAR);
 #ifdef CONFIG_MACH_DECSTATION
-       real_year = CMOS_READ(RTC_DEC_YEAR);
+       p->real_year = CMOS_READ(RTC_DEC_YEAR);
 #endif
 #ifdef CONFIG_ACPI
        if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
-           acpi_gbl_FADT.century)
-               century = CMOS_READ(acpi_gbl_FADT.century);
+           acpi_gbl_FADT.century) {
+               p->century = CMOS_READ(acpi_gbl_FADT.century);
+       } else {
+               p->century = 0;
+       }
 #endif
-       ctrl = CMOS_READ(RTC_CONTROL);
-       /*
-        * Check for the UIP bit again. If it is set now then
-        * the above values may contain garbage.
-        */
-       retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
-       /*
-        * A NMI might have interrupted the above sequence so check whether
-        * the seconds value has changed which indicates that the NMI took
-        * longer than the UIP bit was set. Unlikely, but possible and
-        * there is also virt...
-        */
-       retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
 
-       spin_unlock_irqrestore(&rtc_lock, flags);
+       p->ctrl = CMOS_READ(RTC_CONTROL);
+}
 
-       if (retry)
-               goto again;
+int mc146818_get_time(struct rtc_time *time)
+{
+       struct mc146818_get_time_callback_param p = {
+               .time = time
+       };
+
+       if (!mc146818_avoid_UIP(mc146818_get_time_callback, &p)) {
+               memset(time, 0, sizeof(*time));
+               return -EIO;
+       }
 
-       if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+       if (!(p.ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
        {
                time->tm_sec = bcd2bin(time->tm_sec);
                time->tm_min = bcd2bin(time->tm_min);
@@ -97,15 +149,19 @@ again:
                time->tm_mday = bcd2bin(time->tm_mday);
                time->tm_mon = bcd2bin(time->tm_mon);
                time->tm_year = bcd2bin(time->tm_year);
-               century = bcd2bin(century);
+#ifdef CONFIG_ACPI
+               p.century = bcd2bin(p.century);
+#endif
        }
 
 #ifdef CONFIG_MACH_DECSTATION
-       time->tm_year += real_year - 72;
+       time->tm_year += p.real_year - 72;
 #endif
 
-       if (century > 20)
-               time->tm_year += (century - 19) * 100;
+#ifdef CONFIG_ACPI
+       if (p.century > 19)
+               time->tm_year += (p.century - 19) * 100;
+#endif
 
        /*
         * Account for differences between how the RTC uses the values
@@ -116,7 +172,7 @@ again:
 
        time->tm_mon--;
 
-       return RTC_24H;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(mc146818_get_time);
 
index 56c58b0..81a5b1f 100644 (file)
@@ -748,7 +748,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
        /*
         * Enable timestamp function and store timestamp of first trigger
-        * event until TSF1 and TFS2 interrupt flags are cleared.
+        * event until TSF1 and TSF2 interrupt flags are cleared.
         */
        ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
                                 PCF2127_BIT_TS_CTRL_TSOFF |
index 15e50bb..df2b072 100644 (file)
@@ -514,21 +514,56 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
 }
 #endif
 
-static const struct pcf85063_config pcf85063tp_config = {
-       .regmap = {
-               .reg_bits = 8,
-               .val_bits = 8,
-               .max_register = 0x0a,
+enum pcf85063_type {
+       PCF85063,
+       PCF85063TP,
+       PCF85063A,
+       RV8263,
+       PCF85063_LAST_ID
+};
+
+static struct pcf85063_config pcf85063_cfg[] = {
+       [PCF85063] = {
+               .regmap = {
+                       .reg_bits = 8,
+                       .val_bits = 8,
+                       .max_register = 0x0a,
+               },
+       },
+       [PCF85063TP] = {
+               .regmap = {
+                       .reg_bits = 8,
+                       .val_bits = 8,
+                       .max_register = 0x0a,
+               },
+       },
+       [PCF85063A] = {
+               .regmap = {
+                       .reg_bits = 8,
+                       .val_bits = 8,
+                       .max_register = 0x11,
+               },
+               .has_alarms = 1,
+       },
+       [RV8263] = {
+               .regmap = {
+                       .reg_bits = 8,
+                       .val_bits = 8,
+                       .max_register = 0x11,
+               },
+               .has_alarms = 1,
+               .force_cap_7000 = 1,
        },
 };
 
+static const struct i2c_device_id pcf85063_ids[];
+
 static int pcf85063_probe(struct i2c_client *client)
 {
        struct pcf85063 *pcf85063;
        unsigned int tmp;
        int err;
-       const struct pcf85063_config *config = &pcf85063tp_config;
-       const void *data = of_device_get_match_data(&client->dev);
+       const struct pcf85063_config *config;
        struct nvmem_config nvmem_cfg = {
                .name = "pcf85063_nvram",
                .reg_read = pcf85063_nvmem_read,
@@ -544,8 +579,17 @@ static int pcf85063_probe(struct i2c_client *client)
        if (!pcf85063)
                return -ENOMEM;
 
-       if (data)
-               config = data;
+       if (client->dev.of_node) {
+               config = of_device_get_match_data(&client->dev);
+               if (!config)
+                       return -ENODEV;
+       } else {
+               enum pcf85063_type type =
+                       i2c_match_id(pcf85063_ids, client)->driver_data;
+               if (type >= PCF85063_LAST_ID)
+                       return -ENODEV;
+               config = &pcf85063_cfg[type];
+       }
 
        pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
        if (IS_ERR(pcf85063->regmap))
@@ -604,31 +648,21 @@ static int pcf85063_probe(struct i2c_client *client)
        return devm_rtc_register_device(pcf85063->rtc);
 }
 
-#ifdef CONFIG_OF
-static const struct pcf85063_config pcf85063a_config = {
-       .regmap = {
-               .reg_bits = 8,
-               .val_bits = 8,
-               .max_register = 0x11,
-       },
-       .has_alarms = 1,
-};
-
-static const struct pcf85063_config rv8263_config = {
-       .regmap = {
-               .reg_bits = 8,
-               .val_bits = 8,
-               .max_register = 0x11,
-       },
-       .has_alarms = 1,
-       .force_cap_7000 = 1,
+static const struct i2c_device_id pcf85063_ids[] = {
+       { "pcf85063", PCF85063 },
+       { "pcf85063tp", PCF85063TP },
+       { "pcf85063a", PCF85063A },
+       { "rv8263", RV8263 },
+       {}
 };
+MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
 
+#ifdef CONFIG_OF
 static const struct of_device_id pcf85063_of_match[] = {
-       { .compatible = "nxp,pcf85063", .data = &pcf85063tp_config },
-       { .compatible = "nxp,pcf85063tp", .data = &pcf85063tp_config },
-       { .compatible = "nxp,pcf85063a", .data = &pcf85063a_config },
-       { .compatible = "microcrystal,rv8263", .data = &rv8263_config },
+       { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
+       { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
+       { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
+       { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf85063_of_match);
@@ -640,6 +674,7 @@ static struct i2c_driver pcf85063_driver = {
                .of_match_table = of_match_ptr(pcf85063_of_match),
        },
        .probe_new      = pcf85063_probe,
+       .id_table       = pcf85063_ids,
 };
 
 module_i2c_driver(pcf85063_driver);
index d2f1d8f..cf8119b 100644 (file)
@@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
        if (sa1100_rtc->irq_alarm < 0)
                return -ENXIO;
 
+       sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(sa1100_rtc->rtc))
+               return PTR_ERR(sa1100_rtc->rtc);
+
        pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
                                resource_size(pxa_rtc->ress));
        if (!pxa_rtc->base) {
index 8098041..cb15983 100644 (file)
 #define RS5C372_REG_MONTH      5
 #define RS5C372_REG_YEAR       6
 #define RS5C372_REG_TRIM       7
-#      define RS5C372_TRIM_XSL         0x80
+#      define RS5C372_TRIM_XSL         0x80            /* only if RS5C372[a|b] */
 #      define RS5C372_TRIM_MASK        0x7F
+#      define R2221TL_TRIM_DEV         (1 << 7)        /* only if R2221TL */
+#      define RS5C372_TRIM_DECR        (1 << 6)
 
 #define RS5C_REG_ALARM_A_MIN   8                       /* or ALARM_W */
 #define RS5C_REG_ALARM_A_HOURS 9
@@ -324,8 +326,12 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
        struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
        u8 tmp = rs5c372->regs[RS5C372_REG_TRIM];
 
-       if (osc)
-               *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+       if (osc) {
+               if (rs5c372->type == rtc_rs5c372a || rs5c372->type == rtc_rs5c372b)
+                       *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
+               else
+                       *osc = 32768;
+       }
 
        if (trim) {
                dev_dbg(&client->dev, "%s: raw trim=%x\n", __func__, tmp);
@@ -485,6 +491,176 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
 #define        rs5c372_rtc_proc        NULL
 #endif
 
+#ifdef CONFIG_RTC_INTF_DEV
+static int rs5c372_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct rs5c372  *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+       unsigned char   ctrl2;
+       int             addr;
+       unsigned int    flags;
+
+       dev_dbg(dev, "%s: cmd=%x\n", __func__, cmd);
+
+       addr = RS5C_ADDR(RS5C_REG_CTRL2);
+       ctrl2 = i2c_smbus_read_byte_data(rs5c->client, addr);
+
+       switch (cmd) {
+       case RTC_VL_READ:
+               flags = 0;
+
+               switch (rs5c->type) {
+               case rtc_r2025sd:
+               case rtc_r2221tl:
+                       if ((rs5c->type == rtc_r2025sd && !(ctrl2 & R2x2x_CTRL2_XSTP)) ||
+                               (rs5c->type == rtc_r2221tl &&  (ctrl2 & R2x2x_CTRL2_XSTP))) {
+                               flags |= RTC_VL_DATA_INVALID;
+                       }
+                       if (ctrl2 & R2x2x_CTRL2_VDET)
+                               flags |= RTC_VL_BACKUP_LOW;
+                       break;
+               default:
+                       if (ctrl2 & RS5C_CTRL2_XSTP)
+                               flags |= RTC_VL_DATA_INVALID;
+                       break;
+               }
+
+               return put_user(flags, (unsigned int __user *)arg);
+       case RTC_VL_CLR:
+               /* clear VDET bit */
+               if (rs5c->type == rtc_r2025sd || rs5c->type == rtc_r2221tl) {
+                       ctrl2 &= ~R2x2x_CTRL2_VDET;
+                       if (i2c_smbus_write_byte_data(rs5c->client, addr, ctrl2) < 0) {
+                               dev_dbg(&rs5c->client->dev, "%s: write error in line %i\n",
+                                               __func__, __LINE__);
+                               return -EIO;
+                       }
+               }
+               return 0;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       return 0;
+}
+#else
+#define rs5c372_ioctl  NULL
+#endif
+
+static int rs5c372_read_offset(struct device *dev, long *offset)
+{
+       struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+       u8 val = rs5c->regs[RS5C372_REG_TRIM];
+       long ppb_per_step = 0;
+       bool decr = val & RS5C372_TRIM_DECR;
+
+       switch (rs5c->type) {
+       case rtc_r2221tl:
+               ppb_per_step = val & R2221TL_TRIM_DEV ? 1017 : 3051;
+               break;
+       case rtc_rs5c372a:
+       case rtc_rs5c372b:
+               ppb_per_step = val & RS5C372_TRIM_XSL ? 3125 : 3051;
+               break;
+       default:
+               ppb_per_step = 3051;
+               break;
+       }
+
+       /* Only bits[0:5] repsents the time counts */
+       val &= 0x3F;
+
+       /* If bits[1:5] are all 0, it means no increment or decrement */
+       if (!(val & 0x3E)) {
+               *offset = 0;
+       } else {
+               if (decr)
+                       *offset = -(((~val) & 0x3F) + 1) * ppb_per_step;
+               else
+                       *offset = (val - 1) * ppb_per_step;
+       }
+
+       return 0;
+}
+
+static int rs5c372_set_offset(struct device *dev, long offset)
+{
+       struct rs5c372 *rs5c = i2c_get_clientdata(to_i2c_client(dev));
+       int addr = RS5C_ADDR(RS5C372_REG_TRIM);
+       u8 val = 0;
+       u8 tmp = 0;
+       long ppb_per_step = 3051;
+       long steps = LONG_MIN;
+
+       switch (rs5c->type) {
+       case rtc_rs5c372a:
+       case rtc_rs5c372b:
+               tmp = rs5c->regs[RS5C372_REG_TRIM];
+               if (tmp & RS5C372_TRIM_XSL) {
+                       ppb_per_step = 3125;
+                       val |= RS5C372_TRIM_XSL;
+               }
+               break;
+       case rtc_r2221tl:
+               /*
+                * Check if it is possible to use high resolution mode (DEV=1).
+                * In this mode, the minimum resolution is 2 / (32768 * 20 * 3),
+                * which is about 1017 ppb.
+                */
+               steps = DIV_ROUND_CLOSEST(offset, 1017);
+               if (steps >= -0x3E && steps <= 0x3E) {
+                       ppb_per_step = 1017;
+                       val |= R2221TL_TRIM_DEV;
+               } else {
+                       /*
+                        * offset is out of the range of high resolution mode.
+                        * Try to use low resolution mode (DEV=0). In this mode,
+                        * the minimum resolution is 2 / (32768 * 20), which is
+                        * about 3051 ppb.
+                        */
+                       steps = LONG_MIN;
+               }
+               break;
+       default:
+               break;
+       }
+
+       if (steps == LONG_MIN) {
+               steps = DIV_ROUND_CLOSEST(offset, ppb_per_step);
+               if (steps > 0x3E || steps < -0x3E)
+                       return -ERANGE;
+       }
+
+       if (steps > 0) {
+               val |= steps + 1;
+       } else {
+               val |= RS5C372_TRIM_DECR;
+               val |= (~(-steps - 1)) & 0x3F;
+       }
+
+       if (!steps || !(val & 0x3E)) {
+               /*
+                * if offset is too small, set oscillation adjustment register
+                * or time trimming register with its default value whic means
+                * no increment or decrement. But for rs5c372[a|b], the XSL bit
+                * should be kept unchanged.
+                */
+               if (rs5c->type == rtc_rs5c372a || rs5c->type == rtc_rs5c372b)
+                       val &= RS5C372_TRIM_XSL;
+               else
+                       val = 0;
+       }
+
+       dev_dbg(&rs5c->client->dev, "write 0x%x for offset %ld\n", val, offset);
+
+       if (i2c_smbus_write_byte_data(rs5c->client, addr, val) < 0) {
+               dev_err(&rs5c->client->dev, "failed to write 0x%x to reg %d\n", val, addr);
+               return -EIO;
+       }
+
+       rs5c->regs[RS5C372_REG_TRIM] = val;
+
+       return 0;
+}
+
 static const struct rtc_class_ops rs5c372_rtc_ops = {
        .proc           = rs5c372_rtc_proc,
        .read_time      = rs5c372_rtc_read_time,
@@ -492,6 +668,9 @@ static const struct rtc_class_ops rs5c372_rtc_ops = {
        .read_alarm     = rs5c_read_alarm,
        .set_alarm      = rs5c_set_alarm,
        .alarm_irq_enable = rs5c_rtc_alarm_irq_enable,
+       .ioctl          = rs5c372_ioctl,
+       .read_offset    = rs5c372_read_offset,
+       .set_offset     = rs5c372_set_offset,
 };
 
 #if IS_ENABLED(CONFIG_RTC_INTF_SYSFS)
index 0d5ed38..f69e0b1 100644 (file)
@@ -55,6 +55,7 @@
 
 enum rv8803_type {
        rv_8803,
+       rx_8804,
        rx_8900
 };
 
@@ -601,6 +602,7 @@ static int rv8803_probe(struct i2c_client *client,
 
 static const struct i2c_device_id rv8803_id[] = {
        { "rv8803", rv_8803 },
+       { "rv8804", rx_8804 },
        { "rx8803", rv_8803 },
        { "rx8900", rx_8900 },
        { }
@@ -617,6 +619,10 @@ static const __maybe_unused struct of_device_id rv8803_of_match[] = {
                .data = (void *)rv_8803
        },
        {
+               .compatible = "epson,rx8804",
+               .data = (void *)rx_8804
+       },
+       {
                .compatible = "epson,rx8900",
                .data = (void *)rx_8900
        },
diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c
new file mode 100644 (file)
index 0000000..e8e2ab1
--- /dev/null
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * The RTC driver for Sunplus  SP7021
+ *
+ * Copyright (C) 2019 Sunplus Technology Inc., All rights reseerved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/rtc.h>
+
+#define RTC_REG_NAME                   "rtc"
+
+#define RTC_CTRL                       0x40
+#define TIMER_FREEZE_MASK_BIT          BIT(5 + 16)
+#define TIMER_FREEZE                   BIT(5)
+#define DIS_SYS_RST_RTC_MASK_BIT       BIT(4 + 16)
+#define DIS_SYS_RST_RTC                        BIT(4)
+#define RTC32K_MODE_RESET_MASK_BIT     BIT(3 + 16)
+#define RTC32K_MODE_RESET              BIT(3)
+#define ALARM_EN_OVERDUE_MASK_BIT      BIT(2 + 16)
+#define ALARM_EN_OVERDUE               BIT(2)
+#define ALARM_EN_PMC_MASK_BIT          BIT(1 + 16)
+#define ALARM_EN_PMC                   BIT(1)
+#define ALARM_EN_MASK_BIT              BIT(0 + 16)
+#define ALARM_EN                       BIT(0)
+#define RTC_TIMER_OUT                  0x44
+#define RTC_DIVIDER                    0x48
+#define RTC_TIMER_SET                  0x4c
+#define RTC_ALARM_SET                  0x50
+#define RTC_USER_DATA                  0x54
+#define RTC_RESET_RECORD               0x58
+#define RTC_BATT_CHARGE_CTRL           0x5c
+#define BAT_CHARGE_RSEL_MASK_BIT       GENMASK(3 + 16, 2 + 16)
+#define BAT_CHARGE_RSEL_MASK           GENMASK(3, 2)
+#define BAT_CHARGE_RSEL_2K_OHM         FIELD_PREP(BAT_CHARGE_RSEL_MASK, 0)
+#define BAT_CHARGE_RSEL_250_OHM                FIELD_PREP(BAT_CHARGE_RSEL_MASK, 1)
+#define BAT_CHARGE_RSEL_50_OHM         FIELD_PREP(BAT_CHARGE_RSEL_MASK, 2)
+#define BAT_CHARGE_RSEL_0_OHM          FIELD_PREP(BAT_CHARGE_RSEL_MASK, 3)
+#define BAT_CHARGE_DSEL_MASK_BIT       BIT(1 + 16)
+#define BAT_CHARGE_DSEL_MASK           GENMASK(1, 1)
+#define BAT_CHARGE_DSEL_ON             FIELD_PREP(BAT_CHARGE_DSEL_MASK, 0)
+#define BAT_CHARGE_DSEL_OFF            FIELD_PREP(BAT_CHARGE_DSEL_MASK, 1)
+#define BAT_CHARGE_EN_MASK_BIT         BIT(0 + 16)
+#define BAT_CHARGE_EN                  BIT(0)
+#define RTC_TRIM_CTRL                  0x60
+
+struct sunplus_rtc {
+       struct rtc_device *rtc;
+       struct resource *res;
+       struct clk *rtcclk;
+       struct reset_control *rstc;
+       void __iomem *reg_base;
+       int irq;
+};
+
+static void sp_get_seconds(struct device *dev, unsigned long *secs)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+       *secs = (unsigned long)readl(sp_rtc->reg_base + RTC_TIMER_OUT);
+}
+
+static void sp_set_seconds(struct device *dev, unsigned long secs)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+       writel((u32)secs, sp_rtc->reg_base + RTC_TIMER_SET);
+}
+
+static int sp_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       unsigned long secs;
+
+       sp_get_seconds(dev, &secs);
+       rtc_time64_to_tm(secs, tm);
+
+       return 0;
+}
+
+static int sp_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       unsigned long secs;
+
+       secs = rtc_tm_to_time64(tm);
+       dev_dbg(dev, "%s, secs = %lu\n", __func__, secs);
+       sp_set_seconds(dev, secs);
+
+       return 0;
+}
+
+static int sp_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+       unsigned long alarm_time;
+
+       alarm_time = rtc_tm_to_time64(&alrm->time);
+       dev_dbg(dev, "%s, alarm_time: %u\n", __func__, (u32)(alarm_time));
+       writel((u32)alarm_time, sp_rtc->reg_base + RTC_ALARM_SET);
+
+       return 0;
+}
+
+static int sp_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+       unsigned int alarm_time;
+
+       alarm_time = readl(sp_rtc->reg_base + RTC_ALARM_SET);
+       dev_dbg(dev, "%s, alarm_time: %u\n", __func__, alarm_time);
+
+       if (alarm_time == 0)
+               alrm->enabled = 0;
+       else
+               alrm->enabled = 1;
+
+       rtc_time64_to_tm((unsigned long)(alarm_time), &alrm->time);
+
+       return 0;
+}
+
+static int sp_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+       if (enabled)
+               writel((TIMER_FREEZE_MASK_BIT | DIS_SYS_RST_RTC_MASK_BIT |
+                       RTC32K_MODE_RESET_MASK_BIT | ALARM_EN_OVERDUE_MASK_BIT |
+                       ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+                       (DIS_SYS_RST_RTC | ALARM_EN_OVERDUE | ALARM_EN_PMC | ALARM_EN),
+                       sp_rtc->reg_base + RTC_CTRL);
+       else
+               writel((ALARM_EN_OVERDUE_MASK_BIT | ALARM_EN_PMC_MASK_BIT | ALARM_EN_MASK_BIT) |
+                       0x0, sp_rtc->reg_base + RTC_CTRL);
+
+       return 0;
+}
+
+static const struct rtc_class_ops sp_rtc_ops = {
+       .read_time =            sp_rtc_read_time,
+       .set_time =             sp_rtc_set_time,
+       .set_alarm =            sp_rtc_set_alarm,
+       .read_alarm =           sp_rtc_read_alarm,
+       .alarm_irq_enable =     sp_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t sp_rtc_irq_handler(int irq, void *dev_id)
+{
+       struct platform_device *plat_dev = dev_id;
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+       rtc_update_irq(sp_rtc->rtc, 1, RTC_IRQF | RTC_AF);
+       dev_dbg(&plat_dev->dev, "[RTC] ALARM INT\n");
+
+       return IRQ_HANDLED;
+}
+
+/*
+ * -------------------------------------------------------------------------------------
+ * bat_charge_rsel   bat_charge_dsel   bat_charge_en     Remarks
+ *         x              x                 0            Disable
+ *         0              0                 1            0.86mA (2K Ohm with diode)
+ *         1              0                 1            1.81mA (250 Ohm with diode)
+ *         2              0                 1            2.07mA (50 Ohm with diode)
+ *         3              0                 1            16.0mA (0 Ohm with diode)
+ *         0              1                 1            1.36mA (2K Ohm without diode)
+ *         1              1                 1            3.99mA (250 Ohm without diode)
+ *         2              1                 1            4.41mA (50 Ohm without diode)
+ *         3              1                 1            16.0mA (0 Ohm without diode)
+ * -------------------------------------------------------------------------------------
+ */
+static void sp_rtc_set_trickle_charger(struct device dev)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(&dev);
+       u32 ohms, rsel;
+       u32 chargeable;
+
+       if (of_property_read_u32(dev.of_node, "trickle-resistor-ohms", &ohms) ||
+           of_property_read_u32(dev.of_node, "aux-voltage-chargeable", &chargeable)) {
+               dev_warn(&dev, "battery charger disabled\n");
+               return;
+       }
+
+       switch (ohms) {
+       case 2000:
+               rsel = BAT_CHARGE_RSEL_2K_OHM;
+               break;
+       case 250:
+               rsel = BAT_CHARGE_RSEL_250_OHM;
+               break;
+       case 50:
+               rsel = BAT_CHARGE_RSEL_50_OHM;
+               break;
+       case 0:
+               rsel = BAT_CHARGE_RSEL_0_OHM;
+               break;
+       default:
+               dev_err(&dev, "invalid charger resistor value (%d)\n", ohms);
+               return;
+       }
+
+       writel(BAT_CHARGE_RSEL_MASK_BIT | rsel, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+
+       switch (chargeable) {
+       case 0:
+               writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_OFF,
+                      sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+               break;
+       case 1:
+               writel(BAT_CHARGE_DSEL_MASK_BIT | BAT_CHARGE_DSEL_ON,
+                      sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+               break;
+       default:
+               dev_err(&dev, "invalid aux-voltage-chargeable value (%d)\n", chargeable);
+               return;
+       }
+
+       writel(BAT_CHARGE_EN_MASK_BIT | BAT_CHARGE_EN, sp_rtc->reg_base + RTC_BATT_CHARGE_CTRL);
+}
+
+static int sp_rtc_probe(struct platform_device *plat_dev)
+{
+       struct sunplus_rtc *sp_rtc;
+       int ret;
+
+       sp_rtc = devm_kzalloc(&plat_dev->dev, sizeof(*sp_rtc), GFP_KERNEL);
+       if (!sp_rtc)
+               return -ENOMEM;
+
+       sp_rtc->res = platform_get_resource_byname(plat_dev, IORESOURCE_MEM, RTC_REG_NAME);
+       sp_rtc->reg_base = devm_ioremap_resource(&plat_dev->dev, sp_rtc->res);
+       if (IS_ERR(sp_rtc->reg_base))
+               return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->reg_base),
+                                           "%s devm_ioremap_resource fail\n", RTC_REG_NAME);
+       dev_dbg(&plat_dev->dev, "res = 0x%x, reg_base = 0x%lx\n",
+               sp_rtc->res->start, (unsigned long)sp_rtc->reg_base);
+
+       sp_rtc->irq = platform_get_irq(plat_dev, 0);
+       if (sp_rtc->irq < 0)
+               return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+
+       ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
+                              IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
+       if (ret)
+               return dev_err_probe(&plat_dev->dev, ret, "devm_request_irq failed:\n");
+
+       sp_rtc->rtcclk = devm_clk_get(&plat_dev->dev, NULL);
+       if (IS_ERR(sp_rtc->rtcclk))
+               return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rtcclk),
+                                           "devm_clk_get fail\n");
+
+       sp_rtc->rstc = devm_reset_control_get_exclusive(&plat_dev->dev, NULL);
+       if (IS_ERR(sp_rtc->rstc))
+               return dev_err_probe(&plat_dev->dev, PTR_ERR(sp_rtc->rstc),
+                                           "failed to retrieve reset controller\n");
+
+       ret = clk_prepare_enable(sp_rtc->rtcclk);
+       if (ret)
+               goto free_clk;
+
+       ret = reset_control_deassert(sp_rtc->rstc);
+       if (ret)
+               goto free_reset_assert;
+
+       device_init_wakeup(&plat_dev->dev, 1);
+       dev_set_drvdata(&plat_dev->dev, sp_rtc);
+
+       sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev);
+       if (IS_ERR(sp_rtc->rtc)) {
+               ret = PTR_ERR(sp_rtc->rtc);
+               goto free_reset_assert;
+       }
+
+       sp_rtc->rtc->range_max = U32_MAX;
+       sp_rtc->rtc->range_min = 0;
+       sp_rtc->rtc->ops = &sp_rtc_ops;
+
+       ret = devm_rtc_register_device(sp_rtc->rtc);
+       if (ret)
+               goto free_reset_assert;
+
+       /* Setup trickle charger */
+       if (plat_dev->dev.of_node)
+               sp_rtc_set_trickle_charger(plat_dev->dev);
+
+       /* Keep RTC from system reset */
+       writel(DIS_SYS_RST_RTC_MASK_BIT | DIS_SYS_RST_RTC, sp_rtc->reg_base + RTC_CTRL);
+
+       return 0;
+
+free_reset_assert:
+       reset_control_assert(sp_rtc->rstc);
+free_clk:
+       clk_disable_unprepare(sp_rtc->rtcclk);
+
+       return ret;
+}
+
+static int sp_rtc_remove(struct platform_device *plat_dev)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev);
+
+       device_init_wakeup(&plat_dev->dev, 0);
+       reset_control_assert(sp_rtc->rstc);
+       clk_disable_unprepare(sp_rtc->rtcclk);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sp_rtc_suspend(struct device *dev)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+       if (device_may_wakeup(dev))
+               enable_irq_wake(sp_rtc->irq);
+
+       return 0;
+}
+
+static int sp_rtc_resume(struct device *dev)
+{
+       struct sunplus_rtc *sp_rtc = dev_get_drvdata(dev);
+
+       if (device_may_wakeup(dev))
+               disable_irq_wake(sp_rtc->irq);
+
+       return 0;
+}
+#endif
+
+static const struct of_device_id sp_rtc_of_match[] = {
+       { .compatible = "sunplus,sp7021-rtc" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sp_rtc_of_match);
+
+static SIMPLE_DEV_PM_OPS(sp_rtc_pm_ops, sp_rtc_suspend, sp_rtc_resume);
+
+static struct platform_driver sp_rtc_driver = {
+       .probe   = sp_rtc_probe,
+       .remove  = sp_rtc_remove,
+       .driver  = {
+               .name   = "sp7021-rtc",
+               .of_match_table = sp_rtc_of_match,
+               .pm     = &sp_rtc_pm_ops,
+       },
+};
+module_platform_driver(sp_rtc_driver);
+
+MODULE_AUTHOR("Vincent Shih <vincent.sunplus@gmail.com>");
+MODULE_DESCRIPTION("Sunplus RTC driver");
+MODULE_LICENSE("GPL v2");
+
index 362f91e..352c725 100644 (file)
@@ -309,13 +309,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
 
        if ((pos & 3) && size > 2) {
                u16 val;
+               __le16 lval;
 
                ret = pci_user_read_config_word(pdev, pos, &val);
                if (ret)
                        return ret;
 
-               val = cpu_to_le16(val);
-               if (copy_to_user(buf + count - size, &val, 2))
+               lval = cpu_to_le16(val);
+               if (copy_to_user(buf + count - size, &lval, 2))
                        return -EFAULT;
 
                pos += 2;
@@ -324,13 +325,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
 
        while (size > 3) {
                u32 val;
+               __le32 lval;
 
                ret = pci_user_read_config_dword(pdev, pos, &val);
                if (ret)
                        return ret;
 
-               val = cpu_to_le32(val);
-               if (copy_to_user(buf + count - size, &val, 4))
+               lval = cpu_to_le32(val);
+               if (copy_to_user(buf + count - size, &lval, 4))
                        return -EFAULT;
 
                pos += 4;
@@ -339,13 +341,14 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
 
        while (size >= 2) {
                u16 val;
+               __le16 lval;
 
                ret = pci_user_read_config_word(pdev, pos, &val);
                if (ret)
                        return ret;
 
-               val = cpu_to_le16(val);
-               if (copy_to_user(buf + count - size, &val, 2))
+               lval = cpu_to_le16(val);
+               if (copy_to_user(buf + count - size, &lval, 2))
                        return -EFAULT;
 
                pos += 2;
index f17490a..9394aa9 100644 (file)
@@ -256,7 +256,7 @@ static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
 
 static void vfio_dma_bitmap_free(struct vfio_dma *dma)
 {
-       kfree(dma->bitmap);
+       kvfree(dma->bitmap);
        dma->bitmap = NULL;
 }
 
index adbb3a1..5156821 100644 (file)
@@ -355,7 +355,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        struct super_block *sb = inode->i_sb;
        struct object_info obj;
-       int ret;
 
        obj.indaddr     = ADFS_I(inode)->indaddr;
        obj.name_len    = 0;
@@ -365,6 +364,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        obj.attr        = ADFS_I(inode)->attr;
        obj.size        = inode->i_size;
 
-       ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
-       return ret;
+       return adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
 }
index f8c7f26..605017e 100644 (file)
@@ -1116,11 +1116,11 @@ out_free_interp:
                         * independently randomized mmap region (0 load_bias
                         * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
                         */
-                       if (interpreter) {
+                       alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+                       if (alignment > ELF_MIN_ALIGN) {
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
-                               alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
                                if (alignment)
                                        load_bias &= ~(alignment - 1);
                                elf_flags |= MAP_FIXED_NOREPLACE;
@@ -1585,7 +1585,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
        rcu_read_unlock();
-       strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+       get_task_comm(psinfo->pr_fname, p);
 
        return 0;
 }
index 520a0f6..183e5c4 100644 (file)
@@ -18,8 +18,7 @@ config BTRFS_FS
        select RAID6_PQ
        select XOR_BLOCKS
        select SRCU
-       depends on !PPC_256K_PAGES      # powerpc
-       depends on !PAGE_SIZE_256KB     # hexagon
+       depends on PAGE_SIZE_LESS_THAN_256KB
 
        help
          Btrfs is a general purpose copy-on-write filesystem with extents,
index 7d2c33c..7d305b9 100644 (file)
@@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode,
        if ((newcaps & CEPH_CAP_LINK_SHARED) &&
            (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
                set_nlink(inode, le32_to_cpu(grant->nlink));
-               if (inode->i_nlink == 0 &&
-                   (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+               if (inode->i_nlink == 0)
                        deleted_inode = true;
        }
 
index 9d9304e..5b9104b 100644 (file)
@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
                                        int fmode, bool isdir)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_mount_options *opt =
+               ceph_inode_to_client(&ci->vfs_inode)->mount_options;
        struct ceph_file_info *fi;
 
        dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
                if (!fi)
                        return -ENOMEM;
 
+               if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+                       fi->flags |= CEPH_F_SYNC;
+
                file->private_data = fi;
        }
 
@@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
        struct ceph_inode_info *ci = ceph_inode(inode);
        bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
        ssize_t ret;
-       int want, got = 0;
+       int want = 0, got = 0;
        int retry_op = 0, read = 0;
 
 again:
@@ -1556,13 +1561,14 @@ again:
        else
                ceph_start_io_read(inode);
 
+       if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+               want |= CEPH_CAP_FILE_CACHE;
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
-               want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
-       else
-               want = CEPH_CAP_FILE_CACHE;
+               want |= CEPH_CAP_FILE_LAZYIO;
+
        ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
        if (ret < 0) {
-               if (iocb->ki_flags & IOCB_DIRECT)
+               if (direct_lock)
                        ceph_end_io_direct(inode);
                else
                        ceph_end_io_read(inode);
@@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_cap_flush *prealloc_cf;
        ssize_t count, written = 0;
-       int err, want, got;
+       int err, want = 0, got;
        bool direct_lock = false;
        u32 map_flags;
        u64 pool_flags;
@@ -1771,10 +1777,10 @@ retry_snap:
 
        dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
             inode, ceph_vinop(inode), pos, count, i_size_read(inode));
+       if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+               want |= CEPH_CAP_FILE_BUFFER;
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
-               want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
-       else
-               want = CEPH_CAP_FILE_BUFFER;
+               want |= CEPH_CAP_FILE_LAZYIO;
        got = 0;
        err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
        if (err < 0)
index c57699d..0fcba68 100644 (file)
@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        msg->hdr.version = cpu_to_le16(1);
        msg->hdr.compat_version = cpu_to_le16(1);
        msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-       dout("client%llu send metrics to mds%d\n",
-            ceph_client_gid(mdsc->fsc->client), s->s_mds);
        ceph_con_send(&s->s_con, msg);
 
        return true;
index 620c691..a338a3e 100644 (file)
@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
        /* if root is the real CephFS root, we don't have quota realms */
        if (root && ceph_ino(root) == CEPH_INO_ROOT)
                return false;
+       /* MDS stray dirs have no quota realms */
+       if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
+               return false;
        /* otherwise, we can't know for sure */
        return true;
 }
@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
                if (ci->i_max_bytes) {
                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+                       /* For quota size less than 4MB, use 4KB block size */
+                       if (!total) {
+                               total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
+                               used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
+                               buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+                       }
                        /* It is possible for a quota to be exceeded.
                         * Report 'zero' in that case
                         */
                        free = total > used ? total - used : 0;
+                       /* For quota size less than 4KB, report the
+                        * total=used=4KB,free=0 when quota is full
+                        * and total=free=4KB, used=0 otherwise */
+                       if (!total) {
+                               total = 1;
+                               free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
+                               buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+                       }
                }
                spin_unlock(&ci->i_ceph_lock);
                if (total) {
index bea89bd..bf79f36 100644 (file)
@@ -27,6 +27,8 @@
 #include <linux/ceph/auth.h>
 #include <linux/ceph/debugfs.h>
 
+#include <uapi/linux/magic.h>
+
 static DEFINE_SPINLOCK(ceph_fsc_lock);
 static LIST_HEAD(ceph_fsc_list);
 
@@ -146,6 +148,7 @@ enum {
        Opt_mds_namespace,
        Opt_recover_session,
        Opt_source,
+       Opt_mon_addr,
        /* string args above */
        Opt_dirstat,
        Opt_rbytes,
@@ -159,6 +162,7 @@ enum {
        Opt_quotadf,
        Opt_copyfrom,
        Opt_wsync,
+       Opt_pagecache,
 };
 
 enum ceph_recover_session_mode {
@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
        fsparam_u32     ("rsize",                       Opt_rsize),
        fsparam_string  ("snapdirname",                 Opt_snapdirname),
        fsparam_string  ("source",                      Opt_source),
+       fsparam_string  ("mon_addr",                    Opt_mon_addr),
        fsparam_u32     ("wsize",                       Opt_wsize),
        fsparam_flag_no ("wsync",                       Opt_wsync),
+       fsparam_flag_no ("pagecache",                   Opt_pagecache),
        {}
 };
 
@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
 }
 
 /*
- * Parse the source parameter.  Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+                           const char *namespace, size_t len)
+{
+       return !(fsopt->mds_namespace &&
+                (strlen(fsopt->mds_namespace) != len ||
+                 strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+                                struct fs_context *fc)
+{
+       int r;
+       struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+       struct ceph_mount_options *fsopt = pctx->opts;
+
+       if (*dev_name_end != ':')
+               return invalfc(fc, "separator ':' missing in source");
+
+       r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+                              pctx->copts, fc->log.log, ',');
+       if (r)
+               return r;
+
+       fsopt->new_dev_syntax = false;
+       return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+                                struct fs_context *fc)
+{
+       size_t len;
+       struct ceph_fsid fsid;
+       struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+       struct ceph_mount_options *fsopt = pctx->opts;
+       char *fsid_start, *fs_name_start;
+
+       if (*dev_name_end != '=') {
+               dout("separator '=' missing in source");
+               return -EINVAL;
+       }
+
+       fsid_start = strchr(dev_name, '@');
+       if (!fsid_start)
+               return invalfc(fc, "missing cluster fsid");
+       ++fsid_start; /* start of cluster fsid */
+
+       fs_name_start = strchr(fsid_start, '.');
+       if (!fs_name_start)
+               return invalfc(fc, "missing file system name");
+
+       if (ceph_parse_fsid(fsid_start, &fsid))
+               return invalfc(fc, "Invalid FSID");
+
+       ++fs_name_start; /* start of file system name */
+       len = dev_name_end - fs_name_start;
+
+       if (!namespace_equals(fsopt, fs_name_start, len))
+               return invalfc(fc, "Mismatching mds_namespace");
+       kfree(fsopt->mds_namespace);
+       fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+       if (!fsopt->mds_namespace)
+               return -ENOMEM;
+       dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+       fsopt->new_dev_syntax = true;
+       return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ *     <device_spec>=/<path>
+ * where
+ *     <device_spec> is name@fsid.fsname
+ *     <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
  *
- * The source will look like:
+ * Old device syntax is:
  *     <server_spec>[,<server_spec>...]:[<path>]
  * where
  *     <server_spec> is <ip>[:<port>]
@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
                dev_name_end = dev_name + strlen(dev_name);
        }
 
-       dev_name_end--;         /* back up to ':' separator */
-       if (dev_name_end < dev_name || *dev_name_end != ':')
-               return invalfc(fc, "No path or : separator in source");
+       dev_name_end--;         /* back up to separator */
+       if (dev_name_end < dev_name)
+               return invalfc(fc, "Path missing in source");
 
        dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
        if (fsopt->server_path)
                dout("server path '%s'\n", fsopt->server_path);
 
-       ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
-                                pctx->copts, fc->log.log);
-       if (ret)
-               return ret;
+       dout("trying new device syntax");
+       ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+       if (ret) {
+               if (ret != -EINVAL)
+                       return ret;
+               dout("trying old device syntax");
+               ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+               if (ret)
+                       return ret;
+       }
 
        fc->source = param->string;
        param->string = NULL;
        return 0;
 }
 
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+                              struct fs_context *fc)
+{
+       struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+       struct ceph_mount_options *fsopt = pctx->opts;
+
+       kfree(fsopt->mon_addr);
+       fsopt->mon_addr = param->string;
+       param->string = NULL;
+
+       return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+                                 pctx->copts, fc->log.log, '/');
+}
+
 static int ceph_parse_mount_param(struct fs_context *fc,
                                  struct fs_parameter *param)
 {
@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
                param->string = NULL;
                break;
        case Opt_mds_namespace:
+               if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+                       return invalfc(fc, "Mismatching mds_namespace");
                kfree(fsopt->mds_namespace);
                fsopt->mds_namespace = param->string;
                param->string = NULL;
@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
                if (fc->source)
                        return invalfc(fc, "Multiple sources specified");
                return ceph_parse_source(param, fc);
+       case Opt_mon_addr:
+               return ceph_parse_mon_addr(param, fc);
        case Opt_wsize:
                if (result.uint_32 < PAGE_SIZE ||
                    result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
                else
                        fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
                break;
+       case Opt_pagecache:
+               if (result.negated)
+                       fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+               else
+                       fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+               break;
        default:
                BUG();
        }
@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
        kfree(args->mds_namespace);
        kfree(args->server_path);
        kfree(args->fscache_uniq);
+       kfree(args->mon_addr);
        kfree(args);
 }
 
@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
        if (ret)
                return ret;
 
+       ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+       if (ret)
+               return ret;
+
        return ceph_compare_options(new_opt, fsc->client);
 }
 
@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
        if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
                seq_puts(m, ",copyfrom");
 
-       if (fsopt->mds_namespace)
+       /* dump mds_namespace when old device syntax is in use */
+       if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
                seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
 
+       if (fsopt->mon_addr)
+               seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
        if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
                seq_show_option(m, "recover_session", "clean");
 
        if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
                seq_puts(m, ",wsync");
 
+       if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+               seq_puts(m, ",nopagecache");
+
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%u", fsopt->wsize);
        if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
 static int ceph_get_tree(struct fs_context *fc)
 {
        struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+       struct ceph_mount_options *fsopt = pctx->opts;
        struct super_block *sb;
        struct ceph_fs_client *fsc;
        struct dentry *res;
@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
 
        if (!fc->source)
                return invalfc(fc, "No source");
+       if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+               return invalfc(fc, "No monitor address");
 
        /* create client (which we may/may not use) */
        fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
        else
                ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
 
+       if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+               kfree(fsc->mount_options->mon_addr);
+               fsc->mount_options->mon_addr = fsopt->mon_addr;
+               fsopt->mon_addr = NULL;
+               pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+       }
+
        sync_filesystem(fc->root->d_sb);
        return 0;
 }
@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
 module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
 
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+       .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
 module_init(init_ceph);
 module_exit(exit_ceph);
 
index d0142cc..67f145e 100644 (file)
 #include <linux/fscache.h>
 #endif
 
-/* f_type in struct statfs */
-#define CEPH_SUPER_MAGIC 0x00c36400
-
 /* large granularity for statfs utilization stats to facilitate
  * large volume sizes on 32-bit machines. */
 #define CEPH_BLOCK_SHIFT   22  /* 4 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
+#define CEPH_4K_BLOCK_SHIFT 12  /* 4 KB */
 
 #define CEPH_MOUNT_OPT_CLEANRECOVER    (1<<1) /* auto reonnect (clean mode) after blocklisted */
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
@@ -44,6 +42,7 @@
 #define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
 #define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
 #define CEPH_MOUNT_OPT_ASYNC_DIROPS    (1<<15) /* allow async directory ops */
+#define CEPH_MOUNT_OPT_NOPAGECACHE     (1<<16) /* bypass pagecache altogether */
 
 #define CEPH_MOUNT_OPT_DEFAULT                 \
        (CEPH_MOUNT_OPT_DCACHE |                \
@@ -88,6 +87,8 @@ struct ceph_mount_options {
        unsigned int max_readdir;       /* max readdir result (entries) */
        unsigned int max_readdir_bytes; /* max readdir result (bytes) */
 
+       bool new_dev_syntax;
+
        /*
         * everything above this point can be memcmp'd; everything below
         * is handled in compare_mount_options()
@@ -97,6 +98,7 @@ struct ceph_mount_options {
        char *mds_namespace;  /* default NULL */
        char *server_path;    /* default NULL (means "/") */
        char *fscache_uniq;   /* default NULL */
+       char *mon_addr;
 };
 
 struct ceph_fs_client {
@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
  *
  * These come from src/mds/mdstypes.h in the ceph sources.
  */
-#define CEPH_MAX_MDS           0x100
-#define CEPH_NUM_STRAY         10
+#define CEPH_MAX_MDS                   0x100
+#define CEPH_NUM_STRAY                 10
 #define CEPH_MDS_INO_MDSDIR_OFFSET     (1 * CEPH_MAX_MDS)
+#define CEPH_MDS_INO_LOG_OFFSET                (2 * CEPH_MAX_MDS)
 #define CEPH_INO_SYSTEM_BASE           ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
 
 static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
 {
-       if (vino.ino < CEPH_INO_SYSTEM_BASE &&
-           vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
-               WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
-               return true;
-       }
-       return false;
+       if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
+           vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
+               return false;
+
+       /* Don't warn on mdsdirs */
+       WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
+                       "Attempt to access reserved inode number 0x%llx",
+                       vino.ino);
+       return true;
 }
 
 static inline struct inode *ceph_find_inode(struct super_block *sb,
index 82db656..3c3c366 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1207,7 +1207,8 @@ static int unshare_sighand(struct task_struct *me)
 char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
 {
        task_lock(tsk);
-       strncpy(buf, tsk->comm, buf_size);
+       /* Always NUL terminated and zero-padded */
+       strscpy_pad(buf, tsk->comm, buf_size);
        task_unlock(tsk);
        return buf;
 }
@@ -1222,7 +1223,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
        task_lock(tsk);
        trace_task_rename(tsk, buf);
-       strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+       strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
        task_unlock(tsk);
        perf_event_comm(tsk, exec);
 }
index 7eea3cf..f46a733 100644 (file)
@@ -7,6 +7,7 @@ config F2FS_FS
        select CRYPTO_CRC32
        select F2FS_FS_XATTR if FS_ENCRYPTION
        select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+       select FS_IOMAP
        select LZ4_COMPRESS if F2FS_FS_LZ4
        select LZ4_DECOMPRESS if F2FS_FS_LZ4
        select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
index f1693d4..982f017 100644 (file)
@@ -664,7 +664,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
        /* truncate all the data during iput */
        iput(inode);
 
-       err = f2fs_get_node_info(sbi, ino, &ni);
+       err = f2fs_get_node_info(sbi, ino, &ni, false);
        if (err)
                goto err_out;
 
@@ -1302,8 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        unsigned long flags;
 
        if (cpc->reason & CP_UMOUNT) {
-               if (le32_to_cpu(ckpt->cp_pack_total_block_count) >
-                       sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
+               if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
+                       NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
                        clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
                        f2fs_notice(sbi, "Disable nat_bits due to no space");
                } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
index 49121a2..d0c3aeb 100644 (file)
@@ -154,6 +154,7 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
        cc->rpages = NULL;
        cc->nr_rpages = 0;
        cc->nr_cpages = 0;
+       cc->valid_nr_cpages = 0;
        if (!reuse)
                cc->cluster_idx = NULL_CLUSTER;
 }
@@ -620,7 +621,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
        const struct f2fs_compress_ops *cops =
                                f2fs_cops[fi->i_compress_algorithm];
        unsigned int max_len, new_nr_cpages;
-       struct page **new_cpages;
        u32 chksum = 0;
        int i, ret;
 
@@ -635,6 +635,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
 
        max_len = COMPRESS_HEADER_SIZE + cc->clen;
        cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
+       cc->valid_nr_cpages = cc->nr_cpages;
 
        cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
        if (!cc->cpages) {
@@ -685,13 +686,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
 
        new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
 
-       /* Now we're going to cut unnecessary tail pages */
-       new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
-       if (!new_cpages) {
-               ret = -ENOMEM;
-               goto out_vunmap_cbuf;
-       }
-
        /* zero out any unused part of the last page */
        memset(&cc->cbuf->cdata[cc->clen], 0,
                        (new_nr_cpages * PAGE_SIZE) -
@@ -701,10 +695,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
        vm_unmap_ram(cc->rbuf, cc->cluster_size);
 
        for (i = 0; i < cc->nr_cpages; i++) {
-               if (i < new_nr_cpages) {
-                       new_cpages[i] = cc->cpages[i];
+               if (i < new_nr_cpages)
                        continue;
-               }
                f2fs_compress_free_page(cc->cpages[i]);
                cc->cpages[i] = NULL;
        }
@@ -712,9 +704,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
        if (cops->destroy_compress_ctx)
                cops->destroy_compress_ctx(cc);
 
-       page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
-       cc->cpages = new_cpages;
-       cc->nr_cpages = new_nr_cpages;
+       cc->valid_nr_cpages = new_nr_cpages;
 
        trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
                                                        cc->clen, ret);
@@ -1296,7 +1286,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
 
        psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT;
 
-       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
        if (err)
                goto out_put_dnode;
 
@@ -1308,14 +1298,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
 
        cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
        cic->inode = inode;
-       atomic_set(&cic->pending_pages, cc->nr_cpages);
+       atomic_set(&cic->pending_pages, cc->valid_nr_cpages);
        cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
        if (!cic->rpages)
                goto out_put_cic;
 
        cic->nr_rpages = cc->cluster_size;
 
-       for (i = 0; i < cc->nr_cpages; i++) {
+       for (i = 0; i < cc->valid_nr_cpages; i++) {
                f2fs_set_compressed_page(cc->cpages[i], inode,
                                        cc->rpages[i + 1]->index, cic);
                fio.compressed_page = cc->cpages[i];
@@ -1360,7 +1350,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
                if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr))
                        fio.compr_blocks++;
 
-               if (i > cc->nr_cpages) {
+               if (i > cc->valid_nr_cpages) {
                        if (__is_valid_data_blkaddr(blkaddr)) {
                                f2fs_invalidate_blocks(sbi, blkaddr);
                                f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -1385,8 +1375,8 @@ unlock_continue:
 
        if (fio.compr_blocks)
                f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
-       f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
-       add_compr_block_stat(inode, cc->nr_cpages);
+       f2fs_i_compr_blocks_update(inode, cc->valid_nr_cpages, true);
+       add_compr_block_stat(inode, cc->valid_nr_cpages);
 
        set_inode_flag(cc->inode, FI_APPEND_WRITE);
        if (cc->cluster_idx == 0)
@@ -1424,9 +1414,7 @@ out_unlock_op:
        else
                f2fs_unlock_op(sbi);
 out_free:
-       for (i = 0; i < cc->nr_cpages; i++) {
-               if (!cc->cpages[i])
-                       continue;
+       for (i = 0; i < cc->valid_nr_cpages; i++) {
                f2fs_compress_free_page(cc->cpages[i]);
                cc->cpages[i] = NULL;
        }
@@ -1468,25 +1456,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
                                        enum iostat_type io_type)
 {
        struct address_space *mapping = cc->inode->i_mapping;
-       int _submitted, compr_blocks, ret;
-       int i = -1, err = 0;
+       int _submitted, compr_blocks, ret, i;
 
        compr_blocks = f2fs_compressed_blocks(cc);
-       if (compr_blocks < 0) {
-               err = compr_blocks;
-               goto out_err;
+
+       for (i = 0; i < cc->cluster_size; i++) {
+               if (!cc->rpages[i])
+                       continue;
+
+               redirty_page_for_writepage(wbc, cc->rpages[i]);
+               unlock_page(cc->rpages[i]);
        }
 
+       if (compr_blocks < 0)
+               return compr_blocks;
+
        for (i = 0; i < cc->cluster_size; i++) {
                if (!cc->rpages[i])
                        continue;
 retry_write:
+               lock_page(cc->rpages[i]);
+
                if (cc->rpages[i]->mapping != mapping) {
+continue_unlock:
                        unlock_page(cc->rpages[i]);
                        continue;
                }
 
-               BUG_ON(!PageLocked(cc->rpages[i]));
+               if (!PageDirty(cc->rpages[i]))
+                       goto continue_unlock;
+
+               if (!clear_page_dirty_for_io(cc->rpages[i]))
+                       goto continue_unlock;
 
                ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
                                                NULL, NULL, wbc, io_type,
@@ -1501,26 +1502,15 @@ retry_write:
                                 * avoid deadlock caused by cluster update race
                                 * from foreground operation.
                                 */
-                               if (IS_NOQUOTA(cc->inode)) {
-                                       err = 0;
-                                       goto out_err;
-                               }
+                               if (IS_NOQUOTA(cc->inode))
+                                       return 0;
                                ret = 0;
                                cond_resched();
                                congestion_wait(BLK_RW_ASYNC,
                                                DEFAULT_IO_TIMEOUT);
-                               lock_page(cc->rpages[i]);
-
-                               if (!PageDirty(cc->rpages[i])) {
-                                       unlock_page(cc->rpages[i]);
-                                       continue;
-                               }
-
-                               clear_page_dirty_for_io(cc->rpages[i]);
                                goto retry_write;
                        }
-                       err = ret;
-                       goto out_err;
+                       return ret;
                }
 
                *submitted += _submitted;
@@ -1529,14 +1519,6 @@ retry_write:
        f2fs_balance_fs(F2FS_M_SB(mapping), true);
 
        return 0;
-out_err:
-       for (++i; i < cc->cluster_size; i++) {
-               if (!cc->rpages[i])
-                       continue;
-               redirty_page_for_writepage(wbc, cc->rpages[i]);
-               unlock_page(cc->rpages[i]);
-       }
-       return err;
 }
 
 int f2fs_write_multi_pages(struct compress_ctx *cc,
index aacf5e4..0a1d236 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/cleancache.h>
 #include <linux/sched/signal.h>
 #include <linux/fiemap.h>
+#include <linux/iomap.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -1354,7 +1355,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
        if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
                return -EPERM;
 
-       err = f2fs_get_node_info(sbi, dn->nid, &ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
        if (err)
                return err;
 
@@ -1376,61 +1377,9 @@ alloc:
                f2fs_invalidate_compress_page(sbi, old_blkaddr);
        }
        f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
-       /*
-        * i_size will be updated by direct_IO. Otherwise, we'll get stale
-        * data from unwritten block via dio_read.
-        */
        return 0;
 }
 
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct inode *inode = file_inode(iocb->ki_filp);
-       struct f2fs_map_blocks map;
-       int flag;
-       int err = 0;
-       bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
-       map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
-       map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
-       if (map.m_len > map.m_lblk)
-               map.m_len -= map.m_lblk;
-       else
-               map.m_len = 0;
-
-       map.m_next_pgofs = NULL;
-       map.m_next_extent = NULL;
-       map.m_seg_type = NO_CHECK_TYPE;
-       map.m_may_create = true;
-
-       if (direct_io) {
-               map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
-               flag = f2fs_force_buffered_io(inode, iocb, from) ?
-                                       F2FS_GET_BLOCK_PRE_AIO :
-                                       F2FS_GET_BLOCK_PRE_DIO;
-               goto map_blocks;
-       }
-       if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
-               err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
-       if (f2fs_has_inline_data(inode))
-               return err;
-
-       flag = F2FS_GET_BLOCK_PRE_AIO;
-
-map_blocks:
-       err = f2fs_map_blocks(inode, &map, 1, flag);
-       if (map.m_len > 0 && err == -ENOSPC) {
-               if (!direct_io)
-                       set_inode_flag(inode, FI_NO_PREALLOC);
-               err = 0;
-       }
-       return err;
-}
-
 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -1590,8 +1539,11 @@ next_block:
                                        flag != F2FS_GET_BLOCK_DIO);
                                err = __allocate_data_block(&dn,
                                                        map->m_seg_type);
-                               if (!err)
+                               if (!err) {
+                                       if (flag == F2FS_GET_BLOCK_PRE_DIO)
+                                               file_need_truncate(inode);
                                        set_inode_flag(inode, FI_APPEND_WRITE);
+                               }
                        }
                        if (err)
                                goto sync_out;
@@ -1786,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
        return (blks << inode->i_blkbits);
 }
 
-static int __get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh, int create, int flag,
-                       pgoff_t *next_pgofs, int seg_type, bool may_write)
-{
-       struct f2fs_map_blocks map;
-       int err;
-
-       map.m_lblk = iblock;
-       map.m_len = bytes_to_blks(inode, bh->b_size);
-       map.m_next_pgofs = next_pgofs;
-       map.m_next_extent = NULL;
-       map.m_seg_type = seg_type;
-       map.m_may_create = may_write;
-
-       err = f2fs_map_blocks(inode, &map, create, flag);
-       if (!err) {
-               map_bh(bh, inode->i_sb, map.m_pblk);
-               bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
-               bh->b_size = blks_to_bytes(inode, map.m_len);
-
-               if (map.m_multidev_dio)
-                       bh->b_bdev = map.m_bdev;
-       }
-       return err;
-}
-
-static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
-{
-       return __get_data_block(inode, iblock, bh_result, create,
-                               F2FS_GET_BLOCK_DIO, NULL,
-                               f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               true);
-}
-
-static int get_data_block_dio(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create)
-{
-       return __get_data_block(inode, iblock, bh_result, create,
-                               F2FS_GET_BLOCK_DIO, NULL,
-                               f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-                               false);
-}
-
 static int f2fs_xattr_fiemap(struct inode *inode,
                                struct fiemap_extent_info *fieinfo)
 {
@@ -1849,7 +1757,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
                if (!page)
                        return -ENOMEM;
 
-               err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+               err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
                if (err) {
                        f2fs_put_page(page, 1);
                        return err;
@@ -1881,7 +1789,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
                if (!page)
                        return -ENOMEM;
 
-               err = f2fs_get_node_info(sbi, xnid, &ni);
+               err = f2fs_get_node_info(sbi, xnid, &ni, false);
                if (err) {
                        f2fs_put_page(page, 1);
                        return err;
@@ -2617,6 +2525,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
+       /* The below cases were checked when setting it. */
+       if (f2fs_is_pinned_file(inode))
+               return false;
+       if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+               return true;
        if (f2fs_lfs_mode(sbi))
                return true;
        if (S_ISDIR(inode->i_mode))
@@ -2625,8 +2538,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
                return true;
        if (f2fs_is_atomic_file(inode))
                return true;
-       if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
-               return true;
 
        /* swap file is migrating in aligned write mode */
        if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2738,7 +2649,7 @@ got_it:
                fio->need_lock = LOCK_REQ;
        }
 
-       err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
        if (err)
                goto out_writepage;
 
@@ -2987,6 +2898,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
                .rpages = NULL,
                .nr_rpages = 0,
                .cpages = NULL,
+               .valid_nr_cpages = 0,
                .rbuf = NULL,
                .cbuf = NULL,
                .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
@@ -3305,7 +3217,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
                        FS_CP_DATA_IO : FS_DATA_IO);
 }
 
-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
 {
        loff_t i_size = i_size_read(inode);
 
@@ -3339,12 +3251,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
        int flag;
 
        /*
-        * we already allocated all the blocks, so we don't need to get
-        * the block addresses when there is no need to fill the page.
+        * If a whole page is being written and we already preallocated all the
+        * blocks, then there is no need to get a block address now.
         */
-       if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
-           !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
-           !f2fs_verity_in_progress(inode))
+       if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
                return 0;
 
        /* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -3595,158 +3505,6 @@ unlock_out:
        return copied;
 }
 
-static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
-                          loff_t offset)
-{
-       unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
-       unsigned blkbits = i_blkbits;
-       unsigned blocksize_mask = (1 << blkbits) - 1;
-       unsigned long align = offset | iov_iter_alignment(iter);
-       struct block_device *bdev = inode->i_sb->s_bdev;
-
-       if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
-               return 1;
-
-       if (align & blocksize_mask) {
-               if (bdev)
-                       blkbits = blksize_bits(bdev_logical_block_size(bdev));
-               blocksize_mask = (1 << blkbits) - 1;
-               if (align & blocksize_mask)
-                       return -EINVAL;
-               return 1;
-       }
-       return 0;
-}
-
-static void f2fs_dio_end_io(struct bio *bio)
-{
-       struct f2fs_private_dio *dio = bio->bi_private;
-
-       dec_page_count(F2FS_I_SB(dio->inode),
-                       dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
-       bio->bi_private = dio->orig_private;
-       bio->bi_end_io = dio->orig_end_io;
-
-       kfree(dio);
-
-       bio_endio(bio);
-}
-
-static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
-                                                       loff_t file_offset)
-{
-       struct f2fs_private_dio *dio;
-       bool write = (bio_op(bio) == REQ_OP_WRITE);
-
-       dio = f2fs_kzalloc(F2FS_I_SB(inode),
-                       sizeof(struct f2fs_private_dio), GFP_NOFS);
-       if (!dio)
-               goto out;
-
-       dio->inode = inode;
-       dio->orig_end_io = bio->bi_end_io;
-       dio->orig_private = bio->bi_private;
-       dio->write = write;
-
-       bio->bi_end_io = f2fs_dio_end_io;
-       bio->bi_private = dio;
-
-       inc_page_count(F2FS_I_SB(inode),
-                       write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
-       submit_bio(bio);
-       return;
-out:
-       bio->bi_status = BLK_STS_IOERR;
-       bio_endio(bio);
-}
-
-static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-       struct address_space *mapping = iocb->ki_filp->f_mapping;
-       struct inode *inode = mapping->host;
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct f2fs_inode_info *fi = F2FS_I(inode);
-       size_t count = iov_iter_count(iter);
-       loff_t offset = iocb->ki_pos;
-       int rw = iov_iter_rw(iter);
-       int err;
-       enum rw_hint hint = iocb->ki_hint;
-       int whint_mode = F2FS_OPTION(sbi).whint_mode;
-       bool do_opu;
-
-       err = check_direct_IO(inode, iter, offset);
-       if (err)
-               return err < 0 ? err : 0;
-
-       if (f2fs_force_buffered_io(inode, iocb, iter))
-               return 0;
-
-       do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-
-       trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-
-       if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
-               iocb->ki_hint = WRITE_LIFE_NOT_SET;
-
-       if (iocb->ki_flags & IOCB_NOWAIT) {
-               if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
-                       iocb->ki_hint = hint;
-                       err = -EAGAIN;
-                       goto out;
-               }
-               if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
-                       up_read(&fi->i_gc_rwsem[rw]);
-                       iocb->ki_hint = hint;
-                       err = -EAGAIN;
-                       goto out;
-               }
-       } else {
-               down_read(&fi->i_gc_rwsem[rw]);
-               if (do_opu)
-                       down_read(&fi->i_gc_rwsem[READ]);
-       }
-
-       err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-                       iter, rw == WRITE ? get_data_block_dio_write :
-                       get_data_block_dio, NULL, f2fs_dio_submit_bio,
-                       rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
-                       DIO_SKIP_HOLES);
-
-       if (do_opu)
-               up_read(&fi->i_gc_rwsem[READ]);
-
-       up_read(&fi->i_gc_rwsem[rw]);
-
-       if (rw == WRITE) {
-               if (whint_mode == WHINT_MODE_OFF)
-                       iocb->ki_hint = hint;
-               if (err > 0) {
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
-                                                                       err);
-                       if (!do_opu)
-                               set_inode_flag(inode, FI_UPDATE_WRITE);
-               } else if (err == -EIOCBQUEUED) {
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
-                                               count - iov_iter_count(iter));
-               } else if (err < 0) {
-                       f2fs_write_failed(inode, offset + count);
-               }
-       } else {
-               if (err > 0)
-                       f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
-               else if (err == -EIOCBQUEUED)
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
-                                               count - iov_iter_count(iter));
-       }
-
-out:
-       trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-
-       return err;
-}
-
 void f2fs_invalidate_page(struct page *page, unsigned int offset,
                                                        unsigned int length)
 {
@@ -3770,12 +3528,9 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
 
        clear_page_private_gcing(page);
 
-       if (test_opt(sbi, COMPRESS_CACHE)) {
-               if (f2fs_compressed_file(inode))
-                       f2fs_invalidate_compress_pages(sbi, inode->i_ino);
-               if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
-                       clear_page_private_data(page);
-       }
+       if (test_opt(sbi, COMPRESS_CACHE) &&
+                       inode->i_ino == F2FS_COMPRESS_INO(sbi))
+               clear_page_private_data(page);
 
        if (page_private_atomic(page))
                return f2fs_drop_inmem_page(inode, page);
@@ -3795,12 +3550,9 @@ int f2fs_release_page(struct page *page, gfp_t wait)
                return 0;
 
        if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
-               struct f2fs_sb_info *sbi = F2FS_P_SB(page);
                struct inode *inode = page->mapping->host;
 
-               if (f2fs_compressed_file(inode))
-                       f2fs_invalidate_compress_pages(sbi, inode->i_ino);
-               if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
+               if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode)))
                        clear_page_private_data(page);
        }
 
@@ -4202,7 +3954,7 @@ const struct address_space_operations f2fs_dblock_aops = {
        .set_page_dirty = f2fs_set_data_page_dirty,
        .invalidatepage = f2fs_invalidate_page,
        .releasepage    = f2fs_release_page,
-       .direct_IO      = f2fs_direct_IO,
+       .direct_IO      = noop_direct_IO,
        .bmap           = f2fs_bmap,
        .swap_activate  = f2fs_swap_activate,
        .swap_deactivate = f2fs_swap_deactivate,
@@ -4282,3 +4034,58 @@ void f2fs_destroy_bio_entry_cache(void)
 {
        kmem_cache_destroy(bio_entry_slab);
 }
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+                           unsigned int flags, struct iomap *iomap,
+                           struct iomap *srcmap)
+{
+       struct f2fs_map_blocks map = {};
+       pgoff_t next_pgofs = 0;
+       int err;
+
+       map.m_lblk = bytes_to_blks(inode, offset);
+       map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+       map.m_next_pgofs = &next_pgofs;
+       map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+       if (flags & IOMAP_WRITE)
+               map.m_may_create = true;
+
+       err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+                             F2FS_GET_BLOCK_DIO);
+       if (err)
+               return err;
+
+       iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+       if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+               iomap->length = blks_to_bytes(inode, map.m_len);
+               if (map.m_flags & F2FS_MAP_MAPPED) {
+                       iomap->type = IOMAP_MAPPED;
+                       iomap->flags |= IOMAP_F_MERGED;
+               } else {
+                       iomap->type = IOMAP_UNWRITTEN;
+               }
+               if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+                       return -EINVAL;
+
+               iomap->bdev = map.m_bdev;
+               iomap->addr = blks_to_bytes(inode, map.m_pblk);
+       } else {
+               iomap->length = blks_to_bytes(inode, next_pgofs) -
+                               iomap->offset;
+               iomap->type = IOMAP_HOLE;
+               iomap->addr = IOMAP_NULL_ADDR;
+       }
+
+       if (map.m_flags & F2FS_MAP_NEW)
+               iomap->flags |= IOMAP_F_NEW;
+       if ((inode->i_state & I_DIRTY_DATASYNC) ||
+           offset + length > i_size_read(inode))
+               iomap->flags |= IOMAP_F_DIRTY;
+
+       return 0;
+}
+
+const struct iomap_ops f2fs_iomap_ops = {
+       .iomap_begin    = f2fs_iomap_begin,
+};
index d0d6031..eb22fa9 100644 (file)
@@ -58,6 +58,7 @@ enum {
        FAULT_WRITE_IO,
        FAULT_SLAB_ALLOC,
        FAULT_DQUOT_INIT,
+       FAULT_LOCK_OP,
        FAULT_MAX,
 };
 
@@ -656,6 +657,7 @@ enum {
 #define FADVISE_KEEP_SIZE_BIT  0x10
 #define FADVISE_HOT_BIT                0x20
 #define FADVISE_VERITY_BIT     0x40
+#define FADVISE_TRUNC_BIT      0x80
 
 #define FADVISE_MODIFIABLE_BITS        (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
 
@@ -683,6 +685,10 @@ enum {
 #define file_is_verity(inode)  is_file(inode, FADVISE_VERITY_BIT)
 #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
 
+#define file_should_truncate(inode)    is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode)      set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode)      clear_file(inode, FADVISE_TRUNC_BIT)
+
 #define DEF_DIR_LEVEL          0
 
 enum {
@@ -717,7 +723,7 @@ enum {
        FI_INLINE_DOTS,         /* indicate inline dot dentries */
        FI_DO_DEFRAG,           /* indicate defragment is running */
        FI_DIRTY_FILE,          /* indicate regular/symlink has dirty pages */
-       FI_NO_PREALLOC,         /* indicate skipped preallocated blocks */
+       FI_PREALLOCATED_ALL,    /* all blocks for write were preallocated */
        FI_HOT_DATA,            /* indicate file is hot */
        FI_EXTRA_ATTR,          /* indicate file has extra attribute */
        FI_PROJ_INHERIT,        /* indicate file inherits projectid */
@@ -1020,6 +1026,7 @@ struct f2fs_sm_info {
        unsigned int segment_count;     /* total # of segments */
        unsigned int main_segments;     /* # of segments in main area */
        unsigned int reserved_segments; /* # of reserved segments */
+       unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
        unsigned int ovp_segments;      /* # of overprovision segments */
 
        /* a threshold to reclaim prefree segments */
@@ -1488,6 +1495,7 @@ struct compress_ctx {
        unsigned int nr_rpages;         /* total page number in rpages */
        struct page **cpages;           /* pages store compressed data in cluster */
        unsigned int nr_cpages;         /* total page number in cpages */
+       unsigned int valid_nr_cpages;   /* valid page number in cpages */
        void *rbuf;                     /* virtual mapped address on rpages */
        struct compress_data *cbuf;     /* virtual mapped address on cpages */
        size_t rlen;                    /* valid data length in rbuf */
@@ -1679,6 +1687,9 @@ struct f2fs_sb_info {
        unsigned int cur_victim_sec;            /* current victim section num */
        unsigned int gc_mode;                   /* current GC state */
        unsigned int next_victim_seg[2];        /* next segment in victim section */
+       spinlock_t gc_urgent_high_lock;
+       bool gc_urgent_high_limited;            /* indicates having limited trial count */
+       unsigned int gc_urgent_high_remaining;  /* remaining trial count for GC_URGENT_HIGH */
 
        /* for skip statistic */
        unsigned int atomic_files;              /* # of opened atomic file */
@@ -1803,13 +1814,6 @@ struct f2fs_sb_info {
 #endif
 };
 
-struct f2fs_private_dio {
-       struct inode *inode;
-       void *orig_private;
-       bio_end_io_t *orig_end_io;
-       bool write;
-};
-
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 #define f2fs_show_injection_info(sbi, type)                                    \
        printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
@@ -2095,6 +2099,10 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 
 static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
 {
+       if (time_to_inject(sbi, FAULT_LOCK_OP)) {
+               f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+               return 0;
+       }
        return down_read_trylock(&sbi->cp_rwsem);
 }
 
@@ -2200,6 +2208,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
        if (!__allow_reserved_blocks(sbi, inode, true))
                avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+       if (F2FS_IO_ALIGNED(sbi))
+               avail_user_block_count -= sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments;
+
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
                if (avail_user_block_count > sbi->unusable_block_count)
                        avail_user_block_count -= sbi->unusable_block_count;
@@ -2446,6 +2459,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
        if (!__allow_reserved_blocks(sbi, inode, false))
                valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+       if (F2FS_IO_ALIGNED(sbi))
+               valid_block_count += sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments;
+
        user_block_count = sbi->user_block_count;
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                user_block_count -= sbi->unusable_block_count;
@@ -3118,12 +3136,16 @@ static inline int is_file(struct inode *inode, int type)
 
 static inline void set_file(struct inode *inode, int type)
 {
+       if (is_file(inode, type))
+               return;
        F2FS_I(inode)->i_advise |= type;
        f2fs_mark_inode_dirty_sync(inode, true);
 }
 
 static inline void clear_file(struct inode *inode, int type)
 {
+       if (!is_file(inode, type))
+               return;
        F2FS_I(inode)->i_advise &= ~type;
        f2fs_mark_inode_dirty_sync(inode, true);
 }
@@ -3408,7 +3430,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
 bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
 bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
 int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
-                                               struct node_info *ni);
+                               struct node_info *ni, bool checkpoint_context);
 pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
 int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
 int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
@@ -3616,7 +3638,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
 int f2fs_reserve_new_block(struct dnode_of_data *dn);
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                        int op_flags, bool for_write);
@@ -3639,6 +3660,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
                                struct writeback_control *wbc,
                                enum iostat_type io_type,
                                int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
 void f2fs_invalidate_page(struct page *page, unsigned int offset,
                        unsigned int length);
 int f2fs_release_page(struct page *page, gfp_t wait);
@@ -3652,6 +3674,7 @@ int f2fs_init_post_read_processing(void);
 void f2fs_destroy_post_read_processing(void);
 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+extern const struct iomap_ops f2fs_iomap_ops;
 
 /*
  * gc.c
index 92ec269..3c98ef6 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sched/signal.h>
 #include <linux/fileattr.h>
 #include <linux/fadvise.h>
+#include <linux/iomap.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -1232,7 +1233,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
                        if (ret)
                                return ret;
 
-                       ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+                       ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
                        if (ret) {
                                f2fs_put_dnode(&dn);
                                return ret;
@@ -1687,6 +1688,7 @@ next_alloc:
 
                map.m_seg_type = CURSEG_COLD_DATA_PINNED;
                err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+               file_dont_truncate(inode);
 
                up_write(&sbi->pin_sem);
 
@@ -1748,7 +1750,11 @@ static long f2fs_fallocate(struct file *file, int mode,
                (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
                return -EOPNOTSUPP;
 
-       if (f2fs_compressed_file(inode) &&
+       /*
+        * Pinned file should not support partial trucation since the block
+        * can be used by applications.
+        */
+       if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
                (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
                        FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
                return -EOPNOTSUPP;
@@ -3143,17 +3149,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
 
        inode_lock(inode);
 
-       if (f2fs_should_update_outplace(inode, NULL)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
        if (!pin) {
                clear_inode_flag(inode, FI_PIN_FILE);
                f2fs_i_gc_failures_write(inode, 0);
                goto done;
        }
 
+       if (f2fs_should_update_outplace(inode, NULL)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (f2fs_pin_file_control(inode, false)) {
                ret = -EAGAIN;
                goto out;
@@ -4218,27 +4224,385 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        return __f2fs_ioctl(filp, cmd, arg);
 }
 
-static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Return %true if the given read or write request should use direct I/O, or
+ * %false if it should use buffered I/O.
+ */
+static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
+                               struct iov_iter *iter)
+{
+       unsigned int align;
+
+       if (!(iocb->ki_flags & IOCB_DIRECT))
+               return false;
+
+       if (f2fs_force_buffered_io(inode, iocb, iter))
+               return false;
+
+       /*
+        * Direct I/O not aligned to the disk's logical_block_size will be
+        * attempted, but will fail with -EINVAL.
+        *
+        * f2fs additionally requires that direct I/O be aligned to the
+        * filesystem block size, which is often a stricter requirement.
+        * However, f2fs traditionally falls back to buffered I/O on requests
+        * that are logical_block_size-aligned but not fs-block aligned.
+        *
+        * The below logic implements this behavior.
+        */
+       align = iocb->ki_pos | iov_iter_alignment(iter);
+       if (!IS_ALIGNED(align, i_blocksize(inode)) &&
+           IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
+               return false;
+
+       return true;
+}
+
+static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
+                               unsigned int flags)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+       dec_page_count(sbi, F2FS_DIO_READ);
+       if (error)
+               return error;
+       f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+       return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
+       .end_io = f2fs_dio_read_end_io,
+};
+
+static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
-       int ret;
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       const loff_t pos = iocb->ki_pos;
+       const size_t count = iov_iter_count(to);
+       struct iomap_dio *dio;
+       ssize_t ret;
+
+       if (count == 0)
+               return 0; /* skip atime update */
+
+       trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
+
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       } else {
+               down_read(&fi->i_gc_rwsem[READ]);
+       }
+
+       /*
+        * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+        * the higher-level function iomap_dio_rw() in order to ensure that the
+        * F2FS_DIO_READ counter will be decremented correctly in all cases.
+        */
+       inc_page_count(sbi, F2FS_DIO_READ);
+       dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
+                            &f2fs_iomap_dio_read_ops, 0, 0);
+       if (IS_ERR_OR_NULL(dio)) {
+               ret = PTR_ERR_OR_ZERO(dio);
+               if (ret != -EIOCBQUEUED)
+                       dec_page_count(sbi, F2FS_DIO_READ);
+       } else {
+               ret = iomap_dio_complete(dio);
+       }
+
+       up_read(&fi->i_gc_rwsem[READ]);
+
+       file_accessed(file);
+out:
+       trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
+       return ret;
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       ssize_t ret;
 
        if (!f2fs_is_compress_backend_ready(inode))
                return -EOPNOTSUPP;
 
-       ret = generic_file_read_iter(iocb, iter);
+       if (f2fs_should_use_dio(inode, iocb, to))
+               return f2fs_dio_read_iter(iocb, to);
 
+       ret = filemap_read(iocb, to, 0);
        if (ret > 0)
-               f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+               f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+       return ret;
+}
+
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       ssize_t count;
+       int err;
+
+       if (IS_IMMUTABLE(inode))
+               return -EPERM;
+
+       if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+               return -EPERM;
+
+       count = generic_write_checks(iocb, from);
+       if (count <= 0)
+               return count;
+
+       err = file_modified(file);
+       if (err)
+               return err;
+       return count;
+}
+
+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so.  Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+                                  bool dio)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       const loff_t pos = iocb->ki_pos;
+       const size_t count = iov_iter_count(iter);
+       struct f2fs_map_blocks map = {};
+       int flag;
+       int ret;
+
+       /* If it will be an out-of-place direct write, don't bother. */
+       if (dio && f2fs_lfs_mode(sbi))
+               return 0;
+       /*
+        * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+        * buffered IO, if DIO meets any holes.
+        */
+       if (dio && i_size_read(inode) &&
+               (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+               return 0;
+
+       /* No-wait I/O can't allocate blocks. */
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return 0;
+
+       /* If it will be a short write, don't bother. */
+       if (fault_in_iov_iter_readable(iter, count))
+               return 0;
+
+       if (f2fs_has_inline_data(inode)) {
+               /* If the data will fit inline, don't bother. */
+               if (pos + count <= MAX_INLINE_DATA(inode))
+                       return 0;
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+       }
+
+       /* Do not preallocate blocks that will be written partially in 4KB. */
+       map.m_lblk = F2FS_BLK_ALIGN(pos);
+       map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+       if (map.m_len > map.m_lblk)
+               map.m_len -= map.m_lblk;
+       else
+               map.m_len = 0;
+       map.m_may_create = true;
+       if (dio) {
+               map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+               flag = F2FS_GET_BLOCK_PRE_DIO;
+       } else {
+               map.m_seg_type = NO_CHECK_TYPE;
+               flag = F2FS_GET_BLOCK_PRE_AIO;
+       }
+
+       ret = f2fs_map_blocks(inode, &map, 1, flag);
+       /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+       if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
+               return ret;
+       if (ret == 0)
+               set_inode_flag(inode, FI_PREALLOCATED_ALL);
+       return map.m_len;
+}
+
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+                                       struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       ssize_t ret;
 
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return -EOPNOTSUPP;
+
+       current->backing_dev_info = inode_to_bdi(inode);
+       ret = generic_perform_write(file, from, iocb->ki_pos);
+       current->backing_dev_info = NULL;
+
+       if (ret > 0) {
+               iocb->ki_pos += ret;
+               f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+       }
        return ret;
 }
 
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
+                                unsigned int flags)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+       dec_page_count(sbi, F2FS_DIO_WRITE);
+       if (error)
+               return error;
+       f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+       return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
+       .end_io = f2fs_dio_write_end_io,
+};
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+                                  bool *may_need_sync)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       const bool do_opu = f2fs_lfs_mode(sbi);
+       const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+       const loff_t pos = iocb->ki_pos;
+       const ssize_t count = iov_iter_count(from);
+       const enum rw_hint hint = iocb->ki_hint;
+       unsigned int dio_flags;
+       struct iomap_dio *dio;
+       ssize_t ret;
+
+       trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
+
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               /* f2fs_convert_inline_inode() and block allocation can block */
+               if (f2fs_has_inline_data(inode) ||
+                   !f2fs_overwrite_io(inode, pos, count)) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+
+               if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+                       up_read(&fi->i_gc_rwsem[WRITE]);
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       } else {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       goto out;
+
+               down_read(&fi->i_gc_rwsem[WRITE]);
+               if (do_opu)
+                       down_read(&fi->i_gc_rwsem[READ]);
+       }
+       if (whint_mode == WHINT_MODE_OFF)
+               iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+       /*
+        * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+        * the higher-level function iomap_dio_rw() in order to ensure that the
+        * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
+        */
+       inc_page_count(sbi, F2FS_DIO_WRITE);
+       dio_flags = 0;
+       if (pos + count > inode->i_size)
+               dio_flags |= IOMAP_DIO_FORCE_WAIT;
+       dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+                            &f2fs_iomap_dio_write_ops, dio_flags, 0);
+       if (IS_ERR_OR_NULL(dio)) {
+               ret = PTR_ERR_OR_ZERO(dio);
+               if (ret == -ENOTBLK)
+                       ret = 0;
+               if (ret != -EIOCBQUEUED)
+                       dec_page_count(sbi, F2FS_DIO_WRITE);
+       } else {
+               ret = iomap_dio_complete(dio);
+       }
+
+       if (whint_mode == WHINT_MODE_OFF)
+               iocb->ki_hint = hint;
+       if (do_opu)
+               up_read(&fi->i_gc_rwsem[READ]);
+       up_read(&fi->i_gc_rwsem[WRITE]);
+
+       if (ret < 0)
+               goto out;
+       if (pos + ret > inode->i_size)
+               f2fs_i_size_write(inode, pos + ret);
+       if (!do_opu)
+               set_inode_flag(inode, FI_UPDATE_WRITE);
+
+       if (iov_iter_count(from)) {
+               ssize_t ret2;
+               loff_t bufio_start_pos = iocb->ki_pos;
+
+               /*
+                * The direct write was partial, so we need to fall back to a
+                * buffered write for the remainder.
+                */
+
+               ret2 = f2fs_buffered_write_iter(iocb, from);
+               if (iov_iter_count(from))
+                       f2fs_write_failed(inode, iocb->ki_pos);
+               if (ret2 < 0)
+                       goto out;
+
+               /*
+                * Ensure that the pagecache pages are written to disk and
+                * invalidated to preserve the expected O_DIRECT semantics.
+                */
+               if (ret2 > 0) {
+                       loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+                       ret += ret2;
+
+                       ret2 = filemap_write_and_wait_range(file->f_mapping,
+                                                           bufio_start_pos,
+                                                           bufio_end_pos);
+                       if (ret2 < 0)
+                               goto out;
+                       invalidate_mapping_pages(file->f_mapping,
+                                                bufio_start_pos >> PAGE_SHIFT,
+                                                bufio_end_pos >> PAGE_SHIFT);
+               }
+       } else {
+               /* iomap_dio_rw() already handled the generic_write_sync(). */
+               *may_need_sync = false;
+       }
+out:
+       trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+       return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       const loff_t orig_pos = iocb->ki_pos;
+       const size_t orig_count = iov_iter_count(from);
+       loff_t target_size;
+       bool dio;
+       bool may_need_sync = true;
+       int preallocated;
        ssize_t ret;
 
        if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4260,91 +4624,42 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                inode_lock(inode);
        }
 
-       if (unlikely(IS_IMMUTABLE(inode))) {
-               ret = -EPERM;
-               goto unlock;
-       }
-
-       if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
-               ret = -EPERM;
-               goto unlock;
-       }
-
-       ret = generic_write_checks(iocb, from);
-       if (ret > 0) {
-               bool preallocated = false;
-               size_t target_size = 0;
-               int err;
-
-               if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
-                       set_inode_flag(inode, FI_NO_PREALLOC);
-
-               if ((iocb->ki_flags & IOCB_NOWAIT)) {
-                       if (!f2fs_overwrite_io(inode, iocb->ki_pos,
-                                               iov_iter_count(from)) ||
-                               f2fs_has_inline_data(inode) ||
-                               f2fs_force_buffered_io(inode, iocb, from)) {
-                               clear_inode_flag(inode, FI_NO_PREALLOC);
-                               inode_unlock(inode);
-                               ret = -EAGAIN;
-                               goto out;
-                       }
-                       goto write;
-               }
-
-               if (is_inode_flag_set(inode, FI_NO_PREALLOC))
-                       goto write;
-
-               if (iocb->ki_flags & IOCB_DIRECT) {
-                       /*
-                        * Convert inline data for Direct I/O before entering
-                        * f2fs_direct_IO().
-                        */
-                       err = f2fs_convert_inline_inode(inode);
-                       if (err)
-                               goto out_err;
-                       /*
-                        * If force_buffere_io() is true, we have to allocate
-                        * blocks all the time, since f2fs_direct_IO will fall
-                        * back to buffered IO.
-                        */
-                       if (!f2fs_force_buffered_io(inode, iocb, from) &&
-                                       f2fs_lfs_mode(F2FS_I_SB(inode)))
-                               goto write;
-               }
-               preallocated = true;
-               target_size = iocb->ki_pos + iov_iter_count(from);
+       ret = f2fs_write_checks(iocb, from);
+       if (ret <= 0)
+               goto out_unlock;
 
-               err = f2fs_preallocate_blocks(iocb, from);
-               if (err) {
-out_err:
-                       clear_inode_flag(inode, FI_NO_PREALLOC);
-                       inode_unlock(inode);
-                       ret = err;
-                       goto out;
-               }
-write:
-               ret = __generic_file_write_iter(iocb, from);
-               clear_inode_flag(inode, FI_NO_PREALLOC);
+       /* Determine whether we will do a direct write or a buffered write. */
+       dio = f2fs_should_use_dio(inode, iocb, from);
 
-               /* if we couldn't write data, we should deallocate blocks. */
-               if (preallocated && i_size_read(inode) < target_size) {
-                       down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-                       filemap_invalidate_lock(inode->i_mapping);
-                       f2fs_truncate(inode);
-                       filemap_invalidate_unlock(inode->i_mapping);
-                       up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-               }
+       /* Possibly preallocate the blocks for the write. */
+       target_size = iocb->ki_pos + iov_iter_count(from);
+       preallocated = f2fs_preallocate_blocks(iocb, from, dio);
+       if (preallocated < 0)
+               ret = preallocated;
+       else
+               /* Do the actual write. */
+               ret = dio ?
+                       f2fs_dio_write_iter(iocb, from, &may_need_sync):
+                       f2fs_buffered_write_iter(iocb, from);
 
-               if (ret > 0)
-                       f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+       /* Don't leave any preallocated blocks around past i_size. */
+       if (preallocated && i_size_read(inode) < target_size) {
+               down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+               filemap_invalidate_lock(inode->i_mapping);
+               if (!f2fs_truncate(inode))
+                       file_dont_truncate(inode);
+               filemap_invalidate_unlock(inode->i_mapping);
+               up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+       } else {
+               file_dont_truncate(inode);
        }
-unlock:
+
+       clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+out_unlock:
        inode_unlock(inode);
 out:
-       trace_f2fs_file_write_iter(inode, iocb->ki_pos,
-                                       iov_iter_count(from), ret);
-       if (ret > 0)
+       trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
+       if (ret > 0 && may_need_sync)
                ret = generic_write_sync(iocb, ret);
        return ret;
 }
@@ -4352,12 +4667,12 @@ out:
 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
                int advice)
 {
-       struct inode *inode;
        struct address_space *mapping;
        struct backing_dev_info *bdi;
+       struct inode *inode = file_inode(filp);
+       int err;
 
        if (advice == POSIX_FADV_SEQUENTIAL) {
-               inode = file_inode(filp);
                if (S_ISFIFO(inode->i_mode))
                        return -ESPIPE;
 
@@ -4374,7 +4689,13 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
                return 0;
        }
 
-       return generic_fadvise(filp, offset, len, advice);
+       err = generic_fadvise(filp, offset, len, advice);
+       if (!err && advice == POSIX_FADV_DONTNEED &&
+               test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
+               f2fs_compressed_file(inode))
+               f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
+
+       return err;
 }
 
 #ifdef CONFIG_COMPAT
index 374bbb5..ee308a8 100644 (file)
@@ -92,6 +92,18 @@ static int gc_thread_func(void *data)
                 * So, I'd like to wait some time to collect dirty segments.
                 */
                if (sbi->gc_mode == GC_URGENT_HIGH) {
+                       spin_lock(&sbi->gc_urgent_high_lock);
+                       if (sbi->gc_urgent_high_limited) {
+                               if (!sbi->gc_urgent_high_remaining) {
+                                       sbi->gc_urgent_high_limited = false;
+                                       spin_unlock(&sbi->gc_urgent_high_lock);
+                                       sbi->gc_mode = GC_NORMAL;
+                                       continue;
+                               }
+                               sbi->gc_urgent_high_remaining--;
+                       }
+                       spin_unlock(&sbi->gc_urgent_high_lock);
+
                        wait_ms = gc_th->urgent_sleep_time;
                        down_write(&sbi->gc_lock);
                        goto do_gc;
@@ -947,7 +959,7 @@ next_step:
                        continue;
                }
 
-               if (f2fs_get_node_info(sbi, nid, &ni)) {
+               if (f2fs_get_node_info(sbi, nid, &ni, false)) {
                        f2fs_put_page(node_page, 1);
                        continue;
                }
@@ -1015,7 +1027,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        if (IS_ERR(node_page))
                return false;
 
-       if (f2fs_get_node_info(sbi, nid, dni)) {
+       if (f2fs_get_node_info(sbi, nid, dni, false)) {
                f2fs_put_page(node_page, 1);
                return false;
        }
@@ -1026,6 +1038,9 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                set_sbi_flag(sbi, SBI_NEED_FSCK);
        }
 
+       if (f2fs_check_nid_range(sbi, dni->ino))
+               return false;
+
        *nofs = ofs_of_node(node_page);
        source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
        f2fs_put_page(node_page, 1);
@@ -1039,7 +1054,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                        if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
                                f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
                                         blkaddr, source_blkaddr, segno);
-                               f2fs_bug_on(sbi, 1);
+                               set_sbi_flag(sbi, SBI_NEED_FSCK);
                        }
                }
 #endif
@@ -1206,7 +1221,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
 
        f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
 
-       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
        if (err)
                goto put_out;
 
@@ -1456,7 +1471,8 @@ next_step:
 
                if (phase == 3) {
                        inode = f2fs_iget(sb, dni.ino);
-                       if (IS_ERR(inode) || is_bad_inode(inode))
+                       if (IS_ERR(inode) || is_bad_inode(inode) ||
+                                       special_file(inode->i_mode))
                                continue;
 
                        if (!down_write_trylock(
index ea08f0d..4b5cefa 100644 (file)
@@ -131,7 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
        if (err)
                return err;
 
-       err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+       err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false);
        if (err) {
                f2fs_truncate_data_blocks_range(dn, 1);
                f2fs_put_dnode(dn);
@@ -786,7 +786,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
                ilen = start + len;
        ilen -= start;
 
-       err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+       err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false);
        if (err)
                goto out;
 
index 4c11254..0ec8e32 100644 (file)
@@ -516,6 +516,11 @@ make_now:
        } else if (ino == F2FS_COMPRESS_INO(sbi)) {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
                inode->i_mapping->a_ops = &f2fs_compress_aops;
+               /*
+                * generic_error_remove_page only truncates pages of regular
+                * inode
+                */
+               inode->i_mode |= S_IFREG;
 #endif
                mapping_set_gfp_mask(inode->i_mapping,
                        GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
@@ -544,6 +549,14 @@ make_now:
                goto bad_inode;
        }
        f2fs_set_inode_flags(inode);
+
+       if (file_should_truncate(inode)) {
+               ret = f2fs_truncate(inode);
+               if (ret)
+                       goto bad_inode;
+               file_dont_truncate(inode);
+       }
+
        unlock_new_inode(inode);
        trace_f2fs_iget(inode);
        return inode;
@@ -738,7 +751,8 @@ void f2fs_evict_inode(struct inode *inode)
        trace_f2fs_evict_inode(inode);
        truncate_inode_pages_final(&inode->i_data);
 
-       if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
+       if ((inode->i_nlink || is_bad_inode(inode)) &&
+               test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
                f2fs_invalidate_compress_pages(sbi, inode->i_ino);
 
        if (inode->i_ino == F2FS_NODE_INO(sbi) ||
@@ -868,7 +882,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
         * so we can prevent losing this orphan when encoutering checkpoint
         * and following suddenly power-off.
         */
-       err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+       err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
        if (err) {
                set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
index cdcf54a..be599f3 100644 (file)
@@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
        struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
        struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
 
-       spin_lock_irq(&sbi->iostat_lat_lock);
+       spin_lock_bh(&sbi->iostat_lat_lock);
        for (idx = 0; idx < MAX_IO_TYPE; idx++) {
                for (io = 0; io < NR_PAGE_TYPE; io++) {
                        cnt = io_lat->bio_cnt[idx][io];
@@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
                        io_lat->bio_cnt[idx][io] = 0;
                }
        }
-       spin_unlock_irq(&sbi->iostat_lat_lock);
+       spin_unlock_bh(&sbi->iostat_lat_lock);
 
        trace_f2fs_iostat_latency(sbi, iostat_lat);
 }
@@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
                return;
 
        /* Need double check under the lock */
-       spin_lock(&sbi->iostat_lock);
+       spin_lock_bh(&sbi->iostat_lock);
        if (time_is_after_jiffies(sbi->iostat_next_period)) {
-               spin_unlock(&sbi->iostat_lock);
+               spin_unlock_bh(&sbi->iostat_lock);
                return;
        }
        sbi->iostat_next_period = jiffies +
@@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
                                sbi->prev_rw_iostat[i];
                sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
        }
-       spin_unlock(&sbi->iostat_lock);
+       spin_unlock_bh(&sbi->iostat_lock);
 
        trace_f2fs_iostat(sbi, iostat_diff);
 
@@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
        struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
        int i;
 
-       spin_lock(&sbi->iostat_lock);
+       spin_lock_bh(&sbi->iostat_lock);
        for (i = 0; i < NR_IO_TYPE; i++) {
                sbi->rw_iostat[i] = 0;
                sbi->prev_rw_iostat[i] = 0;
        }
-       spin_unlock(&sbi->iostat_lock);
+       spin_unlock_bh(&sbi->iostat_lock);
 
-       spin_lock_irq(&sbi->iostat_lat_lock);
+       spin_lock_bh(&sbi->iostat_lat_lock);
        memset(io_lat, 0, sizeof(struct iostat_lat_info));
-       spin_unlock_irq(&sbi->iostat_lat_lock);
+       spin_unlock_bh(&sbi->iostat_lat_lock);
 }
 
 void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
        if (!sbi->iostat_enable)
                return;
 
-       spin_lock(&sbi->iostat_lock);
+       spin_lock_bh(&sbi->iostat_lock);
        sbi->rw_iostat[type] += io_bytes;
 
-       if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
-               sbi->rw_iostat[APP_BUFFERED_IO] =
-                       sbi->rw_iostat[APP_WRITE_IO] -
-                       sbi->rw_iostat[APP_DIRECT_IO];
+       if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+               sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
 
-       if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
-               sbi->rw_iostat[APP_BUFFERED_READ_IO] =
-                       sbi->rw_iostat[APP_READ_IO] -
-                       sbi->rw_iostat[APP_DIRECT_READ_IO];
-       spin_unlock(&sbi->iostat_lock);
+       if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
+               sbi->rw_iostat[APP_READ_IO] += io_bytes;
+
+       spin_unlock_bh(&sbi->iostat_lock);
 
        f2fs_record_iostat(sbi);
 }
@@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
 {
        unsigned long ts_diff;
        unsigned int iotype = iostat_ctx->type;
-       unsigned long flags;
        struct f2fs_sb_info *sbi = iostat_ctx->sbi;
        struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
        int idx;
@@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
                        idx = WRITE_ASYNC_IO;
        }
 
-       spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+       spin_lock_bh(&sbi->iostat_lat_lock);
        io_lat->sum_lat[idx][iotype] += ts_diff;
        io_lat->bio_cnt[idx][iotype]++;
        if (ts_diff > io_lat->peak_lat[idx][iotype])
                io_lat->peak_lat[idx][iotype] = ts_diff;
-       spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+       spin_unlock_bh(&sbi->iostat_lat_lock);
 }
 
 void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
index 219506c..50b2874 100644 (file)
@@ -430,6 +430,10 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *new, *e;
 
+       /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
+       if (rwsem_is_locked(&sbi->cp_global_sem))
+               return;
+
        new = __alloc_nat_entry(sbi, nid, false);
        if (!new)
                return;
@@ -539,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 }
 
 int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
-                                               struct node_info *ni)
+                               struct node_info *ni, bool checkpoint_context)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -572,9 +576,10 @@ retry:
         * nat_tree_lock. Therefore, we should retry, if we failed to grab here
         * while not bothering checkpoint.
         */
-       if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+       if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
                down_read(&curseg->journal_rwsem);
-       } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+       } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+                               !down_read_trylock(&curseg->journal_rwsem)) {
                up_read(&nm_i->nat_tree_lock);
                goto retry;
        }
@@ -887,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn)
        int err;
        pgoff_t index;
 
-       err = f2fs_get_node_info(sbi, dn->nid, &ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
        if (err)
                return err;
 
@@ -1286,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
                goto fail;
 
 #ifdef CONFIG_F2FS_CHECK_FS
-       err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+       err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
        if (err) {
                dec_valid_node_count(sbi, dn->inode, !ofs);
                goto fail;
@@ -1348,7 +1353,7 @@ static int read_node_page(struct page *page, int op_flags)
                return LOCKED_PAGE;
        }
 
-       err = f2fs_get_node_info(sbi, page->index, &ni);
+       err = f2fs_get_node_info(sbi, page->index, &ni, false);
        if (err)
                return err;
 
@@ -1600,7 +1605,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        nid = nid_of_node(page);
        f2fs_bug_on(sbi, page->index != nid);
 
-       if (f2fs_get_node_info(sbi, nid, &ni))
+       if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
                goto redirty_out;
 
        if (wbc->for_reclaim) {
@@ -2701,7 +2706,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
                goto recover_xnid;
 
        /* 1: invalidate the previous xattr nid */
-       err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+       err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
        if (err)
                return err;
 
@@ -2741,7 +2746,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
        struct page *ipage;
        int err;
 
-       err = f2fs_get_node_info(sbi, ino, &old_ni);
+       err = f2fs_get_node_info(sbi, ino, &old_ni, false);
        if (err)
                return err;
 
index d1664a0..9683c80 100644 (file)
@@ -596,7 +596,7 @@ retry_dn:
 
        f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
 
-       err = f2fs_get_node_info(sbi, dn.nid, &ni);
+       err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
        if (err)
                goto err;
 
index 40fdb4a..575d3dc 100644 (file)
@@ -253,7 +253,7 @@ retry:
                                goto next;
                        }
 
-                       err = f2fs_get_node_info(sbi, dn.nid, &ni);
+                       err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
                        if (err) {
                                f2fs_put_dnode(&dn);
                                return err;
index 46fde9f..0291cd5 100644 (file)
@@ -538,7 +538,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
 
 static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
 {
-       return SM_I(sbi)->reserved_segments;
+       return SM_I(sbi)->reserved_segments +
+                       SM_I(sbi)->additional_reserved_segments;
 }
 
 static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
index 15f12ec..76e6a3d 100644 (file)
@@ -59,6 +59,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
        [FAULT_WRITE_IO]        = "write IO error",
        [FAULT_SLAB_ALLOC]      = "slab alloc",
        [FAULT_DQUOT_INIT]      = "dquot initialize",
+       [FAULT_LOCK_OP]         = "lock_op",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -321,6 +322,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
                                           F2FS_OPTION(sbi).s_resgid));
 }
 
+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+       unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+       unsigned int avg_vblocks;
+       unsigned int wanted_reserved_segments;
+       block_t avail_user_block_count;
+
+       if (!F2FS_IO_ALIGNED(sbi))
+               return 0;
+
+       /* average valid block count in section in worst case */
+       avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+       /*
+        * we need enough free space when migrating one section in worst case
+        */
+       wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+                                               reserved_segments(sbi);
+       wanted_reserved_segments -= reserved_segments(sbi);
+
+       avail_user_block_count = sbi->user_block_count -
+                               sbi->current_reserved_blocks -
+                               F2FS_OPTION(sbi).root_reserved_blocks;
+
+       if (wanted_reserved_segments * sbi->blocks_per_seg >
+                                       avail_user_block_count) {
+               f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+                       wanted_reserved_segments,
+                       avail_user_block_count >> sbi->log_blocks_per_seg);
+               return -ENOSPC;
+       }
+
+       SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+       f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+                        wanted_reserved_segments);
+
+       return 0;
+}
+
 static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
 {
        if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -3540,6 +3581,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->seq_file_ra_mul = MIN_RA_MUL;
        sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
        sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
+       spin_lock_init(&sbi->gc_urgent_high_lock);
 
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -4179,6 +4221,10 @@ try_onemore:
                goto free_nm;
        }
 
+       err = adjust_reserved_segment(sbi);
+       if (err)
+               goto free_nm;
+
        /* For write statistics */
        sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
 
index 8408f77..df406c1 100644 (file)
@@ -118,6 +118,15 @@ static ssize_t sb_status_show(struct f2fs_attr *a,
        return sprintf(buf, "%lx\n", sbi->s_flag);
 }
 
+static ssize_t pending_discard_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+{
+       if (!SM_I(sbi)->dcc_info)
+               return -EINVAL;
+       return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+                               &SM_I(sbi)->dcc_info->discard_cmd_cnt));
+}
+
 static ssize_t features_show(struct f2fs_attr *a,
                struct f2fs_sb_info *sbi, char *buf)
 {
@@ -414,7 +423,9 @@ out:
        if (a->struct_type == RESERVED_BLOCKS) {
                spin_lock(&sbi->stat_lock);
                if (t > (unsigned long)(sbi->user_block_count -
-                               F2FS_OPTION(sbi).root_reserved_blocks)) {
+                               F2FS_OPTION(sbi).root_reserved_blocks -
+                               sbi->blocks_per_seg *
+                               SM_I(sbi)->additional_reserved_segments)) {
                        spin_unlock(&sbi->stat_lock);
                        return -EINVAL;
                }
@@ -477,6 +488,15 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
+               spin_lock(&sbi->gc_urgent_high_lock);
+               sbi->gc_urgent_high_limited = t != 0;
+               sbi->gc_urgent_high_remaining = t;
+               spin_unlock(&sbi->gc_urgent_high_lock);
+
+               return count;
+       }
+
 #ifdef CONFIG_F2FS_IOSTAT
        if (!strcmp(a->attr.name, "iostat_enable")) {
                sbi->iostat_enable = !!t;
@@ -732,6 +752,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
 #endif
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
 F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
 F2FS_GENERAL_RO_ATTR(dirty_segments);
 F2FS_GENERAL_RO_ATTR(free_segments);
@@ -743,6 +764,7 @@ F2FS_GENERAL_RO_ATTR(unusable);
 F2FS_GENERAL_RO_ATTR(encoding);
 F2FS_GENERAL_RO_ATTR(mounted_time_sec);
 F2FS_GENERAL_RO_ATTR(main_blkaddr);
+F2FS_GENERAL_RO_ATTR(pending_discard);
 #ifdef CONFIG_F2FS_STAT_FS
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@ -811,6 +833,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(main_blkaddr),
        ATTR_LIST(max_small_discards),
        ATTR_LIST(discard_granularity),
+       ATTR_LIST(pending_discard),
        ATTR_LIST(batched_trim_sections),
        ATTR_LIST(ipu_policy),
        ATTR_LIST(min_ipu_util),
@@ -843,6 +866,7 @@ static struct attribute *f2fs_attrs[] = {
 #endif
        ATTR_LIST(data_io_flag),
        ATTR_LIST(node_io_flag),
+       ATTR_LIST(gc_urgent_high_remaining),
        ATTR_LIST(ckpt_thread_ioprio),
        ATTR_LIST(dirty_segments),
        ATTR_LIST(free_segments),
index e348f33..8e5cd9c 100644 (file)
@@ -226,15 +226,18 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
 }
 
 static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
-                               void *last_base_addr, int index,
-                               size_t len, const char *name)
+                               void *last_base_addr, void **last_addr,
+                               int index, size_t len, const char *name)
 {
        struct f2fs_xattr_entry *entry;
 
        list_for_each_xattr(entry, base_addr) {
                if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
-                       (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+                       (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
+                       if (last_addr)
+                               *last_addr = entry;
                        return NULL;
+               }
 
                if (entry->e_name_index != index)
                        continue;
@@ -254,19 +257,9 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
        unsigned int inline_size = inline_xattr_size(inode);
        void *max_addr = base_addr + inline_size;
 
-       list_for_each_xattr(entry, base_addr) {
-               if ((void *)entry + sizeof(__u32) > max_addr ||
-                       (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
-                       *last_addr = entry;
-                       return NULL;
-               }
-               if (entry->e_name_index != index)
-                       continue;
-               if (entry->e_name_len != len)
-                       continue;
-               if (!memcmp(entry->e_name, name, len))
-                       break;
-       }
+       entry = __find_xattr(base_addr, max_addr, last_addr, index, len, name);
+       if (!entry)
+               return NULL;
 
        /* inline xattr header or entry across max inline xattr size */
        if (IS_XATTR_LAST_ENTRY(entry) &&
@@ -368,7 +361,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
        else
                cur_addr = txattr_addr;
 
-       *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+       *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
        if (!*xe) {
                f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
                                                                inode->i_ino);
@@ -659,7 +652,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
        last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
 
        /* find entry with wanted name. */
-       here = __find_xattr(base_addr, last_base_addr, index, len, name);
+       here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
        if (!here) {
                f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
                                                                inode->i_ino);
@@ -684,8 +677,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
        }
 
        last = here;
-       while (!IS_XATTR_LAST_ENTRY(last))
+       while (!IS_XATTR_LAST_ENTRY(last)) {
+               if ((void *)(last) + sizeof(__u32) > last_base_addr ||
+                       (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
+                       f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
+                                       inode->i_ino, ENTRY_SIZE(last));
+                       set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+                       error = -EFSCORRUPTED;
+                       goto exit;
+               }
                last = XATTR_NEXT_ENTRY(last);
+       }
 
        newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
 
index 13855ba..a5a309f 100644 (file)
@@ -175,9 +175,10 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 static int fat_file_release(struct inode *inode, struct file *filp)
 {
        if ((filp->f_mode & FMODE_WRITE) &&
-            MSDOS_SB(inode->i_sb)->options.flush) {
+           MSDOS_SB(inode->i_sb)->options.flush) {
                fat_flush_inodes(inode->i_sb, inode, NULL);
-               congestion_wait(BLK_RW_ASYNC, HZ/10);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               io_schedule_timeout(HZ/10);
        }
        return 0;
 }
index 456e87a..68b4240 100644 (file)
@@ -260,8 +260,10 @@ struct hfsplus_cat_folder {
        __be32 access_date;
        __be32 backup_date;
        struct hfsplus_perm permissions;
-       struct DInfo user_info;
-       struct DXInfo finder_info;
+       struct_group_attr(info, __packed,
+               struct DInfo user_info;
+               struct DXInfo finder_info;
+       );
        __be32 text_encoding;
        __be32 subfolders;      /* Subfolder count in HFSX. Reserved in HFS+. */
 } __packed;
@@ -294,8 +296,10 @@ struct hfsplus_cat_file {
        __be32 access_date;
        __be32 backup_date;
        struct hfsplus_perm permissions;
-       struct FInfo user_info;
-       struct FXInfo finder_info;
+       struct_group_attr(info, __packed,
+               struct FInfo user_info;
+               struct FXInfo finder_info;
+       );
        __be32 text_encoding;
        u32 reserved2;
 
index e2855ce..49891b1 100644 (file)
@@ -296,7 +296,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
                                        sizeof(hfsplus_cat_entry));
                if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
                        if (size == folder_finderinfo_len) {
-                               memcpy(&entry.folder.user_info, value,
+                               memcpy(&entry.folder.info, value,
                                                folder_finderinfo_len);
                                hfs_bnode_write(cat_fd.bnode, &entry,
                                        cat_fd.entryoffset,
@@ -309,7 +309,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
                        }
                } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
                        if (size == file_finderinfo_len) {
-                               memcpy(&entry.file.user_info, value,
+                               memcpy(&entry.file.info, value,
                                                file_finderinfo_len);
                                hfs_bnode_write(cat_fd.bnode, &entry,
                                        cat_fd.entryoffset,
index a776312..bb7f161 100644 (file)
@@ -48,7 +48,8 @@ struct io_worker {
        struct io_wqe *wqe;
 
        struct io_wq_work *cur_work;
-       spinlock_t lock;
+       struct io_wq_work *next_work;
+       raw_spinlock_t lock;
 
        struct completion ref_done;
 
@@ -405,8 +406,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
  * Worker will start processing some work. Move it to the busy list, if
  * it's currently on the freelist
  */
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
-                            struct io_wq_work *work)
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
        __must_hold(wqe->lock)
 {
        if (worker->flags & IO_WORKER_F_FREE) {
@@ -529,9 +529,10 @@ static void io_assign_current_work(struct io_worker *worker,
                cond_resched();
        }
 
-       spin_lock(&worker->lock);
+       raw_spin_lock(&worker->lock);
        worker->cur_work = work;
-       spin_unlock(&worker->lock);
+       worker->next_work = NULL;
+       raw_spin_unlock(&worker->lock);
 }
 
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -546,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker)
 
        do {
                struct io_wq_work *work;
-get_next:
+
                /*
                 * If we got some work, mark us as busy. If we didn't, but
                 * the list isn't empty, it means we stalled on hashed work.
@@ -555,9 +556,20 @@ get_next:
                 * clear the stalled flag.
                 */
                work = io_get_next_work(acct, worker);
-               if (work)
-                       __io_worker_busy(wqe, worker, work);
-
+               if (work) {
+                       __io_worker_busy(wqe, worker);
+
+                       /*
+                        * Make sure cancelation can find this, even before
+                        * it becomes the active work. That avoids a window
+                        * where the work has been removed from our general
+                        * work list, but isn't yet discoverable as the
+                        * current work item for this worker.
+                        */
+                       raw_spin_lock(&worker->lock);
+                       worker->next_work = work;
+                       raw_spin_unlock(&worker->lock);
+               }
                raw_spin_unlock(&wqe->lock);
                if (!work)
                        break;
@@ -594,11 +606,6 @@ get_next:
                                spin_unlock_irq(&wq->hash->wait.lock);
                                if (wq_has_sleeper(&wq->hash->wait))
                                        wake_up(&wq->hash->wait);
-                               raw_spin_lock(&wqe->lock);
-                               /* skip unnecessary unlock-lock wqe->lock */
-                               if (!work)
-                                       goto get_next;
-                               raw_spin_unlock(&wqe->lock);
                        }
                } while (work);
 
@@ -815,7 +822,7 @@ fail:
 
        refcount_set(&worker->ref, 1);
        worker->wqe = wqe;
-       spin_lock_init(&worker->lock);
+       raw_spin_lock_init(&worker->lock);
        init_completion(&worker->ref_done);
 
        if (index == IO_WQ_ACCT_BOUND)
@@ -973,6 +980,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
        work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
 }
 
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+                                 struct io_cb_cancel_data *match,
+                                 struct io_wq_work *work)
+{
+       if (work && match->fn(work, match->data)) {
+               work->flags |= IO_WQ_WORK_CANCEL;
+               set_notify_signal(worker->task);
+               return true;
+       }
+
+       return false;
+}
+
 static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
 {
        struct io_cb_cancel_data *match = data;
@@ -981,13 +1001,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
         * Hold the lock to avoid ->cur_work going out of scope, caller
         * may dereference the passed in work.
         */
-       spin_lock(&worker->lock);
-       if (worker->cur_work &&
-           match->fn(worker->cur_work, match->data)) {
-               set_notify_signal(worker->task);
+       raw_spin_lock(&worker->lock);
+       if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+           __io_wq_worker_cancel(worker, match, worker->next_work))
                match->nr_running++;
-       }
-       spin_unlock(&worker->lock);
+       raw_spin_unlock(&worker->lock);
 
        return match->nr_running && !match->cancel_all;
 }
@@ -1039,17 +1057,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
 {
        int i;
 retry:
-       raw_spin_lock(&wqe->lock);
        for (i = 0; i < IO_WQ_ACCT_NR; i++) {
                struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
 
                if (io_acct_cancel_pending_work(wqe, acct, match)) {
+                       raw_spin_lock(&wqe->lock);
                        if (match->cancel_all)
                                goto retry;
-                       return;
+                       break;
                }
        }
-       raw_spin_unlock(&wqe->lock);
 }
 
 static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1074,25 +1091,27 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
         * First check pending list, if we're lucky we can just remove it
         * from there. CANCEL_OK means that the work is returned as-new,
         * no completion will be posted for it.
-        */
-       for_each_node(node) {
-               struct io_wqe *wqe = wq->wqes[node];
-
-               io_wqe_cancel_pending_work(wqe, &match);
-               if (match.nr_pending && !match.cancel_all)
-                       return IO_WQ_CANCEL_OK;
-       }
-
-       /*
-        * Now check if a free (going busy) or busy worker has the work
+        *
+        * Then check if a free (going busy) or busy worker has the work
         * currently running. If we find it there, we'll return CANCEL_RUNNING
         * as an indication that we attempt to signal cancellation. The
         * completion will run normally in this case.
+        *
+        * Do both of these while holding the wqe->lock, to ensure that
+        * we'll find a work item regardless of state.
         */
        for_each_node(node) {
                struct io_wqe *wqe = wq->wqes[node];
 
+               raw_spin_lock(&wqe->lock);
+               io_wqe_cancel_pending_work(wqe, &match);
+               if (match.nr_pending && !match.cancel_all) {
+                       raw_spin_unlock(&wqe->lock);
+                       return IO_WQ_CANCEL_OK;
+               }
+
                io_wqe_cancel_running_work(wqe, &match);
+               raw_spin_unlock(&wqe->lock);
                if (match.nr_running && !match.cancel_all)
                        return IO_WQ_CANCEL_RUNNING;
        }
@@ -1263,7 +1282,9 @@ static void io_wq_destroy(struct io_wq *wq)
                        .fn             = io_wq_work_match_all,
                        .cancel_all     = true,
                };
+               raw_spin_lock(&wqe->lock);
                io_wqe_cancel_pending_work(wqe, &match);
+               raw_spin_unlock(&wqe->lock);
                free_cpumask_var(wqe->cpu_mask);
                kfree(wqe);
        }
index de9c9de..e54c412 100644 (file)
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
        return atomic_dec_and_test(&req->refs);
 }
 
-static inline void req_ref_put(struct io_kiocb *req)
-{
-       WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
-       WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
 static inline void req_ref_get(struct io_kiocb *req)
 {
        WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
 
 static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
 {
-       struct wait_queue_head *head = poll->head;
+       struct wait_queue_head *head = smp_load_acquire(&poll->head);
 
-       spin_lock_irq(&head->lock);
-       list_del_init(&poll->wait.entry);
-       poll->head = NULL;
-       spin_unlock_irq(&head->lock);
+       if (head) {
+               spin_lock_irq(&head->lock);
+               list_del_init(&poll->wait.entry);
+               poll->head = NULL;
+               spin_unlock_irq(&head->lock);
+       }
 }
 
 static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
        struct io_poll_iocb *poll = io_poll_get_single(req);
        struct io_poll_iocb *poll_double = io_poll_get_double(req);
 
-       if (poll->head)
-               io_poll_remove_entry(poll);
-       if (poll_double && poll_double->head)
+       /*
+        * While we hold the waitqueue lock and the waitqueue is nonempty,
+        * wake_up_pollfree() will wait for us.  However, taking the waitqueue
+        * lock in the first place can race with the waitqueue being freed.
+        *
+        * We solve this as eventpoll does: by taking advantage of the fact that
+        * all users of wake_up_pollfree() will RCU-delay the actual free.  If
+        * we enter rcu_read_lock() and see that the pointer to the queue is
+        * non-NULL, we can then lock it without the memory being freed out from
+        * under us.
+        *
+        * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+        * case the caller deletes the entry from the queue, leaving it empty.
+        * In that case, only RCU prevents the queue memory from being freed.
+        */
+       rcu_read_lock();
+       io_poll_remove_entry(poll);
+       if (poll_double)
                io_poll_remove_entry(poll_double);
+       rcu_read_unlock();
 }
 
 /*
@@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
                                                 wait);
        __poll_t mask = key_to_poll(key);
 
+       if (unlikely(mask & POLLFREE)) {
+               io_poll_mark_cancelled(req);
+               /* we have to kick tw in case it's not already */
+               io_poll_execute(req, 0);
+
+               /*
+                * If the waitqueue is being freed early but someone is already
+                * holds ownership over it, we have to tear down the request as
+                * best we can. That means immediately removing the request from
+                * its waitqueue and preventing all further accesses to the
+                * waitqueue via the request.
+                */
+               list_del_init(&poll->wait.entry);
+
+               /*
+                * Careful: this *must* be the last step, since as soon
+                * as req->head is NULL'ed out, the request can be
+                * completed and freed, since aio_poll_complete_work()
+                * will no longer need to take the waitqueue lock.
+                */
+               smp_store_release(&poll->head, NULL);
+               return 1;
+       }
+
        /* for instances that support it check for an event match first */
        if (mask && !(mask & poll->events))
                return 0;
@@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
        WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
 
        ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
-       if (ret != -ENOENT)
-               return ret;
+       /*
+        * Fall-through even for -EALREADY, as we may have poll armed
+        * that need unarming.
+        */
+       if (!ret)
+               return 0;
 
        spin_lock(&ctx->completion_lock);
+       ret = io_poll_cancel(ctx, sqe_addr, false);
+       if (ret != -ENOENT)
+               goto out;
+
        spin_lock_irq(&ctx->timeout_lock);
        ret = io_timeout_cancel(ctx, sqe_addr);
        spin_unlock_irq(&ctx->timeout_lock);
-       if (ret != -ENOENT)
-               goto out;
-       ret = io_poll_cancel(ctx, sqe_addr, false);
 out:
        spin_unlock(&ctx->completion_lock);
        return ret;
index b014f46..c03eba0 100644 (file)
 #include "ksmbd_spnego_negtokeninit.asn1.h"
 #include "ksmbd_spnego_negtokentarg.asn1.h"
 
-#define SPNEGO_OID_LEN 7
 #define NTLMSSP_OID_LEN  10
-#define KRB5_OID_LEN  7
-#define KRB5U2U_OID_LEN  8
-#define MSKRB5_OID_LEN  7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
 
 static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
        0x82, 0x37, 0x02, 0x02, 0x0a };
 
-static bool
-asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
-                 unsigned long *subid)
-{
-       const unsigned char *ptr = *begin;
-       unsigned char ch;
-
-       *subid = 0;
-
-       do {
-               if (ptr >= end)
-                       return false;
-
-               ch = *ptr++;
-               *subid <<= 7;
-               *subid |= ch & 0x7F;
-       } while ((ch & 0x80) == 0x80);
-
-       *begin = ptr;
-       return true;
-}
-
-static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
-                           unsigned long **oid, size_t *oidlen)
-{
-       const unsigned char *iptr = value, *end = value + vlen;
-       unsigned long *optr;
-       unsigned long subid;
-
-       vlen += 1;
-       if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
-               goto fail_nullify;
-
-       *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
-       if (!*oid)
-               return false;
-
-       optr = *oid;
-
-       if (!asn1_subid_decode(&iptr, end, &subid))
-               goto fail;
-
-       if (subid < 40) {
-               optr[0] = 0;
-               optr[1] = subid;
-       } else if (subid < 80) {
-               optr[0] = 1;
-               optr[1] = subid - 40;
-       } else {
-               optr[0] = 2;
-               optr[1] = subid - 80;
-       }
-
-       *oidlen = 2;
-       optr += 2;
-
-       while (iptr < end) {
-               if (++(*oidlen) > vlen)
-                       goto fail;
-
-               if (!asn1_subid_decode(&iptr, end, optr++))
-                       goto fail;
-       }
-       return true;
-
-fail:
-       kfree(*oid);
-fail_nullify:
-       *oid = NULL;
-       return false;
-}
-
-static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
-                  unsigned long *oid2, unsigned int oid2len)
-{
-       if (oid1len != oid2len)
-               return false;
-
-       return memcmp(oid1, oid2, oid1len) == 0;
-}
-
 int
 ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
                          struct ksmbd_conn *conn)
@@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
 int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
                           const void *value, size_t vlen)
 {
-       unsigned long *oid;
-       size_t oidlen;
-       int err = 0;
-
-       if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
-               err = -EBADMSG;
-               goto out;
-       }
+       enum OID oid;
 
-       if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
-               err = -EBADMSG;
-       kfree(oid);
-out:
-       if (err) {
+       oid = look_up_OID(value, vlen);
+       if (oid != OID_spnego) {
                char buf[50];
 
                sprint_oid(value, vlen, buf, sizeof(buf));
                ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+               return -EBADMSG;
        }
-       return err;
+
+       return 0;
 }
 
 int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
@@ -279,37 +181,31 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
                                   size_t vlen)
 {
        struct ksmbd_conn *conn = context;
-       unsigned long *oid;
-       size_t oidlen;
+       enum OID oid;
        int mech_type;
-       char buf[50];
 
-       if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
-               goto fail;
-
-       if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+       oid = look_up_OID(value, vlen);
+       if (oid == OID_ntlmssp) {
                mech_type = KSMBD_AUTH_NTLMSSP;
-       else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+       } else if (oid == OID_mskrb5) {
                mech_type = KSMBD_AUTH_MSKRB5;
-       else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+       } else if (oid == OID_krb5) {
                mech_type = KSMBD_AUTH_KRB5;
-       else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+       } else if (oid == OID_krb5u2u) {
                mech_type = KSMBD_AUTH_KRB5U2U;
-       else
-               goto fail;
+       } else {
+               char buf[50];
+
+               sprint_oid(value, vlen, buf, sizeof(buf));
+               ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+               return -EBADMSG;
+       }
 
        conn->auth_mechs |= mech_type;
        if (conn->preferred_auth_mech == 0)
                conn->preferred_auth_mech = mech_type;
 
-       kfree(oid);
        return 0;
-
-fail:
-       kfree(oid);
-       sprint_oid(value, vlen, buf, sizeof(buf));
-       ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
-       return -EBADMSG;
 }
 
 int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
index 3503b1c..dc3d061 100644 (file)
@@ -215,7 +215,7 @@ out:
  * Return:     0 on success, error number on error
  */
 int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
-                     int blen, char *domain_name)
+                     int blen, char *domain_name, char *cryptkey)
 {
        char ntlmv2_hash[CIFS_ENCPWD_SIZE];
        char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
@@ -256,7 +256,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
                goto out;
        }
 
-       memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+       memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE);
        memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
 
        rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
@@ -295,7 +295,8 @@ out:
  * Return:     0 on success, error number on error
  */
 int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
-                                  int blob_len, struct ksmbd_session *sess)
+                                  int blob_len, struct ksmbd_conn *conn,
+                                  struct ksmbd_session *sess)
 {
        char *domain_name;
        unsigned int nt_off, dn_off;
@@ -324,7 +325,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
 
        /* TODO : use domain name that imported from configuration file */
        domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
-                                            dn_len, true, sess->conn->local_nls);
+                                            dn_len, true, conn->local_nls);
        if (IS_ERR(domain_name))
                return PTR_ERR(domain_name);
 
@@ -333,7 +334,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
                    domain_name);
        ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
                                nt_len - CIFS_ENCPWD_SIZE,
-                               domain_name);
+                               domain_name, conn->ntlmssp.cryptkey);
        kfree(domain_name);
        return ret;
 }
@@ -347,7 +348,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
  *
  */
 int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
-                                 int blob_len, struct ksmbd_session *sess)
+                                 int blob_len, struct ksmbd_conn *conn)
 {
        if (blob_len < sizeof(struct negotiate_message)) {
                ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
@@ -361,7 +362,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
                return -EINVAL;
        }
 
-       sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+       conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
        return 0;
 }
 
@@ -375,14 +376,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
  */
 unsigned int
 ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
-                                  struct ksmbd_session *sess)
+                                  struct ksmbd_conn *conn)
 {
        struct target_info *tinfo;
        wchar_t *name;
        __u8 *target_name;
        unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
        int len, uni_len, conv_len;
-       int cflags = sess->ntlmssp.client_flags;
+       int cflags = conn->ntlmssp.client_flags;
 
        memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
        chgblob->MessageType = NtLmChallenge;
@@ -403,7 +404,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
        if (cflags & NTLMSSP_REQUEST_TARGET)
                flags |= NTLMSSP_REQUEST_TARGET;
 
-       if (sess->conn->use_spnego &&
+       if (conn->use_spnego &&
            (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
                flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
 
@@ -414,7 +415,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
                return -ENOMEM;
 
        conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
-                                 sess->conn->local_nls);
+                                 conn->local_nls);
        if (conv_len < 0 || conv_len > len) {
                kfree(name);
                return -EINVAL;
@@ -430,8 +431,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
        chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
 
        /* Initialize random conn challenge */
-       get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
-       memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+       get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64));
+       memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey,
               CIFS_CRYPTO_KEY_SIZE);
 
        /* Add Target Information to security buffer */
index 9c2d4ba..9562965 100644 (file)
@@ -38,16 +38,16 @@ struct kvec;
 int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
                        unsigned int nvec, int enc);
 void ksmbd_copy_gss_neg_header(void *buf);
-int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
 int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
-                     int blen, char *domain_name);
+                     int blen, char *domain_name, char *cryptkey);
 int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
-                                  int blob_len, struct ksmbd_session *sess);
+                                  int blob_len, struct ksmbd_conn *conn,
+                                  struct ksmbd_session *sess);
 int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
-                                 int blob_len, struct ksmbd_session *sess);
+                                 int blob_len, struct ksmbd_conn *conn);
 unsigned int
 ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
-                                  struct ksmbd_session *sess);
+                                  struct ksmbd_conn *conn);
 int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
                            int in_len, char *out_blob, int *out_len);
 int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
index 83a94d0..208d2cf 100644 (file)
@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
        atomic_set(&conn->req_running, 0);
        atomic_set(&conn->r_count, 0);
        conn->total_credits = 1;
+       conn->outstanding_credits = 1;
 
        init_waitqueue_head(&conn->req_running_q);
        INIT_LIST_HEAD(&conn->conns_list);
@@ -386,17 +387,24 @@ out:
 static void stop_sessions(void)
 {
        struct ksmbd_conn *conn;
+       struct ksmbd_transport *t;
 
 again:
        read_lock(&conn_list_lock);
        list_for_each_entry(conn, &conn_list, conns_list) {
                struct task_struct *task;
 
-               task = conn->transport->handler;
+               t = conn->transport;
+               task = t->handler;
                if (task)
                        ksmbd_debug(CONN, "Stop session handler %s/%d\n",
                                    task->comm, task_pid_nr(task));
                conn->status = KSMBD_SESS_EXITING;
+               if (t->ops->shutdown) {
+                       read_unlock(&conn_list_lock);
+                       t->ops->shutdown(t);
+                       read_lock(&conn_list_lock);
+               }
        }
        read_unlock(&conn_list_lock);
 
index e5403c5..7a59aac 100644 (file)
@@ -61,8 +61,8 @@ struct ksmbd_conn {
        atomic_t                        req_running;
        /* References which are made for this Server object*/
        atomic_t                        r_count;
-       unsigned short                  total_credits;
-       unsigned short                  max_credits;
+       unsigned int                    total_credits;
+       unsigned int                    outstanding_credits;
        spinlock_t                      credits_lock;
        wait_queue_head_t               req_running_q;
        /* Lock to protect requests list*/
@@ -72,12 +72,7 @@ struct ksmbd_conn {
        int                             connection_type;
        struct ksmbd_stats              stats;
        char                            ClientGUID[SMB2_CLIENT_GUID_SIZE];
-       union {
-               /* pending trans request table */
-               struct trans_state      *recent_trans;
-               /* Used by ntlmssp */
-               char                    *ntlmssp_cryptkey;
-       };
+       struct ntlmssp_auth             ntlmssp;
 
        spinlock_t                      llist_lock;
        struct list_head                lock_list;
@@ -122,6 +117,7 @@ struct ksmbd_conn_ops {
 struct ksmbd_transport_ops {
        int (*prepare)(struct ksmbd_transport *t);
        void (*disconnect)(struct ksmbd_transport *t);
+       void (*shutdown)(struct ksmbd_transport *t);
        int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
        int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
                      int size, bool need_invalidate_rkey,
index c6718a0..71bfb7d 100644 (file)
@@ -103,6 +103,8 @@ struct ksmbd_startup_request {
                                         * we set the SPARSE_FILES bit (0x40).
                                         */
        __u32   sub_auth[3];            /* Subauth value for Security ID */
+       __u32   smb2_max_credits;       /* MAX credits */
+       __u32   reserved[128];          /* Reserved room */
        __u32   ifc_list_sz;            /* interfaces list size */
        __s8    ____payload[];
 };
@@ -113,7 +115,7 @@ struct ksmbd_startup_request {
  * IPC request to shutdown ksmbd server.
  */
 struct ksmbd_shutdown_request {
-       __s32   reserved;
+       __s32   reserved[16];
 };
 
 /*
@@ -122,6 +124,7 @@ struct ksmbd_shutdown_request {
 struct ksmbd_login_request {
        __u32   handle;
        __s8    account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+       __u32   reserved[16];                           /* Reserved room */
 };
 
 /*
@@ -135,6 +138,7 @@ struct ksmbd_login_response {
        __u16   status;
        __u16   hash_sz;                        /* hash size */
        __s8    hash[KSMBD_REQ_MAX_HASH_SZ];    /* password hash */
+       __u32   reserved[16];                   /* Reserved room */
 };
 
 /*
@@ -143,6 +147,7 @@ struct ksmbd_login_response {
 struct ksmbd_share_config_request {
        __u32   handle;
        __s8    share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+       __u32   reserved[16];           /* Reserved room */
 };
 
 /*
@@ -157,6 +162,7 @@ struct ksmbd_share_config_response {
        __u16   force_directory_mode;
        __u16   force_uid;
        __u16   force_gid;
+       __u32   reserved[128];          /* Reserved room */
        __u32   veto_list_sz;
        __s8    ____payload[];
 };
@@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request {
        __s8    account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
        __s8    share[KSMBD_REQ_MAX_SHARE_NAME];
        __s8    peer_addr[64];
+       __u32   reserved[16];           /* Reserved room */
 };
 
 /*
@@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response {
        __u32   handle;
        __u16   status;
        __u16   connection_flags;
+       __u32   reserved[16];           /* Reserved room */
 };
 
 /*
@@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response {
 struct ksmbd_tree_disconnect_request {
        __u64   session_id;     /* session id */
        __u64   connect_id;     /* tree connection id */
+       __u32   reserved[16];   /* Reserved room */
 };
 
 /*
@@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request {
 struct ksmbd_logout_request {
        __s8    account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
        __u32   account_flags;
+       __u32   reserved[16];                           /* Reserved room */
 };
 
 /*
index 1019d36..279d00f 100644 (file)
@@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user)
                return 1;
        return 0;
 }
+
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2)
+{
+       if (strcmp(u1->name, u2->name))
+               return false;
+       if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz))
+               return false;
+
+       return true;
+}
index aff80b0..6a44109 100644 (file)
@@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account);
 struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
 void ksmbd_free_user(struct ksmbd_user *user);
 int ksmbd_anonymous_user(struct ksmbd_user *user);
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2);
 #endif /* __USER_CONFIG_MANAGEMENT_H__ */
index 82289c3..e241f16 100644 (file)
@@ -45,7 +45,6 @@ struct ksmbd_session {
        int                             state;
        __u8                            *Preauth_HashValue;
 
-       struct ntlmssp_auth             ntlmssp;
        char                            sess_key[CIFS_KEY_SIZE];
 
        struct hlist_node               hlist;
index 50d0b10..4a94601 100644 (file)
@@ -289,7 +289,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
        unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
        unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
        void *__hdr = hdr;
-       int ret;
+       int ret = 0;
 
        switch (hdr->Command) {
        case SMB2_QUERY_INFO:
@@ -326,21 +326,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
                ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
                            credit_charge, calc_credit_num);
                return 1;
-       } else if (credit_charge > conn->max_credits) {
+       } else if (credit_charge > conn->vals->max_credits) {
                ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
                return 1;
        }
 
        spin_lock(&conn->credits_lock);
-       if (credit_charge <= conn->total_credits) {
-               conn->total_credits -= credit_charge;
-               ret = 0;
-       } else {
+       if (credit_charge > conn->total_credits) {
                ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
                            credit_charge, conn->total_credits);
                ret = 1;
        }
+
+       if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) {
+               ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
+                           credit_charge, conn->outstanding_credits);
+               ret = 1;
+       } else
+               conn->outstanding_credits += credit_charge;
+
        spin_unlock(&conn->credits_lock);
+
        return ret;
 }
 
index 02a44d2..ab23da2 100644 (file)
@@ -19,6 +19,7 @@ static struct smb_version_values smb21_server_values = {
        .max_read_size = SMB21_DEFAULT_IOSIZE,
        .max_write_size = SMB21_DEFAULT_IOSIZE,
        .max_trans_size = SMB21_DEFAULT_IOSIZE,
+       .max_credits = SMB2_MAX_CREDITS,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -44,6 +45,7 @@ static struct smb_version_values smb30_server_values = {
        .max_read_size = SMB3_DEFAULT_IOSIZE,
        .max_write_size = SMB3_DEFAULT_IOSIZE,
        .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+       .max_credits = SMB2_MAX_CREDITS,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -70,6 +72,7 @@ static struct smb_version_values smb302_server_values = {
        .max_read_size = SMB3_DEFAULT_IOSIZE,
        .max_write_size = SMB3_DEFAULT_IOSIZE,
        .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+       .max_credits = SMB2_MAX_CREDITS,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -96,6 +99,7 @@ static struct smb_version_values smb311_server_values = {
        .max_read_size = SMB3_DEFAULT_IOSIZE,
        .max_write_size = SMB3_DEFAULT_IOSIZE,
        .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+       .max_credits = SMB2_MAX_CREDITS,
        .large_lock_type = 0,
        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
        .shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -197,7 +201,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
        conn->ops = &smb2_0_server_ops;
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
-       conn->max_credits = SMB2_MAX_CREDITS;
        conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -215,7 +218,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
        conn->ops = &smb3_0_server_ops;
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
-       conn->max_credits = SMB2_MAX_CREDITS;
        conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -240,7 +242,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
        conn->ops = &smb3_0_server_ops;
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
-       conn->max_credits = SMB2_MAX_CREDITS;
        conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -265,7 +266,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
        conn->ops = &smb3_11_server_ops;
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
-       conn->max_credits = SMB2_MAX_CREDITS;
        conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -304,3 +304,11 @@ void init_smb2_max_trans_size(unsigned int sz)
        smb302_server_values.max_trans_size = sz;
        smb311_server_values.max_trans_size = sz;
 }
+
+void init_smb2_max_credits(unsigned int sz)
+{
+       smb21_server_values.max_credits = sz;
+       smb30_server_values.max_credits = sz;
+       smb302_server_values.max_credits = sz;
+       smb311_server_values.max_credits = sz;
+}
index b8b3a4c..1866c81 100644 (file)
@@ -299,16 +299,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
        struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
        struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
        struct ksmbd_conn *conn = work->conn;
-       unsigned short credits_requested;
+       unsigned short credits_requested, aux_max;
        unsigned short credit_charge, credits_granted = 0;
-       unsigned short aux_max, aux_credits;
 
        if (work->send_no_response)
                return 0;
 
        hdr->CreditCharge = req_hdr->CreditCharge;
 
-       if (conn->total_credits > conn->max_credits) {
+       if (conn->total_credits > conn->vals->max_credits) {
                hdr->CreditRequest = 0;
                pr_err("Total credits overflow: %d\n", conn->total_credits);
                return -EINVAL;
@@ -316,6 +315,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
 
        credit_charge = max_t(unsigned short,
                              le16_to_cpu(req_hdr->CreditCharge), 1);
+       if (credit_charge > conn->total_credits) {
+               ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+                           credit_charge, conn->total_credits);
+               return -EINVAL;
+       }
+
+       conn->total_credits -= credit_charge;
+       conn->outstanding_credits -= credit_charge;
        credits_requested = max_t(unsigned short,
                                  le16_to_cpu(req_hdr->CreditRequest), 1);
 
@@ -325,16 +332,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
         * TODO: Need to adjuct CreditRequest value according to
         * current cpu load
         */
-       aux_credits = credits_requested - 1;
        if (hdr->Command == SMB2_NEGOTIATE)
-               aux_max = 0;
+               aux_max = 1;
        else
-               aux_max = conn->max_credits - credit_charge;
-       aux_credits = min_t(unsigned short, aux_credits, aux_max);
-       credits_granted = credit_charge + aux_credits;
+               aux_max = conn->vals->max_credits - credit_charge;
+       credits_granted = min_t(unsigned short, credits_requested, aux_max);
 
-       if (conn->max_credits - conn->total_credits < credits_granted)
-               credits_granted = conn->max_credits -
+       if (conn->vals->max_credits - conn->total_credits < credits_granted)
+               credits_granted = conn->vals->max_credits -
                        conn->total_credits;
 
        conn->total_credits += credits_granted;
@@ -610,16 +615,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id)
 
 /**
  * smb2_get_name() - get filename string from on the wire smb format
- * @share:     ksmbd_share_config pointer
  * @src:       source buffer
  * @maxlen:    maxlen of source string
- * @nls_table: nls_table pointer
+ * @local_nls: nls_table pointer
  *
  * Return:      matching converted filename on success, otherwise error ptr
  */
 static char *
-smb2_get_name(struct ksmbd_share_config *share, const char *src,
-             const int maxlen, struct nls_table *local_nls)
+smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
 {
        char *name;
 
@@ -1303,7 +1306,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
        int sz, rc;
 
        ksmbd_debug(SMB, "negotiate phase\n");
-       rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
+       rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn);
        if (rc)
                return rc;
 
@@ -1313,7 +1316,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
        memset(chgblob, 0, sizeof(struct challenge_message));
 
        if (!work->conn->use_spnego) {
-               sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+               sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
                if (sz < 0)
                        return -ENOMEM;
 
@@ -1329,7 +1332,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
                return -ENOMEM;
 
        chgblob = (struct challenge_message *)neg_blob;
-       sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+       sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
        if (sz < 0) {
                rc = -ENOMEM;
                goto out;
@@ -1450,60 +1453,62 @@ static int ntlm_authenticate(struct ksmbd_work *work)
                        ksmbd_free_user(user);
                        return 0;
                }
-               ksmbd_free_user(sess->user);
-       }
 
-       sess->user = user;
-       if (user_guest(sess->user)) {
-               if (conn->sign) {
-                       ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+               if (!ksmbd_compare_user(sess->user, user)) {
+                       ksmbd_free_user(user);
                        return -EPERM;
                }
+               ksmbd_free_user(user);
+       } else {
+               sess->user = user;
+       }
 
+       if (user_guest(sess->user)) {
                rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
        } else {
                struct authenticate_message *authblob;
 
                authblob = user_authblob(conn, req);
                sz = le16_to_cpu(req->SecurityBufferLength);
-               rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+               rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
                if (rc) {
                        set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
                        ksmbd_debug(SMB, "authentication failed\n");
                        return -EPERM;
                }
+       }
 
-               /*
-                * If session state is SMB2_SESSION_VALID, We can assume
-                * that it is reauthentication. And the user/password
-                * has been verified, so return it here.
-                */
-               if (sess->state == SMB2_SESSION_VALID) {
-                       if (conn->binding)
-                               goto binding_session;
-                       return 0;
-               }
+       /*
+        * If session state is SMB2_SESSION_VALID, We can assume
+        * that it is reauthentication. And the user/password
+        * has been verified, so return it here.
+        */
+       if (sess->state == SMB2_SESSION_VALID) {
+               if (conn->binding)
+                       goto binding_session;
+               return 0;
+       }
 
-               if ((conn->sign || server_conf.enforced_signing) ||
-                   (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
-                       sess->sign = true;
+       if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE &&
+            (conn->sign || server_conf.enforced_signing)) ||
+           (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+               sess->sign = true;
 
-               if (smb3_encryption_negotiated(conn) &&
-                   !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
-                       rc = conn->ops->generate_encryptionkey(sess);
-                       if (rc) {
-                               ksmbd_debug(SMB,
-                                           "SMB3 encryption key generation failed\n");
-                               return -EINVAL;
-                       }
-                       sess->enc = true;
-                       rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
-                       /*
-                        * signing is disable if encryption is enable
-                        * on this session
-                        */
-                       sess->sign = false;
+       if (smb3_encryption_negotiated(conn) &&
+                       !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+               rc = conn->ops->generate_encryptionkey(sess);
+               if (rc) {
+                       ksmbd_debug(SMB,
+                                       "SMB3 encryption key generation failed\n");
+                       return -EINVAL;
                }
+               sess->enc = true;
+               rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+               /*
+                * signing is disable if encryption is enable
+                * on this session
+                */
+               sess->sign = false;
        }
 
 binding_session:
@@ -2057,9 +2062,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
 
        ksmbd_debug(SMB, "request\n");
 
-       /* Got a valid session, set connection state */
-       WARN_ON(sess->conn != conn);
-
        /* setting CifsExiting here may race with start_tcp_sess */
        ksmbd_conn_set_need_reconnect(work);
        ksmbd_close_session_fds(work);
@@ -2530,8 +2532,7 @@ int smb2_open(struct ksmbd_work *work)
                        goto err_out1;
                }
 
-               name = smb2_get_name(share,
-                                    req->Buffer,
+               name = smb2_get_name(req->Buffer,
                                     le16_to_cpu(req->NameLength),
                                     work->conn->local_nls);
                if (IS_ERR(name)) {
@@ -3392,7 +3393,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
  * @conn:      connection instance
  * @info_level:        smb information level
  * @d_info:    structure included variables for query dir
- * @user_ns:   user namespace
  * @ksmbd_kstat:       ksmbd wrapper of dirent stat information
  *
  * if directory has many entries, find first can't read it fully.
@@ -4018,6 +4018,7 @@ err_out2:
  * buffer_check_err() - helper function to check buffer errors
  * @reqOutputBufferLength:     max buffer length expected in command response
  * @rsp:               query info response buffer contains output buffer length
+ * @rsp_org:           base response buffer pointer in case of chained response
  * @infoclass_size:    query info class response buffer size
  *
  * Return:     0 on success, otherwise error
@@ -5398,8 +5399,7 @@ static int smb2_rename(struct ksmbd_work *work,
                goto out;
        }
 
-       new_name = smb2_get_name(share,
-                                file_info->FileName,
+       new_name = smb2_get_name(file_info->FileName,
                                 le32_to_cpu(file_info->FileNameLength),
                                 local_nls);
        if (IS_ERR(new_name)) {
@@ -5510,8 +5510,7 @@ static int smb2_create_link(struct ksmbd_work *work,
        if (!pathname)
                return -ENOMEM;
 
-       link_name = smb2_get_name(share,
-                                 file_info->FileName,
+       link_name = smb2_get_name(file_info->FileName,
                                  le32_to_cpu(file_info->FileNameLength),
                                  local_nls);
        if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
@@ -5849,7 +5848,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
  * smb2_set_info_file() - handler for smb2 set info command
  * @work:      smb work containing set info command buffer
  * @fp:                ksmbd_file pointer
- * @info_class:        smb2 set info class
+ * @req:       request buffer pointer
  * @share:     ksmbd_share_config pointer
  *
  * Return:     0 on success, otherwise error
@@ -6121,25 +6120,33 @@ out:
        return err;
 }
 
-static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
-                                     struct smb2_read_req *req, void *data_buf,
-                                     size_t length)
+static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+                                       struct smb2_buffer_desc_v1 *desc,
+                                       __le32 Channel,
+                                       __le16 ChannelInfoOffset,
+                                       __le16 ChannelInfoLength)
 {
-       struct smb2_buffer_desc_v1 *desc =
-               (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
-       int err;
-
        if (work->conn->dialect == SMB30_PROT_ID &&
-           req->Channel != SMB2_CHANNEL_RDMA_V1)
+           Channel != SMB2_CHANNEL_RDMA_V1)
                return -EINVAL;
 
-       if (req->ReadChannelInfoOffset == 0 ||
-           le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+       if (ChannelInfoOffset == 0 ||
+           le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
                return -EINVAL;
 
        work->need_invalidate_rkey =
-               (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+               (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
        work->remote_key = le32_to_cpu(desc->token);
+       return 0;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+                                     struct smb2_read_req *req, void *data_buf,
+                                     size_t length)
+{
+       struct smb2_buffer_desc_v1 *desc =
+               (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+       int err;
 
        err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
                                    le32_to_cpu(desc->token),
@@ -6162,7 +6169,7 @@ int smb2_read(struct ksmbd_work *work)
        struct ksmbd_conn *conn = work->conn;
        struct smb2_read_req *req;
        struct smb2_read_rsp *rsp;
-       struct ksmbd_file *fp;
+       struct ksmbd_file *fp = NULL;
        loff_t offset;
        size_t length, mincount;
        ssize_t nbytes = 0, remain_bytes = 0;
@@ -6176,6 +6183,18 @@ int smb2_read(struct ksmbd_work *work)
                return smb2_read_pipe(work);
        }
 
+       if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+           req->Channel == SMB2_CHANNEL_RDMA_V1) {
+               err = smb2_set_remote_key_for_rdma(work,
+                                                  (struct smb2_buffer_desc_v1 *)
+                                                  &req->Buffer[0],
+                                                  req->Channel,
+                                                  req->ReadChannelInfoOffset,
+                                                  req->ReadChannelInfoLength);
+               if (err)
+                       goto out;
+       }
+
        fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
                                  le64_to_cpu(req->PersistentFileId));
        if (!fp) {
@@ -6361,21 +6380,6 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
 
        desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
 
-       if (work->conn->dialect == SMB30_PROT_ID &&
-           req->Channel != SMB2_CHANNEL_RDMA_V1)
-               return -EINVAL;
-
-       if (req->Length != 0 || req->DataOffset != 0)
-               return -EINVAL;
-
-       if (req->WriteChannelInfoOffset == 0 ||
-           le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
-               return -EINVAL;
-
-       work->need_invalidate_rkey =
-               (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
-       work->remote_key = le32_to_cpu(desc->token);
-
        data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
        if (!data_buf)
                return -ENOMEM;
@@ -6422,6 +6426,20 @@ int smb2_write(struct ksmbd_work *work)
                return smb2_write_pipe(work);
        }
 
+       if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+           req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+               if (req->Length != 0 || req->DataOffset != 0)
+                       return -EINVAL;
+               err = smb2_set_remote_key_for_rdma(work,
+                                                  (struct smb2_buffer_desc_v1 *)
+                                                  &req->Buffer[0],
+                                                  req->Channel,
+                                                  req->WriteChannelInfoOffset,
+                                                  req->WriteChannelInfoLength);
+               if (err)
+                       goto out;
+       }
+
        if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
                ksmbd_debug(SMB, "User does not have write permission\n");
                err = -EACCES;
@@ -7243,15 +7261,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
        struct sockaddr_storage_rsp *sockaddr_storage;
        unsigned int flags;
        unsigned long long speed;
-       struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
 
        rtnl_lock();
        for_each_netdev(&init_net, netdev) {
-               if (out_buf_len <
-                   nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
-                       rtnl_unlock();
-                       return -ENOSPC;
-               }
+               bool ipv4_set = false;
 
                if (netdev->type == ARPHRD_LOOPBACK)
                        continue;
@@ -7259,12 +7272,20 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
                flags = dev_get_flags(netdev);
                if (!(flags & IFF_RUNNING))
                        continue;
+ipv6_retry:
+               if (out_buf_len <
+                   nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+                       rtnl_unlock();
+                       return -ENOSPC;
+               }
 
                nii_rsp = (struct network_interface_info_ioctl_rsp *)
                                &rsp->Buffer[nbytes];
                nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
 
                nii_rsp->Capability = 0;
+               if (netdev->real_num_tx_queues > 1)
+                       nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE);
                if (ksmbd_rdma_capable_netdev(netdev))
                        nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
 
@@ -7289,8 +7310,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
                                        nii_rsp->SockAddr_Storage;
                memset(sockaddr_storage, 0, 128);
 
-               if (conn->peer_addr.ss_family == PF_INET ||
-                   ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+               if (!ipv4_set) {
                        struct in_device *idev;
 
                        sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
@@ -7301,6 +7321,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
                                continue;
                        sockaddr_storage->addr4.IPv4address =
                                                idev_ipv4_address(idev);
+                       nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+                       ipv4_set = true;
+                       goto ipv6_retry;
                } else {
                        struct inet6_dev *idev6;
                        struct inet6_ifaddr *ifa;
@@ -7322,9 +7345,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
                                break;
                        }
                        sockaddr_storage->addr6.ScopeId = 0;
+                       nbytes += sizeof(struct network_interface_info_ioctl_rsp);
                }
-
-               nbytes += sizeof(struct network_interface_info_ioctl_rsp);
        }
        rtnl_unlock();
 
index 4a3e433..725b800 100644 (file)
@@ -980,6 +980,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn);
 void init_smb2_max_read_size(unsigned int sz);
 void init_smb2_max_write_size(unsigned int sz);
 void init_smb2_max_trans_size(unsigned int sz);
+void init_smb2_max_credits(unsigned int sz);
 
 bool is_smb2_neg_cmd(struct ksmbd_work *work);
 bool is_smb2_rsp(struct ksmbd_work *work);
index 5059084..e1369b4 100644 (file)
@@ -365,6 +365,7 @@ struct smb_version_values {
        __u32           max_read_size;
        __u32           max_write_size;
        __u32           max_trans_size;
+       __u32           max_credits;
        __u32           large_lock_type;
        __u32           exclusive_lock_type;
        __u32           shared_lock_type;
index 1acf189..3ad6881 100644 (file)
@@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
                init_smb2_max_write_size(req->smb2_max_write);
        if (req->smb2_max_trans)
                init_smb2_max_trans_size(req->smb2_max_trans);
+       if (req->smb2_max_credits)
+               init_smb2_max_credits(req->smb2_max_credits);
 
        ret = ksmbd_set_netbios_name(req->netbios_name);
        ret |= ksmbd_set_server_string(req->server_string);
index 7e57cbb..3c1ec1a 100644 (file)
@@ -34,7 +34,8 @@
 #include "smbstatus.h"
 #include "transport_rdma.h"
 
-#define SMB_DIRECT_PORT        5445
+#define SMB_DIRECT_PORT_IWARP          5445
+#define SMB_DIRECT_PORT_INFINIBAND     445
 
 #define SMB_DIRECT_VERSION_LE          cpu_to_le16(0x0100)
 
  * as defined in [MS-SMBD] 3.1.1.1
  * Those may change after a SMB_DIRECT negotiation
  */
+
+/* Set 445 port to SMB Direct port by default */
+static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
+
 /* The local peer's maximum number of credits to grant to the peer */
 static int smb_direct_receive_credit_max = 255;
 
@@ -75,10 +80,18 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
 /*  The maximum single-message size which can be received */
 static int smb_direct_max_receive_size = 8192;
 
-static int smb_direct_max_read_write_size = 1024 * 1024;
+static int smb_direct_max_read_write_size = 1048512;
 
 static int smb_direct_max_outstanding_rw_ops = 8;
 
+static LIST_HEAD(smb_direct_device_list);
+static DEFINE_RWLOCK(smb_direct_device_lock);
+
+struct smb_direct_device {
+       struct ib_device        *ib_dev;
+       struct list_head        list;
+};
+
 static struct smb_direct_listener {
        struct rdma_cm_id       *cm_id;
 } smb_direct_listener;
@@ -415,6 +428,7 @@ static void free_transport(struct smb_direct_transport *t)
 
        if (t->qp) {
                ib_drain_qp(t->qp);
+               ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
                ib_destroy_qp(t->qp);
        }
 
@@ -555,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
                }
                t->negotiation_requested = true;
                t->full_packet_received = true;
+               enqueue_reassembly(t, recvmsg, 0);
                wake_up_interruptible(&t->wait_status);
                break;
        case SMB_DIRECT_MSG_DATA_TRANSFER: {
@@ -1438,6 +1453,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t)
        free_transport(st);
 }
 
+static void smb_direct_shutdown(struct ksmbd_transport *t)
+{
+       struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+       ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
+
+       smb_direct_disconnect_rdma_work(&st->disconnect_work);
+}
+
 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
                                 struct rdma_cm_event *event)
 {
@@ -1581,19 +1605,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t)
                pr_err("error at rdma_accept: %d\n", ret);
                return ret;
        }
-
-       wait_event_interruptible(t->wait_status,
-                                t->status != SMB_DIRECT_CS_NEW);
-       if (t->status != SMB_DIRECT_CS_CONNECTED)
-               return -ENOTCONN;
        return 0;
 }
 
-static int smb_direct_negotiate(struct smb_direct_transport *t)
+static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
 {
        int ret;
        struct smb_direct_recvmsg *recvmsg;
-       struct smb_direct_negotiate_req *req;
 
        recvmsg = get_free_recvmsg(t);
        if (!recvmsg)
@@ -1603,44 +1621,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t)
        ret = smb_direct_post_recv(t, recvmsg);
        if (ret) {
                pr_err("Can't post recv: %d\n", ret);
-               goto out;
+               goto out_err;
        }
 
        t->negotiation_requested = false;
        ret = smb_direct_accept_client(t);
        if (ret) {
                pr_err("Can't accept client\n");
-               goto out;
+               goto out_err;
        }
 
        smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
-
-       ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
-       ret = wait_event_interruptible_timeout(t->wait_status,
-                                              t->negotiation_requested ||
-                                               t->status == SMB_DIRECT_CS_DISCONNECTED,
-                                              SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
-       if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
-               ret = ret < 0 ? ret : -ETIMEDOUT;
-               goto out;
-       }
-
-       ret = smb_direct_check_recvmsg(recvmsg);
-       if (ret == -ECONNABORTED)
-               goto out;
-
-       req = (struct smb_direct_negotiate_req *)recvmsg->packet;
-       t->max_recv_size = min_t(int, t->max_recv_size,
-                                le32_to_cpu(req->preferred_send_size));
-       t->max_send_size = min_t(int, t->max_send_size,
-                                le32_to_cpu(req->max_receive_size));
-       t->max_fragmented_send_size =
-                       le32_to_cpu(req->max_fragmented_size);
-
-       ret = smb_direct_send_negotiate_response(t, ret);
-out:
-       if (recvmsg)
-               put_recvmsg(t, recvmsg);
+       return 0;
+out_err:
+       put_recvmsg(t, recvmsg);
        return ret;
 }
 
@@ -1724,7 +1718,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
        cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
        cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
        cap->max_inline_data = 0;
-       cap->max_rdma_ctxs = 0;
+       cap->max_rdma_ctxs =
+               rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
+               smb_direct_max_outstanding_rw_ops;
        return 0;
 }
 
@@ -1806,6 +1802,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
 {
        int ret;
        struct ib_qp_init_attr qp_attr;
+       int pages_per_rw;
 
        t->pd = ib_alloc_pd(t->cm_id->device, 0);
        if (IS_ERR(t->pd)) {
@@ -1853,6 +1850,23 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
        t->qp = t->cm_id->qp;
        t->cm_id->event_handler = smb_direct_cm_handler;
 
+       pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+       if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+               int pages_per_mr, mr_count;
+
+               pages_per_mr = min_t(int, pages_per_rw,
+                                    t->cm_id->device->attrs.max_fast_reg_page_list_len);
+               mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
+                       atomic_read(&t->rw_avail_ops);
+               ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
+                                     IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
+               if (ret) {
+                       pr_err("failed to init mr pool count %d pages %d\n",
+                              mr_count, pages_per_mr);
+                       goto err;
+               }
+       }
+
        return 0;
 err:
        if (t->qp) {
@@ -1877,6 +1891,49 @@ err:
 static int smb_direct_prepare(struct ksmbd_transport *t)
 {
        struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+       struct smb_direct_recvmsg *recvmsg;
+       struct smb_direct_negotiate_req *req;
+       int ret;
+
+       ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+       ret = wait_event_interruptible_timeout(st->wait_status,
+                                              st->negotiation_requested ||
+                                              st->status == SMB_DIRECT_CS_DISCONNECTED,
+                                              SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+       if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
+               return ret < 0 ? ret : -ETIMEDOUT;
+
+       recvmsg = get_first_reassembly(st);
+       if (!recvmsg)
+               return -ECONNABORTED;
+
+       ret = smb_direct_check_recvmsg(recvmsg);
+       if (ret == -ECONNABORTED)
+               goto out;
+
+       req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+       st->max_recv_size = min_t(int, st->max_recv_size,
+                                 le32_to_cpu(req->preferred_send_size));
+       st->max_send_size = min_t(int, st->max_send_size,
+                                 le32_to_cpu(req->max_receive_size));
+       st->max_fragmented_send_size =
+               le32_to_cpu(req->max_fragmented_size);
+       st->max_fragmented_recv_size =
+               (st->recv_credit_max * st->max_recv_size) / 2;
+
+       ret = smb_direct_send_negotiate_response(st, ret);
+out:
+       spin_lock_irq(&st->reassembly_queue_lock);
+       st->reassembly_queue_length--;
+       list_del(&recvmsg->list);
+       spin_unlock_irq(&st->reassembly_queue_lock);
+       put_recvmsg(st, recvmsg);
+
+       return ret;
+}
+
+static int smb_direct_connect(struct smb_direct_transport *st)
+{
        int ret;
        struct ib_qp_cap qp_cap;
 
@@ -1898,13 +1955,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
                return ret;
        }
 
-       ret = smb_direct_negotiate(st);
+       ret = smb_direct_prepare_negotiation(st);
        if (ret) {
                pr_err("Can't negotiate: %d\n", ret);
                return ret;
        }
-
-       st->status = SMB_DIRECT_CS_CONNECTED;
        return 0;
 }
 
@@ -1920,6 +1975,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
 {
        struct smb_direct_transport *t;
+       int ret;
 
        if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
                ksmbd_debug(RDMA,
@@ -1932,18 +1988,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
        if (!t)
                return -ENOMEM;
 
+       ret = smb_direct_connect(t);
+       if (ret)
+               goto out_err;
+
        KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
                                              KSMBD_TRANS(t)->conn, "ksmbd:r%u",
-                                             SMB_DIRECT_PORT);
+                                             smb_direct_port);
        if (IS_ERR(KSMBD_TRANS(t)->handler)) {
-               int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
-
+               ret = PTR_ERR(KSMBD_TRANS(t)->handler);
                pr_err("Can't start thread\n");
-               free_transport(t);
-               return ret;
+               goto out_err;
        }
 
        return 0;
+out_err:
+       free_transport(t);
+       return ret;
 }
 
 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
@@ -2007,12 +2068,65 @@ err:
        return ret;
 }
 
+static int smb_direct_ib_client_add(struct ib_device *ib_dev)
+{
+       struct smb_direct_device *smb_dev;
+
+       /* Set 5445 port if device type is iWARP(No IB) */
+       if (ib_dev->node_type != RDMA_NODE_IB_CA)
+               smb_direct_port = SMB_DIRECT_PORT_IWARP;
+
+       if (!ib_dev->ops.get_netdev ||
+           !rdma_frwr_is_supported(&ib_dev->attrs))
+               return 0;
+
+       smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
+       if (!smb_dev)
+               return -ENOMEM;
+       smb_dev->ib_dev = ib_dev;
+
+       write_lock(&smb_direct_device_lock);
+       list_add(&smb_dev->list, &smb_direct_device_list);
+       write_unlock(&smb_direct_device_lock);
+
+       ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
+       return 0;
+}
+
+static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
+                                       void *client_data)
+{
+       struct smb_direct_device *smb_dev, *tmp;
+
+       write_lock(&smb_direct_device_lock);
+       list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
+               if (smb_dev->ib_dev == ib_dev) {
+                       list_del(&smb_dev->list);
+                       kfree(smb_dev);
+                       break;
+               }
+       }
+       write_unlock(&smb_direct_device_lock);
+}
+
+static struct ib_client smb_direct_ib_client = {
+       .name   = "ksmbd_smb_direct_ib",
+       .add    = smb_direct_ib_client_add,
+       .remove = smb_direct_ib_client_remove,
+};
+
 int ksmbd_rdma_init(void)
 {
        int ret;
 
        smb_direct_listener.cm_id = NULL;
 
+       ret = ib_register_client(&smb_direct_ib_client);
+       if (ret) {
+               pr_err("failed to ib_register_client\n");
+               return ret;
+       }
+
        /* When a client is running out of send credits, the credits are
         * granted by the server's sending a packet using this queue.
         * This avoids the situation that a clients cannot send packets
@@ -2023,7 +2137,7 @@ int ksmbd_rdma_init(void)
        if (!smb_direct_wq)
                return -ENOMEM;
 
-       ret = smb_direct_listen(SMB_DIRECT_PORT);
+       ret = smb_direct_listen(smb_direct_port);
        if (ret) {
                destroy_workqueue(smb_direct_wq);
                smb_direct_wq = NULL;
@@ -2036,36 +2150,67 @@ int ksmbd_rdma_init(void)
        return 0;
 }
 
-int ksmbd_rdma_destroy(void)
+void ksmbd_rdma_destroy(void)
 {
-       if (smb_direct_listener.cm_id)
-               rdma_destroy_id(smb_direct_listener.cm_id);
+       if (!smb_direct_listener.cm_id)
+               return;
+
+       ib_unregister_client(&smb_direct_ib_client);
+       rdma_destroy_id(smb_direct_listener.cm_id);
+
        smb_direct_listener.cm_id = NULL;
 
        if (smb_direct_wq) {
                destroy_workqueue(smb_direct_wq);
                smb_direct_wq = NULL;
        }
-       return 0;
 }
 
 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
 {
-       struct ib_device *ibdev;
+       struct smb_direct_device *smb_dev;
+       int i;
        bool rdma_capable = false;
 
-       ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
-       if (ibdev) {
-               if (rdma_frwr_is_supported(&ibdev->attrs))
-                       rdma_capable = true;
-               ib_device_put(ibdev);
+       read_lock(&smb_direct_device_lock);
+       list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
+               for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
+                       struct net_device *ndev;
+
+                       ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
+                                                              i + 1);
+                       if (!ndev)
+                               continue;
+
+                       if (ndev == netdev) {
+                               dev_put(ndev);
+                               rdma_capable = true;
+                               goto out;
+                       }
+                       dev_put(ndev);
+               }
+       }
+out:
+       read_unlock(&smb_direct_device_lock);
+
+       if (rdma_capable == false) {
+               struct ib_device *ibdev;
+
+               ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+               if (ibdev) {
+                       if (rdma_frwr_is_supported(&ibdev->attrs))
+                               rdma_capable = true;
+                       ib_device_put(ibdev);
+               }
        }
+
        return rdma_capable;
 }
 
 static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
        .prepare        = smb_direct_prepare,
        .disconnect     = smb_direct_disconnect,
+       .shutdown       = smb_direct_shutdown,
        .writev         = smb_direct_writev,
        .read           = smb_direct_read,
        .rdma_read      = smb_direct_rdma_read,
index 0fa8adc..5567d93 100644 (file)
@@ -7,8 +7,6 @@
 #ifndef __KSMBD_TRANSPORT_RDMA_H__
 #define __KSMBD_TRANSPORT_RDMA_H__
 
-#define SMB_DIRECT_PORT        5445
-
 /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
 struct smb_direct_negotiate_req {
        __le16 min_version;
@@ -52,7 +50,7 @@ struct smb_direct_data_transfer {
 
 #ifdef CONFIG_SMB_SERVER_SMBDIRECT
 int ksmbd_rdma_init(void);
-int ksmbd_rdma_destroy(void);
+void ksmbd_rdma_destroy(void);
 bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
 #else
 static inline int ksmbd_rdma_init(void) { return 0; }
index c14320e..82a1429 100644 (file)
@@ -404,7 +404,7 @@ static int create_socket(struct interface *iface)
                                  &ksmbd_socket);
                if (ret) {
                        pr_err("Can't create socket for ipv4: %d\n", ret);
-                       goto out_error;
+                       goto out_clear;
                }
 
                sin.sin_family = PF_INET;
@@ -462,6 +462,7 @@ static int create_socket(struct interface *iface)
 
 out_error:
        tcp_destroy_socket(ksmbd_socket);
+out_clear:
        iface->ksmbd_socket = NULL;
        return ret;
 }
index 448576f..36239ce 100644 (file)
@@ -96,16 +96,6 @@ struct ksmbd_file {
 
        int                             durable_timeout;
 
-       /* for SMB1 */
-       int                             pid;
-
-       /* conflict lock fail count for SMB1 */
-       unsigned int                    cflock_cnt;
-       /* last lock failure start offset for SMB1 */
-       unsigned long long              llock_fstart;
-
-       int                             dirent_offset;
-
        /* if ls is happening on directory, below is valid*/
        struct ksmbd_readdir_data       readdir_data;
        int                             dot_dotdot[2];
index bc3e2cd..063dd16 100644 (file)
@@ -195,12 +195,12 @@ void nilfs_page_bug(struct page *page)
  */
 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
 {
-       struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
+       struct buffer_head *dbh, *dbufs, *sbh;
        unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
 
        BUG_ON(PageWriteback(dst));
 
-       sbh = sbufs = page_buffers(src);
+       sbh = page_buffers(src);
        if (!page_has_buffers(dst))
                create_empty_buffers(dst, sbh->b_size, 0);
 
index 43a7abd..fd8b0c1 100644 (file)
@@ -92,6 +92,7 @@
 #include <linux/string_helpers.h>
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
+#include <linux/kthread.h>
 
 #include <asm/processor.h>
 #include "internal.h"
@@ -102,6 +103,8 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
 
        if (p->flags & PF_WQ_WORKER)
                wq_worker_comm(tcomm, sizeof(tcomm), p);
+       else if (p->flags & PF_KTHREAD)
+               get_kthread_comm(tcomm, sizeof(tcomm), p);
        else
                __get_task_comm(tcomm, sizeof(tcomm), p);
 
index 13eda8d..d654ce7 100644 (file)
@@ -670,10 +670,10 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
 /************************************************************************/
 
 /* permission checks */
-static int proc_fd_access_allowed(struct inode *inode)
+static bool proc_fd_access_allowed(struct inode *inode)
 {
        struct task_struct *task;
-       int allowed = 0;
+       bool allowed = false;
        /* Allow access to a task's file descriptors if it is us or we
         * may use ptrace attach to the process and find out that
         * information.
index 5d66fae..389e1e4 100644 (file)
@@ -163,7 +163,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
                else {
                        pr_err("sysctl duplicate entry: ");
                        sysctl_print_dir(head->parent);
-                       pr_cont("/%s\n", entry->procname);
+                       pr_cont("%s\n", entry->procname);
                        return -EEXIST;
                }
        }
@@ -1020,8 +1020,8 @@ failed:
        if (IS_ERR(subdir)) {
                pr_err("sysctl could not get directory: ");
                sysctl_print_dir(dir);
-               pr_cont("/%*.*s %ld\n",
-                       namelen, namelen, name, PTR_ERR(subdir));
+               pr_cont("%*.*s %ld\n", namelen, namelen, name,
+                       PTR_ERR(subdir));
        }
        drop_sysctl_table(&dir->header);
        if (new)
@@ -1053,7 +1053,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
        struct ctl_dir *dir;
        int ret;
 
-       ret = 0;
        spin_lock(&sysctl_lock);
        root = (*pentry)->data;
        set = lookup_header_set(root);
@@ -1626,7 +1625,7 @@ static void put_links(struct ctl_table_header *header)
                else {
                        pr_err("sysctl link missing during unregister: ");
                        sysctl_print_dir(parent);
-                       pr_cont("/%s\n", name);
+                       pr_cont("%s\n", name);
                }
        }
 }
index 509f851..702754d 100644 (file)
@@ -65,8 +65,6 @@ static size_t vmcoredd_orig_sz;
 static DECLARE_RWSEM(vmcore_cb_rwsem);
 /* List of registered vmcore callbacks. */
 static LIST_HEAD(vmcore_cb_list);
-/* Whether we had a surprise unregistration of a callback. */
-static bool vmcore_cb_unstable;
 /* Whether the vmcore has been opened once. */
 static bool vmcore_opened;
 
@@ -94,10 +92,8 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
         * very unusual (e.g., forced driver removal), but we cannot stop
         * unregistering.
         */
-       if (vmcore_opened) {
+       if (vmcore_opened)
                pr_warn_once("Unexpected vmcore callback unregistration\n");
-               vmcore_cb_unstable = true;
-       }
        up_write(&vmcore_cb_rwsem);
 }
 EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
@@ -108,8 +104,6 @@ static bool pfn_is_ram(unsigned long pfn)
        bool ret = true;
 
        lockdep_assert_held_read(&vmcore_cb_rwsem);
-       if (unlikely(vmcore_cb_unstable))
-               return false;
 
        list_for_each_entry(cb, &vmcore_cb_list, next) {
                if (unlikely(!cb->pfn_is_ram))
@@ -581,7 +575,7 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
         * looping over all pages without a reason.
         */
        down_read(&vmcore_cb_rwsem);
-       if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
+       if (!list_empty(&vmcore_cb_list))
                ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
        else
                ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
index c43877c..505533c 100644 (file)
@@ -93,21 +93,6 @@ struct getbmapx {
 #define XFS_FMR_OWN_DEFECTIVE  FMR_OWNER('X', 8) /* bad blocks */
 
 /*
- * Structure for XFS_IOC_FSSETDM.
- * For use by backup and restore programs to set the XFS on-disk inode
- * fields di_dmevmask and di_dmstate.  These must be set to exactly and
- * only values previously obtained via xfs_bulkstat!  (Specifically the
- * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
- */
-#ifndef HAVE_FSDMIDATA
-struct fsdmidata {
-       __u32           fsd_dmevmask;   /* corresponds to di_dmevmask */
-       __u16           fsd_padding;
-       __u16           fsd_dmstate;    /* corresponds to di_dmstate  */
-};
-#endif
-
-/*
  * File segment locking set data type for 64 bit access.
  * Also used for all the RESV/FREE interfaces.
  */
@@ -562,16 +547,10 @@ typedef struct xfs_fsop_handlereq {
 
 /*
  * Compound structures for passing args through Handle Request interfaces
- * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle
- * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and
- *          XFS_IOC_ATTRMULTI_BY_HANDLE
+ * xfs_attrlist_by_handle, xfs_attrmulti_by_handle
+ * - ioctls: XFS_IOC_ATTRLIST_BY_HANDLE, and XFS_IOC_ATTRMULTI_BY_HANDLE
  */
 
-typedef struct xfs_fsop_setdm_handlereq {
-       struct xfs_fsop_handlereq       hreq;   /* handle information   */
-       struct fsdmidata                __user *data;   /* DMAPI data   */
-} xfs_fsop_setdm_handlereq_t;
-
 /*
  * Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface.
  *
@@ -781,15 +760,15 @@ struct xfs_scrub_metadata {
  * For 'documentation' purposed more than anything else,
  * the "cmd #" field reflects the IRIX fcntl number.
  */
-#define XFS_IOC_ALLOCSP                _IOW ('X', 10, struct xfs_flock64)
-#define XFS_IOC_FREESP         _IOW ('X', 11, struct xfs_flock64)
+/*     XFS_IOC_ALLOCSP ------- deprecated 10    */
+/*     XFS_IOC_FREESP -------- deprecated 11    */
 #define XFS_IOC_DIOINFO                _IOR ('X', 30, struct dioattr)
 #define XFS_IOC_FSGETXATTR     FS_IOC_FSGETXATTR
 #define XFS_IOC_FSSETXATTR     FS_IOC_FSSETXATTR
-#define XFS_IOC_ALLOCSP64      _IOW ('X', 36, struct xfs_flock64)
-#define XFS_IOC_FREESP64       _IOW ('X', 37, struct xfs_flock64)
+/*     XFS_IOC_ALLOCSP64 ----- deprecated 36    */
+/*     XFS_IOC_FREESP64 ------ deprecated 37    */
 #define XFS_IOC_GETBMAP                _IOWR('X', 38, struct getbmap)
-#define XFS_IOC_FSSETDM                _IOW ('X', 39, struct fsdmidata)
+/*      XFS_IOC_FSSETDM ------- deprecated 39    */
 #define XFS_IOC_RESVSP         _IOW ('X', 40, struct xfs_flock64)
 #define XFS_IOC_UNRESVSP       _IOW ('X', 41, struct xfs_flock64)
 #define XFS_IOC_RESVSP64       _IOW ('X', 42, struct xfs_flock64)
@@ -831,7 +810,7 @@ struct xfs_scrub_metadata {
 #define XFS_IOC_FREEZE              _IOWR('X', 119, int)       /* aka FIFREEZE */
 #define XFS_IOC_THAW                _IOWR('X', 120, int)       /* aka FITHAW */
 
-#define XFS_IOC_FSSETDM_BY_HANDLE    _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
+/*      XFS_IOC_FSSETDM_BY_HANDLE -- deprecated 121      */
 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
 #define XFS_IOC_FSGEOMETRY_V4       _IOR ('X', 124, struct xfs_fsop_geom_v4)
index 797ea0c..d4a387d 100644 (file)
@@ -771,8 +771,7 @@ int
 xfs_alloc_file_space(
        struct xfs_inode        *ip,
        xfs_off_t               offset,
-       xfs_off_t               len,
-       int                     alloc_type)
+       xfs_off_t               len)
 {
        xfs_mount_t             *mp = ip->i_mount;
        xfs_off_t               count;
@@ -865,8 +864,8 @@ xfs_alloc_file_space(
                        goto error;
 
                error = xfs_bmapi_write(tp, ip, startoffset_fsb,
-                                       allocatesize_fsb, alloc_type, 0, imapp,
-                                       &nimaps);
+                               allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+                               &nimaps);
                if (error)
                        goto error;
 
index 9f99316..24b37d2 100644 (file)
@@ -54,7 +54,7 @@ int   xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
 
 /* preallocation and hole punch interface */
 int    xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
-                            xfs_off_t len, int alloc_type);
+                            xfs_off_t len);
 int    xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
 int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
index 8d4c5ca..22ad207 100644 (file)
@@ -1051,8 +1051,7 @@ xfs_file_fallocate(
                }
 
                if (!xfs_is_always_cow_inode(ip)) {
-                       error = xfs_alloc_file_space(ip, offset, len,
-                                                    XFS_BMAPI_PREALLOC);
+                       error = xfs_alloc_file_space(ip, offset, len);
                        if (error)
                                goto out_unlock;
                }
index 8ea47a9..03a6198 100644 (file)
@@ -627,87 +627,6 @@ xfs_attrmulti_by_handle(
        return error;
 }
 
-int
-xfs_ioc_space(
-       struct file             *filp,
-       xfs_flock64_t           *bf)
-{
-       struct inode            *inode = file_inode(filp);
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct iattr            iattr;
-       enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR;
-       uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
-       int                     error;
-
-       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
-               return -EPERM;
-
-       if (!(filp->f_mode & FMODE_WRITE))
-               return -EBADF;
-
-       if (!S_ISREG(inode->i_mode))
-               return -EINVAL;
-
-       if (xfs_is_always_cow_inode(ip))
-               return -EOPNOTSUPP;
-
-       if (filp->f_flags & O_DSYNC)
-               flags |= XFS_PREALLOC_SYNC;
-       if (filp->f_mode & FMODE_NOCMTIME)
-               flags |= XFS_PREALLOC_INVISIBLE;
-
-       error = mnt_want_write_file(filp);
-       if (error)
-               return error;
-
-       xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
-       if (error)
-               goto out_unlock;
-       inode_dio_wait(inode);
-
-       switch (bf->l_whence) {
-       case 0: /*SEEK_SET*/
-               break;
-       case 1: /*SEEK_CUR*/
-               bf->l_start += filp->f_pos;
-               break;
-       case 2: /*SEEK_END*/
-               bf->l_start += XFS_ISIZE(ip);
-               break;
-       default:
-               error = -EINVAL;
-               goto out_unlock;
-       }
-
-       if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) {
-               error = -EINVAL;
-               goto out_unlock;
-       }
-
-       if (bf->l_start > XFS_ISIZE(ip)) {
-               error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
-                               bf->l_start - XFS_ISIZE(ip),
-                               XFS_BMAPI_PREALLOC);
-               if (error)
-                       goto out_unlock;
-       }
-
-       iattr.ia_valid = ATTR_SIZE;
-       iattr.ia_size = bf->l_start;
-       error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
-                                   &iattr);
-       if (error)
-               goto out_unlock;
-
-       error = xfs_update_prealloc_flags(ip, flags);
-
-out_unlock:
-       xfs_iunlock(ip, iolock);
-       mnt_drop_write_file(filp);
-       return error;
-}
-
 /* Return 0 on success or positive error */
 int
 xfs_fsbulkstat_one_fmt(
@@ -1936,6 +1855,15 @@ xfs_fs_eofblocks_from_user(
 }
 
 /*
+ * These long-unused ioctls were removed from the official ioctl API in 5.17,
+ * but retain these definitions so that we can log warnings about them.
+ */
+#define XFS_IOC_ALLOCSP                _IOW ('X', 10, struct xfs_flock64)
+#define XFS_IOC_FREESP         _IOW ('X', 11, struct xfs_flock64)
+#define XFS_IOC_ALLOCSP64      _IOW ('X', 36, struct xfs_flock64)
+#define XFS_IOC_FREESP64       _IOW ('X', 37, struct xfs_flock64)
+
+/*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
  * So we don't "sign flip" like most other routines.  This means
@@ -1965,13 +1893,11 @@ xfs_file_ioctl(
        case XFS_IOC_ALLOCSP:
        case XFS_IOC_FREESP:
        case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64: {
-               xfs_flock64_t           bf;
-
-               if (copy_from_user(&bf, arg, sizeof(bf)))
-                       return -EFAULT;
-               return xfs_ioc_space(filp, &bf);
-       }
+       case XFS_IOC_FREESP64:
+               xfs_warn_once(mp,
+       "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported",
+                               current->comm);
+               return -ENOTTY;
        case XFS_IOC_DIOINFO: {
                struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
                struct dioattr          da;
index 845d3bc..d4abba2 100644 (file)
@@ -10,12 +10,6 @@ struct xfs_bstat;
 struct xfs_ibulk;
 struct xfs_inogrp;
 
-
-extern int
-xfs_ioc_space(
-       struct file             *filp,
-       xfs_flock64_t           *bf);
-
 int
 xfs_ioc_swapext(
        xfs_swapext_t   *sxp);
index 8783af2..004ed2a 100644 (file)
 
 #ifdef BROKEN_X86_ALIGNMENT
 STATIC int
-xfs_compat_flock64_copyin(
-       xfs_flock64_t           *bf,
-       compat_xfs_flock64_t    __user *arg32)
-{
-       if (get_user(bf->l_type,        &arg32->l_type) ||
-           get_user(bf->l_whence,      &arg32->l_whence) ||
-           get_user(bf->l_start,       &arg32->l_start) ||
-           get_user(bf->l_len,         &arg32->l_len) ||
-           get_user(bf->l_sysid,       &arg32->l_sysid) ||
-           get_user(bf->l_pid,         &arg32->l_pid) ||
-           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC int
 xfs_compat_ioc_fsgeometry_v1(
        struct xfs_mount          *mp,
        compat_xfs_fsop_geom_v1_t __user *arg32)
@@ -445,17 +429,6 @@ xfs_file_compat_ioctl(
 
        switch (cmd) {
 #if defined(BROKEN_X86_ALIGNMENT)
-       case XFS_IOC_ALLOCSP_32:
-       case XFS_IOC_FREESP_32:
-       case XFS_IOC_ALLOCSP64_32:
-       case XFS_IOC_FREESP64_32: {
-               struct xfs_flock64      bf;
-
-               if (xfs_compat_flock64_copyin(&bf, arg))
-                       return -EFAULT;
-               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-               return xfs_ioc_space(filp, &bf);
-       }
        case XFS_IOC_FSGEOMETRY_V1_32:
                return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
        case XFS_IOC_FSGROWFSDATA_32: {
index 9929482..fc5a91f 100644 (file)
@@ -154,10 +154,6 @@ typedef struct compat_xfs_flock64 {
        __s32           l_pad[4];       /* reserve area */
 } compat_xfs_flock64_t;
 
-#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
 #define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
 #define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
 #define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
index 3d503e7..fd7e8fb 100644 (file)
@@ -285,7 +285,7 @@ do {                                                                        \
  * write-combining memory accesses before this macro with those after it.
  */
 #ifndef io_stop_wc
-#define io_stop_wc do { } while (0)
+#define io_stop_wc() do { } while (0)
 #endif
 
 #endif /* !__ASSEMBLY__ */
index ad889b5..ccbc36c 100644 (file)
@@ -10,7 +10,7 @@
 #define _KUNIT_ASSERT_H
 
 #include <linux/err.h>
-#include <linux/kernel.h>
+#include <linux/printk.h>
 
 struct kunit;
 struct string_stream;
index 6e947cd..fa517ae 100644 (file)
@@ -316,7 +316,12 @@ enum bpf_type_flag {
         */
        MEM_RDONLY              = BIT(1 + BPF_BASE_TYPE_BITS),
 
-       __BPF_TYPE_LAST_FLAG    = MEM_RDONLY,
+       /* MEM was "allocated" from a different helper, and cannot be mixed
+        * with regular non-MEM_ALLOC'ed MEM types.
+        */
+       MEM_ALLOC               = BIT(2 + BPF_BASE_TYPE_BITS),
+
+       __BPF_TYPE_LAST_FLAG    = MEM_ALLOC,
 };
 
 /* Max number of base types. */
@@ -400,7 +405,7 @@ enum bpf_return_type {
        RET_PTR_TO_SOCKET_OR_NULL       = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
        RET_PTR_TO_TCP_SOCK_OR_NULL     = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
        RET_PTR_TO_SOCK_COMMON_OR_NULL  = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
-       RET_PTR_TO_ALLOC_MEM_OR_NULL    = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
+       RET_PTR_TO_ALLOC_MEM_OR_NULL    = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
        RET_PTR_TO_BTF_ID_OR_NULL       = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
 
        /* This must be the last entry. Its purpose is to ensure the enum is
index 143401d..e999317 100644 (file)
@@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
 void
 bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
-int check_ctx_reg(struct bpf_verifier_env *env,
-                 const struct bpf_reg_state *reg, int regno);
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+                     const struct bpf_reg_state *reg, int regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
                   u32 regno, u32 mem_size);
 
index 309acbc..6a89ea4 100644 (file)
@@ -295,12 +295,13 @@ extern bool libceph_compatible(void *data);
 
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid);
 
 struct fs_parameter;
 struct fc_log;
 struct ceph_options *ceph_alloc_options(void);
 int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
-                      struct fc_log *l);
+                      struct fc_log *l, char delim);
 int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
                     struct fc_log *l);
 int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
index 0e6e9ad..ff99ce0 100644 (file)
@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
 
 extern int ceph_parse_ips(const char *c, const char *end,
                          struct ceph_entity_addr *addr,
-                         int max_count, int *count);
+                         int max_count, int *count, char delim);
 
 extern int ceph_msgr_init(void);
 extern void ceph_msgr_exit(void);
index af7e6eb..3e03d01 100644 (file)
@@ -9,18 +9,9 @@
 
 #include <uapi/linux/taskstats.h>
 
-/*
- * Per-task flags relevant to delay accounting
- * maintained privately to avoid exhausting similar flags in sched.h:PF_*
- * Used to set current->delays->flags
- */
-#define DELAYACCT_PF_SWAPIN    0x00000001      /* I am doing a swapin */
-#define DELAYACCT_PF_BLKIO     0x00000002      /* I am waiting on IO */
-
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
        raw_spinlock_t  lock;
-       unsigned int    flags;  /* Private per-task flags */
 
        /* For each stat XXX, add following, aligned appropriately
         *
@@ -37,13 +28,13 @@ struct task_delay_info {
         * associated with the operation is added to XXX_delay.
         * XXX_delay contains the accumulated delay time in nanoseconds.
         */
-       u64 blkio_start;        /* Shared by blkio, swapin */
+       u64 blkio_start;
        u64 blkio_delay;        /* wait for sync block io completion */
-       u64 swapin_delay;       /* wait for swapin block io completion */
+       u64 swapin_start;
+       u64 swapin_delay;       /* wait for swapin */
        u32 blkio_count;        /* total count of the number of sync block */
                                /* io operations performed */
-       u32 swapin_count;       /* total count of the number of swapin block */
-                               /* io operations performed */
+       u32 swapin_count;       /* total count of swapin */
 
        u64 freepages_start;
        u64 freepages_delay;    /* wait for memory reclaim */
@@ -51,8 +42,12 @@ struct task_delay_info {
        u64 thrashing_start;
        u64 thrashing_delay;    /* wait for thrashing page */
 
+       u64 compact_start;
+       u64 compact_delay;      /* wait for memory compact */
+
        u32 freepages_count;    /* total count of memory reclaim */
        u32 thrashing_count;    /* total count of thrash waits */
+       u32 compact_count;      /* total count of memory compact */
 };
 #endif
 
@@ -79,26 +74,10 @@ extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
 extern void __delayacct_thrashing_start(void);
 extern void __delayacct_thrashing_end(void);
-
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
-{
-       if (p->delays)
-               return (p->delays->flags & DELAYACCT_PF_BLKIO);
-       else
-               return 0;
-}
-
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{
-       if (p->delays)
-               p->delays->flags |= flag;
-}
-
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{
-       if (p->delays)
-               p->delays->flags &= ~flag;
-}
+extern void __delayacct_swapin_start(void);
+extern void __delayacct_swapin_end(void);
+extern void __delayacct_compact_start(void);
+extern void __delayacct_compact_end(void);
 
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
@@ -123,7 +102,6 @@ static inline void delayacct_blkio_start(void)
        if (!static_branch_unlikely(&delayacct_key))
                return;
 
-       delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
        if (current->delays)
                __delayacct_blkio_start();
 }
@@ -135,7 +113,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
 
        if (p->delays)
                __delayacct_blkio_end(p);
-       delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
 }
 
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@@ -147,33 +124,77 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 
 static inline void delayacct_freepages_start(void)
 {
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
        if (current->delays)
                __delayacct_freepages_start();
 }
 
 static inline void delayacct_freepages_end(void)
 {
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
        if (current->delays)
                __delayacct_freepages_end();
 }
 
 static inline void delayacct_thrashing_start(void)
 {
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
        if (current->delays)
                __delayacct_thrashing_start();
 }
 
 static inline void delayacct_thrashing_end(void)
 {
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
        if (current->delays)
                __delayacct_thrashing_end();
 }
 
+static inline void delayacct_swapin_start(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_swapin_start();
+}
+
+static inline void delayacct_swapin_end(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_swapin_end();
+}
+
+static inline void delayacct_compact_start(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_compact_start();
+}
+
+static inline void delayacct_compact_end(void)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (current->delays)
+               __delayacct_compact_end();
+}
+
 #else
-static inline void delayacct_set_flag(struct task_struct *p, int flag)
-{}
-static inline void delayacct_clear_flag(struct task_struct *p, int flag)
-{}
 static inline void delayacct_init(void)
 {}
 static inline void delayacct_tsk_init(struct task_struct *tsk)
@@ -199,6 +220,14 @@ static inline void delayacct_thrashing_start(void)
 {}
 static inline void delayacct_thrashing_end(void)
 {}
+static inline void delayacct_swapin_start(void)
+{}
+static inline void delayacct_swapin_end(void)
+{}
+static inline void delayacct_compact_start(void)
+{}
+static inline void delayacct_compact_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
index e272c3d..54feb64 100644 (file)
@@ -43,6 +43,11 @@ struct compat_elf_prpsinfo
        __compat_uid_t                  pr_uid;
        __compat_gid_t                  pr_gid;
        compat_pid_t                    pr_pid, pr_ppid, pr_pgrp, pr_sid;
+       /*
+        * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+        * changed as it is exposed to userspace. We'd better make it hard-coded
+        * here.
+        */
        char                            pr_fname[16];
        char                            pr_psargs[ELF_PRARGSZ];
 };
index 957ebec..746e081 100644 (file)
@@ -65,6 +65,11 @@ struct elf_prpsinfo
        __kernel_gid_t  pr_gid;
        pid_t   pr_pid, pr_ppid, pr_pgrp, pr_sid;
        /* Lots missing */
+       /*
+        * The hard-coded 16 is derived from TASK_COMM_LEN, but it can't be
+        * changed as it is exposed to userspace. We'd better make it hard-coded
+        * here.
+        */
        char    pr_fname[16];   /* filename of executable */
        char    pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
 };
index ad6fa21..38edaa0 100644 (file)
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
        return val * GOLDEN_RATIO_32;
 }
 
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
 {
        /* High bits are more random, so use them. */
        return __hash_32(val) >> (32 - bits);
index 055eb20..33f47a9 100644 (file)
@@ -1,4 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NOTE:
+ *
+ * This header has combined a lot of unrelated to each other stuff.
+ * The process of splitting its content is in progress while keeping
+ * backward compatibility. That's why it's highly recommended NOT to
+ * include this header inside another header file, especially under
+ * generic or architectural include/ directory.
+ */
 #ifndef _LINUX_KERNEL_H
 #define _LINUX_KERNEL_H
 
index b6c8aaf..3df4ea0 100644 (file)
@@ -33,6 +33,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
                                          unsigned int cpu,
                                          const char *namefmt);
 
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk);
 bool set_kthread_struct(struct task_struct *p);
 
 void kthread_set_per_cpu(struct task_struct *k, int cpu);
index 6636fc0..dd6c204 100644 (file)
@@ -258,8 +258,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
  * @list: the entry to test
  * @head: the head of the list
  */
-static inline int list_is_first(const struct list_head *list,
-                                       const struct list_head *head)
+static inline int list_is_first(const struct list_head *list, const struct list_head *head)
 {
        return list->prev == head;
 }
@@ -269,13 +268,22 @@ static inline int list_is_first(const struct list_head *list,
  * @list: the entry to test
  * @head: the head of the list
  */
-static inline int list_is_last(const struct list_head *list,
-                               const struct list_head *head)
+static inline int list_is_last(const struct list_head *list, const struct list_head *head)
 {
        return list->next == head;
 }
 
 /**
+ * list_is_head - tests whether @list is the list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_head(const struct list_head *list, const struct list_head *head)
+{
+       return list == head;
+}
+
+/**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
  */
@@ -318,7 +326,7 @@ static inline void list_del_init_careful(struct list_head *entry)
 static inline int list_empty_careful(const struct list_head *head)
 {
        struct list_head *next = smp_load_acquire(&head->next);
-       return (next == head) && (next == head->prev);
+       return list_is_head(next, head) && (next == head->prev);
 }
 
 /**
@@ -393,10 +401,9 @@ static inline void list_cut_position(struct list_head *list,
 {
        if (list_empty(head))
                return;
-       if (list_is_singular(head) &&
-               (head->next != entry && head != entry))
+       if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next))
                return;
-       if (entry == head)
+       if (list_is_head(entry, head))
                INIT_LIST_HEAD(list);
        else
                __list_cut_position(list, head, entry);
@@ -570,7 +577,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each(pos, head) \
-       for (pos = (head)->next; pos != (head); pos = pos->next)
+       for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
 
 /**
  * list_for_each_continue - continue iteration over a list
@@ -580,7 +587,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * Continue to iterate over a list, continuing after the current position.
  */
 #define list_for_each_continue(pos, head) \
-       for (pos = pos->next; pos != (head); pos = pos->next)
+       for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next)
 
 /**
  * list_for_each_prev  -       iterate over a list backwards
@@ -588,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev; pos != (head); pos = pos->prev)
+       for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
 
 /**
  * list_for_each_safe - iterate over a list safe against removal of list entry
@@ -597,8 +604,9 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @head:      the head for your list.
  */
 #define list_for_each_safe(pos, n, head) \
-       for (pos = (head)->next, n = pos->next; pos != (head); \
-               pos = n, n = pos->next)
+       for (pos = (head)->next, n = pos->next; \
+            !list_is_head(pos, (head)); \
+            pos = n, n = pos->next)
 
 /**
  * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
@@ -608,7 +616,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  */
 #define list_for_each_prev_safe(pos, n, head) \
        for (pos = (head)->prev, n = pos->prev; \
-            pos != (head); \
+            !list_is_head(pos, (head)); \
             pos = n, n = pos->prev)
 
 /**
index 0661af1..808bb4c 100644 (file)
@@ -123,7 +123,11 @@ struct cmos_rtc_board_info {
 #define RTC_IO_EXTENT_USED      RTC_IO_EXTENT
 #endif /* ARCH_RTC_LOCATION */
 
-unsigned int mc146818_get_time(struct rtc_time *time);
+bool mc146818_does_rtc_work(void);
+int mc146818_get_time(struct rtc_time *time);
 int mc146818_set_time(struct rtc_time *time);
 
+bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param),
+                       void *param);
+
 #endif /* _MC146818RTC_H */
index ae4004e..f1ec5ad 100644 (file)
@@ -94,10 +94,7 @@ extern const char * const pcpu_fc_names[PCPU_FC_NR];
 
 extern enum pcpu_fc pcpu_chosen_fc;
 
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
-                                    size_t align);
-typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
-typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef int (pcpu_fc_cpu_to_node_fn_t)(int cpu);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
@@ -111,15 +108,13 @@ extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
-                               pcpu_fc_alloc_fn_t alloc_fn,
-                               pcpu_fc_free_fn_t free_fn);
+                               pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+void __init pcpu_populate_pte(unsigned long addr);
 extern int __init pcpu_page_first_chunk(size_t reserved_size,
-                               pcpu_fc_alloc_fn_t alloc_fn,
-                               pcpu_fc_free_fn_t free_fn,
-                               pcpu_fc_populate_pte_fn_t populate_pte_fn);
+                               pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
 #endif
 
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
index 069c7fd..01b9268 100644 (file)
@@ -178,8 +178,16 @@ static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
 #define proc_create_seq(name, mode, parent, ops) ({NULL;})
 #define proc_create_single(name, mode, parent, show) ({NULL;})
 #define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
-#define proc_create(name, mode, parent, proc_ops) ({NULL;})
-#define proc_create_data(name, mode, parent, proc_ops, data) ({NULL;})
+
+static inline struct proc_dir_entry *
+proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent,
+           const struct proc_ops *proc_ops)
+{ return NULL; }
+
+static inline struct proc_dir_entry *
+proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent,
+                const struct proc_ops *proc_ops, void *data)
+{ return NULL; }
 
 static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {}
 static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {}
index fc0357a..95df357 100644 (file)
@@ -416,6 +416,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
 }
 
 /**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+                                           unsigned int users);
+
+/**
  * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
  * @sbq: Bitmap queue to resize.
  * @depth: New number of bits to resize to.
index a6a2db5..508b91d 100644 (file)
@@ -274,8 +274,13 @@ struct task_group;
 
 #define get_current_state()    READ_ONCE(current->__state)
 
-/* Task command name length: */
-#define TASK_COMM_LEN                  16
+/*
+ * Define the task command name length as enum, then it can be visible to
+ * BPF programs.
+ */
+enum {
+       TASK_COMM_LEN = 16,
+};
 
 extern void scheduler_tick(void);
 
index c0d817d..f4c8eaf 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
 #define _LINUX_UNALIGNED_PACKED_STRUCT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 struct __una_u16 { u16 x; } __packed;
 struct __una_u32 { u32 x; } __packed;
index 48cc579..63540be 100644 (file)
@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
 
 static inline void fqdir_pre_exit(struct fqdir *fqdir)
 {
-       fqdir->high_thresh = 0; /* prevent creation of new frags */
-       fqdir->dead = true;
+       /* Prevent creation of new frags.
+        * Pairs with READ_ONCE() in inet_frag_find().
+        */
+       WRITE_ONCE(fqdir->high_thresh, 0);
+
+       /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+        * and ip6frag_expire_frag_queue().
+        */
+       WRITE_ONCE(fqdir->dead, true);
 }
 void fqdir_exit(struct fqdir *fqdir);
 
index 851029e..0a47791 100644 (file)
@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
        struct sk_buff *head;
 
        rcu_read_lock();
-       if (fq->q.fqdir->dead)
+       /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+       if (READ_ONCE(fq->q.fqdir->dead))
                goto out_rcu_unlock;
        spin_lock(&fq->q.lock);
 
index ebef45e..676cb8e 100644 (file)
@@ -218,8 +218,10 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
 #ifdef CONFIG_NET_CLS_ACT
        exts->type = 0;
        exts->nr_actions = 0;
+       /* Note: we do not own yet a reference on net.
+        * This reference might be taken later from tcf_exts_get_net().
+        */
        exts->net = net;
-       netns_tracker_alloc(net, &exts->ns_tracker, GFP_KERNEL);
        exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
                                GFP_KERNEL);
        if (!exts->actions)
index c11dbac..472843e 100644 (file)
@@ -1244,6 +1244,7 @@ struct psched_ratecfg {
        u64     rate_bytes_ps; /* bytes per second */
        u32     mult;
        u16     overhead;
+       u16     mpu;
        u8      linklayer;
        u8      shift;
 };
@@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 {
        len += r->overhead;
 
+       if (len < r->mpu)
+               len = r->mpu;
+
        if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
                return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
 
@@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
        res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
 
        res->overhead = r->overhead;
+       res->mpu = r->mpu;
        res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
index 96f64bf..a1922a8 100644 (file)
 
 enum error_detector {
        ERROR_DETECTOR_KFENCE,
-       ERROR_DETECTOR_KASAN
+       ERROR_DETECTOR_KASAN,
+       ERROR_DETECTOR_WARN,
 };
 
 #endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
-#define error_detector_list    \
+#define error_detector_list                    \
        EM(ERROR_DETECTOR_KFENCE, "kfence")     \
-       EMe(ERROR_DETECTOR_KASAN, "kasan")
+       EM(ERROR_DETECTOR_KASAN, "kasan")       \
+       EMe(ERROR_DETECTOR_WARN, "warning")
 /* Always end the list with an EMe. */
 
 #undef EM
index f8cb916..f701bb2 100644 (file)
@@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
 
 TRACE_EVENT(f2fs_file_write_iter,
 
-       TP_PROTO(struct inode *inode, unsigned long offset,
-               unsigned long length, int ret),
+       TP_PROTO(struct inode *inode, loff_t offset, size_t length,
+                ssize_t ret),
 
        TP_ARGS(inode, offset, length, ret),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(ino_t,  ino)
-               __field(unsigned long, offset)
-               __field(unsigned long, length)
-               __field(int,    ret)
+               __field(loff_t, offset)
+               __field(size_t, length)
+               __field(ssize_t, ret)
        ),
 
        TP_fast_assign(
@@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter,
        ),
 
        TP_printk("dev = (%d,%d), ino = %lu, "
-               "offset = %lu, length = %lu, written(err) = %d",
+               "offset = %lld, length = %zu, written(err) = %zd",
                show_dev_ino(__entry),
                __entry->offset,
                __entry->length,
@@ -936,14 +936,14 @@ TRACE_EVENT(f2fs_fallocate,
 
 TRACE_EVENT(f2fs_direct_IO_enter,
 
-       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+       TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw),
 
-       TP_ARGS(inode, offset, len, rw),
+       TP_ARGS(inode, iocb, len, rw),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(ino_t,  ino)
-               __field(loff_t, pos)
+               __field(struct kiocb *, iocb)
                __field(unsigned long,  len)
                __field(int,    rw)
        ),
@@ -951,15 +951,18 @@ TRACE_EVENT(f2fs_direct_IO_enter,
        TP_fast_assign(
                __entry->dev    = inode->i_sb->s_dev;
                __entry->ino    = inode->i_ino;
-               __entry->pos    = offset;
+               __entry->iocb   = iocb;
                __entry->len    = len;
                __entry->rw     = rw;
        ),
 
-       TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
+       TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d",
                show_dev_ino(__entry),
-               __entry->pos,
+               __entry->iocb->ki_pos,
                __entry->len,
+               __entry->iocb->ki_flags,
+               __entry->iocb->ki_hint,
+               __entry->iocb->ki_ioprio,
                __entry->rw)
 );
 
index dae5df8..0425cd7 100644 (file)
@@ -6,6 +6,7 @@
 #define AFFS_SUPER_MAGIC       0xadff
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC     0x0187
+#define CEPH_SUPER_MAGIC       0x00c36400
 #define CODA_SUPER_MAGIC       0x73757245
 #define CRAMFS_MAGIC           0x28cd3d45      /* some random number */
 #define CRAMFS_MAGIC_WEND      0x453dcd28      /* magic number with the wrong endianess */
index ccbd087..12327d3 100644 (file)
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION      10
+#define TASKSTATS_VERSION      11
 #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                         * in linux/sched.h */
 
@@ -172,6 +172,10 @@ struct taskstats {
 
        /* v10: 64-bit btime to avoid overflow */
        __u64   ac_btime64;             /* 64-bit begin time */
+
+       /* Delay waiting for memory compact */
+       __u64   compact_count;
+       __u64   compact_delay_total;
 };
 
 
index e5a7eec..c0f4bd9 100644 (file)
@@ -1,18 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* DO NOT USE in new code! This is solely for MEI due to legacy reasons */
 /*
  * UUID/GUID definition
  *
  * Copyright (C) 2010, Intel Corp.
  *     Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation;
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _UAPI_LINUX_UUID_H_
index 33bb8ae..e16dafe 100644 (file)
@@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                        i, btf_type_str(t));
                                return -EINVAL;
                        }
-                       if (check_ctx_reg(env, reg, regno))
+                       if (check_ptr_off_reg(env, reg, regno))
                                return -EINVAL;
                } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
                        const struct btf_type *reg_ref_t;
index 80da1db..5a8d9f7 100644 (file)
@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
        int opt;
 
        opt = fs_parse(fc, bpf_fs_parameters, param, &result);
-       if (opt < 0)
+       if (opt < 0) {
                /* We might like to report bad mount options here, but
                 * traditionally we've ignored all mount options, so we'd
                 * better continue to ignore non-existing options for bpf.
                 */
-               return opt == -ENOPARAM ? 0 : opt;
+               if (opt == -ENOPARAM) {
+                       opt = vfs_parse_fs_param_source(fc, param);
+                       if (opt != -ENOPARAM)
+                               return opt;
+
+                       return 0;
+               }
+
+               if (opt < 0)
+                       return opt;
+       }
 
        switch (opt) {
        case OPT_MODE:
index bfb4538..a39eede 100644 (file)
@@ -570,6 +570,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 
        if (type & MEM_RDONLY)
                strncpy(prefix, "rdonly_", 16);
+       if (type & MEM_ALLOC)
+               strncpy(prefix, "alloc_", 16);
 
        snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
                 prefix, str[base_type(type)], postfix);
@@ -616,7 +618,7 @@ static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
 
 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
 {
-       env->scratched_stack_slots |= 1UL << spi;
+       env->scratched_stack_slots |= 1ULL << spi;
 }
 
 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
@@ -637,14 +639,14 @@ static bool verifier_state_scratched(const struct bpf_verifier_env *env)
 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
 {
        env->scratched_regs = 0U;
-       env->scratched_stack_slots = 0UL;
+       env->scratched_stack_slots = 0ULL;
 }
 
 /* Used for printing the entire verifier state. */
 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 {
        env->scratched_regs = ~0U;
-       env->scratched_stack_slots = ~0UL;
+       env->scratched_stack_slots = ~0ULL;
 }
 
 /* The reg state of a pointer or a bounded scalar was saved when
@@ -3969,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
 }
 #endif
 
-int check_ctx_reg(struct bpf_verifier_env *env,
-                 const struct bpf_reg_state *reg, int regno)
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+                              const struct bpf_reg_state *reg, int regno,
+                              bool fixed_off_ok)
 {
-       /* Access to ctx or passing it to a helper is only allowed in
-        * its original, unmodified form.
+       /* Access to this pointer-typed register or passing it to a helper
+        * is only allowed in its original, unmodified form.
         */
 
-       if (reg->off) {
-               verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
-                       regno, reg->off);
+       if (!fixed_off_ok && reg->off) {
+               verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+                       reg_type_str(env, reg->type), regno, reg->off);
                return -EACCES;
        }
 
@@ -3986,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env,
                char tn_buf[48];
 
                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-               verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
+               verbose(env, "variable %s access var_off=%s disallowed\n",
+                       reg_type_str(env, reg->type), tn_buf);
                return -EACCES;
        }
 
        return 0;
 }
 
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+                     const struct bpf_reg_state *reg, int regno)
+{
+       return __check_ptr_off_reg(env, reg, regno, false);
+}
+
 static int __check_buffer_access(struct bpf_verifier_env *env,
                                 const char *buf_info,
                                 const struct bpf_reg_state *reg,
@@ -4437,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                        return -EACCES;
                }
 
-               err = check_ctx_reg(env, reg, regno);
+               err = check_ptr_off_reg(env, reg, regno);
                if (err < 0)
                        return err;
 
@@ -5127,6 +5137,7 @@ static const struct bpf_reg_types mem_types = {
                PTR_TO_MAP_KEY,
                PTR_TO_MAP_VALUE,
                PTR_TO_MEM,
+               PTR_TO_MEM | MEM_ALLOC,
                PTR_TO_BUF,
        },
 };
@@ -5144,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = {
 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5244,12 +5255,6 @@ found:
                                kernel_type_name(btf_vmlinux, *arg_btf_id));
                        return -EACCES;
                }
-
-               if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
-                       verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
-                               regno);
-                       return -EACCES;
-               }
        }
 
        return 0;
@@ -5304,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
        if (err)
                return err;
 
-       if (type == PTR_TO_CTX) {
-               err = check_ctx_reg(env, reg, regno);
+       switch ((u32)type) {
+       case SCALAR_VALUE:
+       /* Pointer types where reg offset is explicitly allowed: */
+       case PTR_TO_PACKET:
+       case PTR_TO_PACKET_META:
+       case PTR_TO_MAP_KEY:
+       case PTR_TO_MAP_VALUE:
+       case PTR_TO_MEM:
+       case PTR_TO_MEM | MEM_RDONLY:
+       case PTR_TO_MEM | MEM_ALLOC:
+       case PTR_TO_BUF:
+       case PTR_TO_BUF | MEM_RDONLY:
+       case PTR_TO_STACK:
+               /* Some of the argument types nevertheless require a
+                * zero register offset.
+                */
+               if (arg_type == ARG_PTR_TO_ALLOC_MEM)
+                       goto force_off_check;
+               break;
+       /* All the rest must be rejected: */
+       default:
+force_off_check:
+               err = __check_ptr_off_reg(env, reg, regno,
+                                         type == PTR_TO_BTF_ID);
                if (err < 0)
                        return err;
+               break;
        }
 
 skip_type_check:
@@ -9507,9 +9535,13 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
                return 0;
        }
 
-       if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
-               mark_reg_known_zero(env, regs, insn->dst_reg);
+       /* All special src_reg cases are listed below. From this point onwards
+        * we either succeed and assign a corresponding dst_reg->type after
+        * zeroing the offset, or fail and reject the program.
+        */
+       mark_reg_known_zero(env, regs, insn->dst_reg);
 
+       if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
                dst_reg->type = aux->btf_var.reg_type;
                switch (base_type(dst_reg->type)) {
                case PTR_TO_MEM:
@@ -9547,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
        }
 
        map = env->used_maps[aux->map_index];
-       mark_reg_known_zero(env, regs, insn->dst_reg);
        dst_reg->map_ptr = map;
 
        if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
@@ -9651,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
                        return err;
        }
 
-       err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+       err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
        if (err < 0)
                return err;
 
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
new file mode 100644 (file)
index 0000000..e9ffb0c
--- /dev/null
@@ -0,0 +1,105 @@
+# The config is based on running daily CI for enterprise Linux distros to
+# seek regressions on linux-next builds on different bare-metal and virtual
+# platforms. It can be used for example,
+#
+# $ make ARCH=arm64 defconfig debug.config
+#
+# Keep alphabetically sorted inside each section.
+#
+# printk and dmesg options
+#
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_PRINTK_TIME=y
+CONFIG_SYMBOLIC_ERRNAME=y
+#
+# Compile-time checks and compiler options
+#
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_FRAME_WARN=2048
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+#
+# Generic Kernel Debugging Instruments
+#
+# CONFIG_UBSAN_ALIGNMENT is not set
+# CONFIG_UBSAN_DIV_ZERO is not set
+# CONFIG_UBSAN_TRAP is not set
+# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_FS_ALLOW_ALL=y
+CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_BOOL=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_ENUM=y
+CONFIG_UBSAN_SHIFT=y
+CONFIG_UBSAN_UNREACHABLE=y
+#
+# Memory Debugging
+#
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF is not set
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_WX is not set
+# CONFIG_KFENCE is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_SLUB_STATS is not set
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_OWNER=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VIRTUAL=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_GENERIC_PTDUMP=y
+CONFIG_KASAN=y
+CONFIG_KASAN_GENERIC=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SLUB_DEBUG_ON=y
+#
+# Debug Oops, Lockups and Hangs
+#
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_SOFTLOCKUP_DETECTOR=y
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+# CONFIG_PROVE_RAW_LOCK_NESTING is not set
+CONFIG_PROVE_LOCKING=y
+#
+# Debug kernel data structures
+#
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+#
+# RCU Debugging
+#
+CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
+#
+# Tracers
+#
+CONFIG_BRANCH_PROFILE_NONE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
index 51530d5..c5e8cea 100644 (file)
@@ -100,19 +100,10 @@ void __delayacct_blkio_start(void)
  */
 void __delayacct_blkio_end(struct task_struct *p)
 {
-       struct task_delay_info *delays = p->delays;
-       u64 *total;
-       u32 *count;
-
-       if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
-               total = &delays->swapin_delay;
-               count = &delays->swapin_count;
-       } else {
-               total = &delays->blkio_delay;
-               count = &delays->blkio_count;
-       }
-
-       delayacct_end(&delays->lock, &delays->blkio_start, total, count);
+       delayacct_end(&p->delays->lock,
+                     &p->delays->blkio_start,
+                     &p->delays->blkio_delay,
+                     &p->delays->blkio_count);
 }
 
 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -164,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
        d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
        tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
        d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
+       tmp = d->compact_delay_total + tsk->delays->compact_delay;
+       d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
        d->blkio_count += tsk->delays->blkio_count;
        d->swapin_count += tsk->delays->swapin_count;
        d->freepages_count += tsk->delays->freepages_count;
        d->thrashing_count += tsk->delays->thrashing_count;
+       d->compact_count += tsk->delays->compact_count;
        raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
        return 0;
@@ -179,8 +173,7 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
        unsigned long flags;
 
        raw_spin_lock_irqsave(&tsk->delays->lock, flags);
-       ret = nsec_to_clock_t(tsk->delays->blkio_delay +
-                               tsk->delays->swapin_delay);
+       ret = nsec_to_clock_t(tsk->delays->blkio_delay);
        raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
        return ret;
 }
@@ -210,3 +203,29 @@ void __delayacct_thrashing_end(void)
                      &current->delays->thrashing_delay,
                      &current->delays->thrashing_count);
 }
+
+void __delayacct_swapin_start(void)
+{
+       current->delays->swapin_start = local_clock();
+}
+
+void __delayacct_swapin_end(void)
+{
+       delayacct_end(&current->delays->lock,
+                     &current->delays->swapin_start,
+                     &current->delays->swapin_delay,
+                     &current->delays->swapin_count);
+}
+
+void __delayacct_compact_start(void)
+{
+       current->delays->compact_start = local_clock();
+}
+
+void __delayacct_compact_end(void)
+{
+       delayacct_end(&current->delays->lock,
+                     &current->delays->compact_start,
+                     &current->delays->compact_delay,
+                     &current->delays->compact_count);
+}
index a2c156e..38c6dd8 100644 (file)
@@ -61,6 +61,8 @@ struct kthread {
 #ifdef CONFIG_BLK_CGROUP
        struct cgroup_subsys_state *blkcg_css;
 #endif
+       /* To store the full name if task comm is truncated. */
+       char *full_name;
 };
 
 enum KTHREAD_BITS {
@@ -94,6 +96,18 @@ static inline struct kthread *__to_kthread(struct task_struct *p)
        return kthread;
 }
 
+void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk)
+{
+       struct kthread *kthread = to_kthread(tsk);
+
+       if (!kthread || !kthread->full_name) {
+               __get_task_comm(buf, buf_size, tsk);
+               return;
+       }
+
+       strscpy_pad(buf, kthread->full_name, buf_size);
+}
+
 bool set_kthread_struct(struct task_struct *p)
 {
        struct kthread *kthread;
@@ -121,10 +135,14 @@ void free_kthread_struct(struct task_struct *k)
         * Can be NULL if kmalloc() in set_kthread_struct() failed.
         */
        kthread = to_kthread(k);
+       if (!kthread)
+               return;
+
 #ifdef CONFIG_BLK_CGROUP
-       WARN_ON_ONCE(kthread && kthread->blkcg_css);
+       WARN_ON_ONCE(kthread->blkcg_css);
 #endif
        k->worker_private = NULL;
+       kfree(kthread->full_name);
        kfree(kthread);
 }
 
@@ -438,12 +456,22 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
        task = create->result;
        if (!IS_ERR(task)) {
                char name[TASK_COMM_LEN];
+               va_list aq;
+               int len;
 
                /*
                 * task is already visible to other tasks, so updating
                 * COMM must be protected.
                 */
-               vsnprintf(name, sizeof(name), namefmt, args);
+               va_copy(aq, args);
+               len = vsnprintf(name, sizeof(name), namefmt, aq);
+               va_end(aq);
+               if (len >= TASK_COMM_LEN) {
+                       struct kthread *kthread = to_kthread(task);
+
+                       /* leave it truncated when out of memory. */
+                       kthread->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
+               }
                set_task_comm(task, name);
        }
        kfree(create);
index cefd7d8..55b50e0 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/bug.h>
 #include <linux/ratelimit.h>
 #include <linux/debugfs.h>
+#include <trace/events/error_report.h>
 #include <asm/sections.h>
 
 #define PANIC_TIMER_STEP 100
@@ -533,26 +534,9 @@ void oops_enter(void)
                trigger_all_cpu_backtrace();
 }
 
-/*
- * 64-bit random ID for oopses:
- */
-static u64 oops_id;
-
-static int init_oops_id(void)
-{
-       if (!oops_id)
-               get_random_bytes(&oops_id, sizeof(oops_id));
-       else
-               oops_id++;
-
-       return 0;
-}
-late_initcall(init_oops_id);
-
 static void print_oops_end_marker(void)
 {
-       init_oops_id();
-       pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
+       pr_warn("---[ end trace %016llx ]---\n", 0ULL);
 }
 
 /*
@@ -609,6 +593,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
        print_irqtrace_events(current);
 
        print_oops_end_marker();
+       trace_error_report_end(ERROR_DETECTOR_WARN, (unsigned long)caller);
 
        /* Just a warning, don't kill lockdep. */
        add_taint(taint, LOCKDEP_STILL_OK);
index 2450a9f..ecc4cf0 100644 (file)
@@ -220,7 +220,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
                niceval = MAX_NICE;
 
        rcu_read_lock();
-       read_lock(&tasklist_lock);
        switch (which) {
        case PRIO_PROCESS:
                if (who)
@@ -235,9 +234,11 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
                        pgrp = find_vpid(who);
                else
                        pgrp = task_pgrp(current);
+               read_lock(&tasklist_lock);
                do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                        error = set_one_prio(p, niceval, error);
                } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+               read_unlock(&tasklist_lock);
                break;
        case PRIO_USER:
                uid = make_kuid(cred->user_ns, who);
@@ -249,16 +250,15 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
                        if (!user)
                                goto out_unlock;        /* No processes for this user */
                }
-               do_each_thread(g, p) {
+               for_each_process_thread(g, p) {
                        if (uid_eq(task_uid(p), uid) && task_pid_vnr(p))
                                error = set_one_prio(p, niceval, error);
-               } while_each_thread(g, p);
+               }
                if (!uid_eq(uid, cred->uid))
                        free_uid(user);         /* For find_user() */
                break;
        }
 out_unlock:
-       read_unlock(&tasklist_lock);
        rcu_read_unlock();
 out:
        return error;
@@ -283,7 +283,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
                return -EINVAL;
 
        rcu_read_lock();
-       read_lock(&tasklist_lock);
        switch (which) {
        case PRIO_PROCESS:
                if (who)
@@ -301,11 +300,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
                        pgrp = find_vpid(who);
                else
                        pgrp = task_pgrp(current);
+               read_lock(&tasklist_lock);
                do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                        niceval = nice_to_rlimit(task_nice(p));
                        if (niceval > retval)
                                retval = niceval;
                } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+               read_unlock(&tasklist_lock);
                break;
        case PRIO_USER:
                uid = make_kuid(cred->user_ns, who);
@@ -317,19 +318,18 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
                        if (!user)
                                goto out_unlock;        /* No processes for this user */
                }
-               do_each_thread(g, p) {
+               for_each_process_thread(g, p) {
                        if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
                                niceval = nice_to_rlimit(task_nice(p));
                                if (niceval > retval)
                                        retval = niceval;
                        }
-               } while_each_thread(g, p);
+               }
                if (!uid_eq(uid, cred->uid))
                        free_uid(user);         /* for find_user() */
                break;
        }
 out_unlock:
-       read_unlock(&tasklist_lock);
        rcu_read_unlock();
 
        return retval;
index c77fe36..14b89aa 100644 (file)
@@ -1984,6 +1984,8 @@ config KCOV
        bool "Code coverage for fuzzing"
        depends on ARCH_HAS_KCOV
        depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
+       depends on !ARCH_WANTS_NO_INSTR || STACK_VALIDATION || \
+                  GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
        select DEBUG_FS
        select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
        help
@@ -2222,12 +2224,11 @@ config TEST_RHASHTABLE
 
          If unsure, say N.
 
-config TEST_HASH
-       tristate "Perform selftest on hash functions"
+config TEST_SIPHASH
+       tristate "Perform selftest on siphash functions"
        help
-         Enable this option to test the kernel's integer (<linux/hash.h>),
-         string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
-         hash functions on boot (or module load).
+         Enable this option to test the kernel's siphash (<linux/siphash.h>) hash
+         functions on boot (or module load).
 
          This is intended to help people writing architecture-specific
          optimized versions.  If unsure, say N.
@@ -2371,6 +2372,25 @@ config BITFIELD_KUNIT
 
          If unsure, say N.
 
+config HASH_KUNIT_TEST
+       tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         Enable this option to test the kernel's string (<linux/stringhash.h>), and
+         integer (<linux/hash.h>) hash functions on boot.
+
+         KUnit tests run during boot and output the results to the debug log
+         in TAP format (https://testanything.org/). Only useful for kernel devs
+         running the KUnit test harness, and not intended for inclusion into a
+         production build.
+
+         For more information on KUnit and unit tests in general please refer
+         to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+         This is intended to help people writing architecture-specific
+         optimized versions. If unsure, say N.
+
 config RESOURCE_KUNIT_TEST
        tristate "KUnit test for resource API"
        depends on KUNIT
@@ -2502,6 +2522,7 @@ config TEST_KMOD
        depends on m
        depends on NETDEVICES && NET_CORE && INET # for TUN
        depends on BLOCK
+       depends on PAGE_SIZE_LESS_THAN_256KB # for BTRFS
        select TEST_LKM
        select XFS_FS
        select TUN
index e5372a1..236c5ce 100644 (file)
@@ -112,19 +112,6 @@ config UBSAN_UNREACHABLE
          This option enables -fsanitize=unreachable which checks for control
          flow reaching an expected-to-be-unreachable position.
 
-config UBSAN_OBJECT_SIZE
-       bool "Perform checking for accesses beyond the end of objects"
-       default UBSAN
-       # gcc hugely expands stack usage with -fsanitize=object-size
-       # https://lore.kernel.org/lkml/CAHk-=wjPasyJrDuwDnpHJS2TuQfExwe=px-SzLeN8GFMAQJPmQ@mail.gmail.com/
-       depends on !CC_IS_GCC
-       depends on $(cc-option,-fsanitize=object-size)
-       help
-         This option enables -fsanitize=object-size which checks for accesses
-         beyond the end of objects where the optimizer can determine both the
-         object being operated on and its size, usually seen with bad downcasts,
-         or access to struct members from NULL pointers.
-
 config UBSAN_BOOL
        bool "Perform checking for non-boolean values used as boolean"
        default UBSAN
index b213a7b..300f569 100644 (file)
@@ -61,7 +61,8 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
 CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
-obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
+obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o
+obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
 obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
 CFLAGS_test_kasan.o += -fno-builtin
index 059b8b0..886510d 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "kstrtox.h"
 
+noinline
 const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
 {
        if (*base == 0) {
@@ -47,6 +48,7 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
  *
  * Don't you dare use this function.
  */
+noinline
 unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
                                  size_t max_chars)
 {
@@ -85,6 +87,7 @@ unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned lon
        return rv;
 }
 
+noinline
 unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
 {
        return _parse_integer_limit(s, base, p, INT_MAX);
@@ -125,6 +128,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
  * Preferred over simple_strtoull(). Return code must be checked.
  */
+noinline
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
        if (s[0] == '+')
@@ -148,6 +152,7 @@ EXPORT_SYMBOL(kstrtoull);
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
  * Preferred over simple_strtoll(). Return code must be checked.
  */
+noinline
 int kstrtoll(const char *s, unsigned int base, long long *res)
 {
        unsigned long long tmp;
@@ -219,6 +224,7 @@ EXPORT_SYMBOL(_kstrtol);
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
  * Preferred over simple_strtoul(). Return code must be checked.
  */
+noinline
 int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 {
        unsigned long long tmp;
@@ -249,6 +255,7 @@ EXPORT_SYMBOL(kstrtouint);
  * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
  * Preferred over simple_strtol(). Return code must be checked.
  */
+noinline
 int kstrtoint(const char *s, unsigned int base, int *res)
 {
        long long tmp;
@@ -264,6 +271,7 @@ int kstrtoint(const char *s, unsigned int base, int *res)
 }
 EXPORT_SYMBOL(kstrtoint);
 
+noinline
 int kstrtou16(const char *s, unsigned int base, u16 *res)
 {
        unsigned long long tmp;
@@ -279,6 +287,7 @@ int kstrtou16(const char *s, unsigned int base, u16 *res)
 }
 EXPORT_SYMBOL(kstrtou16);
 
+noinline
 int kstrtos16(const char *s, unsigned int base, s16 *res)
 {
        long long tmp;
@@ -294,6 +303,7 @@ int kstrtos16(const char *s, unsigned int base, s16 *res)
 }
 EXPORT_SYMBOL(kstrtos16);
 
+noinline
 int kstrtou8(const char *s, unsigned int base, u8 *res)
 {
        unsigned long long tmp;
@@ -309,6 +319,7 @@ int kstrtou8(const char *s, unsigned int base, u8 *res)
 }
 EXPORT_SYMBOL(kstrtou8);
 
+noinline
 int kstrtos8(const char *s, unsigned int base, s8 *res)
 {
        long long tmp;
@@ -333,6 +344,7 @@ EXPORT_SYMBOL(kstrtos8);
  * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
  * pointed to by res is updated upon finding a match.
  */
+noinline
 int kstrtobool(const char *s, bool *res)
 {
        if (!s)
index 5d5424b..9daa3fb 100644 (file)
@@ -49,11 +49,11 @@ bool __list_del_entry_valid(struct list_head *entry)
                        "list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
                        entry, LIST_POISON2) ||
            CHECK_DATA_CORRUPTION(prev->next != entry,
-                       "list_del corruption. prev->next should be %px, but was %px\n",
-                       entry, prev->next) ||
+                       "list_del corruption. prev->next should be %px, but was %px. (prev=%px)\n",
+                       entry, prev->next, prev) ||
            CHECK_DATA_CORRUPTION(next->prev != entry,
-                       "list_del corruption. next->prev should be %px, but was %px\n",
-                       entry, next->prev))
+                       "list_del corruption. next->prev should be %px, but was %px. (next=%px)\n",
+                       entry, next->prev, next))
                return false;
 
        return true;
index 673bd20..330aa53 100644 (file)
@@ -36,6 +36,8 @@
  */
 
 #include <asm/unaligned.h>
+
+#include <linux/bitops.h>
 #include <linux/string.h>       /* memset, memcpy */
 
 #define FORCE_INLINE __always_inline
index 0ae2e66..a6789c0 100644 (file)
@@ -69,9 +69,12 @@ int ref_tracker_alloc(struct ref_tracker_dir *dir,
        unsigned long entries[REF_TRACKER_STACK_ENTRIES];
        struct ref_tracker *tracker;
        unsigned int nr_entries;
+       gfp_t gfp_mask = gfp;
        unsigned long flags;
 
-       *trackerp = tracker = kzalloc(sizeof(*tracker), gfp | __GFP_NOFAIL);
+       if (gfp & __GFP_DIRECT_RECLAIM)
+               gfp_mask |= __GFP_NOFAIL;
+       *trackerp = tracker = kzalloc(sizeof(*tracker), gfp_mask);
        if (unlikely(!tracker)) {
                pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
                refcount_inc(&dir->untracked);
index 2709ab8..6220fa6 100644 (file)
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
 
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
-                                           unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+                                           unsigned int wake_batch)
 {
-       unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
        int i;
 
        if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
        }
 }
 
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+                                           unsigned int depth)
+{
+       unsigned int wake_batch;
+
+       wake_batch = sbq_calc_wake_batch(sbq, depth);
+       __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+                                           unsigned int users)
+{
+       unsigned int wake_batch;
+
+       wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+                       users, 4, SBQ_WAKE_BATCH);
+       __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
 {
        sbitmap_queue_update_wake_batch(sbq, depth);
index 0ee40b4..bb25fda 100644 (file)
  * and hash_64().
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
-
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/hash.h>
 #include <linux/stringhash.h>
-#include <linux/printk.h>
+#include <kunit/test.h>
 
 /* 32-bit XORSHIFT generator.  Seed must not be zero. */
-static u32 __init __attribute_const__
+static u32 __attribute_const__
 xorshift(u32 seed)
 {
        seed ^= seed << 13;
@@ -34,7 +32,7 @@ xorshift(u32 seed)
 }
 
 /* Given a non-zero x, returns a non-zero byte. */
-static u8 __init __attribute_const__
+static u8 __attribute_const__
 mod255(u32 x)
 {
        x = (x & 0xffff) + (x >> 16);   /* 1 <= x <= 0x1fffe */
@@ -45,8 +43,7 @@ mod255(u32 x)
 }
 
 /* Fill the buffer with non-zero bytes. */
-static void __init
-fill_buf(char *buf, size_t len, u32 seed)
+static void fill_buf(char *buf, size_t len, u32 seed)
 {
        size_t i;
 
@@ -56,6 +53,50 @@ fill_buf(char *buf, size_t len, u32 seed)
        }
 }
 
+/* Holds most testing variables for the int test. */
+struct test_hash_params {
+        /* Pointer to integer to be hashed. */
+       unsigned long long *h64;
+        /* Low 32-bits of integer to be hashed. */
+       u32 h0;
+        /* Arch-specific hash result. */
+       u32 h1;
+        /* Generic hash result. */
+       u32 h2;
+        /* ORed hashes of given size (in bits). */
+       u32 (*hash_or)[33];
+};
+
+#ifdef HAVE_ARCH__HASH_32
+static void
+test_int__hash_32(struct kunit *test, struct test_hash_params *params)
+{
+       params->hash_or[1][0] |= params->h2 = __hash_32_generic(params->h0);
+#if HAVE_ARCH__HASH_32 == 1
+       KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+                           "__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
+                           params->h0, params->h1, params->h2);
+#endif
+}
+#endif
+
+#ifdef HAVE_ARCH_HASH_64
+static void
+test_int_hash_64(struct kunit *test, struct test_hash_params *params, u32 const *m, int *k)
+{
+       params->h2 = hash_64_generic(*params->h64, *k);
+#if HAVE_ARCH_HASH_64 == 1
+       KUNIT_EXPECT_EQ_MSG(test, params->h1, params->h2,
+                           "hash_64(%#llx, %d) = %#x != hash_64_generic() = %#x",
+                           *params->h64, *k, params->h1, params->h2);
+#else
+       KUNIT_EXPECT_LE_MSG(test, params->h1, params->h2,
+                           "hash_64_generic(%#llx, %d) = %#x > %#x",
+                           *params->h64, *k, params->h1, *m);
+#endif
+}
+#endif
+
 /*
  * Test the various integer hash functions.  h64 (or its low-order bits)
  * is the integer to hash.  hash_or accumulates the OR of the hash values,
@@ -65,23 +106,16 @@ fill_buf(char *buf, size_t len, u32 seed)
  * inline, the code being tested is actually in the module, and you can
  * recompile and re-test the module without rebooting.
  */
-static bool __init
-test_int_hash(unsigned long long h64, u32 hash_or[2][33])
+static void
+test_int_hash(struct kunit *test, unsigned long long h64, u32 hash_or[2][33])
 {
        int k;
-       u32 h0 = (u32)h64, h1, h2;
+       struct test_hash_params params = { &h64, (u32)h64, 0, 0, hash_or };
 
        /* Test __hash32 */
-       hash_or[0][0] |= h1 = __hash_32(h0);
+       hash_or[0][0] |= params.h1 = __hash_32(params.h0);
 #ifdef HAVE_ARCH__HASH_32
-       hash_or[1][0] |= h2 = __hash_32_generic(h0);
-#if HAVE_ARCH__HASH_32 == 1
-       if (h1 != h2) {
-               pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
-                       h0, h1, h2);
-               return false;
-       }
-#endif
+       test_int__hash_32(test, &params);
 #endif
 
        /* Test k = 1..32 bits */
@@ -89,63 +123,53 @@ test_int_hash(unsigned long long h64, u32 hash_or[2][33])
                u32 const m = ((u32)2 << (k-1)) - 1;    /* Low k bits set */
 
                /* Test hash_32 */
-               hash_or[0][k] |= h1 = hash_32(h0, k);
-               if (h1 > m) {
-                       pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
-                       return false;
-               }
-#ifdef HAVE_ARCH_HASH_32
-               h2 = hash_32_generic(h0, k);
-#if HAVE_ARCH_HASH_32 == 1
-               if (h1 != h2) {
-                       pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
-                               " = %#x", h0, k, h1, h2);
-                       return false;
-               }
-#else
-               if (h2 > m) {
-                       pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
-                               h0, k, h1, m);
-                       return false;
-               }
-#endif
-#endif
+               hash_or[0][k] |= params.h1 = hash_32(params.h0, k);
+               KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+                                   "hash_32(%#x, %d) = %#x > %#x",
+                                   params.h0, k, params.h1, m);
+
                /* Test hash_64 */
-               hash_or[1][k] |= h1 = hash_64(h64, k);
-               if (h1 > m) {
-                       pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
-                       return false;
-               }
+               hash_or[1][k] |= params.h1 = hash_64(h64, k);
+               KUNIT_EXPECT_LE_MSG(test, params.h1, m,
+                                   "hash_64(%#llx, %d) = %#x > %#x",
+                                   h64, k, params.h1, m);
 #ifdef HAVE_ARCH_HASH_64
-               h2 = hash_64_generic(h64, k);
-#if HAVE_ARCH_HASH_64 == 1
-               if (h1 != h2) {
-                       pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
-                               "= %#x", h64, k, h1, h2);
-                       return false;
-               }
-#else
-               if (h2 > m) {
-                       pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
-                               h64, k, h1, m);
-                       return false;
-               }
-#endif
+               test_int_hash_64(test, &params, &m, &k);
 #endif
        }
-
-       (void)h2;       /* Suppress unused variable warning */
-       return true;
 }
 
 #define SIZE 256       /* Run time is cubic in SIZE */
 
-static int __init
-test_hash_init(void)
+static void test_string_or(struct kunit *test)
 {
        char buf[SIZE+1];
-       u32 string_or = 0, hash_or[2][33] = { { 0, } };
-       unsigned tests = 0;
+       u32 string_or = 0;
+       int i, j;
+
+       fill_buf(buf, SIZE, 1);
+
+       /* Test every possible non-empty substring in the buffer. */
+       for (j = SIZE; j > 0; --j) {
+               buf[j] = '\0';
+
+               for (i = 0; i <= j; i++) {
+                       u32 h0 = full_name_hash(buf+i, buf+i, j-i);
+
+                       string_or |= h0;
+               } /* i */
+       } /* j */
+
+       /* The OR of all the hash values should cover all the bits */
+       KUNIT_EXPECT_EQ_MSG(test, string_or, -1u,
+                           "OR of all string hash results = %#x != %#x",
+                           string_or, -1u);
+}
+
+static void test_hash_or(struct kunit *test)
+{
+       char buf[SIZE+1];
+       u32 hash_or[2][33] = { { 0, } };
        unsigned long long h64 = 0;
        int i, j;
 
@@ -160,46 +184,27 @@ test_hash_init(void)
                        u32 h0 = full_name_hash(buf+i, buf+i, j-i);
 
                        /* Check that hashlen_string gets the length right */
-                       if (hashlen_len(hashlen) != j-i) {
-                               pr_err("hashlen_string(%d..%d) returned length"
-                                       " %u, expected %d",
-                                       i, j, hashlen_len(hashlen), j-i);
-                               return -EINVAL;
-                       }
+                       KUNIT_EXPECT_EQ_MSG(test, hashlen_len(hashlen), j-i,
+                                           "hashlen_string(%d..%d) returned length %u, expected %d",
+                                           i, j, hashlen_len(hashlen), j-i);
                        /* Check that the hashes match */
-                       if (hashlen_hash(hashlen) != h0) {
-                               pr_err("hashlen_string(%d..%d) = %08x != "
-                                       "full_name_hash() = %08x",
-                                       i, j, hashlen_hash(hashlen), h0);
-                               return -EINVAL;
-                       }
+                       KUNIT_EXPECT_EQ_MSG(test, hashlen_hash(hashlen), h0,
+                                           "hashlen_string(%d..%d) = %08x != full_name_hash() = %08x",
+                                           i, j, hashlen_hash(hashlen), h0);
 
-                       string_or |= h0;
                        h64 = h64 << 32 | h0;   /* For use with hash_64 */
-                       if (!test_int_hash(h64, hash_or))
-                               return -EINVAL;
-                       tests++;
+                       test_int_hash(test, h64, hash_or);
                } /* i */
        } /* j */
 
-       /* The OR of all the hash values should cover all the bits */
-       if (~string_or) {
-               pr_err("OR of all string hash results = %#x != %#x",
-                       string_or, -1u);
-               return -EINVAL;
-       }
-       if (~hash_or[0][0]) {
-               pr_err("OR of all __hash_32 results = %#x != %#x",
-                       hash_or[0][0], -1u);
-               return -EINVAL;
-       }
+       KUNIT_EXPECT_EQ_MSG(test, hash_or[0][0], -1u,
+                           "OR of all __hash_32 results = %#x != %#x",
+                           hash_or[0][0], -1u);
 #ifdef HAVE_ARCH__HASH_32
 #if HAVE_ARCH__HASH_32 != 1    /* Test is pointless if results match */
-       if (~hash_or[1][0]) {
-               pr_err("OR of all __hash_32_generic results = %#x != %#x",
-                       hash_or[1][0], -1u);
-               return -EINVAL;
-       }
+       KUNIT_EXPECT_EQ_MSG(test, hash_or[1][0], -1u,
+                           "OR of all __hash_32_generic results = %#x != %#x",
+                           hash_or[1][0], -1u);
 #endif
 #endif
 
@@ -207,51 +212,27 @@ test_hash_init(void)
        for (i = 1; i <= 32; i++) {
                u32 const m = ((u32)2 << (i-1)) - 1;    /* Low i bits set */
 
-               if (hash_or[0][i] != m) {
-                       pr_err("OR of all hash_32(%d) results = %#x "
-                               "(%#x expected)", i, hash_or[0][i], m);
-                       return -EINVAL;
-               }
-               if (hash_or[1][i] != m) {
-                       pr_err("OR of all hash_64(%d) results = %#x "
-                               "(%#x expected)", i, hash_or[1][i], m);
-                       return -EINVAL;
-               }
+               KUNIT_EXPECT_EQ_MSG(test, hash_or[0][i], m,
+                                   "OR of all hash_32(%d) results = %#x (%#x expected)",
+                                   i, hash_or[0][i], m);
+               KUNIT_EXPECT_EQ_MSG(test, hash_or[1][i], m,
+                                   "OR of all hash_64(%d) results = %#x (%#x expected)",
+                                   i, hash_or[1][i], m);
        }
+}
 
-       /* Issue notices about skipped tests. */
-#ifdef HAVE_ARCH__HASH_32
-#if HAVE_ARCH__HASH_32 != 1
-       pr_info("__hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
-       pr_info("__hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_32
-#if HAVE_ARCH_HASH_32 != 1
-       pr_info("hash_32() is arch-specific; not compared to generic.");
-#endif
-#else
-       pr_info("hash_32() has no arch implementation to test.");
-#endif
-#ifdef HAVE_ARCH_HASH_64
-#if HAVE_ARCH_HASH_64 != 1
-       pr_info("hash_64() is arch-specific; not compared to generic.");
-#endif
-#else
-       pr_info("hash_64() has no arch implementation to test.");
-#endif
-
-       pr_notice("%u tests passed.", tests);
+static struct kunit_case hash_test_cases[] __refdata = {
+       KUNIT_CASE(test_string_or),
+       KUNIT_CASE(test_hash_or),
+       {}
+};
 
-       return 0;
-}
+static struct kunit_suite hash_test_suite = {
+       .name = "hash",
+       .test_cases = hash_test_cases,
+};
 
-static void __exit test_hash_exit(void)
-{
-}
 
-module_init(test_hash_init);   /* Does everything */
-module_exit(test_hash_exit);   /* Does nothing */
+kunit_test_suite(hash_test_suite);
 
 MODULE_LICENSE("GPL");
index e4f706a..3ca717f 100644 (file)
@@ -337,6 +337,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
                if (num)
                        kmem_cache_free_bulk(c, num, objects);
        }
+       kmem_cache_destroy(c);
        *total_failures += fail;
        return 1;
 }
index 7e7bbd0..2062be1 100644 (file)
@@ -79,15 +79,6 @@ static void test_ubsan_load_invalid_value(void)
        eval2 = eval;
 }
 
-static void test_ubsan_null_ptr_deref(void)
-{
-       volatile int *ptr = NULL;
-       int val;
-
-       UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
-       val = *ptr;
-}
-
 static void test_ubsan_misaligned_access(void)
 {
        volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
@@ -98,29 +89,16 @@ static void test_ubsan_misaligned_access(void)
        *ptr = val;
 }
 
-static void test_ubsan_object_size_mismatch(void)
-{
-       /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
-       volatile int val __aligned(8) = 4;
-       volatile long long *ptr, val2;
-
-       UBSAN_TEST(CONFIG_UBSAN_OBJECT_SIZE);
-       ptr = (long long *)&val;
-       val2 = *ptr;
-}
-
 static const test_ubsan_fp test_ubsan_array[] = {
        test_ubsan_shift_out_of_bounds,
        test_ubsan_out_of_bounds,
        test_ubsan_load_invalid_value,
        test_ubsan_misaligned_access,
-       test_ubsan_object_size_mismatch,
 };
 
 /* Excluded because they Oops the module. */
 static const test_ubsan_fp skip_ubsan_array[] = {
        test_ubsan_divrem_overflow,
-       test_ubsan_null_ptr_deref,
 };
 
 static int __init test_ubsan_init(void)
index 53d7485..a99bd49 100644 (file)
@@ -432,6 +432,18 @@ config NEED_PER_CPU_KM
        bool
        default y
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+       bool
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+       bool
+
+config USE_PERCPU_NUMA_NODE_ID
+       bool
+
+config HAVE_SETUP_PER_CPU_AREA
+       bool
+
 config CLEANCACHE
        bool "Enable cleancache driver to cache clean pages if tmem is present"
        help
index f306e69..c125c49 100644 (file)
@@ -3529,7 +3529,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        if (unlikely(!si))
                goto out;
 
-       delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
        page = lookup_swap_cache(entry, vma, vmf->address);
        swapcache = page;
 
@@ -3577,7 +3576,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                                        vmf->address, &vmf->ptl);
                        if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
                                ret = VM_FAULT_OOM;
-                       delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
                        goto unlock;
                }
 
@@ -3591,13 +3589,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                 * owner processes (which may be unknown at hwpoison time)
                 */
                ret = VM_FAULT_HWPOISON;
-               delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
                goto out_release;
        }
 
        locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
 
-       delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
        if (!locked) {
                ret |= VM_FAULT_RETRY;
                goto out_release;
index d4205e5..3589feb 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/padata.h>
 #include <linux/khugepaged.h>
 #include <linux/buffer_head.h>
+#include <linux/delayacct.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -4365,6 +4366,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                return NULL;
 
        psi_memstall_enter(&pflags);
+       delayacct_compact_start();
        noreclaim_flag = memalloc_noreclaim_save();
 
        *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
@@ -4372,6 +4374,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 
        memalloc_noreclaim_restore(noreclaim_flag);
        psi_memstall_leave(&pflags);
+       delayacct_compact_end();
 
        if (*compact_result == COMPACT_SKIPPED)
                return NULL;
index 9725c7e..0bf8e40 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/psi.h>
 #include <linux/uio.h>
 #include <linux/sched/task.h>
+#include <linux/delayacct.h>
 
 void end_swap_bio_write(struct bio *bio)
 {
@@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
         * significant part of overall IO time.
         */
        psi_memstall_enter(&pflags);
+       delayacct_swapin_start();
 
        if (frontswap_load(page) == 0) {
                SetPageUptodate(page);
@@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)
 
 out:
        psi_memstall_leave(&pflags);
+       delayacct_swapin_end();
        return ret;
 }
 
index 4199a06..a188ac9 100644 (file)
@@ -2992,6 +2992,42 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
 
        return ai;
 }
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
+                                  pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
+{
+       const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NUMA
+       int node = NUMA_NO_NODE;
+       void *ptr;
+
+       if (cpu_to_nd_fn)
+               node = cpu_to_nd_fn(cpu);
+
+       if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
+               ptr = memblock_alloc_from(size, align, goal);
+               pr_info("cpu %d has no node %d or node-local memory\n",
+                       cpu, node);
+               pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
+                        cpu, size, (u64)__pa(ptr));
+       } else {
+               ptr = memblock_alloc_try_nid(size, align, goal,
+                                            MEMBLOCK_ALLOC_ACCESSIBLE,
+                                            node);
+
+               pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
+                        cpu, size, node, (u64)__pa(ptr));
+       }
+       return ptr;
+#else
+       return memblock_alloc_from(size, align, goal);
+#endif
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+       memblock_free(ptr, size);
+}
 #endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
 
 #if defined(BUILD_EMBED_FIRST_CHUNK)
@@ -3001,14 +3037,13 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
  * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
- * @alloc_fn: function to allocate percpu page
- * @free_fn: function to free percpu page
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
  *
  * If this function is used to setup the first chunk, it is allocated
- * by calling @alloc_fn and used as-is without being mapped into
+ * by calling pcpu_fc_alloc and used as-is without being mapped into
  * vmalloc area.  Allocations are always whole multiples of @atom_size
  * aligned to @atom_size.
  *
@@ -3022,7 +3057,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
  * @dyn_size specifies the minimum dynamic area size.
  *
  * If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned using @free_fn.
+ * size, the leftover is returned using pcpu_fc_free.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
@@ -3030,8 +3065,7 @@ static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
 int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                  size_t atom_size,
                                  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
-                                 pcpu_fc_alloc_fn_t alloc_fn,
-                                 pcpu_fc_free_fn_t free_fn)
+                                 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
        void *base = (void *)ULONG_MAX;
        void **areas = NULL;
@@ -3066,7 +3100,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                BUG_ON(cpu == NR_CPUS);
 
                /* allocate space for the whole group */
-               ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+               ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
                if (!ptr) {
                        rc = -ENOMEM;
                        goto out_free_areas;
@@ -3105,12 +3139,12 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
                        if (gi->cpu_map[i] == NR_CPUS) {
                                /* unused unit, free whole */
-                               free_fn(ptr, ai->unit_size);
+                               pcpu_fc_free(ptr, ai->unit_size);
                                continue;
                        }
                        /* copy and return the unused part */
                        memcpy(ptr, __per_cpu_load, ai->static_size);
-                       free_fn(ptr + size_sum, ai->unit_size - size_sum);
+                       pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
                }
        }
 
@@ -3129,7 +3163,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 out_free_areas:
        for (group = 0; group < ai->nr_groups; group++)
                if (areas[group])
-                       free_fn(areas[group],
+                       pcpu_fc_free(areas[group],
                                ai->groups[group].nr_units * ai->unit_size);
 out_free:
        pcpu_free_alloc_info(ai);
@@ -3140,12 +3174,79 @@ out_free:
 #endif /* BUILD_EMBED_FIRST_CHUNK */
 
 #ifdef BUILD_PAGE_FIRST_CHUNK
+#include <asm/pgalloc.h>
+
+#ifndef P4D_TABLE_SIZE
+#define P4D_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PUD_TABLE_SIZE
+#define PUD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PMD_TABLE_SIZE
+#define PMD_TABLE_SIZE PAGE_SIZE
+#endif
+
+#ifndef PTE_TABLE_SIZE
+#define PTE_TABLE_SIZE PAGE_SIZE
+#endif
+void __init __weak pcpu_populate_pte(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       if (pgd_none(*pgd)) {
+               p4d_t *new;
+
+               new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pgd_populate(&init_mm, pgd, new);
+       }
+
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d)) {
+               pud_t *new;
+
+               new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               p4d_populate(&init_mm, p4d, new);
+       }
+
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud)) {
+               pmd_t *new;
+
+               new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pud_populate(&init_mm, pud, new);
+       }
+
+       pmd = pmd_offset(pud, addr);
+       if (!pmd_present(*pmd)) {
+               pte_t *new;
+
+               new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+               if (!new)
+                       goto err_alloc;
+               pmd_populate_kernel(&init_mm, pmd, new);
+       }
+
+       return;
+
+err_alloc:
+       panic("%s: Failed to allocate memory\n", __func__);
+}
+
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @reserved_size: the size of reserved percpu area in bytes
- * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: function to free percpu page, always called with PAGE_SIZE
- * @populate_pte_fn: function to populate pte
+ * @cpu_to_nd_fn: callback to convert cpu to it's node, optional
  *
  * This is a helper to ease setting up page-remapped first percpu
  * chunk and can be called where pcpu_setup_first_chunk() is expected.
@@ -3156,10 +3257,7 @@ out_free:
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_page_first_chunk(size_t reserved_size,
-                                pcpu_fc_alloc_fn_t alloc_fn,
-                                pcpu_fc_free_fn_t free_fn,
-                                pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
 {
        static struct vm_struct vm;
        struct pcpu_alloc_info *ai;
@@ -3201,7 +3299,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
                for (i = 0; i < unit_pages; i++) {
                        void *ptr;
 
-                       ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
+                       ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
                        if (!ptr) {
                                pr_warn("failed to allocate %s page for cpu%u\n",
                                                psize_str, cpu);
@@ -3223,7 +3321,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
                        (unsigned long)vm.addr + unit * ai->unit_size;
 
                for (i = 0; i < unit_pages; i++)
-                       populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
+                       pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
 
                /* pte already populated, the following shouldn't fail */
                rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
@@ -3253,7 +3351,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 
 enomem:
        while (--j >= 0)
-               free_fn(page_address(pages[j]), PAGE_SIZE);
+               pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
        rc = -ENOMEM;
 out_free_ar:
        memblock_free(pages, pages_size);
@@ -3278,17 +3376,6 @@ out_free_ar:
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
-static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
-                                      size_t align)
-{
-       return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
-{
-       memblock_free(ptr, size);
-}
-
 void __init setup_per_cpu_areas(void)
 {
        unsigned long delta;
@@ -3299,9 +3386,8 @@ void __init setup_per_cpu_areas(void)
         * Always reserve area for module percpu variables.  That's
         * what the legacy allocator did.
         */
-       rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-                                   PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
-                                   pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
+       rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
+                                   PAGE_SIZE, NULL, NULL);
        if (rc < 0)
                panic("Failed to initialize percpu areas.");
 
index a52ad81..55f47ca 100644 (file)
@@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
        err = dev_set_allmulti(dev, 1);
        if (err) {
                br_multicast_del_port(p);
+               dev_put_track(dev, &p->dev_tracker);
                kfree(p);       /* kobject not yet init'd, manually free */
                goto err1;
        }
@@ -724,10 +725,10 @@ err3:
        sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
        br_multicast_del_port(p);
+       dev_put_track(dev, &p->dev_tracker);
        kobject_put(&p->kobj);
        dev_set_allmulti(dev, -1);
 err1:
-       dev_put(dev);
        return err;
 }
 
index 9441b4a..ecc400a 100644 (file)
@@ -190,14 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
 }
 EXPORT_SYMBOL(ceph_compare_options);
 
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
        int i = 0;
        char tmp[3];
        int err = -EINVAL;
        int d;
 
-       dout("parse_fsid '%s'\n", str);
+       dout("%s '%s'\n", __func__, str);
        tmp[2] = 0;
        while (*str && i < 16) {
                if (ispunct(*str)) {
@@ -217,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 
        if (i == 16)
                err = 0;
-       dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+       dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
        return err;
 }
+EXPORT_SYMBOL(ceph_parse_fsid);
 
 /*
  * ceph options
@@ -395,14 +396,14 @@ out:
 }
 
 int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
-                      struct fc_log *l)
+                      struct fc_log *l, char delim)
 {
        struct p_log log = {.prefix = "libceph", .log = l};
        int ret;
 
-       /* ip1[:port1][,ip2[:port2]...] */
+       /* ip1[:port1][<delim>ip2[:port2]...] */
        ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
-                            &opt->num_mon);
+                            &opt->num_mon, delim);
        if (ret) {
                error_plog(&log, "Failed to parse monitor IPs: %d", ret);
                return ret;
@@ -428,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
        case Opt_ip:
                err = ceph_parse_ips(param->string,
                                     param->string + param->size,
-                                    &opt->my_addr,
-                                    1, NULL);
+                                    &opt->my_addr, 1, NULL, ',');
                if (err) {
                        error_plog(&log, "Failed to parse ip: %d", err);
                        return err;
@@ -438,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
                break;
 
        case Opt_fsid:
-               err = parse_fsid(param->string, &opt->fsid);
+               err = ceph_parse_fsid(param->string, &opt->fsid);
                if (err) {
                        error_plog(&log, "Failed to parse fsid: %d", err);
                        return err;
index 7b891be..45eba2d 100644 (file)
@@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
  */
 int ceph_parse_ips(const char *c, const char *end,
                   struct ceph_entity_addr *addr,
-                  int max_count, int *count)
+                  int max_count, int *count, char delim)
 {
        int i, ret = -EINVAL;
        const char *p = c;
 
        dout("parse_ips on '%.*s'\n", (int)(end-c), c);
        for (i = 0; i < max_count; i++) {
+               char cur_delim = delim;
                const char *ipend;
                int port;
-               char delim = ',';
 
                if (*p == '[') {
-                       delim = ']';
+                       cur_delim = ']';
                        p++;
                }
 
-               ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+               ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+                                            &ipend);
                if (ret)
                        goto bad;
                ret = -EINVAL;
 
                p = ipend;
 
-               if (delim == ']') {
+               if (cur_delim == ']') {
                        if (*p != ']') {
                                dout("missing matching ']'\n");
                                goto bad;
@@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end,
                addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
                addr[i].nonce = 0;
 
-               dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+               dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
 
                if (p == end)
                        break;
-               if (*p != ',')
+               if (*p != delim)
                        goto bad;
                p++;
        }
index 84a0d95..1baab07 100644 (file)
@@ -8981,6 +8981,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
                goto out_unlock;
        }
        old_prog = link->prog;
+       if (old_prog->type != new_prog->type ||
+           old_prog->expected_attach_type != new_prog->expected_attach_type) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
        if (old_prog == new_prog) {
                /* no-op, don't disturb drivers */
                bpf_prog_put(new_prog);
index 9b7171c..a5b5bb9 100644 (file)
@@ -164,8 +164,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
 {
        struct net *net;
        if (ops->exit) {
-               list_for_each_entry(net, net_exit_list, exit_list)
+               list_for_each_entry(net, net_exit_list, exit_list) {
                        ops->exit(net);
+                       cond_resched();
+               }
        }
        if (ops->exit_batch)
                ops->exit_batch(net_exit_list);
index 95a64c8..f1a9bf7 100644 (file)
@@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
 {
        struct platform_device *pdev = of_find_device_by_node(np);
        struct nvmem_cell *cell;
-       const void *buf;
+       const void *mac;
        size_t len;
        int ret;
 
@@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
        if (IS_ERR(cell))
                return PTR_ERR(cell);
 
-       buf = nvmem_cell_read(cell, &len);
+       mac = nvmem_cell_read(cell, &len);
        nvmem_cell_put(cell);
 
-       if (IS_ERR(buf))
-               return PTR_ERR(buf);
-
-       ret = 0;
-       if (len == ETH_ALEN) {
-               if (is_valid_ether_addr(buf))
-                       memcpy(addr, buf, ETH_ALEN);
-               else
-                       ret = -EINVAL;
-       } else if (len == 3 * ETH_ALEN - 1) {
-               u8 mac[ETH_ALEN];
-
-               if (mac_pton(buf, mac))
-                       memcpy(addr, mac, ETH_ALEN);
-               else
-                       ret = -EINVAL;
-       } else {
-               ret = -EINVAL;
+       if (IS_ERR(mac))
+               return PTR_ERR(mac);
+
+       if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+               kfree(mac);
+               return -EINVAL;
        }
 
-       kfree(buf);
+       memcpy(addr, mac, ETH_ALEN);
+       kfree(mac);
 
-       return ret;
+       return 0;
 }
 
 /**
index e21485a..4ff806d 100644 (file)
@@ -844,6 +844,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
        }
 
        num = ethtool_get_phc_vclocks(dev, &vclock_index);
+       dev_put(dev);
+
        for (i = 0; i < num; i++) {
                if (*(vclock_index + i) == phc_index) {
                        match = true;
@@ -2047,6 +2049,9 @@ void sk_destruct(struct sock *sk)
 {
        bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
 
+       WARN_ON_ONCE(!llist_empty(&sk->defer_list));
+       sk_defer_free_flush(sk);
+
        if (rcu_access_pointer(sk->sk_reuseport_cb)) {
                reuseport_detach_sock(sk);
                use_call_rcu = true;
index 828de17..b458986 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/netlink.h>
+#include <linux/hash.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
 static unsigned int fib_info_cnt;
 
 #define DEVINDEX_HASHBITS 8
@@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi)
                pr_warn("Freeing alive fib_info %p\n", fi);
                return;
        }
-       fib_info_cnt--;
 
        call_rcu(&fi->rcu, free_fib_info_rcu);
 }
@@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi)
        spin_lock_bh(&fib_info_lock);
        if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
                hlist_del(&fi->fib_hash);
+
+               /* Paired with READ_ONCE() in fib_create_info(). */
+               WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+
                if (fi->fib_prefsrc)
                        hlist_del(&fi->fib_lhash);
                if (fi->nh) {
@@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
 
 static inline unsigned int fib_devindex_hashfn(unsigned int val)
 {
-       unsigned int mask = DEVINDEX_HASHSIZE - 1;
+       return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+       u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
 
-       return (val ^
-               (val >> DEVINDEX_HASHBITS) ^
-               (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+       return &fib_info_devhash[fib_devindex_hashfn(val)];
 }
 
 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -430,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
        struct hlist_head *head;
        struct fib_nh *nh;
-       unsigned int hash;
 
        spin_lock(&fib_info_lock);
 
-       hash = fib_devindex_hashfn(dev->ifindex);
-       head = &fib_info_devhash[hash];
+       head = fib_info_devhash_bucket(dev);
+
        hlist_for_each_entry(nh, head, nh_hash) {
                if (nh->fib_nh_dev == dev &&
                    nh->fib_nh_gw4 == gw &&
@@ -1240,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
        return err;
 }
 
-static inline unsigned int fib_laddr_hashfn(__be32 val)
+static struct hlist_head *
+fib_info_laddrhash_bucket(const struct net *net, __be32 val)
 {
-       unsigned int mask = (fib_info_hash_size - 1);
+       u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+                          fib_info_hash_bits);
 
-       return ((__force u32)val ^
-               ((__force u32)val >> 7) ^
-               ((__force u32)val >> 14)) & mask;
+       return &fib_info_laddrhash[slot];
 }
 
 static struct hlist_head *fib_info_hash_alloc(int bytes)
@@ -1282,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
        old_info_hash = fib_info_hash;
        old_laddrhash = fib_info_laddrhash;
        fib_info_hash_size = new_size;
+       fib_info_hash_bits = ilog2(new_size);
 
        for (i = 0; i < old_size; i++) {
                struct hlist_head *head = &fib_info_hash[i];
@@ -1299,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
        }
        fib_info_hash = new_info_hash;
 
+       fib_info_laddrhash = new_laddrhash;
        for (i = 0; i < old_size; i++) {
-               struct hlist_head *lhead = &fib_info_laddrhash[i];
+               struct hlist_head *lhead = &old_laddrhash[i];
                struct hlist_node *n;
                struct fib_info *fi;
 
                hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
                        struct hlist_head *ldest;
-                       unsigned int new_hash;
 
-                       new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
-                       ldest = &new_laddrhash[new_hash];
+                       ldest = fib_info_laddrhash_bucket(fi->fib_net,
+                                                         fi->fib_prefsrc);
                        hlist_add_head(&fi->fib_lhash, ldest);
                }
        }
-       fib_info_laddrhash = new_laddrhash;
 
        spin_unlock_bh(&fib_info_lock);
 
@@ -1430,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 #endif
 
        err = -ENOBUFS;
-       if (fib_info_cnt >= fib_info_hash_size) {
+
+       /* Paired with WRITE_ONCE() in fib_release_info() */
+       if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
                unsigned int new_size = fib_info_hash_size << 1;
                struct hlist_head *new_info_hash;
                struct hlist_head *new_laddrhash;
@@ -1462,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                return ERR_PTR(err);
        }
 
-       fib_info_cnt++;
        fi->fib_net = net;
        fi->fib_protocol = cfg->fc_protocol;
        fi->fib_scope = cfg->fc_scope;
@@ -1591,12 +1600,13 @@ link_it:
        refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock_bh(&fib_info_lock);
+       fib_info_cnt++;
        hlist_add_head(&fi->fib_hash,
                       &fib_info_hash[fib_info_hashfn(fi)]);
        if (fi->fib_prefsrc) {
                struct hlist_head *head;
 
-               head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+               head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
                hlist_add_head(&fi->fib_lhash, head);
        }
        if (fi->nh) {
@@ -1604,12 +1614,10 @@ link_it:
        } else {
                change_nexthops(fi) {
                        struct hlist_head *head;
-                       unsigned int hash;
 
                        if (!nexthop_nh->fib_nh_dev)
                                continue;
-                       hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
-                       head = &fib_info_devhash[hash];
+                       head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
                        hlist_add_head(&nexthop_nh->nh_hash, head);
                } endfor_nexthops(fi)
        }
@@ -1870,16 +1878,16 @@ nla_put_failure:
  */
 int fib_sync_down_addr(struct net_device *dev, __be32 local)
 {
-       int ret = 0;
-       unsigned int hash = fib_laddr_hashfn(local);
-       struct hlist_head *head = &fib_info_laddrhash[hash];
        int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
        struct net *net = dev_net(dev);
+       struct hlist_head *head;
        struct fib_info *fi;
+       int ret = 0;
 
        if (!fib_info_laddrhash || local == 0)
                return 0;
 
+       head = fib_info_laddrhash_bucket(net, local);
        hlist_for_each_entry(fi, head, fib_lhash) {
                if (!net_eq(fi->fib_net, net) ||
                    fi->fib_tb_id != tb_id)
@@ -1961,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
-       unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-       struct hlist_head *head = &fib_info_devhash[hash];
+       struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_nh *nh;
 
        hlist_for_each_entry(nh, head, nh_hash) {
@@ -1981,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
-       int ret = 0;
-       int scope = RT_SCOPE_NOWHERE;
+       struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_info *prev_fi = NULL;
-       unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-       struct hlist_head *head = &fib_info_devhash[hash];
+       int scope = RT_SCOPE_NOWHERE;
        struct fib_nh *nh;
+       int ret = 0;
 
        if (force)
                scope = -1;
@@ -2131,7 +2137,6 @@ out:
 int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
        struct fib_info *prev_fi;
-       unsigned int hash;
        struct hlist_head *head;
        struct fib_nh *nh;
        int ret;
@@ -2147,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
        }
 
        prev_fi = NULL;
-       hash = fib_devindex_hashfn(dev->ifindex);
-       head = &fib_info_devhash[hash];
+       head = fib_info_devhash_bucket(dev);
        ret = 0;
 
        hlist_for_each_entry(nh, head, nh_hash) {
index 05cd198..3410968 100644 (file)
@@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
                /* The RCU read lock provides a memory barrier
                 * guaranteeing that if fqdir->dead is false then
                 * the hash table destruction will not start until
-                * after we unlock.  Paired with inet_frags_exit_net().
+                * after we unlock.  Paired with fqdir_pre_exit().
                 */
-               if (!fqdir->dead) {
+               if (!READ_ONCE(fqdir->dead)) {
                        rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
                                               fqdir->f->rhash_params);
                        refcount_dec(&fq->refcnt);
@@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 {
+       /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+       long high_thresh = READ_ONCE(fqdir->high_thresh);
        struct inet_frag_queue *fq = NULL, *prev;
 
-       if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+       if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
                return NULL;
 
        rcu_read_lock();
index cfeb889..fad803d 100644 (file)
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
 
        rcu_read_lock();
 
-       if (qp->q.fqdir->dead)
+       /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+       if (READ_ONCE(qp->q.fqdir->dead))
                goto out_rcu_unlock;
 
        spin_lock(&qp->q.lock);
index 2ac2b95..99db2e4 100644 (file)
@@ -604,8 +604,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 
        key = &info->key;
        ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
-                           tunnel_id_to_key32(key->tun_id), key->tos, 0,
-                           skb->mark, skb_get_hash(skb));
+                           tunnel_id_to_key32(key->tun_id),
+                           key->tos & ~INET_ECN_MASK, 0, skb->mark,
+                           skb_get_hash(skb));
        rt = ip_route_output_key(dev_net(dev), &fl4);
        if (IS_ERR(rt))
                return PTR_ERR(rt);
index a618dce..c0b138c 100644 (file)
@@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
                dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
        }
 
-       if (rt->rt_type != RTN_UNICAST) {
+       if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
                ip_rt_put(rt);
                dev->stats.tx_carrier_errors++;
                goto tx_error_icmp;
index 86ad15a..750f9f9 100644 (file)
@@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test,
                                   struct mctp_test_route **rtp,
                                   struct socket **sockp)
 {
-       struct sockaddr_mctp addr;
+       struct sockaddr_mctp addr = {0};
        struct mctp_test_route *rt;
        struct mctp_test_dev *dev;
        struct socket *sock;
index 58dcafe..7d00a14 100644 (file)
@@ -206,7 +206,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
        struct nft_connlimit *priv_src = nft_expr_priv(src);
 
        priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
-       if (priv_dst->list)
+       if (!priv_dst->list)
                return -ENOMEM;
 
        nf_conncount_list_init(priv_dst->list);
index 5ee33d0..4f745a4 100644 (file)
@@ -106,7 +106,7 @@ static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
        struct nft_last_priv *priv_dst = nft_expr_priv(dst);
 
        priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
-       if (priv_dst->last)
+       if (!priv_dst->last)
                return -ENOMEM;
 
        return 0;
index f04be5b..c4f3084 100644 (file)
@@ -145,7 +145,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst,
        priv_dst->invert = priv_src->invert;
 
        priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
-       if (priv_dst->limit)
+       if (!priv_dst->limit)
                return -ENOMEM;
 
        spin_lock_init(&priv_dst->limit->lock);
index 0484aef..f394a0b 100644 (file)
@@ -237,7 +237,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
        struct nft_quota *priv_dst = nft_expr_priv(dst);
 
        priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
-       if (priv_dst->consumed)
+       if (!priv_dst->consumed)
                return -ENOMEM;
 
        atomic64_set(priv_dst->consumed, 0);
index 6cfd30f..0b93a17 100644 (file)
@@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
 
+       if (!llcp_sock->local) {
+               release_sock(sk);
+               return -ENODEV;
+       }
+
        if (sk->sk_type == SOCK_DGRAM) {
                DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
                                 msg->msg_name);
index c9c6f49..2cb496c 100644 (file)
@@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 
                qdisc_offload_graft_root(dev, new, old, extack);
 
-               if (new && new->ops->attach)
+               if (new && new->ops->attach && !ingress)
                        goto skip;
 
                for (i = 0; i < num_q; i++) {
index b07bd1c..f893d9a 100644 (file)
@@ -1529,6 +1529,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 {
        memset(r, 0, sizeof(*r));
        r->overhead = conf->overhead;
+       r->mpu = conf->mpu;
        r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
        r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
        psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
index aa3bcaa..961854e 100644 (file)
@@ -634,9 +634,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first)
 {
        struct smc_connection *conn = &smc->conn;
        struct smc_link_group *lgr = conn->lgr;
+       bool lgr_valid = false;
+
+       if (smc_conn_lgr_valid(conn))
+               lgr_valid = true;
 
        smc_conn_free(conn);
-       if (local_first)
+       if (local_first && lgr_valid)
                smc_lgr_cleanup_early(lgr);
 }
 
index 1a4fc1c..3d0b8e3 100644 (file)
@@ -221,6 +221,7 @@ struct smc_connection {
                                                 */
        u64                     peer_token;     /* SMC-D token of peer */
        u8                      killed : 1;     /* abnormal termination */
+       u8                      freed : 1;      /* normal termiation */
        u8                      out_of_sync : 1; /* out of sync with peer */
 };
 
index 84c8a43..9d5a971 100644 (file)
@@ -197,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 {
        int rc;
 
-       if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
+       if (!smc_conn_lgr_valid(conn) ||
+           (conn->lgr->is_smcd && conn->lgr->peer_shutdown))
                return -EPIPE;
 
        if (conn->lgr->is_smcd) {
index 6be95a2..ce27399 100644 (file)
@@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version)
        dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX;
        dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ?
                                                SMC_FIRST_CONTACT_MASK : 0;
-       if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
+       if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) &&
            smc_ib_is_valid_local_systemid())
                memcpy(dclc.id_for_peer, local_systemid,
                       sizeof(local_systemid));
index 8935ef4..29525d0 100644 (file)
@@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
-       if (!lgr)
+       if (!smc_conn_lgr_valid(conn))
                return;
        write_lock_bh(&lgr->conns_lock);
        if (conn->alert_token_local) {
                __smc_lgr_unregister_conn(conn);
        }
        write_unlock_bh(&lgr->conns_lock);
-       conn->lgr = NULL;
 }
 
 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
@@ -749,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
        }
        get_device(&lnk->smcibdev->ibdev->dev);
        atomic_inc(&lnk->smcibdev->lnk_cnt);
+       refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
+       lnk->clearing = 0;
        lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
        lnk->link_id = smcr_next_link_id(lgr);
        lnk->lgr = lgr;
+       smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
        lnk->link_idx = link_idx;
        smc_ibdev_cnt_inc(lnk);
        smcr_copy_dev_info_to_link(lnk);
@@ -806,6 +808,7 @@ out:
        lnk->state = SMC_LNK_UNUSED;
        if (!atomic_dec_return(&smcibdev->lnk_cnt))
                wake_up(&smcibdev->lnks_deleted);
+       smc_lgr_put(lgr); /* lgr_hold above */
        return rc;
 }
 
@@ -844,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
        lgr->terminating = 0;
        lgr->freeing = 0;
        lgr->vlan_id = ini->vlan_id;
+       refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
        mutex_init(&lgr->sndbufs_lock);
        mutex_init(&lgr->rmbs_lock);
        rwlock_init(&lgr->conns_lock);
@@ -996,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn,
                               struct smc_link *to_lnk)
 {
        atomic_dec(&conn->lnk->conn_cnt);
+       /* link_hold in smc_conn_create() */
+       smcr_link_put(conn->lnk);
        conn->lnk = to_lnk;
        atomic_inc(&conn->lnk->conn_cnt);
+       /* link_put in smc_conn_free() */
+       smcr_link_hold(conn->lnk);
 }
 
 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
@@ -1130,8 +1138,19 @@ void smc_conn_free(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
-       if (!lgr)
+       if (!lgr || conn->freed)
+               /* Connection has never been registered in a
+                * link group, or has already been freed.
+                */
                return;
+
+       conn->freed = 1;
+       if (!smc_conn_lgr_valid(conn))
+               /* Connection has already unregistered from
+                * link group.
+                */
+               goto lgr_put;
+
        if (lgr->is_smcd) {
                if (!list_empty(&lgr->list))
                        smc_ism_unset_conn(conn);
@@ -1148,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn)
 
        if (!lgr->conns_num)
                smc_lgr_schedule_free_work(lgr);
+lgr_put:
+       if (!lgr->is_smcd)
+               smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
+       smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
 }
 
 /* unregister a link from a buf_desc */
@@ -1203,13 +1226,29 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
        }
 }
 
-/* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk, bool log)
+static void __smcr_link_clear(struct smc_link *lnk)
 {
+       struct smc_link_group *lgr = lnk->lgr;
        struct smc_ib_device *smcibdev;
 
-       if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+       smc_wr_free_link_mem(lnk);
+       smc_ibdev_cnt_dec(lnk);
+       put_device(&lnk->smcibdev->ibdev->dev);
+       smcibdev = lnk->smcibdev;
+       memset(lnk, 0, sizeof(struct smc_link));
+       lnk->state = SMC_LNK_UNUSED;
+       if (!atomic_dec_return(&smcibdev->lnk_cnt))
+               wake_up(&smcibdev->lnks_deleted);
+       smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
+{
+       if (!lnk->lgr || lnk->clearing ||
+           lnk->state == SMC_LNK_UNUSED)
                return;
+       lnk->clearing = 1;
        lnk->peer_qpn = 0;
        smc_llc_link_clear(lnk, log);
        smcr_buf_unmap_lgr(lnk);
@@ -1218,14 +1257,18 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
        smc_wr_free_link(lnk);
        smc_ib_destroy_queue_pair(lnk);
        smc_ib_dealloc_protection_domain(lnk);
-       smc_wr_free_link_mem(lnk);
-       smc_ibdev_cnt_dec(lnk);
-       put_device(&lnk->smcibdev->ibdev->dev);
-       smcibdev = lnk->smcibdev;
-       memset(lnk, 0, sizeof(struct smc_link));
-       lnk->state = SMC_LNK_UNUSED;
-       if (!atomic_dec_return(&smcibdev->lnk_cnt))
-               wake_up(&smcibdev->lnks_deleted);
+       smcr_link_put(lnk); /* theoretically last link_put */
+}
+
+void smcr_link_hold(struct smc_link *lnk)
+{
+       refcount_inc(&lnk->refcnt);
+}
+
+void smcr_link_put(struct smc_link *lnk)
+{
+       if (refcount_dec_and_test(&lnk->refcnt))
+               __smcr_link_clear(lnk);
 }
 
 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1290,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
        __smc_lgr_free_bufs(lgr, true);
 }
 
+/* won't be freed until no one accesses to lgr anymore */
+static void __smc_lgr_free(struct smc_link_group *lgr)
+{
+       smc_lgr_free_bufs(lgr);
+       if (lgr->is_smcd) {
+               if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
+                       wake_up(&lgr->smcd->lgrs_deleted);
+       } else {
+               smc_wr_free_lgr_mem(lgr);
+               if (!atomic_dec_return(&lgr_cnt))
+                       wake_up(&lgrs_deleted);
+       }
+       kfree(lgr);
+}
+
 /* remove a link group */
 static void smc_lgr_free(struct smc_link_group *lgr)
 {
@@ -1305,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr)
                smc_llc_lgr_clear(lgr);
        }
 
-       smc_lgr_free_bufs(lgr);
        destroy_workqueue(lgr->tx_wq);
        if (lgr->is_smcd) {
                smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
                put_device(&lgr->smcd->dev);
-               if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
-                       wake_up(&lgr->smcd->lgrs_deleted);
-       } else {
-               smc_wr_free_lgr_mem(lgr);
-               if (!atomic_dec_return(&lgr_cnt))
-                       wake_up(&lgrs_deleted);
        }
-       kfree(lgr);
+       smc_lgr_put(lgr); /* theoretically last lgr_put */
+}
+
+void smc_lgr_hold(struct smc_link_group *lgr)
+{
+       refcount_inc(&lgr->refcnt);
+}
+
+void smc_lgr_put(struct smc_link_group *lgr)
+{
+       if (refcount_dec_and_test(&lgr->refcnt))
+               __smc_lgr_free(lgr);
 }
 
 static void smc_sk_wake_ups(struct smc_sock *smc)
@@ -1469,16 +1531,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
 /* Called when an SMCR device is removed or the smc module is unloaded.
  * If smcibdev is given, all SMCR link groups using this device are terminated.
  * If smcibdev is NULL, all SMCR link groups are terminated.
- *
- * We must wait here for QPs been destroyed before we destroy the CQs,
- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
- * smc_sock cannot be released.
  */
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 {
        struct smc_link_group *lgr, *lg;
        LIST_HEAD(lgr_free_list);
-       LIST_HEAD(lgr_linkdown_list);
        int i;
 
        spin_lock_bh(&smc_lgr_list.lock);
@@ -1490,7 +1547,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
                list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
                        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                                if (lgr->lnk[i].smcibdev == smcibdev)
-                                       list_move_tail(&lgr->list, &lgr_linkdown_list);
+                                       smcr_link_down_cond_sched(&lgr->lnk[i]);
                        }
                }
        }
@@ -1502,16 +1559,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
                __smc_lgr_terminate(lgr, false);
        }
 
-       list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
-               for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-                       if (lgr->lnk[i].smcibdev == smcibdev) {
-                               mutex_lock(&lgr->llc_conf_mutex);
-                               smcr_link_down_cond(&lgr->lnk[i]);
-                               mutex_unlock(&lgr->llc_conf_mutex);
-                       }
-               }
-       }
-
        if (smcibdev) {
                if (atomic_read(&smcibdev->lnk_cnt))
                        wait_event(smcibdev->lnks_deleted,
@@ -1856,6 +1903,10 @@ create:
                        goto out;
                }
        }
+       smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
+       if (!conn->lgr->is_smcd)
+               smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
+       conn->freed = 0;
        conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
        conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
        conn->urg_state = SMC_URG_READ;
@@ -2240,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
-       if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+       if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+           !smc_link_active(conn->lnk))
                return;
        smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
-       if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
+       if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
+           !smc_link_active(conn->lnk))
                return;
        smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -2256,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
        int i;
 
-       if (!conn->lgr || conn->lgr->is_smcd)
+       if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
                return;
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                if (!smc_link_active(&conn->lgr->lnk[i]))
@@ -2270,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
        int i;
 
-       if (!conn->lgr || conn->lgr->is_smcd)
+       if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
                return;
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
                if (!smc_link_active(&conn->lgr->lnk[i]))
index 521c64a..4cb03e9 100644 (file)
@@ -137,6 +137,8 @@ struct smc_link {
        u8                      peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
        u8                      link_idx;       /* index in lgr link array */
        u8                      link_is_asym;   /* is link asymmetric? */
+       u8                      clearing : 1;   /* link is being cleared */
+       refcount_t              refcnt;         /* link reference count */
        struct smc_link_group   *lgr;           /* parent link group */
        struct work_struct      link_down_wrk;  /* wrk to bring link down */
        char                    ibname[IB_DEVICE_NAME_MAX]; /* ib device name */
@@ -249,6 +251,7 @@ struct smc_link_group {
        u8                      terminating : 1;/* lgr is terminating */
        u8                      freeing : 1;    /* lgr is being freed */
 
+       refcount_t              refcnt;         /* lgr reference count */
        bool                    is_smcd;        /* SMC-R or SMC-D */
        u8                      smc_version;
        u8                      negotiated_eid[SMC_MAX_EID_LEN];
@@ -409,6 +412,11 @@ static inline struct smc_connection *smc_lgr_find_conn(
        return res;
 }
 
+static inline bool smc_conn_lgr_valid(struct smc_connection *conn)
+{
+       return conn->lgr && conn->alert_token_local;
+}
+
 /*
  * Returns true if the specified link is usable.
  *
@@ -487,6 +495,8 @@ struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_cleanup_early(struct smc_link_group *lgr);
 void smc_lgr_terminate_sched(struct smc_link_group *lgr);
+void smc_lgr_hold(struct smc_link_group *lgr);
+void smc_lgr_put(struct smc_link_group *lgr);
 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -518,6 +528,8 @@ void smc_core_exit(void);
 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
                   u8 link_idx, struct smc_init_info *ini);
 void smcr_link_clear(struct smc_link *lnk, bool log);
+void smcr_link_hold(struct smc_link *lnk);
+void smcr_link_put(struct smc_link *lnk);
 void smc_switch_link_and_count(struct smc_connection *conn,
                               struct smc_link *to_lnk);
 int smcr_buf_map_lgr(struct smc_link *lnk);
index 7c8dad2..b8898c7 100644 (file)
@@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
        r->diag_state = sk->sk_state;
        if (smc->use_fallback)
                r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
-       else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+       else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd)
                r->diag_mode = SMC_DIAG_MODE_SMCD;
        else
                r->diag_mode = SMC_DIAG_MODE_SMCR;
@@ -142,7 +142,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                        goto errout;
        }
 
-       if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+       if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd &&
            (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
            !list_empty(&smc->conn.lgr->list)) {
                struct smc_link *link = smc->conn.lnk;
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
                        goto errout;
        }
-       if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+       if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
            (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
            !list_empty(&smc->conn.lgr->list)) {
                struct smc_connection *conn = &smc->conn;
index db9825c..291f148 100644 (file)
@@ -369,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
        memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
        strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
        new_pe->ndev = ndev;
-       netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
+       if (ndev)
+               netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL);
        rc = -EEXIST;
        new_netdev = true;
        write_lock(&pnettable->lock);
index 47512cc..a54e90a 100644 (file)
@@ -125,10 +125,6 @@ int smc_wr_tx_v2_send(struct smc_link *link,
 int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
                        unsigned long timeout);
 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
-                            smc_wr_tx_filter filter,
-                            smc_wr_tx_dismisser dismisser,
-                            unsigned long data);
 void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
 
 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
index 95e774f..efc8484 100644 (file)
@@ -2059,6 +2059,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
 splice_read_end:
        release_sock(sk);
+       sk_defer_free_flush(sk);
        return copied ? : err;
 }
 
index 12e2dda..d45d536 100644 (file)
@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
 {
        /* If number of inflight sockets is insane,
         * force a garbage collect right now.
+        * Paired with the WRITE_ONCE() in unix_inflight(),
+        * unix_notinflight() and gc_in_progress().
         */
-       if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+       if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+           !READ_ONCE(gc_in_progress))
                unix_gc();
        wait_event(unix_gc_wait, gc_in_progress == false);
 }
@@ -213,7 +216,9 @@ void unix_gc(void)
        if (gc_in_progress)
                goto out;
 
-       gc_in_progress = true;
+       /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+       WRITE_ONCE(gc_in_progress, true);
+
        /* First, select candidates for garbage collection.  Only
         * in-flight sockets are considered, and from those only ones
         * which don't have any external reference.
@@ -299,7 +304,10 @@ void unix_gc(void)
 
        /* All candidates should have been detached by now. */
        BUG_ON(!list_empty(&gc_candidates));
-       gc_in_progress = false;
+
+       /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+       WRITE_ONCE(gc_in_progress, false);
+
        wake_up(&unix_gc_wait);
 
  out:
index 052ae70..aa27a02 100644 (file)
@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
                } else {
                        BUG_ON(list_empty(&u->link));
                }
-               unix_tot_inflight++;
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
        }
        user->unix_inflight++;
        spin_unlock(&unix_gc_lock);
@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 
                if (atomic_long_dec_and_test(&u->inflight))
                        list_del_init(&u->link);
-               unix_tot_inflight--;
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
        }
        user->unix_inflight--;
        spin_unlock(&unix_gc_lock);
index dccb8f3..04d1ce9 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/if_tunnel.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/gre.h>
@@ -3295,7 +3296,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
        fl4->flowi4_proto = iph->protocol;
        fl4->daddr = reverse ? iph->saddr : iph->daddr;
        fl4->saddr = reverse ? iph->daddr : iph->saddr;
-       fl4->flowi4_tos = iph->tos;
+       fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
 
        if (!ip_is_fragment(iph)) {
                switch (iph->protocol) {
index 4866afd..eb4d947 100644 (file)
@@ -113,11 +113,11 @@ static inline int update_counts(void *ctx, u32 pid, u64 delta)
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
        unsigned long long pad;
-       char prev_comm[16];
+       char prev_comm[TASK_COMM_LEN];
        int prev_pid;
        int prev_prio;
        long long prev_state;
-       char next_comm[16];
+       char next_comm[TASK_COMM_LEN];
        int next_pid;
        int next_prio;
 };
index f6d593e..8fdd2c9 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <linux/version.h>
 #include <linux/ptrace.h>
+#include <linux/sched.h>
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
@@ -22,17 +23,17 @@ int prog(struct pt_regs *ctx)
 {
        struct signal_struct *signal;
        struct task_struct *tsk;
-       char oldcomm[16] = {};
-       char newcomm[16] = {};
+       char oldcomm[TASK_COMM_LEN] = {};
+       char newcomm[TASK_COMM_LEN] = {};
        u16 oom_score_adj;
        u32 pid;
 
        tsk = (void *)PT_REGS_PARM1(ctx);
 
        pid = _(tsk->pid);
-       bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm);
-       bpf_probe_read_kernel(newcomm, sizeof(newcomm),
-                             (void *)PT_REGS_PARM2(ctx));
+       bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm);
+       bpf_probe_read_kernel_str(newcomm, sizeof(newcomm),
+                                 (void *)PT_REGS_PARM2(ctx));
        signal = _(tsk->signal);
        oom_score_adj = _(signal->oom_score_adj);
        return 0;
index eaa3269..80edada 100644 (file)
@@ -4,6 +4,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#include <linux/sched.h>
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
@@ -11,8 +12,8 @@
 struct task_rename {
        __u64 pad;
        __u32 pid;
-       char oldcomm[16];
-       char newcomm[16];
+       char oldcomm[TASK_COMM_LEN];
+       char newcomm[TASK_COMM_LEN];
        __u16 oom_score_adj;
 };
 SEC("tracepoint/task/task_rename")
index 9e2092f..7099c60 100644 (file)
@@ -8,7 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS)       += -fsanitize=local-bounds
 ubsan-cflags-$(CONFIG_UBSAN_SHIFT)             += -fsanitize=shift
 ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO)          += -fsanitize=integer-divide-by-zero
 ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE)       += -fsanitize=unreachable
-ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE)       += -fsanitize=object-size
 ubsan-cflags-$(CONFIG_UBSAN_BOOL)              += -fsanitize=bool
 ubsan-cflags-$(CONFIG_UBSAN_ENUM)              += -fsanitize=enum
 ubsan-cflags-$(CONFIG_UBSAN_TRAP)              += -fsanitize-undefined-trap-on-error
index 1784921..b01c36a 100755 (executable)
@@ -3172,7 +3172,7 @@ sub process {
                    length($line) > 75 &&
                    !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ ||
                                        # file delta changes
-                     $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ ||
+                     $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ ||
                                        # filename then :
                      $line =~ /^\s*(?:Fixes:|Link:|$signature_tags)/i ||
                                        # A Fixes: or Link: line or signature tag line
@@ -3479,47 +3479,47 @@ sub process {
                    # Kconfig supports named choices), so use a word boundary
                    # (\b) rather than a whitespace character (\s)
                    $line =~ /^\+\s*(?:config|menuconfig|choice)\b/) {
-                       my $length = 0;
-                       my $cnt = $realcnt;
-                       my $ln = $linenr + 1;
-                       my $f;
-                       my $is_start = 0;
-                       my $is_end = 0;
-                       for (; $cnt > 0 && defined $lines[$ln - 1]; $ln++) {
-                               $f = $lines[$ln - 1];
-                               $cnt-- if ($lines[$ln - 1] !~ /^-/);
-                               $is_end = $lines[$ln - 1] =~ /^\+/;
+                       my $ln = $linenr;
+                       my $needs_help = 0;
+                       my $has_help = 0;
+                       my $help_length = 0;
+                       while (defined $lines[$ln]) {
+                               my $f = $lines[$ln++];
 
                                next if ($f =~ /^-/);
-                               last if (!$file && $f =~ /^\@\@/);
+                               last if ($f !~ /^[\+ ]/);       # !patch context
 
-                               if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
-                                       $is_start = 1;
-                               } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
-                                       $length = -1;
+                               if ($f =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) {
+                                       $needs_help = 1;
+                                       next;
+                               }
+                               if ($f =~ /^\+\s*help\s*$/) {
+                                       $has_help = 1;
+                                       next;
                                }
 
-                               $f =~ s/^.//;
-                               $f =~ s/#.*//;
-                               $f =~ s/^\s+//;
-                               next if ($f =~ /^$/);
+                               $f =~ s/^.//;   # strip patch context [+ ]
+                               $f =~ s/#.*//;  # strip # directives
+                               $f =~ s/^\s+//; # strip leading blanks
+                               next if ($f =~ /^$/);   # skip blank lines
 
+                               # At the end of this Kconfig block:
                                # This only checks context lines in the patch
                                # and so hopefully shouldn't trigger false
                                # positives, even though some of these are
                                # common words in help texts
-                               if ($f =~ /^\s*(?:config|menuconfig|choice|endchoice|
-                                                 if|endif|menu|endmenu|source)\b/x) {
-                                       $is_end = 1;
+                               if ($f =~ /^(?:config|menuconfig|choice|endchoice|
+                                              if|endif|menu|endmenu|source)\b/x) {
                                        last;
                                }
-                               $length++;
+                               $help_length++ if ($has_help);
                        }
-                       if ($is_start && $is_end && $length < $min_conf_desc_length) {
+                       if ($needs_help &&
+                           $help_length < $min_conf_desc_length) {
+                               my $stat_real = get_stat_real($linenr, $ln - 1);
                                WARN("CONFIG_DESCRIPTION",
-                                    "please write a paragraph that describes the config symbol fully\n" . $herecurr);
+                                    "please write a help paragraph that fully describes the config symbol\n" . "$here\n$stat_real\n");
                        }
-                       #print "is_start<$is_start> is_end<$is_end> length<$length>\n";
                }
 
 # check MAINTAINERS entries
index 3980985..1eeb7b4 100644 (file)
@@ -12,19 +12,27 @@ driver_info
 drm_connector_funcs
 drm_encoder_funcs
 drm_encoder_helper_funcs
+dvb_frontend_ops
+dvb_tuner_ops
 ethtool_ops
 extent_io_ops
+fb_ops
 file_lock_operations
 file_operations
 hv_ops
+hwmon_ops
+ib_device_ops
 ide_dma_ops
 ide_port_ops
+ieee80211_ops
+iio_buffer_setup_ops
 inode_operations
 intel_dvo_dev_ops
 irq_domain_ops
 item_operations
 iwl_cfg
 iwl_ops
+kernel_param_ops
 kgdb_arch
 kgdb_io
 kset_uevent_ops
@@ -32,25 +40,33 @@ lock_manager_operations
 machine_desc
 microcode_ops
 mlxsw_reg_info
+mtd_ooblayout_ops
 mtrr_ops
+nand_controller_ops
 neigh_ops
 net_device_ops
+nft_expr_ops
 nlmsvc_binding
 nvkm_device_chip
 of_device_id
 pci_raw_ops
 phy_ops
+pinconf_ops
 pinctrl_ops
 pinmux_ops
 pipe_buf_operations
 platform_hibernation_ops
 platform_suspend_ops
+proc_ops
 proto_ops
+pwm_ops
 regmap_access_table
 regulator_ops
+reset_control_ops
 rpc_pipe_ops
 rtc_class_ops
 sd_desc
+sdhci_ops
 seq_operations
 sirfsoc_padmux
 snd_ac97_build_ops
@@ -67,6 +83,13 @@ uart_ops
 usb_mon_operations
 v4l2_ctrl_ops
 v4l2_ioctl_ops
+v4l2_subdev_core_ops
+v4l2_subdev_internal_ops
+v4l2_subdev_ops
+v4l2_subdev_pad_ops
+v4l2_subdev_video_ops
+vb2_ops
 vm_operations_struct
 wacom_features
+watchdog_ops
 wd_ops
index 2075db0..6bd5221 100755 (executable)
@@ -1718,7 +1718,7 @@ sub vcs_exists {
     %VCS_cmds = %VCS_cmds_hg;
     return 2 if eval $VCS_cmds{"available"};
     %VCS_cmds = ();
-    if (!$printed_novcs) {
+    if (!$printed_novcs && $email_git) {
        warn("$P: No supported VCS found.  Add --nogit to options?\n");
        warn("Using a git repository produces better results.\n");
        warn("Try Linus Torvalds' latest git repository using:\n");
index 1e8b779..deb7c1d 100644 (file)
@@ -199,6 +199,8 @@ static int compare_extable(const void *a, const void *b)
        return 0;
 }
 #ifdef MCOUNT_SORT_ENABLED
+pthread_t mcount_sort_thread;
+
 struct elf_mcount_loc {
        Elf_Ehdr *ehdr;
        Elf_Shdr *init_data_sec;
@@ -282,10 +284,9 @@ static int do_sort(Elf_Ehdr *ehdr,
        unsigned int shnum;
        unsigned int shstrndx;
 #ifdef MCOUNT_SORT_ENABLED
-       struct elf_mcount_loc mstruct;
+       struct elf_mcount_loc mstruct = {0};
        uint_t _start_mcount_loc = 0;
        uint_t _stop_mcount_loc = 0;
-       pthread_t mcount_sort_thread;
 #endif
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
        unsigned int orc_ip_size = 0;
index ac335f5..31ba702 100644 (file)
@@ -1111,29 +1111,14 @@ EXPORT_SYMBOL(snd_card_file_remove);
  */
 int snd_power_ref_and_wait(struct snd_card *card)
 {
-       wait_queue_entry_t wait;
-       int result = 0;
-
        snd_power_ref(card);
-       /* fastpath */
        if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
                return 0;
-       init_waitqueue_entry(&wait, current);
-       add_wait_queue(&card->power_sleep, &wait);
-       while (1) {
-               if (card->shutdown) {
-                       result = -ENODEV;
-                       break;
-               }
-               if (snd_power_get_state(card) == SNDRV_CTL_POWER_D0)
-                       break;
-               snd_power_unref(card);
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(30 * HZ);
-               snd_power_ref(card);
-       }
-       remove_wait_queue(&card->power_sleep, &wait);
-       return result;
+       wait_event_cmd(card->power_sleep,
+                      card->shutdown ||
+                      snd_power_get_state(card) == SNDRV_CTL_POWER_D0,
+                      snd_power_unref(card), snd_power_ref(card));
+       return card->shutdown ? -ENODEV : 0;
 }
 EXPORT_SYMBOL_GPL(snd_power_ref_and_wait);
 
index 3579dd7..50e4aaa 100644 (file)
@@ -112,7 +112,7 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device,
 {
        const struct snd_pci_quirk *q;
 
-       for (q = list; q->subvendor; q++) {
+       for (q = list; q->subvendor || q->subdevice; q++) {
                if (q->subvendor != vendor)
                        continue;
                if (!q->subdevice ||
index 30b40d8..7185953 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 //
-// cs35l41.c -- CS35l41 ALSA HDA audio driver
+// CS35l41 ALSA HDA audio driver
 //
 // Copyright 2021 Cirrus Logic, Inc.
 //
 #include "cs35l41_hda.h"
 
 static const struct reg_sequence cs35l41_hda_config[] = {
-       { CS35L41_PLL_CLK_CTRL,         0x00000430 }, //3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
-       { CS35L41_GLOBAL_CLK_CTRL,      0x00000003 }, //GLOBAL_FS = 48 kHz
-       { CS35L41_SP_ENABLES,           0x00010000 }, //ASP_RX1_EN = 1
-       { CS35L41_SP_RATE_CTRL,         0x00000021 }, //ASP_BCLK_FREQ = 3.072 MHz
-       { CS35L41_SP_FORMAT,            0x20200200 }, //24 bits, I2S, BCLK Slave, FSYNC Slave
-       { CS35L41_DAC_PCM1_SRC,         0x00000008 }, //DACPCM1_SRC = ASPRX1
-       { CS35L41_AMP_DIG_VOL_CTRL,     0x00000000 }, //AMP_VOL_PCM  0.0 dB
-       { CS35L41_AMP_GAIN_CTRL,        0x00000084 }, //AMP_GAIN_PCM 4.5 dB
-       { CS35L41_PWR_CTRL2,            0x00000001 }, //AMP_EN = 1
+       { CS35L41_PLL_CLK_CTRL,         0x00000430 }, // 3200000Hz, BCLK Input, PLL_REFCLK_EN = 1
+       { CS35L41_GLOBAL_CLK_CTRL,      0x00000003 }, // GLOBAL_FS = 48 kHz
+       { CS35L41_SP_ENABLES,           0x00010000 }, // ASP_RX1_EN = 1
+       { CS35L41_SP_RATE_CTRL,         0x00000021 }, // ASP_BCLK_FREQ = 3.072 MHz
+       { CS35L41_SP_FORMAT,            0x20200200 }, // 24 bits, I2S, BCLK Slave, FSYNC Slave
+       { CS35L41_DAC_PCM1_SRC,         0x00000008 }, // DACPCM1_SRC = ASPRX1
+       { CS35L41_AMP_DIG_VOL_CTRL,     0x00000000 }, // AMP_VOL_PCM  0.0 dB
+       { CS35L41_AMP_GAIN_CTRL,        0x00000084 }, // AMP_GAIN_PCM 4.5 dB
+       { CS35L41_PWR_CTRL2,            0x00000001 }, // AMP_EN = 1
 };
 
 static const struct reg_sequence cs35l41_hda_start_bst[] = {
-       { CS35L41_PWR_CTRL2,            0x00000021 }, //BST_EN = 10, AMP_EN = 1
+       { CS35L41_PWR_CTRL2,            0x00000021 }, // BST_EN = 10, AMP_EN = 1
        { CS35L41_PWR_CTRL1,            0x00000001, 3000}, // set GLOBAL_EN = 1
 };
 
@@ -60,7 +60,7 @@ static const struct reg_sequence cs35l41_stop_ext_vspk[] = {
        { 0x00000040,                   0x00000055 },
        { 0x00000040,                   0x000000AA },
        { 0x00007438,                   0x00585941 },
-       { 0x00002014,                   0x00000000, 3000}, //set GLOBAL_EN = 0
+       { 0x00002014,                   0x00000000, 3000}, // set GLOBAL_EN = 0
        { 0x0000742C,                   0x00000009 },
        { 0x00007438,                   0x00580941 },
        { 0x00011008,                   0x00000001 },
@@ -78,7 +78,7 @@ static const struct reg_sequence cs35l41_safe_to_active[] = {
        { 0x0000742C,                   0x0000000F },
        { 0x0000742C,                   0x00000079 },
        { 0x00007438,                   0x00585941 },
-       { CS35L41_PWR_CTRL1,            0x00000001, 2000 }, //GLOBAL_EN = 1
+       { CS35L41_PWR_CTRL1,            0x00000001, 2000 }, // GLOBAL_EN = 1
        { 0x0000742C,                   0x000000F9 },
        { 0x00007438,                   0x00580941 },
        { 0x00000040,                   0x000000CC },
@@ -89,8 +89,8 @@ static const struct reg_sequence cs35l41_active_to_safe[] = {
        { 0x00000040,                   0x00000055 },
        { 0x00000040,                   0x000000AA },
        { 0x00007438,                   0x00585941 },
-       { CS35L41_AMP_DIG_VOL_CTRL,     0x0000A678 }, //AMP_VOL_PCM Mute
-       { CS35L41_PWR_CTRL2,            0x00000000 }, //AMP_EN = 0
+       { CS35L41_AMP_DIG_VOL_CTRL,     0x0000A678 }, // AMP_VOL_PCM Mute
+       { CS35L41_PWR_CTRL2,            0x00000000 }, // AMP_EN = 0
        { CS35L41_PWR_CTRL1,            0x00000000 },
        { 0x0000742C,                   0x00000009, 2000 },
        { 0x00007438,                   0x00580941 },
@@ -161,11 +161,13 @@ static void cs35l41_hda_playback_hook(struct device *dev, int action)
                if (reg_seq->close)
                        ret = regmap_multi_reg_write(reg, reg_seq->close, reg_seq->num_close);
                break;
+       default:
+               ret = -EINVAL;
+               break;
        }
 
        if (ret)
                dev_warn(cs35l41->dev, "Failed to apply multi reg write: %d\n", ret);
-
 }
 
 static int cs35l41_hda_channel_map(struct device *dev, unsigned int tx_num, unsigned int *tx_slot,
@@ -182,20 +184,19 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas
        struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
        struct hda_component *comps = master_data;
 
-       if (comps && cs35l41->index >= 0 && cs35l41->index < HDA_MAX_COMPONENTS)
-               comps = &comps[cs35l41->index];
-       else
+       if (!comps || cs35l41->index < 0 || cs35l41->index >= HDA_MAX_COMPONENTS)
                return -EINVAL;
 
-       if (!comps->dev) {
-               comps->dev = dev;
-               strscpy(comps->name, dev_name(dev), sizeof(comps->name));
-               comps->playback_hook = cs35l41_hda_playback_hook;
-               comps->set_channel_map = cs35l41_hda_channel_map;
-               return 0;
-       }
+       comps = &comps[cs35l41->index];
+       if (comps->dev)
+               return -EBUSY;
+
+       comps->dev = dev;
+       strscpy(comps->name, dev_name(dev), sizeof(comps->name));
+       comps->playback_hook = cs35l41_hda_playback_hook;
+       comps->set_channel_map = cs35l41_hda_channel_map;
 
-       return -EBUSY;
+       return 0;
 }
 
 static void cs35l41_hda_unbind(struct device *dev, struct device *master, void *master_data)
@@ -227,6 +228,8 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
                internal_boost = true;
 
        switch (hw_cfg->gpio1_func) {
+       case CS35L41_NOT_USED:
+               break;
        case CS35l41_VSPK_SWITCH:
                regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
                                   CS35L41_GPIO1_CTRL_MASK, 1 << CS35L41_GPIO1_CTRL_SHIFT);
@@ -235,13 +238,21 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
                regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
                                   CS35L41_GPIO1_CTRL_MASK, 2 << CS35L41_GPIO1_CTRL_SHIFT);
                break;
+       default:
+               dev_err(cs35l41->dev, "Invalid function %d for GPIO1\n", hw_cfg->gpio1_func);
+               return -EINVAL;
        }
 
        switch (hw_cfg->gpio2_func) {
+       case CS35L41_NOT_USED:
+               break;
        case CS35L41_INTERRUPT:
                regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL,
                                   CS35L41_GPIO2_CTRL_MASK, 2 << CS35L41_GPIO2_CTRL_SHIFT);
                break;
+       default:
+               dev_err(cs35l41->dev, "Invalid function %d for GPIO2\n", hw_cfg->gpio2_func);
+               return -EINVAL;
        }
 
        if (internal_boost) {
@@ -256,11 +267,7 @@ static int cs35l41_hda_apply_properties(struct cs35l41_hda *cs35l41,
                cs35l41->reg_seq = &cs35l41_hda_reg_seq_ext_bst;
        }
 
-       ret = cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
-       if (ret)
-               return ret;
-
-       return 0;
+       return cs35l41_hda_channel_map(cs35l41->dev, 0, NULL, 1, (unsigned int *)&hw_cfg->spk_pos);
 }
 
 static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41,
@@ -269,7 +276,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
        struct cs35l41_hda_hw_config *hw_cfg;
        u32 values[HDA_MAX_COMPONENTS];
        struct acpi_device *adev;
-       struct device *acpi_dev;
+       struct device *physdev;
        char *property;
        size_t nval;
        int i, ret;
@@ -280,11 +287,11 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
                return ERR_PTR(-ENODEV);
        }
 
-       acpi_dev = get_device(acpi_get_first_physical_node(adev));
+       physdev = get_device(acpi_get_first_physical_node(adev));
        acpi_dev_put(adev);
 
        property = "cirrus,dev-index";
-       ret = device_property_count_u32(acpi_dev, property);
+       ret = device_property_count_u32(physdev, property);
        if (ret <= 0)
                goto no_acpi_dsd;
 
@@ -294,7 +301,7 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
        }
        nval = ret;
 
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret)
                goto err;
 
@@ -311,7 +318,9 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
                goto err;
        }
 
-       /* No devm_ version as CLSA0100, in no_acpi_dsd case, can't use devm version */
+       /* To use the same release code for all laptop variants we can't use devm_ version of
+        * gpiod_get here, as CLSA010* don't have a fully functional bios with an _DSD node
+        */
        cs35l41->reset_gpio = fwnode_gpiod_get_index(&adev->fwnode, "reset", cs35l41->index,
                                                     GPIOD_OUT_LOW, "cs35l41-reset");
 
@@ -322,46 +331,46 @@ static struct cs35l41_hda_hw_config *cs35l41_hda_read_acpi(struct cs35l41_hda *c
        }
 
        property = "cirrus,speaker-position";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret)
                goto err_free;
        hw_cfg->spk_pos = values[cs35l41->index];
 
        property = "cirrus,gpio1-func";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret)
                goto err_free;
        hw_cfg->gpio1_func = values[cs35l41->index];
 
        property = "cirrus,gpio2-func";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret)
                goto err_free;
        hw_cfg->gpio2_func = values[cs35l41->index];
 
        property = "cirrus,boost-peak-milliamp";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret == 0)
                hw_cfg->bst_ipk = values[cs35l41->index];
 
        property = "cirrus,boost-ind-nanohenry";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret == 0)
                hw_cfg->bst_ind = values[cs35l41->index];
 
        property = "cirrus,boost-cap-microfarad";
-       ret = device_property_read_u32_array(acpi_dev, property, values, nval);
+       ret = device_property_read_u32_array(physdev, property, values, nval);
        if (ret == 0)
                hw_cfg->bst_cap = values[cs35l41->index];
 
-       put_device(acpi_dev);
+       put_device(physdev);
 
        return hw_cfg;
 
 err_free:
        kfree(hw_cfg);
 err:
-       put_device(acpi_dev);
+       put_device(physdev);
        dev_err(cs35l41->dev, "Failed property %s: %d\n", property, ret);
 
        return ERR_PTR(ret);
@@ -370,18 +379,18 @@ no_acpi_dsd:
        /*
         * Device CLSA0100 doesn't have _DSD so a gpiod_get by the label reset won't work.
         * And devices created by i2c-multi-instantiate don't have their device struct pointing to
-        * the correct fwnode, so acpi_dev must be used here
+        * the correct fwnode, so acpi_dev must be used here.
         * And devm functions expect that the device requesting the resource has the correct
-        * fwnode
+        * fwnode.
         */
        if (strncmp(hid, "CLSA0100", 8) != 0)
                return ERR_PTR(-EINVAL);
 
        /* check I2C address to assign the index */
        cs35l41->index = id == 0x40 ? 0 : 1;
-       cs35l41->reset_gpio = gpiod_get_index(acpi_dev, NULL, 0, GPIOD_OUT_HIGH);
+       cs35l41->reset_gpio = gpiod_get_index(physdev, NULL, 0, GPIOD_OUT_HIGH);
        cs35l41->vspk_always_on = true;
-       put_device(acpi_dev);
+       put_device(physdev);
 
        return NULL;
 }
@@ -416,8 +425,7 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
                if (ret == -EBUSY) {
                        dev_info(cs35l41->dev, "Reset line busy, assuming shared reset\n");
                } else {
-                       if (ret != -EPROBE_DEFER)
-                               dev_err(cs35l41->dev, "Failed to get reset GPIO: %d\n", ret);
+                       dev_err_probe(cs35l41->dev, ret, "Failed to get reset GPIO: %d\n", ret);
                        goto err;
                }
        }
@@ -437,7 +445,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
 
        ret = regmap_read(cs35l41->regmap, CS35L41_IRQ1_STATUS3, &int_sts);
        if (ret || (int_sts & CS35L41_OTP_BOOT_ERR)) {
-               dev_err(cs35l41->dev, "OTP Boot error\n");
+               dev_err(cs35l41->dev, "OTP Boot status %x error: %d\n",
+                       int_sts & CS35L41_OTP_BOOT_ERR, ret);
                ret = -EIO;
                goto err;
        }
@@ -463,6 +472,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
                goto err;
        }
 
+       ret = cs35l41_test_key_unlock(cs35l41->dev, cs35l41->regmap);
+       if (ret)
+               goto err;
+
        ret = cs35l41_register_errata_patch(cs35l41->dev, cs35l41->regmap, reg_revid);
        if (ret)
                goto err;
@@ -473,6 +486,10 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
                goto err;
        }
 
+       ret = cs35l41_test_key_lock(cs35l41->dev, cs35l41->regmap);
+       if (ret)
+               goto err;
+
        ret = cs35l41_hda_apply_properties(cs35l41, acpi_hw_cfg);
        if (ret)
                goto err;
@@ -480,8 +497,8 @@ int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int i
        acpi_hw_cfg = NULL;
 
        if (cs35l41->reg_seq->probe) {
-               ret = regmap_register_patch(cs35l41->regmap, cs35l41->reg_seq->probe,
-                                           cs35l41->reg_seq->num_probe);
+               ret = regmap_multi_reg_write(cs35l41->regmap, cs35l41->reg_seq->probe,
+                                            cs35l41->reg_seq->num_probe);
                if (ret) {
                        dev_err(cs35l41->dev, "Fail to apply probe reg patch: %d\n", ret);
                        goto err;
@@ -506,9 +523,9 @@ err:
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(cs35l41_hda_probe);
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_probe, SND_HDA_SCODEC_CS35L41);
 
-int cs35l41_hda_remove(struct device *dev)
+void cs35l41_hda_remove(struct device *dev)
 {
        struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev);
 
@@ -517,11 +534,8 @@ int cs35l41_hda_remove(struct device *dev)
        if (!cs35l41->vspk_always_on)
                gpiod_set_value_cansleep(cs35l41->reset_gpio, 0);
        gpiod_put(cs35l41->reset_gpio);
-
-       return 0;
 }
-EXPORT_SYMBOL_GPL(cs35l41_hda_remove);
-
+EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41);
 
 MODULE_DESCRIPTION("CS35L41 HDA Driver");
 MODULE_AUTHOR("Lucas Tanure, Cirrus Logic Inc, <tanureal@opensource.cirrus.com>");
index 76c69a8..7495100 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * cs35l41_hda.h -- CS35L41 ALSA HDA audio driver
+ * CS35L41 ALSA HDA audio driver
  *
  * Copyright 2021 Cirrus Logic, Inc.
  *
@@ -64,6 +64,6 @@ struct cs35l41_hda {
 
 int cs35l41_hda_probe(struct device *dev, const char *device_name, int id, int irq,
                      struct regmap *regmap);
-int cs35l41_hda_remove(struct device *dev);
+void cs35l41_hda_remove(struct device *dev);
 
 #endif /*__CS35L41_HDA_H__*/
index 4a9462f..e810b27 100644 (file)
@@ -32,7 +32,9 @@ static int cs35l41_hda_i2c_probe(struct i2c_client *clt, const struct i2c_device
 
 static int cs35l41_hda_i2c_remove(struct i2c_client *clt)
 {
-       return cs35l41_hda_remove(&clt->dev);
+       cs35l41_hda_remove(&clt->dev);
+
+       return 0;
 }
 
 static const struct i2c_device_id cs35l41_hda_i2c_id[] = {
@@ -58,9 +60,9 @@ static struct i2c_driver cs35l41_i2c_driver = {
        .probe          = cs35l41_hda_i2c_probe,
        .remove         = cs35l41_hda_i2c_remove,
 };
-
 module_i2c_driver(cs35l41_i2c_driver);
 
 MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
 MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
 MODULE_LICENSE("GPL");
index 77426e9..9f81238 100644 (file)
@@ -30,7 +30,9 @@ static int cs35l41_hda_spi_probe(struct spi_device *spi)
 
 static int cs35l41_hda_spi_remove(struct spi_device *spi)
 {
-       return cs35l41_hda_remove(&spi->dev);
+       cs35l41_hda_remove(&spi->dev);
+
+       return 0;
 }
 
 static const struct spi_device_id cs35l41_hda_spi_id[] = {
@@ -55,9 +57,9 @@ static struct spi_driver cs35l41_spi_driver = {
        .probe          = cs35l41_hda_spi_probe,
        .remove         = cs35l41_hda_spi_remove,
 };
-
 module_spi_driver(cs35l41_spi_driver);
 
 MODULE_DESCRIPTION("HDA CS35L41 driver");
+MODULE_IMPORT_NS(SND_HDA_SCODEC_CS35L41);
 MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
 MODULE_LICENSE("GPL");
index df0b452..2d1fa70 100644 (file)
@@ -490,6 +490,8 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0ADC, "Warlock", CS8409_WARLOCK),
        SND_PCI_QUIRK(0x1028, 0x0AF4, "Warlock", CS8409_WARLOCK),
        SND_PCI_QUIRK(0x1028, 0x0AF5, "Warlock", CS8409_WARLOCK),
+       SND_PCI_QUIRK(0x1028, 0x0BB5, "Warlock N3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
+       SND_PCI_QUIRK(0x1028, 0x0BB6, "Warlock V3 15 TGL-U Nuvoton EC", CS8409_WARLOCK),
        SND_PCI_QUIRK(0x1028, 0x0A77, "Cyborg", CS8409_CYBORG),
        SND_PCI_QUIRK(0x1028, 0x0A78, "Cyborg", CS8409_CYBORG),
        SND_PCI_QUIRK(0x1028, 0x0A79, "Cyborg", CS8409_CYBORG),
index eef9736..668274e 100644 (file)
@@ -6948,6 +6948,7 @@ enum {
        ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
        ALC287_FIXUP_LEGION_16ACHG6,
        ALC287_FIXUP_CS35L41_I2C_2,
+       ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8698,6 +8699,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs35l41_fixup_i2c_two,
        },
+       [ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                        { 0x20, AC_VERB_SET_COEF_INDEX, 0x19 },
+                        { 0x20, AC_VERB_SET_PROC_COEF, 0x8e11 },
+                        { }
+               },
+               .chained = true,
+               .chain_id = ALC285_FIXUP_HP_MUTE_LED,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8911,6 +8922,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
index 5d391f6..96991dd 100644 (file)
@@ -431,6 +431,14 @@ static const struct usbmix_name_map aorus_master_alc1220vb_map[] = {
        {}
 };
 
+/* MSI MPG X570S Carbon Max Wifi with ALC4080  */
+static const struct usbmix_name_map msi_mpg_x570s_carbon_max_wifi_alc4080_map[] = {
+       { 29, "Speaker Playback" },
+       { 30, "Front Headphone Playback" },
+       { 32, "IEC958 Playback" },
+       {}
+};
+
 /*
  * Control map entries
  */
@@ -577,6 +585,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .map = trx40_mobo_map,
                .connector_map = trx40_mobo_connector_map,
        },
+       {       /* MSI MPG X570S Carbon Max Wifi */
+               .id = USB_ID(0x0db0, 0x419c),
+               .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
+       },
        {       /* MSI TRX40 */
                .id = USB_ID(0x0db0, 0x543d),
                .map = trx40_mobo_map,
index 5ef1c15..11e8673 100644 (file)
@@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
               "RECLAIM  %12s%15s%15s\n"
               "      %15llu%15llu%15llums\n"
               "THRASHING%12s%15s%15s\n"
+              "      %15llu%15llu%15llums\n"
+              "COMPACT  %12s%15s%15s\n"
               "      %15llu%15llu%15llums\n",
               "count", "real total", "virtual total",
               "delay total", "delay average",
@@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
               "count", "delay total", "delay average",
               (unsigned long long)t->thrashing_count,
               (unsigned long long)t->thrashing_delay_total,
-              average_ms(t->thrashing_delay_total, t->thrashing_count));
+              average_ms(t->thrashing_delay_total, t->thrashing_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->compact_count,
+              (unsigned long long)t->compact_delay_total,
+              average_ms(t->compact_delay_total, t->compact_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
index d9b4209..f70702f 100644 (file)
@@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
 
        e.pid = task->tgid;
        e.id = get_obj_id(file->private_data, obj_type);
-       bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
-                             task->group_leader->comm);
+       bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm),
+                                 task->group_leader->comm);
        bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
 
        return 0;
index ad6fa21..38edaa0 100644 (file)
@@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val)
        return val * GOLDEN_RATIO_32;
 }
 
-#ifndef HAVE_ARCH_HASH_32
-#define hash_32 hash_32_generic
-#endif
-static inline u32 hash_32_generic(u32 val, unsigned int bits)
+static inline u32 hash_32(u32 val, unsigned int bits)
 {
        /* High bits are more random, so use them. */
        return __hash_32(val) >> (32 - bits);
index 32fc5b3..911345c 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "test_d_path.skel.h"
 #include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
 
 static int duration;
 
@@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem(void)
        test_d_path_check_rdonly_mem__destroy(skel);
 }
 
+static void test_d_path_check_types(void)
+{
+       struct test_d_path_check_types *skel;
+
+       skel = test_d_path_check_types__open_and_load();
+       ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+       test_d_path_check_types__destroy(skel);
+}
+
 void test_d_path(void)
 {
        if (test__start_subtest("basic"))
@@ -174,4 +185,7 @@ void test_d_path(void)
 
        if (test__start_subtest("check_rdonly_mem"))
                test_d_path_check_rdonly_mem();
+
+       if (test__start_subtest("check_alloc_mem"))
+               test_d_path_check_types();
 }
index 983ab0b..b2b357f 100644 (file)
@@ -8,46 +8,47 @@
 
 void serial_test_xdp_link(void)
 {
-       __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
        DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
        struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+       __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
        struct bpf_link_info link_info;
        struct bpf_prog_info prog_info;
        struct bpf_link *link;
+       int err;
        __u32 link_info_len = sizeof(link_info);
        __u32 prog_info_len = sizeof(prog_info);
 
        skel1 = test_xdp_link__open_and_load();
-       if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+       if (!ASSERT_OK_PTR(skel1, "skel_load"))
                goto cleanup;
        prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
 
        skel2 = test_xdp_link__open_and_load();
-       if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+       if (!ASSERT_OK_PTR(skel2, "skel_load"))
                goto cleanup;
        prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
 
        memset(&prog_info, 0, sizeof(prog_info));
        err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
-       if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+       if (!ASSERT_OK(err, "fd_info1"))
                goto cleanup;
        id1 = prog_info.id;
 
        memset(&prog_info, 0, sizeof(prog_info));
        err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
-       if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+       if (!ASSERT_OK(err, "fd_info2"))
                goto cleanup;
        id2 = prog_info.id;
 
        /* set initial prog attachment */
        err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
-       if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+       if (!ASSERT_OK(err, "fd_attach"))
                goto cleanup;
 
        /* validate prog ID */
        err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
-       CHECK(err || id0 != id1, "id1_check",
-             "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+       if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+               goto cleanup;
 
        /* BPF link is not allowed to replace prog attachment */
        link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
@@ -62,7 +63,7 @@ void serial_test_xdp_link(void)
        /* detach BPF program */
        opts.old_fd = prog_fd1;
        err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
-       if (CHECK(err, "prog_detach", "failed %d\n", err))
+       if (!ASSERT_OK(err, "prog_detach"))
                goto cleanup;
 
        /* now BPF link should attach successfully */
@@ -73,24 +74,23 @@ void serial_test_xdp_link(void)
 
        /* validate prog ID */
        err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
-       if (CHECK(err || id0 != id1, "id1_check",
-                 "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+       if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
                goto cleanup;
 
        /* BPF prog attach is not allowed to replace BPF link */
        opts.old_fd = prog_fd1;
        err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
-       if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+       if (!ASSERT_ERR(err, "prog_attach_fail"))
                goto cleanup;
 
        /* Can't force-update when BPF link is active */
        err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
-       if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+       if (!ASSERT_ERR(err, "prog_update_fail"))
                goto cleanup;
 
        /* Can't force-detach when BPF link is active */
        err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
-       if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+       if (!ASSERT_ERR(err, "prog_detach_fail"))
                goto cleanup;
 
        /* BPF link is not allowed to replace another BPF link */
@@ -110,40 +110,39 @@ void serial_test_xdp_link(void)
        skel2->links.xdp_handler = link;
 
        err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
-       if (CHECK(err || id0 != id2, "id2_check",
-                 "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+       if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
                goto cleanup;
 
        /* updating program under active BPF link works as expected */
        err = bpf_link__update_program(link, skel1->progs.xdp_handler);
-       if (CHECK(err, "link_upd", "failed: %d\n", err))
+       if (!ASSERT_OK(err, "link_upd"))
                goto cleanup;
 
        memset(&link_info, 0, sizeof(link_info));
        err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
-       if (CHECK(err, "link_info", "failed: %d\n", err))
+       if (!ASSERT_OK(err, "link_info"))
                goto cleanup;
 
-       CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
-             "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
-       CHECK(link_info.prog_id != id1, "link_prog_id",
-             "got %u != exp %u\n", link_info.prog_id, id1);
-       CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
-             "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+       ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+       ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+       ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+       /* updating program under active BPF link with different type fails */
+       err = bpf_link__update_program(link, skel1->progs.tc_handler);
+       if (!ASSERT_ERR(err, "link_upd_invalid"))
+               goto cleanup;
 
        err = bpf_link__detach(link);
-       if (CHECK(err, "link_detach", "failed %d\n", err))
+       if (!ASSERT_OK(err, "link_detach"))
                goto cleanup;
 
        memset(&link_info, 0, sizeof(link_info));
        err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
-       if (CHECK(err, "link_info", "failed: %d\n", err))
-               goto cleanup;
-       CHECK(link_info.prog_id != id1, "link_prog_id",
-             "got %u != exp %u\n", link_info.prog_id, id1);
+
+       ASSERT_OK(err, "link_info");
+       ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
        /* ifindex should be zeroed out */
-       CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
-             "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+       ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
 
 cleanup:
        test_xdp_link__destroy(skel1);
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644 (file)
index 0000000..7e02b73
--- /dev/null
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_RINGBUF);
+       __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+            __u32 request_mask, unsigned int query_flags)
+{
+       void *active;
+       u32 cpu;
+
+       cpu = bpf_get_smp_processor_id();
+       active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+       if (active) {
+               /* FAIL here! 'active' points to 'regular' memory. It
+                * cannot be submitted to ring buffer.
+                */
+               bpf_ringbuf_submit(active, 0);
+       }
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index a8233e7..728dbd3 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2018 Facebook
 
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 
 #ifndef PERF_MAX_STACK_DEPTH
@@ -41,11 +41,11 @@ struct {
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
        unsigned long long pad;
-       char prev_comm[16];
+       char prev_comm[TASK_COMM_LEN];
        int prev_pid;
        int prev_prio;
        long long prev_state;
-       char next_comm[16];
+       char next_comm[TASK_COMM_LEN];
        int next_pid;
        int next_prio;
 };
index ce69740..43bd7a2 100644 (file)
@@ -1,17 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2017 Facebook
 
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
        unsigned long long pad;
-       char prev_comm[16];
+       char prev_comm[TASK_COMM_LEN];
        int prev_pid;
        int prev_prio;
        long long prev_state;
-       char next_comm[16];
+       char next_comm[TASK_COMM_LEN];
        int next_pid;
        int next_prio;
 };
index ee7d6ac..64ff32e 100644 (file)
@@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp)
 {
        return 0;
 }
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c
new file mode 100644 (file)
index 0000000..b64d33e
--- /dev/null
@@ -0,0 +1,95 @@
+{
+       "ringbuf: invalid reservation offset 1",
+       .insns = {
+       /* reserve 8 byte ringbuf memory */
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_2, 8),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+       /* store a pointer to the reserved memory in R6 */
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       /* check whether the reservation was successful */
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       /* spill R6(mem) into the stack */
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+       /* fill it back in R7 */
+       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+       /* should be able to access *(R7) = 0 */
+       BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+       /* submit the reserved ringbuf memory */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+       /* add invalid offset to reserved ringbuf memory */
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_ringbuf = { 1 },
+       .result = REJECT,
+       .errstr = "dereference of modified alloc_mem ptr R1",
+},
+{
+       "ringbuf: invalid reservation offset 2",
+       .insns = {
+       /* reserve 8 byte ringbuf memory */
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_2, 8),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+       /* store a pointer to the reserved memory in R6 */
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       /* check whether the reservation was successful */
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       /* spill R6(mem) into the stack */
+       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+       /* fill it back in R7 */
+       BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8),
+       /* add invalid offset to reserved ringbuf memory */
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe),
+       /* should be able to access *(R7) = 0 */
+       BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0),
+       /* submit the reserved ringbuf memory */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_ringbuf = { 1 },
+       .result = REJECT,
+       .errstr = "R7 min value is outside of the allowed memory range",
+},
+{
+       "ringbuf: check passing rb mem to helpers",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+       /* reserve 8 byte ringbuf memory */
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_2, 8),
+       BPF_MOV64_IMM(BPF_REG_3, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       /* check whether the reservation was successful */
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       /* pass allocated ring buffer memory to fib lookup */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+       BPF_MOV64_IMM(BPF_REG_3, 8),
+       BPF_MOV64_IMM(BPF_REG_4, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup),
+       /* submit the ringbuf memory */
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+       BPF_MOV64_IMM(BPF_REG_2, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_ringbuf = { 2 },
+       .prog_type = BPF_PROG_TYPE_XDP,
+       .result = ACCEPT,
+},
index 1a8eb96..8cfc534 100644 (file)
@@ -84,7 +84,7 @@
        },
        .fixup_map_ringbuf = { 1 },
        .result = REJECT,
-       .errstr = "R0 pointer arithmetic on mem_or_null prohibited",
+       .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited",
 },
 {
        "check corrupted spill/fill",
index 412d852..3f4c8cf 100755 (executable)
@@ -4059,6 +4059,9 @@ usage: ${0##*/} OPTS
        -p          Pause on fail
        -P          Pause after each test
        -v          Be verbose
+
+Tests:
+       $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
 EOF
 }