Merge tag 'driver-core-6.6-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Sep 2023 18:49:05 +0000 (11:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Sep 2023 18:49:05 +0000 (11:49 -0700)
Pull driver symbol lookup fix from Greg KH:
 "Here is one last fixup for your tree for 6.6-rc1. It resolves a
  problem with the way that symbol_get was changed in the module tree
  merge in your tree to fix up the DVB drivers which rely on this old
  api to attach new devices.

  As the changelog comment says:

    In commit 9011e49d54dc ("modules: only allow symbol_get of
    EXPORT_SYMBOL_GPL modules") the use of symbol_get is properly
    restricted to GPL-only marked symbols. This interacts oddly with the
    DVB logic which only uses dvb_attach() to load the dvb driver which
    then uses symbol_get().

    Fix this up by properly marking all of the dvb_attach attach symbols
    as EXPORT_SYMBOL_GPL().

  This has been acked by Hans from the V4L driver side, Luis from the
  module side, Mauro on the media side, and Christoph said it was the
  correct solution, and was tested by the original reporter of the
  issue.

  It has passed 0-day testing, but has not been in linux-next due to it
  only being sent yesterday"

* tag 'driver-core-6.6-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core:
  media: dvb: symbol fixup for dvb_attach()

149 files changed:
Documentation/admin-guide/sysctl/kernel.rst
Documentation/dev-tools/kasan.rst
Documentation/devicetree/bindings/sound/fsl,easrc.yaml
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/kcov/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/translations/zh_CN/dev-tools/kasan.rst
MAINTAINERS
arch/arm64/kernel/idreg-override.c
arch/arm64/lib/csum.c
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/asm-prototypes.h
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/kasan.h [new file with mode: 0644]
arch/loongarch/include/asm/kfence.h [new file with mode: 0644]
arch/loongarch/include/asm/kgdb.h [new file with mode: 0644]
arch/loongarch/include/asm/lbt.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/mmzone.h
arch/loongarch/include/asm/page.h
arch/loongarch/include/asm/pgalloc.h
arch/loongarch/include/asm/pgtable.h
arch/loongarch/include/asm/processor.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/stackframe.h
arch/loongarch/include/asm/string.h
arch/loongarch/include/asm/switch_to.h
arch/loongarch/include/asm/thread_info.h
arch/loongarch/include/asm/xor.h [new file with mode: 0644]
arch/loongarch/include/asm/xor_simd.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/ptrace.h
arch/loongarch/include/uapi/asm/sigcontext.h
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/asm-offsets.c
arch/loongarch/kernel/cpu-probe.c
arch/loongarch/kernel/entry.S
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/head.S
arch/loongarch/kernel/kfpu.c
arch/loongarch/kernel/kgdb.c [new file with mode: 0644]
arch/loongarch/kernel/lbt.S [new file with mode: 0644]
arch/loongarch/kernel/numa.c
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/signal.c
arch/loongarch/kernel/stacktrace.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/Makefile
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/xor_simd.c [new file with mode: 0644]
arch/loongarch/lib/xor_simd.h [new file with mode: 0644]
arch/loongarch/lib/xor_simd_glue.c [new file with mode: 0644]
arch/loongarch/lib/xor_template.c [new file with mode: 0644]
arch/loongarch/mm/Makefile
arch/loongarch/mm/cache.c
arch/loongarch/mm/fault.c
arch/loongarch/mm/init.c
arch/loongarch/mm/kasan_init.c [new file with mode: 0644]
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/loongarch/vdso/Makefile
block/blk-map.c
block/blk-throttle.c
block/blk-throttle.h
block/fops.c
block/ioctl.c
drivers/block/drbd/drbd_main.c
drivers/block/null_blk/main.c
drivers/gpio/gpio-zynq.c
drivers/ntb/hw/amd/ntb_hw_amd.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/ntb/test/ntb_tool.c
drivers/pci/Kconfig
drivers/pci/probe.c
drivers/pci/quirks.c
drivers/perf/cxl_pmu.c
drivers/power/supply/power_supply_core.c
drivers/powercap/intel_rapl_common.c
drivers/s390/block/dasd_devmap.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_int.h
drivers/thermal/armada_thermal.c
drivers/thermal/dove_thermal.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/kirkwood_thermal.c
drivers/thermal/spear_thermal.c
drivers/thermal/thermal_core.c
fs/smb/server/Kconfig
fs/smb/server/server.c
include/linux/kasan.h
include/linux/raid/pq.h
include/linux/thermal.h
include/linux/xarray.h
include/sound/dmaengine_pcm.h
include/sound/soc-component.h
io_uring/fdinfo.c
io_uring/io-wq.c
io_uring/io-wq.h
io_uring/io_uring.c
io_uring/sqpoll.c
kernel/dma/Kconfig
kernel/dma/contiguous.c
kernel/dma/debug.c
kernel/dma/pool.c
kernel/printk/printk.c
lib/idr.c
lib/raid6/Makefile
lib/raid6/algos.c
lib/raid6/loongarch.h [new file with mode: 0644]
lib/raid6/loongarch_simd.c [new file with mode: 0644]
lib/raid6/recov_loongarch_simd.c [new file with mode: 0644]
lib/raid6/test/Makefile
lib/xarray.c
mm/kasan/init.c
mm/kasan/kasan.h
mm/kfence/core.c
security/landlock/ruleset.h
sound/core/pcm_lib.c
sound/core/seq/seq_memory.c
sound/isa/sb/emu8000_pcm.c
sound/pci/hda/patch_cs8409.c
sound/pci/hda/patch_cs8409.h
sound/pci/hda/patch_realtek.c
sound/pci/hda/tas2781_hda_i2c.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/atmel/mchp-pdmc.c
sound/soc/codecs/Kconfig
sound/soc/codecs/Makefile
sound/soc/codecs/cs35l45.c
sound/soc/codecs/cs35l56-shared.c
sound/soc/codecs/cs42l43.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/wcd-clsh-v2.c
sound/soc/intel/avs/pcm.c
sound/soc/soc-component.c
sound/soc/soc-generic-dmaengine-pcm.c
sound/soc/stm/stm32_sai_sub.c
sound/usb/midi2.c
tools/testing/radix-tree/multiorder.c
tools/testing/selftests/landlock/fs_test.c

index 8019103..cf33de5 100644 (file)
@@ -450,6 +450,35 @@ this allows system administrators to override the
 ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
 
 
+io_uring_disabled
+=================
+
+Prevents all processes from creating new io_uring instances. Enabling this
+shrinks the kernel's attack surface.
+
+= ======================================================================
+0 All processes can create io_uring instances as normal. This is the
+  default setting.
+1 io_uring creation is disabled (io_uring_setup() will fail with
+  -EPERM) for unprivileged processes not in the io_uring_group group.
+  Existing io_uring instances can still be used.  See the
+  documentation for io_uring_group for more information.
+2 io_uring creation is disabled for all processes. io_uring_setup()
+  always fails with -EPERM. Existing io_uring instances can still be
+  used.
+= ======================================================================
+
+
+io_uring_group
+==============
+
+When io_uring_disabled is set to 1, a process must either be
+privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
+to create an io_uring instance.  If io_uring_group is set to -1 (the
+default), only processes with the CAP_SYS_ADMIN capability may create
+io_uring instances.
+
+
 kexec_load_disabled
 ===================
 
index f4acf9c..382818a 100644 (file)
@@ -41,8 +41,8 @@ Support
 Architectures
 ~~~~~~~~~~~~~
 
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
 
 Compilers
 ~~~~~~~~~
index bdde68a..a680d7a 100644 (file)
@@ -14,7 +14,13 @@ properties:
     pattern: "^easrc@.*"
 
   compatible:
-    const: fsl,imx8mn-easrc
+    oneOf:
+      - enum:
+          - fsl,imx8mn-easrc
+      - items:
+          - enum:
+              - fsl,imx8mp-easrc
+          - const: fsl,imx8mn-easrc
 
   reg:
     maxItems: 1
index bf0124f..c4581c2 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
index ffcc9f2..de84cef 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
index 958498f..5e91ec7 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
index 05ef904..8fdb20c 100644 (file)
@@ -42,7 +42,7 @@ KASAN有三种模式:
 体系架构
 ~~~~~~~~
 
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
 而基于标签的KASAN模式只在arm64上支持。
 
 编译器
index 389fe9e..a62f5a2 100644 (file)
@@ -21244,7 +21244,7 @@ F:      sound/soc/ti/
 TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
 M:     Shenghao Ding <shenghao-ding@ti.com>
 M:     Kevin Lu <kevin-lu@ti.com>
-M:     Baojun Xu <x1077012@ti.com>
+M:     Baojun Xu <baojun.xu@ti.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/sound/tas2552.txt
index aee12c7..3addc09 100644 (file)
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
                if (!len)
                        return;
 
-               len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
-               if (len == -E2BIG)
-                       len = ARRAY_SIZE(buf) - 1;
+               len = min(len, ARRAY_SIZE(buf) - 1);
+               memcpy(buf, cmdline, len);
+               buf[len] = '\0';
 
                if (strcmp(buf, "--") == 0)
                        return;
index 78b87a6..2432683 100644 (file)
@@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
        const u64 *ptr;
        u64 data, sum64 = 0;
 
-       if (unlikely(len == 0))
+       if (unlikely(len <= 0))
                return 0;
 
        offset = (unsigned long)buff & 7;
index ecf282d..e14396a 100644 (file)
@@ -8,11 +8,13 @@ config LOONGARCH
        select ACPI_PPTT if ACPI
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_BINFMT_ELF_STATE
+       select ARCH_DISABLE_KASAN_INLINE
        select ARCH_ENABLE_MEMORY_HOTPLUG
        select ARCH_ENABLE_MEMORY_HOTREMOVE
        select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_FORTIFY_SOURCE
+       select ARCH_HAS_KCOV
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
+       select HAVE_ARCH_KASAN
+       select HAVE_ARCH_KFENCE
+       select HAVE_ARCH_KGDB if PERF_EVENTS
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
 config AS_HAS_LASX_EXTENSION
        def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
 
+config AS_HAS_LBT_EXTENSION
+       def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
 
          If unsure, say Y.
 
+config CPU_HAS_LBT
+       bool "Support for the Loongson Binary Translation Extension"
+       depends on AS_HAS_LBT_EXTENSION
+       help
+         Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+         to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+         Enabling this option allows the kernel to allocate and switch registers
+         specific to LBT.
+
+         If you want to use this feature, such as the Loongson Architecture
+         Translator (LAT), say Y.
+
 config CPU_HAS_PREFETCH
        bool
        default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
+config KASAN_SHADOW_OFFSET
+       hex
+       default 0x0
+       depends on KASAN
+
 menu "Power management options"
 
 config ARCH_SUSPEND_POSSIBLE
index ef87bab..fb0fada 100644 (file)
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux                      += -static -pie --no-dynamic-linker -z notext
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
index d64849b..a3b52aa 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
 CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
 CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_NFCT=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
 CONFIG_IP_NF_TARGET_REDIRECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
 CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_TEHUTI is not set
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
 CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
 CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_RESTART=y
 CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AST=y
 CONFIG_DRM_QXL=m
 CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
 CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
 CONFIG_OVERLAY_FS_XINO_AUTO=y
 CONFIG_OVERLAY_FS_METACOPY=y
 CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=936
 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_936=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
 CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
index ed06d39..cf8e1a4 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/ftrace.h>
index 79e1d53..c9544f3 100644 (file)
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 
-       .macro  parse_v var val
-       \var    = \val
-       .endm
-
-       .macro  parse_r var r
-       \var    = -1
-       .ifc    \r, $r0
-       \var    = 0
-       .endif
-       .ifc    \r, $r1
-       \var    = 1
-       .endif
-       .ifc    \r, $r2
-       \var    = 2
-       .endif
-       .ifc    \r, $r3
-       \var    = 3
-       .endif
-       .ifc    \r, $r4
-       \var    = 4
-       .endif
-       .ifc    \r, $r5
-       \var    = 5
-       .endif
-       .ifc    \r, $r6
-       \var    = 6
-       .endif
-       .ifc    \r, $r7
-       \var    = 7
-       .endif
-       .ifc    \r, $r8
-       \var    = 8
-       .endif
-       .ifc    \r, $r9
-       \var    = 9
-       .endif
-       .ifc    \r, $r10
-       \var    = 10
-       .endif
-       .ifc    \r, $r11
-       \var    = 11
-       .endif
-       .ifc    \r, $r12
-       \var    = 12
-       .endif
-       .ifc    \r, $r13
-       \var    = 13
-       .endif
-       .ifc    \r, $r14
-       \var    = 14
-       .endif
-       .ifc    \r, $r15
-       \var    = 15
-       .endif
-       .ifc    \r, $r16
-       \var    = 16
-       .endif
-       .ifc    \r, $r17
-       \var    = 17
-       .endif
-       .ifc    \r, $r18
-       \var    = 18
-       .endif
-       .ifc    \r, $r19
-       \var    = 19
-       .endif
-       .ifc    \r, $r20
-       \var    = 20
-       .endif
-       .ifc    \r, $r21
-       \var    = 21
-       .endif
-       .ifc    \r, $r22
-       \var    = 22
-       .endif
-       .ifc    \r, $r23
-       \var    = 23
-       .endif
-       .ifc    \r, $r24
-       \var    = 24
-       .endif
-       .ifc    \r, $r25
-       \var    = 25
-       .endif
-       .ifc    \r, $r26
-       \var    = 26
-       .endif
-       .ifc    \r, $r27
-       \var    = 27
-       .endif
-       .ifc    \r, $r28
-       \var    = 28
-       .endif
-       .ifc    \r, $r29
-       \var    = 29
-       .endif
-       .ifc    \r, $r30
-       \var    = 30
-       .endif
-       .ifc    \r, $r31
-       \var    = 31
-       .endif
-       .iflt   \var
-       .error  "Unable to parse register name \r"
-       .endif
-       .endm
-
        .macro  cpu_save_nonscratch thread
        stptr.d s0, \thread, THREAD_REG23
        stptr.d s1, \thread, THREAD_REG24
 
        .macro fpu_save_csr thread tmp
        movfcsr2gr      \tmp, fcsr0
-       stptr.w \tmp, \thread, THREAD_FCSR
+       stptr.w         \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp, \tmp, FPU_CSR_TM
+       beqz            \tmp, 1f
+       /* Save FTOP */
+       x86mftop        \tmp
+       stptr.w         \tmp, \thread, THREAD_FTOP
+       /* Turn off TM to ensure the order of FPR in memory independent of TM */
+       x86clrtm
+1:
+#endif
        .endm
 
-       .macro fpu_restore_csr thread tmp
-       ldptr.w \tmp, \thread, THREAD_FCSR
-       movgr2fcsr      fcsr0, \tmp
+       .macro fpu_restore_csr thread tmp0 tmp1
+       ldptr.w         \tmp0, \thread, THREAD_FCSR
+       movgr2fcsr      fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 2f
+       /* Restore FTOP */
+       ldptr.w         \tmp0, \thread, THREAD_FTOP
+       andi            \tmp0, \tmp0, 0x7
+       la.pcrel        \tmp1, 1f
+       alsl.d          \tmp1, \tmp0, \tmp1, 3
+       jr              \tmp1
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+#endif
        .endm
 
        .macro fpu_save_cc thread tmp0 tmp1
        .macro  lsx_restore_all thread tmp0 tmp1
        lsx_restore_data        \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lsx_save_upper vd base tmp off
        .macro  lasx_restore_all thread tmp0 tmp1
        lasx_restore_data       \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..deeff81
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT    (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK     GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE            (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG                (0x8000)
+#define XKPRANGE_CC_SEG                (0x9000)
+#define XKVRANGE_VC_SEG                (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START              CACHE_BASE
+#define XKPRANGE_CC_SIZE               XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET       (0)
+#define XKPRANGE_CC_SHADOW_SIZE                (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END         (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START              UNCACHE_BASE
+#define XKPRANGE_UC_SIZE               XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET       XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE                (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END         (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached)  */
+#define XKVRANGE_VC_START              MODULES_VADDR
+#define XKVRANGE_VC_SIZE               round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET       XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE                (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END         (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START             round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE              (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END               round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+       return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+       if (!kasan_arch_is_ready()) {
+               return (void *)(kasan_early_shadow_page);
+       } else {
+               unsigned long maddr = (unsigned long)addr;
+               unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+               unsigned long offset = 0;
+
+               maddr &= XRANGE_SHADOW_MASK;
+               switch (xrange) {
+               case XKPRANGE_CC_SEG:
+                       offset = XKPRANGE_CC_SHADOW_OFFSET;
+                       break;
+               case XKPRANGE_UC_SEG:
+                       offset = XKPRANGE_UC_SHADOW_OFFSET;
+                       break;
+               case XKVRANGE_VC_SEG:
+                       offset = XKVRANGE_VC_SHADOW_OFFSET;
+                       break;
+               default:
+                       WARN_ON(1);
+                       return NULL;
+               }
+
+               return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+       }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+       unsigned long addr = (unsigned long)shadow_addr;
+
+       if (unlikely(addr > KASAN_SHADOW_END) ||
+               unlikely(addr < KASAN_SHADOW_START)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+               return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+       else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+       else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+       else {
+               WARN_ON(1);
+               return NULL;
+       }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..6c82aea
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+       int err;
+       char *kfence_pool = __kfence_pool;
+       struct vm_struct *area;
+
+       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+                                   KFENCE_AREA_START, KFENCE_AREA_END,
+                                   __builtin_return_address(0));
+       if (!area)
+               return false;
+
+       __kfence_pool = (char *)area->addr;
+       err = ioremap_page_range((unsigned long)__kfence_pool,
+                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+                                virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+       if (err) {
+               free_vm_area(area);
+               __kfence_pool = kfence_pool;
+               return false;
+       }
+
+       return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       pte_t *pte = virt_to_kpte(addr);
+
+       if (WARN_ON(!pte) || pte_none(*pte))
+               return false;
+
+       if (protect)
+               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+       else
+               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+       preempt_disable();
+       local_flush_tlb_one(addr);
+       preempt_enable();
+
+       return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644 (file)
index 0000000..2041ae5
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG         sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ *     r0-r31: 64 bit
+ *     orig_a0: 64 bit
+ *     pc : 64 bit
+ *     csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE       0
+#define DBG_PT_REGS_NUM                35
+#define DBG_PT_REGS_END                (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ *     f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE           (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM            32
+#define DBG_FPR_END            (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ *     fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE           (DBG_FPR_END + 1)
+#define DBG_FCC_NUM            8
+#define DBG_FCC_END            (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ *     fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM           1
+#define DBG_FCSR               (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM                (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX                 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES            ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE       4
+#define CACHE_FLUSH_IS_SAFE    0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+       DBG_LOONGARCH_ZERO = 0,
+       DBG_LOONGARCH_RA,
+       DBG_LOONGARCH_TP,
+       DBG_LOONGARCH_SP,
+       DBG_LOONGARCH_A0,
+       DBG_LOONGARCH_FP = 22,
+       DBG_LOONGARCH_S0,
+       DBG_LOONGARCH_S1,
+       DBG_LOONGARCH_S2,
+       DBG_LOONGARCH_S3,
+       DBG_LOONGARCH_S4,
+       DBG_LOONGARCH_S5,
+       DBG_LOONGARCH_S6,
+       DBG_LOONGARCH_S7,
+       DBG_LOONGARCH_S8,
+       DBG_LOONGARCH_ORIG_A0,
+       DBG_LOONGARCH_PC,
+       DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644 (file)
index 0000000..e671978
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+               1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+       return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+       enable_lbt();
+       set_thread_flag(TIF_USEDLBT);
+       KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+       if (cpu_has_lbt && !is_lbt_owner()) {
+               __own_lbt();
+               if (restore)
+                       _restore_lbt(&current->thread.lbt);
+       }
+}
+
+static inline void own_lbt(int restore)
+{
+       preempt_disable();
+       own_lbt_inatomic(restore);
+       preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+       if (cpu_has_lbt && is_lbt_owner()) {
+               if (save)
+                       _save_lbt(&tsk->thread.lbt);
+
+               disable_lbt();
+               clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+       }
+       KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+       preempt_disable();
+       lose_lbt_inatomic(save, current);
+       preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+       __own_lbt();
+       _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
index 10748a2..33531d4 100644 (file)
 #ifndef __ASSEMBLY__
 #include <larchintrin.h>
 
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- *                     "#invtlb op, 0, %0\n\t"
- *                     ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- *                     : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n)                            \
-       ".ifc   \\r, $r" #n "\n\t"              \
-       "\\var  = " #n "\n\t"                   \
-       ".endif\n\t"
-
-__asm__(".macro        parse_r var r\n\t"
-       "\\var  = -1\n\t"
-       _IFC_REG(0)  _IFC_REG(1)  _IFC_REG(2)  _IFC_REG(3)
-       _IFC_REG(4)  _IFC_REG(5)  _IFC_REG(6)  _IFC_REG(7)
-       _IFC_REG(8)  _IFC_REG(9)  _IFC_REG(10) _IFC_REG(11)
-       _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
-       _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
-       _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
-       _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
-       _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
-       ".iflt  \\var\n\t"
-       ".error \"Unable to parse register name \\r\"\n\t"
-       ".endif\n\t"
-       ".endm");
-
-#undef _IFC_REG
-
 /* CPUCFG */
 #define read_cpucfg(reg) __cpucfg(reg)
 
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
 #define FPU_CSR_RU     0x200   /* towards +Infinity */
 #define FPU_CSR_RD     0x300   /* towards -Infinity */
 
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT       0x6
+#define FPU_CSR_TM             (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
 #define read_fcsr(source)      \
 ({     \
        unsigned int __res;     \
index fe67d0b..2b9a907 100644 (file)
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid) (node_data[(nid)])
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
index 26e8dcc..63f137c 100644 (file)
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define sym_to_pfn(x)          __phys_to_pfn(__pa_symbol(x))
 
 #define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ?                                  \
+       dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
 
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
index 23f5b11..79470f0 100644 (file)
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
 #endif /* _ASM_PGALLOC_H */
index 06963a1..29d9b12 100644 (file)
@@ -70,12 +70,9 @@ struct vm_area_struct;
  * for zero-mapped memory areas etc..
  */
 
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) \
-       (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr)       virt_to_page(empty_zero_page)
 
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
 #define MODULES_VADDR  (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
 #define MODULES_END    (MODULES_VADDR + SZ_256M)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE       0
+#endif
+
 #define VMALLOC_START  MODULES_END
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_END    \
        (vm_map_base +  \
-        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END    \
+       (vm_map_base +  \
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
 
 #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#define KFENCE_AREA_START      (VMEMMAP_END + 1)
+#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
 #define pte_ERROR(e) \
        pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define pmd_leaf(pmd)          ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud)          ((pud_val(pud) & _PAGE_HUGE) != 0)
+
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
index 636e1c6..c3bc44b 100644 (file)
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
-       unsigned int    fcsr;
        uint64_t        fcc;    /* 8x8 */
+       uint32_t        fcsr;
+       uint32_t        ftop;
        union fpureg    fpr[NUM_FPU_REGS];
 };
 
+struct loongarch_lbt {
+       /* Scratch registers */
+       unsigned long scr0;
+       unsigned long scr1;
+       unsigned long scr2;
+       unsigned long scr3;
+       /* Eflags register */
+       unsigned long eflags;
+};
+
 #define INIT_CPUMASK { \
        {0,} \
 }
@@ -113,15 +124,6 @@ struct thread_struct {
        unsigned long csr_ecfg;
        unsigned long csr_badvaddr;     /* Last user fault */
 
-       /* Scratch registers */
-       unsigned long scr0;
-       unsigned long scr1;
-       unsigned long scr2;
-       unsigned long scr3;
-
-       /* Eflags register */
-       unsigned long eflags;
-
        /* Other stuff associated with the thread. */
        unsigned long trap_nr;
        unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
         * context because they are conditionally copied at fork().
         */
        struct loongarch_fpu fpu FPU_ALIGN;
+       struct loongarch_lbt lbt; /* Also conditionally copied */
 
        /* Hardware breakpoints pinned to this task. */
        struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
         * FPU & vector registers                               \
         */                                                     \
        .fpu                    = {                             \
-               .fcsr           = 0,                            \
                .fcc            = 0,                            \
+               .fcsr           = 0,                            \
+               .ftop           = 0,                            \
                .fpr            = {{{0,},},},                   \
        },                                                      \
        .hbp_break              = {0},                          \
index be05c0e..a0bc159 100644 (file)
@@ -7,6 +7,7 @@
 #define _LOONGARCH_SETUP_H
 
 #include <linux/types.h>
+#include <asm/sections.h>
 #include <uapi/asm/setup.h>
 
 #define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
 extern long __rela_dyn_begin;
 extern long __rela_dyn_end;
 
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
 
 #endif
 
+static inline unsigned long kaslr_offset(void)
+{
+       return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
 #endif /* __SETUP_H */
index 7df80e6..4fb1e64 100644 (file)
        cfi_st  u0, PT_R21, \docfi
        csrrd   u0, PERCPU_BASE_KS
 9:
+#ifdef CONFIG_KGDB
+       li.w    t0, CSR_CRMD_WE
+       csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
        .endm
 
        .macro  SAVE_ALL docfi=0
index 7b29cc9..5bb5a90 100644 (file)
@@ -7,11 +7,31 @@
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
 
 #endif /* _ASM_STRING_H */
index 24e3094..5b225af 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 
 struct task_struct;
 
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)                                            \
 do {                                                                           \
        lose_fpu_inatomic(1, prev);                                             \
+       lose_lbt_inatomic(1, prev);                                             \
        hw_breakpoint_thread_switch(next);                                      \
        (last) = __switch_to(prev, next, task_thread_info(next),                \
                 __builtin_return_address(0), __builtin_frame_address(0));      \
index 1a3354c..8cb653d 100644 (file)
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_SINGLESTEP         16      /* Single Step */
 #define TIF_LSX_CTX_LIVE       17      /* LSX context must be preserved */
 #define TIF_LASX_CTX_LIVE      18      /* LASX context must be preserved */
+#define TIF_USEDLBT            19      /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE       20      /* LBT context must be preserved */
 
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
 #define _TIF_LSX_CTX_LIVE      (1<<TIF_LSX_CTX_LIVE)
 #define _TIF_LASX_CTX_LIVE     (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT           (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE      (1<<TIF_LBT_CTX_LIVE)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644 (file)
index 0000000..12467ff
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+       .name = "lsx",
+       .do_2 = xor_lsx_2,
+       .do_3 = xor_lsx_3,
+       .do_4 = xor_lsx_4,
+       .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX()                                        \
+       do {                                            \
+               if (cpu_has_lsx)                        \
+                       xor_speed(&xor_block_lsx);      \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+       .name = "lasx",
+       .do_2 = xor_lasx_2,
+       .do_3 = xor_lasx_3,
+       .do_4 = xor_lasx_4,
+       .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX()                                       \
+       do {                                                    \
+               if (cpu_has_lasx)                               \
+                       xor_speed(&xor_block_lasx);             \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+do {                                                   \
+       xor_speed(&xor_block_8regs);                    \
+       xor_speed(&xor_block_8regs_p);                  \
+       xor_speed(&xor_block_32regs);                   \
+       xor_speed(&xor_block_32regs_p);                 \
+       XOR_SPEED_LSX();                                \
+       XOR_SPEED_LASX();                               \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644 (file)
index 0000000..471b963
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
index 06e3be5..ac915f8 100644 (file)
@@ -56,6 +56,12 @@ struct user_lasx_state {
        uint64_t vregs[32*4];
 };
 
+struct user_lbt_state {
+       uint64_t scr[4];
+       uint32_t eflags;
+       uint32_t ftop;
+};
+
 struct user_watch_state {
        uint64_t dbg_info;
        struct {
index 4cd7d16..6c22f61 100644 (file)
@@ -59,4 +59,14 @@ struct lasx_context {
        __u32   fcsr;
 };
 
+/* LBT context */
+#define LBT_CTX_MAGIC          0x42540001
+#define LBT_CTX_ALIGN          8
+struct lbt_context {
+       __u64   regs[4];
+       __u32   eflags;
+       __u32   ftop;
+};
+
+
 #endif /* _UAPI_ASM_SIGCONTEXT_H */
index 8e279f0..c56ea0b 100644 (file)
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI)             += efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o kfpu.o
 
+obj-$(CONFIG_CPU_HAS_LBT)      += lbt.o
+
 obj-$(CONFIG_ARCH_STRICT_ALIGN)        += unaligned.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
   CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
 endif
 
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
 obj-$(CONFIG_MODULES)          += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 
+obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_RETHOOK)          += rethook.o rethook_trampoline.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
index 505e4bf..8da0726 100644 (file)
@@ -118,13 +118,6 @@ void output_thread_defines(void)
        OFFSET(THREAD_CSRECFG, task_struct,
               thread.csr_ecfg);
 
-       OFFSET(THREAD_SCR0, task_struct, thread.scr0);
-       OFFSET(THREAD_SCR1, task_struct, thread.scr1);
-       OFFSET(THREAD_SCR2, task_struct, thread.scr2);
-       OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
-       OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
        OFFSET(THREAD_FPU, task_struct, thread.fpu);
 
        OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
 
        OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
        OFFSET(THREAD_FCC,  loongarch_fpu, fcc);
+       OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+       BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+       OFFSET(THREAD_SCR0,  loongarch_lbt, scr0);
+       OFFSET(THREAD_SCR1,  loongarch_lbt, scr1);
+       OFFSET(THREAD_SCR2,  loongarch_lbt, scr2);
+       OFFSET(THREAD_SCR3,  loongarch_lbt, scr3);
+       OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
        BLANK();
 }
 
index e925579..5532081 100644 (file)
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
                c->options |= LOONGARCH_CPU_LVZ;
                elf_hwcap |= HWCAP_LOONGARCH_LVZ;
        }
+#ifdef CONFIG_CPU_HAS_LBT
+       if (config & CPUCFG2_X86BT) {
+               c->options |= LOONGARCH_CPU_LBT_X86;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+       }
+       if (config & CPUCFG2_ARMBT) {
+               c->options |= LOONGARCH_CPU_LBT_ARM;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+       }
+       if (config & CPUCFG2_MIPSBT) {
+               c->options |= LOONGARCH_CPU_LBT_MIPS;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+       }
+#endif
 
        config = read_cpucfg(LOONGARCH_CPUCFG6);
        if (config & CPUCFG6_PMP)
index d737e3c..65518bb 100644 (file)
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
 
        SAVE_STATIC
 
+#ifdef CONFIG_KGDB
+       li.w            t1, CSR_CRMD_WE
+       csrxchg         t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
        move            u0, t0
        li.d            tp, ~_THREAD_MASK
        and             tp, tp, sp
index 501094a..d53ab10 100644 (file)
@@ -22,7 +22,7 @@
 
        .macro  EX insn, reg, src, offs
 .ex\@: \insn   \reg, \src, \offs
-       _asm_extable .ex\@, fault
+       _asm_extable .ex\@, .L_fpu_fault
        .endm
 
        .macro sc_save_fp base
        .macro sc_save_fcsr base, tmp0
        movfcsr2gr      \tmp0, fcsr0
        EX      st.w    \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 1f
+       x86clrtm
+1:
+#endif
        .endm
 
        .macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 SYM_FUNC_START(_restore_fp)
        fpu_restore_double      a0 t1           # clobbers t1
-       fpu_restore_csr         a0 t1
+       fpu_restore_csr         a0 t1 t2
        fpu_restore_cc          a0 t1 t2        # clobbers t1, t2
        jr                      ra
 SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
        jr      ra
 SYM_FUNC_END(_restore_lasx_context)
 
-SYM_FUNC_START(fault)
+.L_fpu_fault:
        li.w    a0, -EFAULT                             # failure
        jr      ra
-SYM_FUNC_END(fault)
index 5e828a8..53b883d 100644 (file)
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry)                        # kernel entry point
        PTR_LI          sp, (_THREAD_SIZE - PT_SIZE)
        PTR_ADD         sp, sp, tp
        set_saved_sp    sp, t0, t1
-#endif
 
-       /* relocate_kernel() returns the new kernel entry point */
-       jr              a0
-       ASM_BUG()
+       /* Jump to the new kernel: new_pc = current_pc + random_offset */
+       pcaddi          t0, 0
+       add.d           t0, t0, a0
+       jirl            zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
 
+#ifdef CONFIG_KASAN
+       bl              kasan_early_init
 #endif
 
        bl              start_kernel
index 5c46ae8..ec5b28e 100644 (file)
@@ -8,19 +8,40 @@
 #include <asm/fpu.h>
 #include <asm/smp.h>
 
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
 
 void kernel_fpu_begin(void)
 {
+       unsigned int *euen_curr;
+
        preempt_disable();
 
        WARN_ON(this_cpu_read(in_kernel_fpu));
 
        this_cpu_write(in_kernel_fpu, true);
+       euen_curr = this_cpu_ptr(&euen_current);
 
-       if (!is_fpu_owner())
-               enable_fpu();
+       *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _save_lasx(&current->thread.fpu);
+       else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _save_lsx(&current->thread.fpu);
        else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _save_fp(&current->thread.fpu);
 
        write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
+       unsigned int *euen_curr;
+
        WARN_ON(!this_cpu_read(in_kernel_fpu));
 
-       if (!is_fpu_owner())
-               disable_fpu();
+       euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _restore_lasx(&current->thread.fpu);
        else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _restore_lsx(&current->thread.fpu);
+       else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _restore_fp(&current->thread.fpu);
 
+       *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
        this_cpu_write(in_kernel_fpu, false);
 
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+       if (cpu_has_lsx)
+               euen_mask |= CSR_EUEN_LSXEN;
+
+       if (cpu_has_lasx)
+               euen_mask |= CSR_EUEN_LASXEN;
+
+       return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644 (file)
index 0000000..445c452
--- /dev/null
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+       { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+       { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+       { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+       { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+       { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+       { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+       { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+       { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+       { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+       { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+       { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+       { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+       { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+       { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+       { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+       { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+       { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+       { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+       { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+       { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+       { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+       { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+       { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+       { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+       { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+       { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+       { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+       { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+       { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+       { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+       { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+       { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+       { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+       { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+       { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+       { "f0", GDB_SIZEOF_REG, 0 },
+       { "f1", GDB_SIZEOF_REG, 1 },
+       { "f2", GDB_SIZEOF_REG, 2 },
+       { "f3", GDB_SIZEOF_REG, 3 },
+       { "f4", GDB_SIZEOF_REG, 4 },
+       { "f5", GDB_SIZEOF_REG, 5 },
+       { "f6", GDB_SIZEOF_REG, 6 },
+       { "f7", GDB_SIZEOF_REG, 7 },
+       { "f8", GDB_SIZEOF_REG, 8 },
+       { "f9", GDB_SIZEOF_REG, 9 },
+       { "f10", GDB_SIZEOF_REG, 10 },
+       { "f11", GDB_SIZEOF_REG, 11 },
+       { "f12", GDB_SIZEOF_REG, 12 },
+       { "f13", GDB_SIZEOF_REG, 13 },
+       { "f14", GDB_SIZEOF_REG, 14 },
+       { "f15", GDB_SIZEOF_REG, 15 },
+       { "f16", GDB_SIZEOF_REG, 16 },
+       { "f17", GDB_SIZEOF_REG, 17 },
+       { "f18", GDB_SIZEOF_REG, 18 },
+       { "f19", GDB_SIZEOF_REG, 19 },
+       { "f20", GDB_SIZEOF_REG, 20 },
+       { "f21", GDB_SIZEOF_REG, 21 },
+       { "f22", GDB_SIZEOF_REG, 22 },
+       { "f23", GDB_SIZEOF_REG, 23 },
+       { "f24", GDB_SIZEOF_REG, 24 },
+       { "f25", GDB_SIZEOF_REG, 25 },
+       { "f26", GDB_SIZEOF_REG, 26 },
+       { "f27", GDB_SIZEOF_REG, 27 },
+       { "f28", GDB_SIZEOF_REG, 28 },
+       { "f29", GDB_SIZEOF_REG, 29 },
+       { "f30", GDB_SIZEOF_REG, 30 },
+       { "f31", GDB_SIZEOF_REG, 31 },
+       { "fcc0", 1, 0 },
+       { "fcc1", 1, 1 },
+       { "fcc2", 1, 2 },
+       { "fcc3", 1, 3 },
+       { "fcc4", 1, 4 },
+       { "fcc5", 1, 5 },
+       { "fcc6", 1, 6 },
+       { "fcc7", 1, 7 },
+       { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return NULL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               goto out;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy(mem, (void *)regs + reg_offset, reg_size);
+               goto out;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               goto out;
+
+       save_fp(current);
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+               break;
+       default:
+               break;
+       }
+
+out:
+       return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return -EINVAL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               return 0;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy((void *)regs + reg_offset, mem, reg_size);
+               return 0;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               return 0;
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+               break;
+       default:
+               break;
+       }
+
+       restore_fp(current);
+
+       return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+       /* Initialize to zero */
+       memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+       gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+       gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+       gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+       /* S0 - S8 */
+       gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+       gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+       gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+       gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+       gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+       gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+       gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+       gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+       gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+       /*
+        * PC use return address (RA), i.e. the moment after return from __switch_to()
+        */
+       gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+       regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+       __asm__ __volatile__ (                  \
+               ".globl kgdb_breakinst\n\t"     \
+               "nop\n"                         \
+               "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+
+       /* Userspace events, ignore. */
+       if (user_mode(regs))
+               return NOTIFY_DONE;
+
+       if (!kgdb_io_module_registered)
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_active) != -1)
+               kgdb_nmicallback(smp_processor_id(), regs);
+
+       if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_setting_breakpoint))
+               if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+                       regs->csr_era += LOONGARCH_INSN_SIZE;
+
+       return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+       struct die_args args = {
+               .regs   = regs,
+               .str    = "Break",
+               .err    = BRK_KDB,
+               .trapnr = read_csr_excode(),
+               .signr  = SIGTRAP,
+
+       };
+
+       return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+       .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+                                        char *remcom_in_buffer)
+{
+       unsigned long addr;
+       char *ptr;
+
+       ptr = &remcom_in_buffer[1];
+       if (kgdb_hex2long(&ptr, &addr))
+               regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+       char cj_val;
+       unsigned int si, si_l, si_h, rd, rj, cj;
+       unsigned long pc = instruction_pointer(regs);
+       union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return -EINVAL;
+       }
+
+       *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+       si_h = ip->reg0i26_format.immediate_h;
+       si_l = ip->reg0i26_format.immediate_l;
+       switch (ip->reg0i26_format.opcode) {
+       case b_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               return 0;
+       case bl_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+               return 0;
+       }
+
+       rj = ip->reg1i21_format.rj;
+       cj = (rj & 0x07) + DBG_FCC_BASE;
+       si_l = ip->reg1i21_format.immediate_l;
+       si_h = ip->reg1i21_format.immediate_h;
+       dbg_get_reg(cj, &cj_val, regs);
+       switch (ip->reg1i21_format.opcode) {
+       case beqz_op:
+               if (regs->regs[rj] == 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bnez_op:
+               if (regs->regs[rj] != 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bceqz_op: /* bceqz_op = bcnez_op */
+               if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       }
+
+       rj = ip->reg2i16_format.rj;
+       rd = ip->reg2i16_format.rd;
+       si = ip->reg2i16_format.immediate;
+       switch (ip->reg2i16_format.opcode) {
+       case beq_op:
+               if (regs->regs[rj] == regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bne_op:
+               if (regs->regs[rj] != regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case blt_op:
+               if ((long)regs->regs[rj] < (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bge_op:
+               if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bltu_op:
+               if (regs->regs[rj] < regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bgeu_op:
+               if (regs->regs[rj] >= regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case jirl_op:
+               regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+               *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+               return 0;
+       }
+
+       return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+       int error = 0;
+       unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+       error = get_step_address(regs, &addr);
+       if (error)
+               return error;
+
+       /* Store the opcode in the stepped address */
+       error = get_kernel_nofault(stepped_opcode, (void *)addr);
+       if (error)
+               return error;
+
+       stepped_address = addr;
+
+       /* Replace the opcode with the break instruction */
+       error = copy_to_kernel_nofault((void *)stepped_address,
+                                      arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+       flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+       if (error) {
+               stepped_opcode = 0;
+               stepped_address = 0;
+       } else {
+               kgdb_single_step = 1;
+               atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+       }
+
+       return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+       if (stepped_opcode) {
+               copy_to_kernel_nofault((void *)stepped_address,
+                                      (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+               flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+       }
+
+       stepped_opcode = 0;
+       stepped_address = 0;
+       kgdb_single_step = 0;
+       atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+                              char *remcom_in_buffer, char *remcom_out_buffer,
+                              struct pt_regs *regs)
+{
+       int ret = 0;
+
+       undo_single_step(regs);
+       regs->csr_prmd |= CSR_PRMD_PWE;
+
+       switch (remcom_in_buffer[0]) {
+       case 'D':
+       case 'k':
+               regs->csr_prmd &= ~CSR_PRMD_PWE;
+               fallthrough;
+       case 'c':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               break;
+       case 's':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               ret = do_single_step(regs);
+               break;
+       default:
+               ret = -1;
+       }
+
+       return ret;
+}
+
+static struct hw_breakpoint {
+       unsigned int            enabled;
+       unsigned long           addr;
+       int                     len;
+       int                     type;
+       struct perf_event       * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+       int cpu, cnt = 0;
+       struct perf_event **pevent;
+
+       for_each_online_cpu(cpu) {
+               cnt++;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_reserve_bp_slot(*pevent))
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       for_each_online_cpu(cpu) {
+               cnt--;
+               if (!cnt)
+                       break;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               dbg_release_bp_slot(*pevent);
+       }
+
+       return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+       int cpu;
+       struct perf_event **pevent;
+
+       if (dbg_is_early)
+               return 0;
+
+       for_each_online_cpu(cpu) {
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_release_bp_slot(*pevent))
+                       /*
+                        * The debugger is responsible for handing the retry on
+                        * remove failure.
+                        */
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (!breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       switch (bptype) {
+       case BP_HARDWARE_BREAKPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_X;
+               break;
+       case BP_READ_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_R;
+               break;
+       case BP_WRITE_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_W;
+               break;
+       case BP_ACCESS_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return -1;
+       }
+
+       switch (len) {
+       case 1:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+               break;
+       case 2:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+               break;
+       case 4:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+               break;
+       case 8:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -1;
+       }
+
+       breakinfo[i].addr = addr;
+       if (hw_break_reserve_slot(i)) {
+               breakinfo[i].addr = 0;
+               return -1;
+       }
+       breakinfo[i].enabled = 1;
+
+       return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       if (hw_break_release_slot(i)) {
+               pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+               return -1;
+       }
+       breakinfo[i].enabled = 0;
+
+       return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled == 1)
+                       continue;
+
+               arch_uninstall_hw_breakpoint(bp);
+               bp->attr.disabled = 1;
+       }
+
+       /* Disable hardware debugging while we are in kgdb */
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (!bp->attr.disabled) {
+                       arch_uninstall_hw_breakpoint(bp);
+                       bp->attr.disabled = 1;
+                       continue;
+               }
+
+               if (hw_break_release_slot(i))
+                       pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+               breakinfo[i].enabled = 0;
+       }
+
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+       int i, activated = 0;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               struct perf_event *bp;
+               int val;
+               int cpu = raw_smp_processor_id();
+
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled != 1)
+                       continue;
+
+               bp->attr.bp_addr = breakinfo[i].addr;
+               bp->attr.bp_len = breakinfo[i].len;
+               bp->attr.bp_type = breakinfo[i].type;
+
+               val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+               if (val)
+                       return;
+
+               val = arch_install_hw_breakpoint(bp);
+               if (!val)
+                       bp->attr.disabled = 0;
+               activated = 1;
+       }
+
+       csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+       .gdb_bpt_instr          = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+       .flags                  = KGDB_HW_BREAKPOINT,
+       .set_hw_breakpoint      = kgdb_set_hw_break,
+       .remove_hw_breakpoint   = kgdb_remove_hw_break,
+       .disable_hw_break       = kgdb_disable_hw_break,
+       .remove_all_hw_break    = kgdb_remove_all_hw_break,
+       .correct_hw_break       = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+       return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+       int i, cpu;
+       struct perf_event_attr attr;
+       struct perf_event **pevent;
+
+       hw_breakpoint_init(&attr);
+
+       attr.bp_addr = (unsigned long)kgdb_arch_init;
+       attr.bp_len = HW_BREAKPOINT_LEN_4;
+       attr.bp_type = HW_BREAKPOINT_W;
+       attr.disabled = 1;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev)
+                       continue;
+
+               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+               if (IS_ERR((void * __force)breakinfo[i].pev)) {
+                       pr_err("kgdb: Could not allocate hw breakpoints.\n");
+                       breakinfo[i].pev = NULL;
+                       return;
+               }
+
+               for_each_online_cpu(cpu) {
+                       pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+                       if (pevent[0]->destroy) {
+                               pevent[0]->destroy = NULL;
+                               release_bp_slot(*pevent);
+                       }
+               }
+       }
+}
+
+void kgdb_arch_exit(void)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev) {
+                       unregister_wide_hw_breakpoint(breakinfo[i].pev);
+                       breakinfo[i].pev = NULL;
+               }
+       }
+
+       unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644 (file)
index 0000000..9c75120
--- /dev/null
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+       .macro  EX insn, reg, src, offs
+.ex\@: \insn   \reg, \src, \offs
+       _asm_extable .ex\@, .L_lbt_fault
+       .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+       movscr2gr       t1, $scr0               # save scr
+       stptr.d         t1, a0, THREAD_SCR0
+       movscr2gr       t1, $scr1
+       stptr.d         t1, a0, THREAD_SCR1
+       movscr2gr       t1, $scr2
+       stptr.d         t1, a0, THREAD_SCR2
+       movscr2gr       t1, $scr3
+       stptr.d         t1, a0, THREAD_SCR3
+
+       x86mfflag       t1, 0x3f                # save eflags
+       stptr.d         t1, a0, THREAD_EFLAGS
+       jr              ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+       ldptr.d         t1, a0, THREAD_SCR0     # restore scr
+       movgr2scr       $scr0, t1
+       ldptr.d         t1, a0, THREAD_SCR1
+       movgr2scr       $scr1, t1
+       ldptr.d         t1, a0, THREAD_SCR2
+       movgr2scr       $scr2, t1
+       ldptr.d         t1, a0, THREAD_SCR3
+       movgr2scr       $scr3, t1
+
+       ldptr.d         t1, a0, THREAD_EFLAGS   # restore eflags
+       x86mtflag       t1, 0x3f
+       jr              ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+       movgr2scr       $scr0, zero
+       movgr2scr       $scr1, zero
+       movgr2scr       $scr2, zero
+       movgr2scr       $scr3, zero
+
+       x86mtflag       zero, 0x3f
+       jr              ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+       movscr2gr       t1, $scr0               # save scr
+       EX      st.d    t1, a0, (0 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr1
+       EX      st.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr2
+       EX      st.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr3
+       EX      st.d    t1, a0, (3 * SCR_REG_WIDTH)
+
+       x86mfflag       t1, 0x3f                # save eflags
+       EX      st.w    t1, a1, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+       EX      ld.d    t1, a0, (0 * SCR_REG_WIDTH)     # restore scr
+       movgr2scr       $scr0, t1
+       EX      ld.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movgr2scr       $scr1, t1
+       EX      ld.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movgr2scr       $scr2, t1
+       EX      ld.d    t1, a0, (3 * SCR_REG_WIDTH)
+       movgr2scr       $scr3, t1
+
+       EX      ld.w    t1, a1, 0                       # restore eflags
+       x86mtflag       t1, 0x3f
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+       x86mftop        t1
+       st.w            t1, a0, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+       ld.w            t1, a0, 0
+       andi            t1, t1, 0x7
+       la.pcrel        a0, 1f
+       alsl.d          a0, t1, a0, 3
+       jr              a0
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+       li.w            a0, -EFAULT             # failure
+       jr              ra
index 7086658..c7d33c4 100644 (file)
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 
 void __init pcpu_populate_pte(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d = p4d_offset(pgd, addr);
-       pud_t *pud;
-       pmd_t *pmd;
-
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
-#endif
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
-#endif
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
+       populate_kernel_pte(addr);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
 {
        high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
        memblock_free_all();
-       setup_zero_pages();     /* This comes from node 0 */
 }
 
 int pcibus_to_node(struct pci_bus *bus)
index ba457e4..3cb082e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/elf.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
        euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
        regs->csr_euen = euen;
        lose_fpu(0);
+       lose_lbt(0);
 
        clear_thread_flag(TIF_LSX_CTX_LIVE);
        clear_thread_flag(TIF_LASX_CTX_LIVE);
+       clear_thread_flag(TIF_LBT_CTX_LIVE);
        clear_used_math();
        regs->csr_era = pc;
        regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        preempt_enable();
 
-       if (used_math())
-               memcpy(dst, src, sizeof(struct task_struct));
-       else
+       if (!used_math())
                memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+       else
+               memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+       memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
 
        return 0;
 }
@@ -189,8 +196,10 @@ out:
        ptrace_hw_copy_thread(p);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
        clear_tsk_thread_flag(p, TIF_USEDSIMD);
+       clear_tsk_thread_flag(p, TIF_USEDLBT);
        clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
        clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+       clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
 
        return 0;
 }
index f72adbf..c114c5e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
 
 #endif /* CONFIG_CPU_HAS_LSX */
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+                  const struct user_regset *regset,
+                  struct membuf to)
+{
+       int r;
+
+       r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+       r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+       r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+       r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+       r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+       r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+       return r;
+}
+
+static int lbt_set(struct task_struct *target,
+                  const struct user_regset *regset,
+                  unsigned int pos, unsigned int count,
+                  const void *kbuf, const void __user *ubuf)
+{
+       int err = 0;
+       const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+       const int ftop_start = eflags_start + sizeof(u32);
+
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.scr0,
+                                 0, 4 * sizeof(target->thread.lbt.scr0));
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.eflags,
+                                 eflags_start, ftop_start);
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpu.ftop,
+                                 ftop_start, ftop_start + sizeof(u32));
+
+       return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 /*
@@ -802,6 +843,9 @@ enum loongarch_regset {
 #ifdef CONFIG_CPU_HAS_LASX
        REGSET_LASX,
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       REGSET_LBT,
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        REGSET_HW_BREAK,
        REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
                .set            = simd_set,
        },
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       [REGSET_LBT] = {
+               .core_note_type = NT_LOONGARCH_LBT,
+               .n              = 5,
+               .size           = sizeof(u64),
+               .align          = sizeof(u64),
+               .regset_get     = lbt_get,
+               .set            = lbt_set,
+       },
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        [REGSET_HW_BREAK] = {
                .core_note_type = NT_LOONGARCH_HW_BREAK,
index 01f94d1..6c3eff9 100644 (file)
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
        *new_addr = (unsigned long)reloc_offset;
 }
 
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
 {
        unsigned long kernel_length;
        unsigned long random_offset = 0;
        void *location_new = _text; /* Default to original kernel start */
-       void *kernel_entry = start_kernel; /* Default to original kernel entry point */
        char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
 
        strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
 
                reloc_offset += random_offset;
 
-               /* Return the new kernel's entry point */
-               kernel_entry = RELOCATED_KASLR(start_kernel);
-
                /* The current thread is now within the relocated kernel */
                __current_thread_info = RELOCATED_KASLR(__current_thread_info);
 
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
 
        relocate_absolute(random_offset);
 
-       return kernel_entry;
+       return random_offset;
 }
 
 /*
index 9d830ab..7783f0a 100644 (file)
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        paging_init();
+
+#ifdef CONFIG_KASAN
+       kasan_init();
+#endif
 }
index ceb8993..504fdfe 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
 
@@ -44,6 +45,9 @@
 /* Make sure we will not lose FPU ownership */
 #define lock_fpu_owner()       ({ preempt_disable(); pagefault_disable(); })
 #define unlock_fpu_owner()     ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner()       ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner()     ({ pagefault_enable(); preempt_enable(); })
 
 /* Assembly functions to move context to/from the FPU */
 extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 extern asmlinkage int
 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
 struct rt_sigframe {
        struct siginfo rs_info;
        struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
        struct _ctx_layout fpu;
        struct _ctx_layout lsx;
        struct _ctx_layout lasx;
+       struct _ctx_layout lbt;
        struct _ctx_layout end;
 };
 
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
        return err;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __put_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __get_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
 /*
  * Wrappers for the assembly _{save,restore}_fp_context functions.
  */
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
        return _restore_lasx_context(regs, fcc, fcsr);
 }
 
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _restore_ftop_context(ftop);
+}
+#endif
+
 static int fcsr_pending(unsigned int __user *fcsr)
 {
        int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
        return err ?: sig;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= save_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_to_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= save_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_to_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+               err |= __put_user(extctx->lbt.size, &info->size);
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0, tmp __maybe_unused;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= restore_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_from_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= restore_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_from_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+#endif
+
 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                            struct extctx_layout *extctx)
 {
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        else if (extctx->fpu.addr)
                err |= protected_save_fpu_context(extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx->lbt.addr)
+               err |= protected_save_lbt_context(extctx);
+#endif
+
        /* Set the "end" magic */
        info = (struct sctx_info *)extctx->end.addr;
        err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
                        extctx->lasx.addr = info;
                        break;
 
+               case LBT_CTX_MAGIC:
+                       if (size < (sizeof(struct sctx_info) +
+                                   sizeof(struct lbt_context)))
+                               goto invalid;
+                       extctx->lbt.addr = info;
+                       break;
+
                default:
                        goto invalid;
                }
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
        else if (extctx.fpu.addr)
                err |= protected_restore_fpu_context(&extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx.lbt.addr)
+               err |= protected_restore_lbt_context(&extctx);
+#endif
+
 bad:
        return err;
 }
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
                          sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
        }
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (cpu_has_lbt && thread_lbt_context_live()) {
+               new_sp = extframe_alloc(extctx, &extctx->lbt,
+                         sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+       }
+#endif
+
        return new_sp;
 }
 
index 2463d2f..92270f1 100644 (file)
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
        struct pt_regs dummyregs;
        struct unwind_state state;
 
-       regs = &dummyregs;
+       if (!regs) {
+               regs = &dummyregs;
 
-       if (task == current) {
-               regs->regs[3] = (unsigned long)__builtin_frame_address(0);
-               regs->csr_era = (unsigned long)__builtin_return_address(0);
-       } else {
-               regs->regs[3] = thread_saved_fp(task);
-               regs->csr_era = thread_saved_ra(task);
+               if (task == current) {
+                       regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+                       regs->csr_era = (unsigned long)__builtin_return_address(0);
+               } else {
+                       regs->regs[3] = thread_saved_fp(task);
+                       regs->csr_era = thread_saved_ra(task);
+               }
+               regs->regs[1] = 0;
        }
 
-       regs->regs[1] = 0;
        for (unwind_start(&state, task, regs);
             !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
                addr = unwind_get_return_address(&state);
index 89699db..6521477 100644 (file)
@@ -36,7 +36,9 @@
 #include <asm/break.h>
 #include <asm/cpu.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/inst.h>
+#include <asm/kgdb.h>
 #include <asm/loongarch.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
         * pertain to them.
         */
        switch (bcode) {
+       case BRK_KDB:
+               if (kgdb_breakpoint_handler(regs))
+                       goto out;
+               else
+                       break;
        case BRK_KPROBE_BP:
                if (kprobe_breakpoint_handler(regs))
                        goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
        pr_warn("Hardware watch point handler not implemented!\n");
 #else
+       if (kgdb_breakpoint_handler(regs))
+               goto out;
+
        if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
                int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
                unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
        irqentry_exit(regs, state);
 }
 
+static void init_restore_lbt(void)
+{
+       if (!thread_lbt_context_live()) {
+               /* First time LBT context user */
+               init_lbt();
+               set_thread_flag(TIF_LBT_CTX_LIVE);
+       } else {
+               if (!is_lbt_owner())
+                       own_lbt_inatomic(1);
+       }
+
+       BUG_ON(!is_lbt_enabled());
+}
+
 asmlinkage void noinstr do_lbt(struct pt_regs *regs)
 {
        irqentry_state_t state = irqentry_enter(regs);
 
-       local_irq_enable();
-       force_sig(SIGILL);
-       local_irq_disable();
+       /*
+        * BTD (Binary Translation Disable exception) can be triggered
+        * during FP save/restore if TM (Top Mode) is on, which may
+        * cause irq_enable during 'switch_to'. To avoid this situation
+        * (including the user using 'MOVGR2GCSR' to turn on TM, which
+        * will not trigger the BTE), we need to check PRMD first.
+        */
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_enable();
+
+       if (!cpu_has_lbt) {
+               force_sig(SIGILL);
+               goto out;
+       }
+       BUG_ON(is_lbt_enabled());
+
+       preempt_disable();
+       init_restore_lbt();
+       preempt_enable();
+
+out:
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_disable();
 
        irqentry_exit(regs, state);
 }
index d60d4e0..a77bf16 100644 (file)
@@ -6,4 +6,6 @@
 lib-y  += delay.o memset.o memcpy.o memmove.o \
           clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
index 0790ead..be74154 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a1, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__clear_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
 2:     move    a0, a1
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
+       _asm_extable 1b, 2b
 SYM_FUNC_END(__clear_user_generic)
 
 /*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_1
-       _asm_extable 3b, .L_fixup_handle_2
-       _asm_extable 4b, .L_fixup_handle_3
-       _asm_extable 5b, .L_fixup_handle_4
-       _asm_extable 6b, .L_fixup_handle_5
-       _asm_extable 7b, .L_fixup_handle_6
-       _asm_extable 8b, .L_fixup_handle_7
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_1
-       _asm_extable 11b, .L_fixup_handle_2
-       _asm_extable 12b, .L_fixup_handle_3
-       _asm_extable 13b, .L_fixup_handle_0
-       _asm_extable 14b, .L_fixup_handle_1
-       _asm_extable 15b, .L_fixup_handle_0
-       _asm_extable 16b, .L_fixup_handle_0
-       _asm_extable 17b, .L_fixup_handle_s0
-       _asm_extable 18b, .L_fixup_handle_s0
-       _asm_extable 19b, .L_fixup_handle_s0
-       _asm_extable 20b, .L_fixup_handle_s2
-       _asm_extable 21b, .L_fixup_handle_s0
-       _asm_extable 22b, .L_fixup_handle_s0
-       _asm_extable 23b, .L_fixup_handle_s4
-       _asm_extable 24b, .L_fixup_handle_s0
-       _asm_extable 25b, .L_fixup_handle_s4
-       _asm_extable 26b, .L_fixup_handle_s0
-       _asm_extable 27b, .L_fixup_handle_s4
-       _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a1, a2, a0
+
+.Lsmall_fixup:
+29:    st.b    zero, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, -1
+       bgt     a1, zero, 29b
+
+.Lexit:
+       move    a0, a1
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Llarge_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Lexit
+       _asm_extable 18b, .Lsmall_fixup
+       _asm_extable 19b, .Lsmall_fixup
+       _asm_extable 20b, .Lsmall_fixup
+       _asm_extable 21b, .Lsmall_fixup
+       _asm_extable 22b, .Lsmall_fixup
+       _asm_extable 23b, .Lsmall_fixup
+       _asm_extable 24b, .Lsmall_fixup
+       _asm_extable 25b, .Lsmall_fixup
+       _asm_extable 26b, .Lsmall_fixup
+       _asm_extable 27b, .Lsmall_fixup
+       _asm_extable 28b, .Lsmall_fixup
+       _asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
index bfe3d27..feec3d3 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a2, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__copy_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
 3:     move    a0, a2
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
-       _asm_extable 2b, .L_fixup_handle_s0
+       _asm_extable 1b, 3b
+       _asm_extable 2b, 3b
 SYM_FUNC_END(__copy_user_generic)
 
 /*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
        sltui   t0, a2, 9
        bnez    t0, .Lsmall
 
-       add.d   a3, a1, a2
-       add.d   a2, a0, a2
 0:     ld.d    t0, a1, 0
 1:     st.d    t0, a0, 0
+       add.d   a3, a1, a2
+       add.d   a2, a0, a2
 
        /* align up destination address */
        andi    t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
 7:     ld.d    t5, a1, 40
 8:     ld.d    t6, a1, 48
 9:     ld.d    t7, a1, 56
-       addi.d  a1, a1, 64
 10:    st.d    t0, a0, 0
 11:    st.d    t1, a0, 8
 12:    st.d    t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
 15:    st.d    t5, a0, 40
 16:    st.d    t6, a0, 48
 17:    st.d    t7, a0, 56
+       addi.d  a1, a1, 64
        addi.d  a0, a0, 64
        bltu    a1, a4, .Lloop64
 
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
 19:    ld.d    t1, a1, 8
 20:    ld.d    t2, a1, 16
 21:    ld.d    t3, a1, 24
-       addi.d  a1, a1, 32
 22:    st.d    t0, a0, 0
 23:    st.d    t1, a0, 8
 24:    st.d    t2, a0, 16
 25:    st.d    t3, a0, 24
+       addi.d  a1, a1, 32
        addi.d  a0, a0, 32
 
 .Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt16
 26:    ld.d    t0, a1, 0
 27:    ld.d    t1, a1, 8
-       addi.d  a1, a1, 16
 28:    st.d    t0, a0, 0
 29:    st.d    t1, a0, 8
+       addi.d  a1, a1, 16
        addi.d  a0, a0, 16
 
 .Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt8
 30:    ld.d    t0, a1, 0
 31:    st.d    t0, a0, 0
+       addi.d  a1, a1, 8
        addi.d  a0, a0, 8
 
 .Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_0
-       _asm_extable 3b, .L_fixup_handle_0
-       _asm_extable 4b, .L_fixup_handle_0
-       _asm_extable 5b, .L_fixup_handle_0
-       _asm_extable 6b, .L_fixup_handle_0
-       _asm_extable 7b, .L_fixup_handle_0
-       _asm_extable 8b, .L_fixup_handle_0
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_0
-       _asm_extable 11b, .L_fixup_handle_1
-       _asm_extable 12b, .L_fixup_handle_2
-       _asm_extable 13b, .L_fixup_handle_3
-       _asm_extable 14b, .L_fixup_handle_4
-       _asm_extable 15b, .L_fixup_handle_5
-       _asm_extable 16b, .L_fixup_handle_6
-       _asm_extable 17b, .L_fixup_handle_7
-       _asm_extable 18b, .L_fixup_handle_0
-       _asm_extable 19b, .L_fixup_handle_0
-       _asm_extable 20b, .L_fixup_handle_0
-       _asm_extable 21b, .L_fixup_handle_0
-       _asm_extable 22b, .L_fixup_handle_0
-       _asm_extable 23b, .L_fixup_handle_1
-       _asm_extable 24b, .L_fixup_handle_2
-       _asm_extable 25b, .L_fixup_handle_3
-       _asm_extable 26b, .L_fixup_handle_0
-       _asm_extable 27b, .L_fixup_handle_0
-       _asm_extable 28b, .L_fixup_handle_0
-       _asm_extable 29b, .L_fixup_handle_1
-       _asm_extable 30b, .L_fixup_handle_0
-       _asm_extable 31b, .L_fixup_handle_0
-       _asm_extable 32b, .L_fixup_handle_0
-       _asm_extable 33b, .L_fixup_handle_0
-       _asm_extable 34b, .L_fixup_handle_s0
-       _asm_extable 35b, .L_fixup_handle_s0
-       _asm_extable 36b, .L_fixup_handle_s0
-       _asm_extable 37b, .L_fixup_handle_s0
-       _asm_extable 38b, .L_fixup_handle_s0
-       _asm_extable 39b, .L_fixup_handle_s0
-       _asm_extable 40b, .L_fixup_handle_s0
-       _asm_extable 41b, .L_fixup_handle_s2
-       _asm_extable 42b, .L_fixup_handle_s0
-       _asm_extable 43b, .L_fixup_handle_s0
-       _asm_extable 44b, .L_fixup_handle_s0
-       _asm_extable 45b, .L_fixup_handle_s0
-       _asm_extable 46b, .L_fixup_handle_s0
-       _asm_extable 47b, .L_fixup_handle_s4
-       _asm_extable 48b, .L_fixup_handle_s0
-       _asm_extable 49b, .L_fixup_handle_s0
-       _asm_extable 50b, .L_fixup_handle_s0
-       _asm_extable 51b, .L_fixup_handle_s4
-       _asm_extable 52b, .L_fixup_handle_s0
-       _asm_extable 53b, .L_fixup_handle_s0
-       _asm_extable 54b, .L_fixup_handle_s0
-       _asm_extable 55b, .L_fixup_handle_s4
-       _asm_extable 56b, .L_fixup_handle_s0
-       _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a2, a2, a0
+
+.Lsmall_fixup:
+58:    ld.b    t0, a1, 0
+59:    st.b    t0, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, 1
+       addi.d  a2, a2, -1
+       bgt     a2, zero, 58b
+
+.Lexit:
+       move    a0, a2
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Lsmall_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Llarge_fixup
+       _asm_extable 18b, .Llarge_fixup
+       _asm_extable 19b, .Llarge_fixup
+       _asm_extable 20b, .Llarge_fixup
+       _asm_extable 21b, .Llarge_fixup
+       _asm_extable 22b, .Llarge_fixup
+       _asm_extable 23b, .Llarge_fixup
+       _asm_extable 24b, .Llarge_fixup
+       _asm_extable 25b, .Llarge_fixup
+       _asm_extable 26b, .Llarge_fixup
+       _asm_extable 27b, .Llarge_fixup
+       _asm_extable 28b, .Llarge_fixup
+       _asm_extable 29b, .Llarge_fixup
+       _asm_extable 30b, .Llarge_fixup
+       _asm_extable 31b, .Llarge_fixup
+       _asm_extable 32b, .Llarge_fixup
+       _asm_extable 33b, .Llarge_fixup
+       _asm_extable 34b, .Lexit
+       _asm_extable 35b, .Lexit
+       _asm_extable 36b, .Lsmall_fixup
+       _asm_extable 37b, .Lsmall_fixup
+       _asm_extable 38b, .Lsmall_fixup
+       _asm_extable 39b, .Lsmall_fixup
+       _asm_extable 40b, .Lsmall_fixup
+       _asm_extable 41b, .Lsmall_fixup
+       _asm_extable 42b, .Lsmall_fixup
+       _asm_extable 43b, .Lsmall_fixup
+       _asm_extable 44b, .Lsmall_fixup
+       _asm_extable 45b, .Lsmall_fixup
+       _asm_extable 46b, .Lsmall_fixup
+       _asm_extable 47b, .Lsmall_fixup
+       _asm_extable 48b, .Lsmall_fixup
+       _asm_extable 49b, .Lsmall_fixup
+       _asm_extable 50b, .Lsmall_fixup
+       _asm_extable 51b, .Lsmall_fixup
+       _asm_extable 52b, .Lsmall_fixup
+       _asm_extable 53b, .Lsmall_fixup
+       _asm_extable 54b, .Lsmall_fixup
+       _asm_extable 55b, .Lsmall_fixup
+       _asm_extable 56b, .Lsmall_fixup
+       _asm_extable 57b, .Lsmall_fixup
+       _asm_extable 58b, .Lexit
+       _asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
index cc30b3b..fa11488 100644 (file)
@@ -10,6 +10,8 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memcpy)
        /*
         * Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
        ALTERNATIVE     "b __memcpy_generic", \
                        "b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
 
 EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
 
 /*
  * void *__memcpy_generic(void *dst, const void *src, size_t n)
index 7dc76d1..82dae06 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memmove)
-       blt     a0, a1, memcpy  /* dst < src, memcpy */
-       blt     a1, a0, rmemcpy /* src < dst, rmemcpy */
-       jr      ra              /* dst == src, return */
+       blt     a0, a1, __memcpy        /* dst < src, memcpy */
+       blt     a1, a0, __rmemcpy       /* src < dst, rmemcpy */
+       jr      ra                      /* dst == src, return */
 SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
 
 EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
 
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
        /*
         * Some CPUs support hardware unaligned access
         */
        ALTERNATIVE     "b __rmemcpy_generic", \
                        "b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
index 3f20f79..06d3ca5 100644 (file)
@@ -16,6 +16,8 @@
        bstrins.d \r0, \r0, 63, 32
 .endm
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memset)
        /*
         * Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
        ALTERNATIVE     "b __memset_generic", \
                        "b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
 
 EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
 
 /*
  * void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644 (file)
index 0000000..84cd24b
--- /dev/null
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)  \
+       "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 16)         \
+       LD(2, base, 32)         \
+       LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(4, base, 0)          \
+       LD(5, base, 16)         \
+       LD(6, base, 32)         \
+       LD(7, base, 48)         \
+       XOR(0, 4)               \
+       XOR(1, 5)               \
+       XOR(2, 6)               \
+       XOR(3, 7)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 16)         \
+       ST(2, base, 32)         \
+       ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)  \
+       "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(2, base, 0)          \
+       LD(3, base, 32)         \
+       XOR(0, 2)               \
+       XOR(1, 3)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644 (file)
index 0000000..f50f325
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644 (file)
index 0000000..393f689
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)                                                        \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_2(bytes, p1, p2);                                      \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor)                                                        \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_3(bytes, p1, p2, p3);                                  \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor)                                                        \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_4(bytes, p1, p2, p3, p4);                              \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor)                                                        \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4,                      \
+                     const unsigned long * __restrict p5)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);                          \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor)         \
+       MAKE_XOR_GLUE_2(flavor);        \
+       MAKE_XOR_GLUE_3(flavor);        \
+       MAKE_XOR_GLUE_4(flavor);        \
+       MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644 (file)
index 0000000..0358ced
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+               : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4,
+                     const unsigned long * __restrict v5)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       LD_AND_XOR_LINE(v5)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+                   [v5] "r"(v5) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+               v5 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
index 8ffc638..e4d1e58 100644 (file)
@@ -7,3 +7,6 @@ obj-y                           += init.o cache.o tlb.o tlbex.o extable.o \
                                   fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
 
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
+obj-$(CONFIG_KASAN)            += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o     := n
index 72685a4..6be04d3 100644 (file)
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
 
        current_cpu_data.cache_leaves_present = leaf;
        current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
-       shm_align_mask = PAGE_SIZE - 1;
 }
 
 static const pgprot_t protection_map[16] = {
index da5b6d5..e6376e3 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        if (fixup_exception(regs))
                return;
 
+       if (kfence_handle_page_fault(address, write, regs))
+               return;
+
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        /*
         * We ran out of memory, call the OOM killer, and return the userspace
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
        pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
 {
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
 
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
         */
        if (address & __UA_LIMIT) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                else
                        do_sigsegv(regs, write, address, si_code);
                return;
@@ -211,7 +217,7 @@ good_area:
 
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                return;
        }
 
@@ -232,7 +238,7 @@ good_area:
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mmap_read_unlock(mm);
                if (fault & VM_FAULT_OOM) {
-                       do_out_of_memory(regs, address);
+                       do_out_of_memory(regs, write, address);
                        return;
                } else if (fault & VM_FAULT_SIGSEGV) {
                        do_sigsegv(regs, write, address, si_code);
index 3b7d812..f3fe8c0 100644 (file)
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed.         Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
-       unsigned int order, i;
-       struct page *page;
-
-       order = 0;
-
-       empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-       if (!empty_zero_page)
-               panic("Oh boy, that early out of memory?");
-
-       page = virt_to_page((void *)empty_zero_page);
-       split_page(page, order);
-       for (i = 0; i < (1 << order); i++, page++)
-               mark_page_reserved(page);
-
-       zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
 
 void copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
        high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
        memblock_free_all();
-       setup_zero_pages();     /* Setup zeroed pages.  */
 }
 #endif /* !CONFIG_NUMA */
 
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
 #endif
 #endif
 
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
 {
-       pgd_t *pgd;
-       p4d_t *p4d;
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
        pud_t *pud;
        pmd_t *pmd;
 
-       pgd = pgd_offset_k(addr);
-       p4d = p4d_offset(pgd, addr);
-
-       if (pgd_none(*pgd)) {
-               pud_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
+       if (p4d_none(*p4d)) {
+               pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pud)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               p4d_populate(&init_mm, p4d, pud);
 #ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
+               pud_init(pud);
 #endif
        }
 
        pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
-               pmd_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
+               pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pmd)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pud_populate(&init_mm, pud, pmd);
 #ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
+               pmd_init(pmd);
 #endif
        }
 
        pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd)) {
-               pte_t *new __maybe_unused;
+       if (!pmd_present(*pmd)) {
+               pte_t *pte;
 
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
+               pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pte)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pmd_populate_kernel(&init_mm, pmd, pte);
        }
 
        return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       ptep = fixmap_pte(addr);
+       ptep = populate_kernel_pte(addr);
        if (!pte_none(*ptep)) {
                pte_ERROR(*ptep);
                return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..da68bc1
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+       void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+                                       __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+       if (!p)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+                       __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+       return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+       if (__pmd_none(early, READ_ONCE(*pmdp))) {
+               phys_addr_t pte_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+               pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+       }
+
+       return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+       if (__pud_none(early, READ_ONCE(*pudp))) {
+               phys_addr_t pmd_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+               pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+       }
+
+       return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+       if (__p4d_none(early, READ_ONCE(*p4dp))) {
+               phys_addr_t pud_phys = early ?
+                       __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+               p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+       }
+
+       return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+       do {
+               phys_addr_t page_phys = early ?
+                                       __pa_symbol(kasan_early_shadow_page)
+                                             : kasan_alloc_zeroed_page(node);
+               next = addr + PAGE_SIZE;
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+       do {
+               next = pmd_addr_end(addr, end);
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+       do {
+               next = pud_addr_end(addr, end);
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+       do {
+               next = p4d_addr_end(addr, end);
+               kasan_pud_populate(p4dp, addr, next, node, early);
+       } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+                                     int node, bool early)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+
+       pgdp = pgd_offset_k(addr);
+
+       do {
+               next = pgd_addr_end(addr, end);
+               kasan_p4d_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+                                     int node)
+{
+       kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+       WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+       /*
+        * Remove references to kasan page tables from
+        * swapper_pg_dir. pgd_clear() can't be used
+        * here because it's nop on 2,3-level pagetable setups
+        */
+       for (; start < end; start += PGDIR_SIZE)
+               kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+       u64 i;
+       phys_addr_t pa_start, pa_end;
+
+       /*
+        * PGD was populated as invalid_pmd_table or invalid_pud_table
+        * in pagetable_init() which depends on how many levels of page
+        * table you are using, but we had to clean the gpd of kasan
+        * shadow memory, as the pgd value is none-zero.
+        * The assertion pgd_none is going to be false and the formal populate
+        * afterwards is not going to create any new pgd at all.
+        */
+       memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+       csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       /* Maps everything to a single page of zeroes */
+       kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+       kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+                                       kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+       kasan_early_stage = false;
+
+       /* Populate the linear mapping */
+       for_each_mem_range(i, &pa_start, &pa_end) {
+               void *start = (void *)phys_to_virt(pa_start);
+               void *end   = (void *)phys_to_virt(pa_end);
+
+               if (start >= end)
+                       break;
+
+               kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+                       (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+       }
+
+       /* Populate modules mapping */
+       kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+               (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+       /*
+        * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_early_shadow_pte[i],
+                       pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       /* At this point kasan is fully initialized. Enable error messages */
+       init_task.kasan_depth = 0;
+       pr_info("KernelAddressSanitizer initialized.\n");
+}
index fbe1a48..a9630a8 100644 (file)
@@ -8,12 +8,11 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
 
-#define COLOUR_ALIGN(addr, pgoff)                              \
-       ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
-        (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff)                      \
+       ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)  \
+        + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
 
 enum mmap_allocation_direction {UP, DOWN};
 
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
                 * cache aliasing constraints.
                 */
                if ((flags & MAP_SHARED) &&
-                   ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+                   ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
                        return -EINVAL;
                return addr;
        }
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
        }
 
        info.length = len;
-       info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
        info.align_offset = pgoff << PAGE_SHIFT;
 
        if (dir == DOWN) {
index b14343e..71d0539 100644 (file)
@@ -9,6 +9,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        pgd_t *init, *ret = NULL;
index a50308b..5c97d14 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
 
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
index 44d74a3..8584bab 100644 (file)
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
                                        n = bytes;
 
                                if (!bio_add_hw_page(rq->q, bio, page, n, offs,
-                                                    max_sectors, &same_page)) {
-                                       if (same_page)
-                                               bio_release_page(bio, page);
+                                                    max_sectors, &same_page))
                                        break;
-                               }
 
+                               if (same_page)
+                                       bio_release_page(bio, page);
                                bytes -= n;
                                offs = 0;
                        }
index 7397ff1..38a881c 100644 (file)
@@ -697,11 +697,41 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
        return true;
 }
 
+static unsigned int calculate_io_allowed(u32 iops_limit,
+                                        unsigned long jiffy_elapsed)
+{
+       unsigned int io_allowed;
+       u64 tmp;
+
+       /*
+        * jiffy_elapsed should not be a big value as minimum iops can be
+        * 1 then at max jiffy elapsed should be equivalent of 1 second as we
+        * will allow dispatch after 1 second and after that slice should
+        * have been trimmed.
+        */
+
+       tmp = (u64)iops_limit * jiffy_elapsed;
+       do_div(tmp, HZ);
+
+       if (tmp > UINT_MAX)
+               io_allowed = UINT_MAX;
+       else
+               io_allowed = tmp;
+
+       return io_allowed;
+}
+
+static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+{
+       return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+}
+
 /* Trim the used slices and adjust slice start accordingly */
 static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 {
-       unsigned long nr_slices, time_elapsed, io_trim;
-       u64 bytes_trim, tmp;
+       unsigned long time_elapsed;
+       long long bytes_trim;
+       int io_trim;
 
        BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
 
@@ -723,67 +753,38 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 
        throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
 
-       time_elapsed = jiffies - tg->slice_start[rw];
-
-       nr_slices = time_elapsed / tg->td->throtl_slice;
-
-       if (!nr_slices)
+       time_elapsed = rounddown(jiffies - tg->slice_start[rw],
+                                tg->td->throtl_slice);
+       if (!time_elapsed)
                return;
-       tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
-       do_div(tmp, HZ);
-       bytes_trim = tmp;
 
-       io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
-               HZ;
-
-       if (!bytes_trim && !io_trim)
+       bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
+                                            time_elapsed) +
+                    tg->carryover_bytes[rw];
+       io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
+                 tg->carryover_ios[rw];
+       if (bytes_trim <= 0 && io_trim <= 0)
                return;
 
-       if (tg->bytes_disp[rw] >= bytes_trim)
+       tg->carryover_bytes[rw] = 0;
+       if ((long long)tg->bytes_disp[rw] >= bytes_trim)
                tg->bytes_disp[rw] -= bytes_trim;
        else
                tg->bytes_disp[rw] = 0;
 
-       if (tg->io_disp[rw] >= io_trim)
+       tg->carryover_ios[rw] = 0;
+       if ((int)tg->io_disp[rw] >= io_trim)
                tg->io_disp[rw] -= io_trim;
        else
                tg->io_disp[rw] = 0;
 
-       tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
+       tg->slice_start[rw] += time_elapsed;
 
        throtl_log(&tg->service_queue,
-                  "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
-                  rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
-                  tg->slice_start[rw], tg->slice_end[rw], jiffies);
-}
-
-static unsigned int calculate_io_allowed(u32 iops_limit,
-                                        unsigned long jiffy_elapsed)
-{
-       unsigned int io_allowed;
-       u64 tmp;
-
-       /*
-        * jiffy_elapsed should not be a big value as minimum iops can be
-        * 1 then at max jiffy elapsed should be equivalent of 1 second as we
-        * will allow dispatch after 1 second and after that slice should
-        * have been trimmed.
-        */
-
-       tmp = (u64)iops_limit * jiffy_elapsed;
-       do_div(tmp, HZ);
-
-       if (tmp > UINT_MAX)
-               io_allowed = UINT_MAX;
-       else
-               io_allowed = tmp;
-
-       return io_allowed;
-}
-
-static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
-{
-       return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+                  "[%c] trim slice nr=%lu bytes=%lld io=%d start=%lu end=%lu jiffies=%lu",
+                  rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice,
+                  bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw],
+                  jiffies);
 }
 
 static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
@@ -816,7 +817,7 @@ static void tg_update_carryover(struct throtl_grp *tg)
                __tg_update_carryover(tg, WRITE);
 
        /* see comments in struct throtl_grp for meaning of these fields. */
-       throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+       throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
                   tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
                   tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
 }
@@ -825,7 +826,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
                                 u32 iops_limit)
 {
        bool rw = bio_data_dir(bio);
-       unsigned int io_allowed;
+       int io_allowed;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 
        if (iops_limit == UINT_MAX) {
@@ -838,9 +839,8 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
        jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
        io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
                     tg->carryover_ios[rw];
-       if (tg->io_disp[rw] + 1 <= io_allowed) {
+       if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
                return 0;
-       }
 
        /* Calc approx time to dispatch */
        jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
@@ -851,7 +851,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
                                u64 bps_limit)
 {
        bool rw = bio_data_dir(bio);
-       u64 bytes_allowed, extra_bytes;
+       long long bytes_allowed;
+       u64 extra_bytes;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
        unsigned int bio_size = throtl_bio_data_size(bio);
 
@@ -869,9 +870,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
        jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
        bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
                        tg->carryover_bytes[rw];
-       if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
+       if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
                return 0;
-       }
 
        /* Calc approx time to dispatch */
        extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
index d1ccbfe..bffbc9c 100644 (file)
@@ -127,8 +127,8 @@ struct throtl_grp {
         * bytes/ios are waited already in previous configuration, and they will
         * be used to calculate wait time under new configuration.
         */
-       uint64_t carryover_bytes[2];
-       unsigned int carryover_ios[2];
+       long long carryover_bytes[2];
+       int carryover_ios[2];
 
        unsigned long last_check_time;
 
index a24a624..acff3d5 100644 (file)
@@ -671,10 +671,6 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
                iov_iter_truncate(from, size);
        }
 
-       ret = file_remove_privs(file);
-       if (ret)
-               return ret;
-
        ret = file_update_time(file);
        if (ret)
                return ret;
index 648670d..d5f5cd6 100644 (file)
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
        struct blkpg_partition p;
        long long start, length;
 
+       if (disk->flags & GENHD_FL_NO_PART)
+               return -EINVAL;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
        if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
index 79ab532..6bc8610 100644 (file)
@@ -1557,7 +1557,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
        do {
                int sent;
 
-               bvec_set_page(&bvec, page, offset, len);
+               bvec_set_page(&bvec, page, len, offset);
                iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
 
                sent = sock_sendmsg(socket, &msg);
index 8640130..9680909 100644 (file)
@@ -1643,9 +1643,12 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
        struct nullb_queue *nq = hctx->driver_data;
        LIST_HEAD(list);
        int nr = 0;
+       struct request *rq;
 
        spin_lock(&nq->poll_lock);
        list_splice_init(&nq->poll_list, &list);
+       list_for_each_entry(rq, &list, queuelist)
+               blk_mq_set_request_complete(rq);
        spin_unlock(&nq->poll_lock);
 
        while (!list_empty(&list)) {
@@ -1671,16 +1674,21 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-       pr_info("rq %p timed out\n", rq);
-
        if (hctx->type == HCTX_TYPE_POLL) {
                struct nullb_queue *nq = hctx->driver_data;
 
                spin_lock(&nq->poll_lock);
+               /* The request may have completed meanwhile. */
+               if (blk_mq_request_completed(rq)) {
+                       spin_unlock(&nq->poll_lock);
+                       return BLK_EH_DONE;
+               }
                list_del_init(&rq->queuelist);
                spin_unlock(&nq->poll_lock);
        }
 
+       pr_info("rq %p timed out\n", rq);
+
        /*
         * If the device is marked as blocking (i.e. memory backed or zoned
         * device), the submission path may be blocked waiting for resources
index 0a7264a..324e942 100644 (file)
@@ -575,6 +575,26 @@ static int zynq_gpio_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static int zynq_gpio_irq_reqres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+       int ret;
+
+       ret = pm_runtime_resume_and_get(chip->parent);
+       if (ret < 0)
+               return ret;
+
+       return gpiochip_reqres_irq(chip, d->hwirq);
+}
+
+static void zynq_gpio_irq_relres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       gpiochip_relres_irq(chip, d->hwirq);
+       pm_runtime_put(chip->parent);
+}
+
 /* irq chip descriptor */
 static const struct irq_chip zynq_gpio_level_irqchip = {
        .name           = DRIVER_NAME,
@@ -584,9 +604,10 @@ static const struct irq_chip zynq_gpio_level_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
                          IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static const struct irq_chip zynq_gpio_edge_irqchip = {
@@ -597,8 +618,9 @@ static const struct irq_chip zynq_gpio_edge_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
index 4940b63..d687e8c 100644 (file)
@@ -941,13 +941,10 @@ static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
                ndev->debugfs_dir =
                        debugfs_create_dir(pci_name(ndev->ntb.pdev),
                                           debugfs_dir);
-               if (IS_ERR(ndev->debugfs_dir))
-                       ndev->debugfs_info = NULL;
-               else
-                       ndev->debugfs_info =
-                               debugfs_create_file("info", S_IRUSR,
-                                                   ndev->debugfs_dir, ndev,
-                                                   &amd_ntb_debugfs_info);
+               ndev->debugfs_info =
+                       debugfs_create_file("info", S_IRUSR,
+                                           ndev->debugfs_dir, ndev,
+                                           &amd_ntb_debugfs_info);
        }
 }
 
index 2abd223..f9e7847 100644 (file)
@@ -909,7 +909,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
        return 0;
 }
 
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp)
 {
        qp->link_is_up = false;
        qp->active = false;
@@ -932,6 +932,13 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_async = 0;
 }
 
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+       ntb_qp_link_context_reset(qp);
+       if (qp->remote_rx_info)
+               qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+}
+
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
 {
        struct ntb_transport_ctx *nt = qp->transport;
@@ -1174,7 +1181,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
        qp->ndev = nt->ndev;
        qp->client_ready = false;
        qp->event_handler = NULL;
-       ntb_qp_link_down_reset(qp);
+       ntb_qp_link_context_reset(qp);
 
        if (mw_num < qp_count % mw_count)
                num_qps_mw = qp_count / mw_count + 1;
@@ -1894,7 +1901,7 @@ err:
 static int ntb_process_tx(struct ntb_transport_qp *qp,
                          struct ntb_queue_entry *entry)
 {
-       if (qp->tx_index == qp->remote_rx_info->entry) {
+       if (!ntb_transport_tx_free_entry(qp)) {
                qp->tx_ring_full++;
                return -EAGAIN;
        }
@@ -2276,9 +2283,13 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
        struct ntb_queue_entry *entry;
        int rc;
 
-       if (!qp || !qp->link_is_up || !len)
+       if (!qp || !len)
                return -EINVAL;
 
+       /* If the qp link is down already, just ignore. */
+       if (!qp->link_is_up)
+               return 0;
+
        entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
        if (!entry) {
                qp->tx_err_no_buf++;
@@ -2418,7 +2429,7 @@ unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
        unsigned int head = qp->tx_index;
        unsigned int tail = qp->remote_rx_info->entry;
 
-       return tail > head ? tail - head : qp->tx_max_entry + tail - head;
+       return tail >= head ? tail - head : qp->tx_max_entry + tail - head;
 }
 EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
 
index 65e1e5c..553f1f4 100644 (file)
@@ -1355,7 +1355,7 @@ static void perf_setup_dbgfs(struct perf_ctx *perf)
        struct pci_dev *pdev = perf->ntb->pdev;
 
        perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
-       if (!perf->dbgfs_dir) {
+       if (IS_ERR(perf->dbgfs_dir)) {
                dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
                return;
        }
index eeeb4b1..641cb7e 100644 (file)
@@ -370,16 +370,9 @@ static ssize_t tool_fn_write(struct tool_ctx *tc,
        if (*offp)
                return 0;
 
-       buf = kmalloc(size + 1, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       if (copy_from_user(buf, ubuf, size)) {
-               kfree(buf);
-               return -EFAULT;
-       }
-
-       buf[size] = 0;
+       buf = memdup_user_nul(ubuf, size);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
 
        n = sscanf(buf, "%c %lli", &cmd, &bits);
 
@@ -1495,8 +1488,6 @@ static void tool_setup_dbgfs(struct tool_ctx *tc)
 
        tc->dbgfs_dir = debugfs_create_dir(dev_name(&tc->ntb->dev),
                                           tool_dbgfs_topdir);
-       if (!tc->dbgfs_dir)
-               return;
 
        debugfs_create_file("port", 0600, tc->dbgfs_dir,
                            tc, &tool_port_fops);
index 49bd09c..e9ae66c 100644 (file)
@@ -196,7 +196,7 @@ config PCI_HYPERV
 
 config PCI_DYNAMIC_OF_NODES
        bool "Create Device tree nodes for PCI devices"
-       depends on OF
+       depends on OF_IRQ
        select OF_DYNAMIC
        help
          This option enables support for generating device tree nodes for some
index ab2a4a3..7955345 100644 (file)
@@ -997,6 +997,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
                res = window->res;
                if (!res->flags && !res->start && !res->end) {
                        release_resource(res);
+                       resource_list_destroy_entry(window);
                        continue;
                }
 
index 5de09d2..eeec1d6 100644 (file)
@@ -3726,7 +3726,7 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
  */
 static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
 {
-       if ((dev->device & 0xffc0) == 0x2340 || dev->device == 0x1eb8)
+       if ((dev->device & 0xffc0) == 0x2340)
                quirk_no_bus_reset(dev);
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
index 0a8f597..365d964 100644 (file)
@@ -25,7 +25,7 @@
 #include "../cxl/pmu.h"
 
 #define CXL_PMU_CAP_REG                        0x0
-#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(4, 0)
+#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(5, 0)
 #define   CXL_PMU_CAP_COUNTER_WIDTH_MSK                        GENMASK_ULL(15, 8)
 #define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK          GENMASK_ULL(24, 20)
 #define   CXL_PMU_CAP_FILTERS_SUP_MSK                  GENMASK_ULL(39, 32)
index 4aa466c..0b69fb7 100644 (file)
@@ -1309,8 +1309,8 @@ static int psy_register_thermal(struct power_supply *psy)
                struct thermal_zone_params tzp = {
                        .no_hwmon = IS_ENABLED(CONFIG_POWER_SUPPLY_HWMON)
                };
-               psy->tzd = thermal_zone_device_register(psy->desc->name,
-                               0, 0, psy, &psy_tzd_ops, &tzp, 0, 0);
+               psy->tzd = thermal_tripless_zone_device_register(psy->desc->name,
+                               psy, &psy_tzd_ops, &tzp);
                if (IS_ERR(psy->tzd))
                        return PTR_ERR(psy->tzd);
                ret = thermal_zone_device_enable(psy->tzd);
index 5c2e6d5..40a2cc6 100644 (file)
@@ -658,8 +658,6 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
        [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
-       [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
-                               RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
        [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
                            RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
        [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
@@ -1458,7 +1456,7 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
                        }
                }
 
-               if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
+               if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
                        rd->rpl[i].name = NULL;
        }
 }
index 620fab0..c4e3665 100644 (file)
@@ -1378,16 +1378,12 @@ static ssize_t dasd_vendor_show(struct device *dev,
 
 static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
 
-#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial    */ 14 + 1 +\
-                    /* SSID   */ 4 + 1 + /* unit addr */ 2 + 1 +\
-                    /* vduit */ 32 + 1)
-
 static ssize_t
 dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
+       char uid_string[DASD_UID_STRLEN];
        struct dasd_device *device;
        struct dasd_uid uid;
-       char uid_string[UID_STRLEN];
        char ua_string[3];
 
        device = dasd_device_from_cdev(to_ccwdev(dev));
index 8587e42..bd89b03 100644 (file)
@@ -1079,12 +1079,12 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
 
        create_uid(conf, &uid);
        if (strlen(uid.vduit) > 0)
-               snprintf(print_uid, sizeof(*print_uid),
+               snprintf(print_uid, DASD_UID_STRLEN,
                         "%s.%s.%04x.%02x.%s",
                         uid.vendor, uid.serial, uid.ssid,
                         uid.real_unit_addr, uid.vduit);
        else
-               snprintf(print_uid, sizeof(*print_uid),
+               snprintf(print_uid, DASD_UID_STRLEN,
                         "%s.%s.%04x.%02x",
                         uid.vendor, uid.serial, uid.ssid,
                         uid.real_unit_addr);
@@ -1093,8 +1093,8 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
 static int dasd_eckd_check_cabling(struct dasd_device *device,
                                   void *conf_data, __u8 lpm)
 {
+       char print_path_uid[DASD_UID_STRLEN], print_device_uid[DASD_UID_STRLEN];
        struct dasd_eckd_private *private = device->private;
-       char print_path_uid[60], print_device_uid[60];
        struct dasd_conf path_conf;
 
        path_conf.data = conf_data;
@@ -1293,9 +1293,9 @@ static void dasd_eckd_path_available_action(struct dasd_device *device,
        __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE];
        __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm;
        struct dasd_conf_data *conf_data;
+       char print_uid[DASD_UID_STRLEN];
        struct dasd_conf path_conf;
        unsigned long flags;
-       char print_uid[60];
        int rc, pos;
 
        opm = 0;
@@ -5855,8 +5855,8 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
 static int dasd_eckd_reload_device(struct dasd_device *device)
 {
        struct dasd_eckd_private *private = device->private;
+       char print_uid[DASD_UID_STRLEN];
        int rc, old_base;
-       char print_uid[60];
        struct dasd_uid uid;
        unsigned long flags;
 
index 0aa5635..8a4dbe9 100644 (file)
@@ -259,6 +259,10 @@ struct dasd_uid {
        char vduit[33];
 };
 
+#define DASD_UID_STRLEN ( /* vendor */ 3 + 1 + /* serial    */ 14 + 1 +        \
+                         /* SSID   */ 4 + 1 + /* unit addr */ 2 + 1 +  \
+                         /* vduit */ 32 + 1)
+
 /*
  * PPRC Status data
  */
index 9f6dc4f..f00765b 100644 (file)
@@ -876,8 +876,9 @@ static int armada_thermal_probe(struct platform_device *pdev)
                /* Wait the sensors to be valid */
                armada_wait_sensor_validity(priv);
 
-               tz = thermal_zone_device_register(priv->zone_name, 0, 0, priv,
-                                                 &legacy_ops, NULL, 0, 0);
+               tz = thermal_tripless_zone_device_register(priv->zone_name,
+                                                          priv, &legacy_ops,
+                                                          NULL);
                if (IS_ERR(tz)) {
                        dev_err(&pdev->dev,
                                "Failed to register thermal zone device\n");
index 9954040..7a18cb9 100644 (file)
@@ -139,8 +139,8 @@ static int dove_thermal_probe(struct platform_device *pdev)
                return ret;
        }
 
-       thermal = thermal_zone_device_register("dove_thermal", 0, 0,
-                                              priv, &ops, NULL, 0, 0);
+       thermal = thermal_tripless_zone_device_register("dove_thermal", priv,
+                                                       &ops, NULL);
        if (IS_ERR(thermal)) {
                dev_err(&pdev->dev,
                        "Failed to register thermal zone device\n");
index ddd6008..ffc2871 100644 (file)
@@ -609,9 +609,9 @@ static int int3400_thermal_probe(struct platform_device *pdev)
 
        evaluate_odvp(priv);
 
-       priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0,
-                                               priv, &int3400_thermal_ops,
-                                               &int3400_thermal_params, 0, 0);
+       priv->thermal = thermal_tripless_zone_device_register("INT3400 Thermal", priv,
+                                                             &int3400_thermal_ops,
+                                                             &int3400_thermal_params);
        if (IS_ERR(priv->thermal)) {
                result = PTR_ERR(priv->thermal);
                goto free_art_trt;
index 668747b..acb10d2 100644 (file)
@@ -71,8 +71,8 @@ static int kirkwood_thermal_probe(struct platform_device *pdev)
        if (IS_ERR(priv->sensor))
                return PTR_ERR(priv->sensor);
 
-       thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0,
-                                              priv, &ops, NULL, 0, 0);
+       thermal = thermal_tripless_zone_device_register("kirkwood_thermal",
+                                                       priv, &ops, NULL);
        if (IS_ERR(thermal)) {
                dev_err(&pdev->dev,
                        "Failed to register thermal zone device\n");
index 6e78616..96d9928 100644 (file)
@@ -122,8 +122,8 @@ static int spear_thermal_probe(struct platform_device *pdev)
        stdev->flags = val;
        writel_relaxed(stdev->flags, stdev->thermal_base);
 
-       spear_thermal = thermal_zone_device_register("spear_thermal", 0, 0,
-                               stdev, &ops, NULL, 0, 0);
+       spear_thermal = thermal_tripless_zone_device_register("spear_thermal",
+                                                             stdev, &ops, NULL);
        if (IS_ERR(spear_thermal)) {
                dev_err(&pdev->dev, "thermal zone device is NULL\n");
                ret = PTR_ERR(spear_thermal);
index 0bdde1a..8717a33 100644 (file)
@@ -1389,16 +1389,16 @@ free_tz:
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips);
 
-struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask,
-                                                        void *devdata, struct thermal_zone_device_ops *ops,
-                                                        const struct thermal_zone_params *tzp, int passive_delay,
-                                                        int polling_delay)
+struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp)
 {
-       return thermal_zone_device_register_with_trips(type, NULL, ntrips, mask,
-                                                      devdata, ops, tzp,
-                                                      passive_delay, polling_delay);
+       return thermal_zone_device_register_with_trips(type, NULL, 0, 0, devdata,
+                                                      ops, tzp, 0, 0);
 }
-EXPORT_SYMBOL_GPL(thermal_zone_device_register);
+EXPORT_SYMBOL_GPL(thermal_tripless_zone_device_register);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd)
 {
index 793151d..cabe6a8 100644 (file)
@@ -1,5 +1,5 @@
 config SMB_SERVER
-       tristate "SMB3 server support (EXPERIMENTAL)"
+       tristate "SMB3 server support"
        depends on INET
        depends on MULTIUSER
        depends on FILE_LOCKING
index 801cd09..5ab2f52 100644 (file)
@@ -590,8 +590,6 @@ static int __init ksmbd_server_init(void)
        if (ret)
                goto err_crypto_destroy;
 
-       pr_warn_once("The ksmbd server is experimental\n");
-
        return 0;
 
 err_crypto_destroy:
index 819b6bc..3df5499 100644 (file)
@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
 int kasan_populate_early_shadow(const void *shadow_start,
                                const void *shadow_end);
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline void *kasan_mem_to_shadow(const void *addr)
 {
        return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
                + KASAN_SHADOW_OFFSET;
 }
+#endif
 
 int kasan_add_zero_shadow(void *start, unsigned long size);
 void kasan_remove_zero_shadow(void *start, unsigned long size);
index f29aaaf..006e18d 100644 (file)
@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
 extern const struct raid6_calls raid6_vpermxor2;
 extern const struct raid6_calls raid6_vpermxor4;
 extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
 
 struct raid6_recov_calls {
        void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
 extern const struct raid6_recov_calls raid6_recov_avx512;
 extern const struct raid6_recov_calls raid6_recov_s390xc;
 extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
index eb17495..c99440a 100644 (file)
@@ -300,16 +300,22 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
 #endif
 
 #ifdef CONFIG_THERMAL
-struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
-               void *, struct thermal_zone_device_ops *,
-               const struct thermal_zone_params *, int, int);
-
-void thermal_zone_device_unregister(struct thermal_zone_device *);
-
-struct thermal_zone_device *
-thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
-                                       void *, struct thermal_zone_device_ops *,
-                                       const struct thermal_zone_params *, int, int);
+struct thermal_zone_device *thermal_zone_device_register_with_trips(
+                                       const char *type,
+                                       struct thermal_trip *trips,
+                                       int num_trips, int mask,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp,
+                                       int passive_delay, int polling_delay);
+
+struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp);
+
+void thermal_zone_device_unregister(struct thermal_zone_device *tz);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
 const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
@@ -350,15 +356,26 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz);
 int thermal_zone_device_disable(struct thermal_zone_device *tz);
 void thermal_zone_device_critical(struct thermal_zone_device *tz);
 #else
-static inline struct thermal_zone_device *thermal_zone_device_register(
-       const char *type, int trips, int mask, void *devdata,
-       struct thermal_zone_device_ops *ops,
-       const struct thermal_zone_params *tzp,
-       int passive_delay, int polling_delay)
+static inline struct thermal_zone_device *thermal_zone_device_register_with_trips(
+                                       const char *type,
+                                       struct thermal_trip *trips,
+                                       int num_trips, int mask,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp,
+                                       int passive_delay, int polling_delay)
+{ return ERR_PTR(-ENODEV); }
+
+static inline struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp)
 { return ERR_PTR(-ENODEV); }
-static inline void thermal_zone_device_unregister(
-       struct thermal_zone_device *tz)
+
+static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 { }
+
 static inline struct thermal_cooling_device *
 thermal_cooling_device_register(const char *type, void *devdata,
        const struct thermal_cooling_device_ops *ops)
index 741703b..cb571df 100644 (file)
@@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
@@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
@@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
index c9a8bce..d70c55f 100644 (file)
@@ -142,7 +142,7 @@ struct snd_dmaengine_pcm_config {
                        struct snd_pcm_substream *substream);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes);
+                      unsigned long bytes);
        dma_filter_fn compat_filter_fn;
        struct device *dma_dev;
        const char *chan_names[SNDRV_PCM_STREAM_LAST + 1];
index 17bea31..ceca69b 100644 (file)
@@ -139,7 +139,7 @@ struct snd_soc_component_driver {
                struct snd_pcm_audio_tstamp_report *audio_tstamp_report);
        int (*copy)(struct snd_soc_component *component,
                    struct snd_pcm_substream *substream, int channel,
-                   unsigned long pos, struct iov_iter *buf,
+                   unsigned long pos, struct iov_iter *iter,
                    unsigned long bytes);
        struct page *(*page)(struct snd_soc_component *component,
                             struct snd_pcm_substream *substream,
@@ -511,7 +511,7 @@ int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream);
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes);
+                              struct iov_iter *iter, unsigned long bytes);
 struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
                                        unsigned long offset);
 int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
index 300455b..c536788 100644 (file)
@@ -93,6 +93,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
                struct io_uring_sqe *sqe;
                unsigned int sq_idx;
 
+               if (ctx->flags & IORING_SETUP_NO_SQARRAY)
+                       break;
                sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
                if (sq_idx > sq_mask)
                        continue;
index 62f3455..1ecc8c7 100644 (file)
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
                complete(&wq->worker_done);
 }
 
+bool io_wq_worker_stopped(void)
+{
+       struct io_worker *worker = current->worker_private;
+
+       if (WARN_ON_ONCE(!io_wq_current_is_worker()))
+               return true;
+
+       return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
+}
+
 static void io_worker_cancel_cb(struct io_worker *worker)
 {
        struct io_wq_acct *acct = io_wq_get_acct(worker);
index 06d9ca9..2b2a640 100644 (file)
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
 
 int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
 int io_wq_max_workers(struct io_wq *wq, int *new_count);
+bool io_wq_worker_stopped(void);
 
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
index e767535..783ed0f 100644 (file)
@@ -150,6 +150,31 @@ static void io_queue_sqe(struct io_kiocb *req);
 
 struct kmem_cache *req_cachep;
 
+static int __read_mostly sysctl_io_uring_disabled;
+static int __read_mostly sysctl_io_uring_group = -1;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kernel_io_uring_disabled_table[] = {
+       {
+               .procname       = "io_uring_disabled",
+               .data           = &sysctl_io_uring_disabled,
+               .maxlen         = sizeof(sysctl_io_uring_disabled),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_TWO,
+       },
+       {
+               .procname       = "io_uring_group",
+               .data           = &sysctl_io_uring_group,
+               .maxlen         = sizeof(gid_t),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {},
+};
+#endif
+
 struct sock *io_uring_get_socket(struct file *file)
 {
 #if defined(CONFIG_UNIX)
@@ -883,7 +908,7 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
                struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
 
                if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
-                       if (ctx->task_complete) {
+                       if (ctx->lockless_cq) {
                                spin_lock(&ctx->completion_lock);
                                io_cqring_event_overflow(ctx, cqe->user_data,
                                                        cqe->res, cqe->flags, 0, 0);
@@ -1541,7 +1566,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 
                if (!(req->flags & REQ_F_CQE_SKIP) &&
                    unlikely(!io_fill_cqe_req(ctx, req))) {
-                       if (ctx->task_complete) {
+                       if (ctx->lockless_cq) {
                                spin_lock(&ctx->completion_lock);
                                io_req_cqe_overflow(req);
                                spin_unlock(&ctx->completion_lock);
@@ -1950,6 +1975,8 @@ fail:
                if (!needs_poll) {
                        if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
                                break;
+                       if (io_wq_worker_stopped())
+                               break;
                        cond_resched();
                        continue;
                }
@@ -4038,9 +4065,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
        return io_uring_create(entries, &p, params);
 }
 
+static inline bool io_uring_allowed(void)
+{
+       int disabled = READ_ONCE(sysctl_io_uring_disabled);
+       kgid_t io_uring_group;
+
+       if (disabled == 2)
+               return false;
+
+       if (disabled == 0 || capable(CAP_SYS_ADMIN))
+               return true;
+
+       io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group);
+       if (!gid_valid(io_uring_group))
+               return false;
+
+       return in_group_p(io_uring_group);
+}
+
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
                struct io_uring_params __user *, params)
 {
+       if (!io_uring_allowed())
+               return -EPERM;
+
        return io_uring_setup(entries, params);
 }
 
@@ -4634,6 +4682,10 @@ static int __init io_uring_init(void)
                                offsetof(struct io_kiocb, cmd.data),
                                sizeof_field(struct io_kiocb, cmd.data), NULL);
 
+#ifdef CONFIG_SYSCTL
+       register_sysctl_init("kernel", kernel_io_uring_disabled_table);
+#endif
+
        return 0;
 };
 __initcall(io_uring_init);
index ee2d2c6..bd6c2c7 100644 (file)
@@ -430,7 +430,9 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
 
        if (sqd) {
                io_sq_thread_park(sqd);
-               ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
+               /* Don't set affinity for a dying thread */
+               if (sqd->thread)
+                       ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
                io_sq_thread_unpark(sqd);
        }
 
index 4c1e9a3..f488997 100644 (file)
@@ -160,7 +160,7 @@ if  DMA_CMA
 
 config DMA_NUMA_CMA
        bool "Enable separate DMA Contiguous Memory Area for NUMA Node"
-       default NUMA
+       depends on NUMA
        help
          Enable this option to get numa CMA areas so that NUMA devices
          can get local memory by DMA coherent APIs.
index 88c595e..f005c66 100644 (file)
@@ -473,11 +473,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
                return -EBUSY;
        }
 
-       if (memblock_is_region_reserved(rmem->base, rmem->size)) {
-               pr_info("Reserved memory: overlap with other memblock reserved region\n");
-               return -EBUSY;
-       }
-
        if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
            of_get_flat_dt_prop(node, "no-map", NULL))
                return -EINVAL;
index f190651..06366ac 100644 (file)
@@ -637,15 +637,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void)
        return entry;
 }
 
-static void __dma_entry_alloc_check_leak(void)
+/*
+ * This should be called outside of free_entries_lock scope to avoid potential
+ * deadlocks with serial consoles that use DMA.
+ */
+static void __dma_entry_alloc_check_leak(u32 nr_entries)
 {
-       u32 tmp = nr_total_entries % nr_prealloc_entries;
+       u32 tmp = nr_entries % nr_prealloc_entries;
 
        /* Shout each time we tick over some multiple of the initial pool */
        if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) {
                pr_info("dma_debug_entry pool grown to %u (%u00%%)\n",
-                       nr_total_entries,
-                       (nr_total_entries / nr_prealloc_entries));
+                       nr_entries,
+                       (nr_entries / nr_prealloc_entries));
        }
 }
 
@@ -656,8 +660,10 @@ static void __dma_entry_alloc_check_leak(void)
  */
 static struct dma_debug_entry *dma_entry_alloc(void)
 {
+       bool alloc_check_leak = false;
        struct dma_debug_entry *entry;
        unsigned long flags;
+       u32 nr_entries;
 
        spin_lock_irqsave(&free_entries_lock, flags);
        if (num_free_entries == 0) {
@@ -667,13 +673,17 @@ static struct dma_debug_entry *dma_entry_alloc(void)
                        pr_err("debugging out of memory - disabling\n");
                        return NULL;
                }
-               __dma_entry_alloc_check_leak();
+               alloc_check_leak = true;
+               nr_entries = nr_total_entries;
        }
 
        entry = __dma_entry_alloc();
 
        spin_unlock_irqrestore(&free_entries_lock, flags);
 
+       if (alloc_check_leak)
+               __dma_entry_alloc_check_leak(nr_entries);
+
 #ifdef CONFIG_STACKTRACE
        entry->stack_len = stack_trace_save(entry->stack_entries,
                                            ARRAY_SIZE(entry->stack_entries),
index 1acec2e..b481c48 100644 (file)
@@ -135,9 +135,9 @@ encrypt_mapping:
 remove_mapping:
 #ifdef CONFIG_DMA_DIRECT_REMAP
        dma_common_free_remap(addr, pool_size);
-#endif
-free_page: __maybe_unused
+free_page:
        __free_pages(page, order);
+#endif
 out:
        return ret;
 }
index 96fc38c..7e0b4dd 100644 (file)
@@ -538,14 +538,12 @@ char *log_buf_addr_get(void)
 {
        return log_buf;
 }
-EXPORT_SYMBOL_GPL(log_buf_addr_get);
 
 /* Return log buffer size */
 u32 log_buf_len_get(void)
 {
        return log_buf_len;
 }
-EXPORT_SYMBOL_GPL(log_buf_len_get);
 
 /*
  * Define how much of the log buffer we could take at maximum. The value
index 7ecdfdb..13f2758 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc);
  * @end: The maximum ID (exclusive).
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * Allocates an unused ID in the range specified by @start and @end.  If
  * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
  * callers to use @start + N as @end as long as N is within integer range.
  * The search for an unused ID will start at the last ID allocated and will
index 45e1761..035b0a4 100644 (file)
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                               vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
 
 hostprogs      += mktables
 
index a22a05c..0ec534f 100644 (file)
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_neonx2,
        &raid6_neonx1,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_lsx,
+#endif
+#endif
 #if defined(__ia64__)
        &raid6_intx32,
        &raid6_intx16,
@@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if defined(CONFIG_KERNEL_MODE_NEON)
        &raid6_recov_neon,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_recov_lsx,
+#endif
+#endif
        &raid6_recov_intx1,
        NULL
 };
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644 (file)
index 0000000..acfc33c
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX    (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX   (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx    (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx   (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644 (file)
index 0000000..aa5d9f9
--- /dev/null
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+               asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+               asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+               asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+               asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+                                  size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr12");
+                       asm volatile("vxor.v $vr5, $vr17, $vr13");
+                       asm volatile("vxor.v $vr6, $vr18, $vr14");
+                       asm volatile("vxor.v $vr7, $vr19, $vr15");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "vld $vr20, %0\n\t"
+                       "vld $vr21, %1\n\t"
+                       "vld $vr22, %2\n\t"
+                       "vld $vr23, %3\n\t"
+                       "vld $vr24, %4\n\t"
+                       "vld $vr25, %5\n\t"
+                       "vld $vr26, %6\n\t"
+                       "vld $vr27, %7\n\t"
+                       "vxor.v $vr20, $vr20, $vr0\n\t"
+                       "vxor.v $vr21, $vr21, $vr1\n\t"
+                       "vxor.v $vr22, $vr22, $vr2\n\t"
+                       "vxor.v $vr23, $vr23, $vr3\n\t"
+                       "vxor.v $vr24, $vr24, $vr4\n\t"
+                       "vxor.v $vr25, $vr25, $vr5\n\t"
+                       "vxor.v $vr26, $vr26, $vr6\n\t"
+                       "vxor.v $vr27, $vr27, $vr7\n\t"
+                       "vst $vr20, %0\n\t"
+                       "vst $vr21, %1\n\t"
+                       "vst $vr22, %2\n\t"
+                       "vst $vr23, %3\n\t"
+                       "vst $vr24, %4\n\t"
+                       "vst $vr25, %5\n\t"
+                       "vst $vr26, %6\n\t"
+                       "vst $vr27, %7\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+       raid6_lsx_gen_syndrome,
+       raid6_lsx_xor_syndrome,
+       raid6_has_lsx,
+       "lsx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+                                   size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr7");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "xvld $xr10, %0\n\t"
+                       "xvld $xr11, %1\n\t"
+                       "xvld $xr12, %2\n\t"
+                       "xvld $xr13, %3\n\t"
+                       "xvxor.v $xr10, $xr10, $xr0\n\t"
+                       "xvxor.v $xr11, $xr11, $xr1\n\t"
+                       "xvxor.v $xr12, $xr12, $xr2\n\t"
+                       "xvxor.v $xr13, $xr13, $xr3\n\t"
+                       "xvst $xr10, %0\n\t"
+                       "xvst $xr11, %1\n\t"
+                       "xvst $xr12, %2\n\t"
+                       "xvst $xr13, %3\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+       raid6_lasx_gen_syndrome,
+       raid6_lasx_xor_syndrome,
+       raid6_has_lasx,
+       "lasx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644 (file)
index 0000000..94aeac8
--- /dev/null
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+                                 int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * vr20, vr21: qmul
+        * vr22, vr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+       while (bytes) {
+               /* vr4 - vr7: Q */
+               asm volatile("vld $vr4, %0" : : "m" (q[0]));
+               asm volatile("vld $vr5, %0" : : "m" (q[16]));
+               asm volatile("vld $vr6, %0" : : "m" (q[32]));
+               asm volatile("vld $vr7, %0" : : "m" (q[48]));
+               /*  vr4 - vr7: Q + Qxy */
+               asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               /* vr0 - vr3: P */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr0 - vr3: P + Pxy */
+               asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+               asm volatile("vxor.v $vr0, $vr0, $vr8");
+               asm volatile("vxor.v $vr1, $vr1, $vr9");
+               asm volatile("vxor.v $vr2, $vr2, $vr10");
+               asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+               asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+               asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+               asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+               asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+               asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+               asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+               /* vr16 - vr19: B(Q + Qxy) */
+               asm volatile("vxor.v $vr16, $vr8, $vr4");
+               asm volatile("vxor.v $vr17, $vr9, $vr5");
+               asm volatile("vxor.v $vr18, $vr10, $vr6");
+               asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+               /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("vsrli.b $vr4, $vr0, 4");
+               asm volatile("vsrli.b $vr5, $vr1, 4");
+               asm volatile("vsrli.b $vr6, $vr2, 4");
+               asm volatile("vsrli.b $vr7, $vr3, 4");
+               /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("vandi.b $vr12, $vr0, 0x0f");
+               asm volatile("vandi.b $vr13, $vr1, 0x0f");
+               asm volatile("vandi.b $vr14, $vr2, 0x0f");
+               asm volatile("vandi.b $vr15, $vr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+               asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+               asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+               asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+               /* lookup from pbmul[16] */
+               asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+               asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+               asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+               asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+               /* vr4 - vr7: A(P + Pxy) */
+               asm volatile("vxor.v $vr4, $vr4, $vr12");
+               asm volatile("vxor.v $vr5, $vr5, $vr13");
+               asm volatile("vxor.v $vr6, $vr6, $vr14");
+               asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+               /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr16");
+               asm volatile("vxor.v $vr5, $vr5, $vr17");
+               asm volatile("vxor.v $vr6, $vr6, $vr18");
+               asm volatile("vxor.v $vr7, $vr7, $vr19");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Pxy + Dx = Dy */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+               asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+               asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+               asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+                                 void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* vr22, vr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+       while (bytes) {
+               /* vr0 - vr3: P + Dx */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr4 - vr7: Qx */
+               asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+               /* vr4 - vr7: Q + Qx */
+               asm volatile("vld $vr8, %0" : : "m" (q[0]));
+               asm volatile("vld $vr9, %0" : : "m" (q[16]));
+               asm volatile("vld $vr10, %0" : : "m" (q[32]));
+               asm volatile("vld $vr11, %0" : : "m" (q[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+               asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+               asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+               asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+               asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+               asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+               asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+               /* vr4 - vr7: qmul(Q + Qx) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Dx + Dx = P */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (p[0]));
+               asm volatile("vst $vr1, %0" : "=m" (p[16]));
+               asm volatile("vst $vr2, %0" : "=m" (p[32]));
+               asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+       .data2 = raid6_2data_recov_lsx,
+       .datap = raid6_datap_recov_lsx,
+       .valid = raid6_has_lsx,
+       .name = "lsx",
+       .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+                                  int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * xr20, xr21: qmul
+        * xr22, xr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+       asm volatile("xvreplve0.q $xr20, $xr20");
+       asm volatile("xvreplve0.q $xr21, $xr21");
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: Q */
+               asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+               /* xr0, xr1: Q + Qxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+               /* xr2, xr3: P */
+               asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+               /* xr2, xr3: P + Pxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvsrli.b $xr4, $xr0, 4");
+               asm volatile("xvsrli.b $xr5, $xr1, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+               /* xr6, xr7: B(Q + Qxy) */
+               asm volatile("xvxor.v $xr6, $xr4, $xr0");
+               asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+               /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+               /* lookup from pbmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr0, xr1: A(P + Pxy) */
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+               /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("xvxor.v $xr0, $xr0, $xr6");
+               asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+               /* xr2, xr3: P + Pxy + Dx = Dy */
+               asm volatile("xvxor.v $xr2, $xr2, $xr0");
+               asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+               asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+                                  void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* xr22, xr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: P + Dx */
+               asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+               /* xr2, xr3: Qx */
+               asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+               /* xr2, xr3: Q + Qx */
+               asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+               asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr2, xr3: qmul(Q + Qx) = Dx */
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr0, xr1: P + Dx + Dx = P */
+               asm volatile("xvxor.v $xr0, $xr0, $xr2");
+               asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+               asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+       .data2 = raid6_2data_recov_lasx,
+       .datap = raid6_datap_recov_lasx,
+       .valid = raid6_has_lasx,
+       .name = "lasx",
+       .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
index 1f693ea..2abe007 100644 (file)
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 endif
 
+ifeq ($(ARCH),loongarch64)
+        CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+        CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' |         \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+        CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' |        \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
         CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
         CFLAGS += -DCONFIG_ALTIVEC
         OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
                 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+        OBJS += loongarch_simd.o recov_loongarch_simd.o
 endif
 
 .c.o:
index 2071a37..39f07bf 100644 (file)
@@ -206,7 +206,7 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node)
        void *entry = xa_entry(xas->xa, node, offset);
 
        xas->xa_node = node;
-       if (xa_is_sibling(entry)) {
+       while (xa_is_sibling(entry)) {
                offset = xa_to_sibling(entry);
                entry = xa_entry(xas->xa, node, offset);
                if (node->shift && xa_is_node(entry))
@@ -1802,6 +1802,9 @@ EXPORT_SYMBOL(xa_get_order);
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -1850,6 +1853,9 @@ EXPORT_SYMBOL(__xa_alloc);
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
index dcfec27..89895f3 100644 (file)
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                unsigned long end)
 {
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               pud_populate(&init_mm, pud,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pmd_init(p);
+                               pud_populate(&init_mm, pud, p);
                        }
                }
                zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                unsigned long end)
 {
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               p4d_populate(&init_mm, p4d,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pud_init(p);
+                               p4d_populate(&init_mm, p4d, p);
                        }
                }
                zero_pud_populate(p4d, addr, next);
index 2e973b3..f70e3d7 100644 (file)
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
        return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
                << KASAN_SHADOW_SCALE_SHIFT);
 }
+#endif
 
 static __always_inline bool addr_has_metadata(const void *addr)
 {
+#ifdef __HAVE_ARCH_SHADOW_MAP
+       return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
        return (kasan_reset_tag(addr) >=
                kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
 }
 
 /**
index 96fd041..3872528 100644 (file)
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
  */
 static unsigned long kfence_init_pool(void)
 {
-       unsigned long addr = (unsigned long)__kfence_pool;
+       unsigned long addr;
        struct page *pages;
        int i;
 
        if (!arch_kfence_init_pool())
-               return addr;
+               return (unsigned long)__kfence_pool;
 
+       addr = (unsigned long)__kfence_pool;
        pages = virt_to_page(__kfence_pool);
 
        /*
index d43231b..55b1df8 100644 (file)
@@ -67,7 +67,7 @@ struct landlock_rule {
         * @layers: Stack of layers, from the latest to the newest, implemented
         * as a flexible array member (FAM).
         */
-       struct landlock_layer layers[];
+       struct landlock_layer layers[] __counted_by(num_layers);
 };
 
 /**
index 4859fb1..a11cd7d 100644 (file)
@@ -1992,8 +1992,8 @@ static int default_write_copy(struct snd_pcm_substream *substream,
                              int channel, unsigned long hwoff,
                              struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                           bytes, iter))
+       if (copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                          bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
@@ -2025,8 +2025,8 @@ static int default_read_copy(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
                             struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                         bytes, iter))
+       if (copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                        bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
index 174585b..b603bb9 100644 (file)
@@ -187,8 +187,13 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
        err = expand_var_event(event, 0, len, buf, in_kernel);
        if (err < 0)
                return err;
-       if (len != newlen)
-               memset(buf + len, 0, newlen - len);
+       if (len != newlen) {
+               if (in_kernel)
+                       memset(buf + len, 0, newlen - len);
+               else if (clear_user((__force void __user *)buf + len,
+                                   newlen - len))
+                       return -EFAULT;
+       }
        return newlen;
 }
 EXPORT_SYMBOL(snd_seq_expand_var_event);
index c05935c..9234d4f 100644 (file)
@@ -456,7 +456,7 @@ static int emu8k_pcm_silence(struct snd_pcm_substream *subs,
        /* convert to word unit */
        pos = (pos << 1) + rec->loop_start[voice];
        count <<= 1;
-       LOOP_WRITE(rec, pos, USER_SOCKPTR(NULL), count);
+       LOOP_WRITE(rec, pos, NULL, count);
        return 0;
 }
 
index 0ba1fbc..6278999 100644 (file)
@@ -888,7 +888,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
 
        /* Initialize CS42L42 companion codec */
        cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num);
-       usleep_range(30000, 35000);
+       msleep(CS42L42_INIT_TIMEOUT_MS);
 
        /* Clear interrupts, by reading interrupt status registers */
        cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs));
index 2a8dfb4..937e938 100644 (file)
@@ -229,6 +229,7 @@ enum cs8409_coefficient_index_registers {
 #define CS42L42_I2C_SLEEP_US                   (2000)
 #define CS42L42_PDN_TIMEOUT_US                 (250000)
 #define CS42L42_PDN_SLEEP_US                   (2000)
+#define CS42L42_INIT_TIMEOUT_MS                        (45)
 #define CS42L42_FULL_SCALE_VOL_MASK            (2)
 #define CS42L42_FULL_SCALE_VOL_0DB             (1)
 #define CS42L42_FULL_SCALE_VOL_MINUS6DB                (0)
index a07df6f..b7e78bf 100644 (file)
@@ -7057,6 +7057,27 @@ static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */
+static void alc287_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */
+       static const hda_nid_t preferred_pairs[] = {
+               0x17, 0x02, 0x21, 0x03, 0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
+       spec->gen.preferred_dacs = preferred_pairs;
+       spec->gen.auto_mute_via_amp = 1;
+       snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                           0x0); /* Make sure 0x14 was disable */
+}
+
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -7319,6 +7340,7 @@ enum {
        ALC287_FIXUP_TAS2781_I2C,
        ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
        ALC245_FIXUP_HP_X360_MUTE_LEDS,
+       ALC287_FIXUP_THINKPAD_I2S_SPK,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9413,6 +9435,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC245_FIXUP_HP_GPIO_LED
        },
+       [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_bind_dacs,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10544,6 +10570,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x90170111},
                {0x19, 0x03a11030},
                {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
+               {0x17, 0x90170110},
+               {0x19, 0x03a11030},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
index 37114fd..fb80280 100644 (file)
@@ -173,16 +173,6 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
        return 0;
 }
 
-static int tasdevice_hda_clamp(int val, int max)
-{
-       if (val > max)
-               val = max;
-
-       if (val < 0)
-               val = 0;
-       return val;
-}
-
 static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
                struct snd_ctl_elem_value *ucontrol)
 {
@@ -191,7 +181,7 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
        int max = tas_priv->rcabin.ncfgs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_profile, max);
+       val = clamp(nr_profile, 0, max);
 
        if (tas_priv->rcabin.profile_cfg_id != val) {
                tas_priv->rcabin.profile_cfg_id = val;
@@ -248,7 +238,7 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_programs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_program, max);
+       val = clamp(nr_program, 0, max);
 
        if (tas_priv->cur_prog != val) {
                tas_priv->cur_prog = val;
@@ -277,7 +267,7 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_configurations - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_config, max);
+       val = clamp(nr_config, 0, max);
 
        if (tas_priv->cur_conf != val) {
                tas_priv->cur_conf = val;
index b304b35..3ec15b4 100644 (file)
@@ -217,6 +217,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                .driver_data = &acp6x_card,
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82TL"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
                }
        },
@@ -328,6 +335,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
        {
                .driver_data = &acp6x_card,
                .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+                       DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
                }
index afe213a..dcc4e14 100644 (file)
@@ -954,7 +954,7 @@ static int mchp_pdmc_dt_init(struct mchp_pdmc *dd)
 /* used to clean the channel index found on RHR's MSB */
 static int mchp_pdmc_process(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
-                            struct iov_iter *buf, unsigned long bytes)
+                            unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        u8 *dma_ptr = runtime->dma_area + hwoff +
index 95b5bd8..f1e1dbc 100644 (file)
@@ -1968,11 +1968,15 @@ config SND_SOC_UDA1380
        tristate
        depends on I2C
 
+config SND_SOC_WCD_CLASSH
+       tristate
+
 config SND_SOC_WCD9335
        tristate "WCD9335 Codec"
        depends on SLIMBUS
        select REGMAP_SLIMBUS
        select REGMAP_IRQ
+       select SND_SOC_WCD_CLASSH
        help
          The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
          Qualcomm Technologies, Inc. (QTI) multimedia solutions,
@@ -1987,6 +1991,7 @@ config SND_SOC_WCD934X
        depends on SLIMBUS
        select REGMAP_IRQ
        select REGMAP_SLIMBUS
+       select SND_SOC_WCD_CLASSH
        select SND_SOC_WCD_MBHC
        depends on MFD_WCD934X || COMPILE_TEST
        help
@@ -1997,6 +2002,7 @@ config SND_SOC_WCD938X
        depends on SND_SOC_WCD938X_SDW
        tristate
        depends on SOUNDWIRE || !SOUNDWIRE
+       select SND_SOC_WCD_CLASSH
 
 config SND_SOC_WCD938X_SDW
        tristate "WCD9380/WCD9385 Codec - SDW"
index c8502a4..a87e569 100644 (file)
@@ -303,10 +303,11 @@ snd-soc-twl4030-objs := twl4030.o
 snd-soc-twl6040-objs := twl6040.o
 snd-soc-uda1334-objs := uda1334.o
 snd-soc-uda1380-objs := uda1380.o
+snd-soc-wcd-classh-objs := wcd-clsh-v2.o
 snd-soc-wcd-mbhc-objs := wcd-mbhc-v2.o
-snd-soc-wcd9335-objs := wcd-clsh-v2.o wcd9335.o
-snd-soc-wcd934x-objs := wcd-clsh-v2.o wcd934x.o
-snd-soc-wcd938x-objs := wcd938x.o wcd-clsh-v2.o
+snd-soc-wcd9335-objs := wcd9335.o
+snd-soc-wcd934x-objs := wcd934x.o
+snd-soc-wcd938x-objs := wcd938x.o
 snd-soc-wcd938x-sdw-objs := wcd938x-sdw.o
 snd-soc-wl1273-objs := wl1273.o
 snd-soc-wm-adsp-objs := wm_adsp.o
@@ -685,6 +686,7 @@ obj-$(CONFIG_SND_SOC_TWL4030)       += snd-soc-twl4030.o
 obj-$(CONFIG_SND_SOC_TWL6040)  += snd-soc-twl6040.o
 obj-$(CONFIG_SND_SOC_UDA1334)  += snd-soc-uda1334.o
 obj-$(CONFIG_SND_SOC_UDA1380)  += snd-soc-uda1380.o
+obj-$(CONFIG_SND_SOC_WCD_CLASSH)       += snd-soc-wcd-classh.o
 obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o
 obj-$(CONFIG_SND_SOC_WCD9335)  += snd-soc-wcd9335.o
 obj-$(CONFIG_SND_SOC_WCD934X)  += snd-soc-wcd934x.o
index d1edb98..be4f422 100644 (file)
@@ -279,7 +279,7 @@ static const struct snd_kcontrol_new cs35l45_dsp_muxes[] = {
 };
 
 static const struct snd_kcontrol_new cs35l45_dac_muxes[] = {
-       SOC_DAPM_ENUM("DACPCM1 Source", cs35l45_dacpcm_enums[0]),
+       SOC_DAPM_ENUM("DACPCM Source", cs35l45_dacpcm_enums[0]),
 };
 
 static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
@@ -333,7 +333,7 @@ static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
        SND_SOC_DAPM_MUX("DSP_RX7 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[6]),
        SND_SOC_DAPM_MUX("DSP_RX8 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[7]),
 
-       SND_SOC_DAPM_MUX("DACPCM1 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
+       SND_SOC_DAPM_MUX("DACPCM Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
 
        SND_SOC_DAPM_OUT_DRV("AMP", SND_SOC_NOPM, 0, 0, NULL, 0),
 
@@ -403,7 +403,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        { "ASP_RX1", NULL, "ASP_EN" },
        { "ASP_RX2", NULL, "ASP_EN" },
 
-       { "AMP", NULL, "DACPCM1 Source"},
+       { "AMP", NULL, "DACPCM Source"},
        { "AMP", NULL, "GLOBAL_EN"},
 
        CS35L45_DSP_MUX_ROUTE("DSP_RX1"),
@@ -427,7 +427,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        {"DSP1 Preload", NULL, "DSP1 Preloader"},
        {"DSP1", NULL, "DSP1 Preloader"},
 
-       CS35L45_DAC_MUX_ROUTE("DACPCM1"),
+       CS35L45_DAC_MUX_ROUTE("DACPCM"),
 
        { "SPK", NULL, "AMP"},
 };
@@ -969,7 +969,7 @@ static irqreturn_t cs35l45_dsp_virt2_mbox_cb(int irq, void *data)
 
        ret = regmap_read(cs35l45->regmap, CS35L45_DSP_VIRT2_MBOX_3, &mbox_val);
        if (!ret && mbox_val)
-               ret = cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
+               cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
                                (mbox_val & CS35L45_MBOX3_DATA_MASK) >> CS35L45_MBOX3_DATA_SHIFT);
 
        /* Handle DSP trace log IRQ */
@@ -1078,6 +1078,7 @@ static int cs35l45_initialize(struct cs35l45_private *cs35l45)
 
        switch (dev_id[0]) {
        case 0x35A450:
+       case 0x35A460:
                break;
        default:
                dev_err(cs35l45->dev, "Bad DEVID 0x%x\n", dev_id[0]);
index ae373f3..98b1e63 100644 (file)
@@ -243,26 +243,27 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base)
 {
        unsigned int reg;
        unsigned int val;
-       int ret;
+       int read_ret, poll_ret;
 
        if (cs35l56_base->rev < CS35L56_REVID_B0)
                reg = CS35L56_DSP1_HALO_STATE_A1;
        else
                reg = CS35L56_DSP1_HALO_STATE;
 
-       ret = regmap_read_poll_timeout(cs35l56_base->regmap, reg,
-                                      val,
-                                      (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
-                                      CS35L56_HALO_STATE_POLL_US,
-                                      CS35L56_HALO_STATE_TIMEOUT_US);
-
-       if ((ret < 0) && (ret != -ETIMEDOUT)) {
-               dev_err(cs35l56_base->dev, "Failed to read HALO_STATE: %d\n", ret);
-               return ret;
-       }
-
-       if ((ret == -ETIMEDOUT) || (val != CS35L56_HALO_STATE_BOOT_DONE)) {
-               dev_err(cs35l56_base->dev, "Firmware boot fail: HALO_STATE=%#x\n", val);
+       /*
+        * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK
+        * I2C until it has booted which would terminate the poll
+        */
+       poll_ret = read_poll_timeout(regmap_read, read_ret,
+                                    (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
+                                    CS35L56_HALO_STATE_POLL_US,
+                                    CS35L56_HALO_STATE_TIMEOUT_US,
+                                    false,
+                                    cs35l56_base->regmap, reg, &val);
+
+       if (poll_ret) {
+               dev_err(cs35l56_base->dev, "Firmware boot timed out(%d): HALO_STATE=%#x\n",
+                       read_ret, val);
                return -EIO;
        }
 
index 24e718e..1a95c37 100644 (file)
@@ -2205,7 +2205,8 @@ static int cs42l43_codec_probe(struct platform_device *pdev)
        // Don't use devm as we need to get against the MFD device
        priv->mclk = clk_get_optional(cs42l43->dev, "mclk");
        if (IS_ERR(priv->mclk)) {
-               dev_err_probe(priv->dev, PTR_ERR(priv->mclk), "Failed to get mclk\n");
+               ret = PTR_ERR(priv->mclk);
+               dev_err_probe(priv->dev, ret, "Failed to get mclk\n");
                goto err_pm;
        }
 
index 038d93e..1a137ca 100644 (file)
@@ -3269,13 +3269,17 @@ static int rt5645_component_set_jack(struct snd_soc_component *component,
 {
        struct snd_soc_jack *mic_jack = NULL;
        struct snd_soc_jack *btn_jack = NULL;
-       int *type = (int *)data;
+       int type;
 
-       if (*type & SND_JACK_MICROPHONE)
-               mic_jack = hs_jack;
-       if (*type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-               SND_JACK_BTN_2 | SND_JACK_BTN_3))
-               btn_jack = hs_jack;
+       if (hs_jack) {
+               type = *(int *)data;
+
+               if (type & SND_JACK_MICROPHONE)
+                       mic_jack = hs_jack;
+               if (type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+                       SND_JACK_BTN_2 | SND_JACK_BTN_3))
+                       btn_jack = hs_jack;
+       }
 
        return rt5645_set_jack_detect(component, hs_jack, mic_jack, btn_jack);
 }
index a75db27..d96e23e 100644 (file)
@@ -355,6 +355,7 @@ void wcd_clsh_set_hph_mode(struct wcd_clsh_ctrl *ctrl, int mode)
                wcd_clsh_v2_set_hph_mode(comp, mode);
 
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_set_hph_mode);
 
 static void wcd_clsh_set_flyback_current(struct snd_soc_component *comp,
                                         int mode)
@@ -869,11 +870,13 @@ int wcd_clsh_ctrl_set_state(struct wcd_clsh_ctrl *ctrl,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_set_state);
 
 int wcd_clsh_ctrl_get_state(struct wcd_clsh_ctrl *ctrl)
 {
        return ctrl->state;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_get_state);
 
 struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
                                          int version)
@@ -890,8 +893,13 @@ struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
 
        return ctrl;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_alloc);
 
 void wcd_clsh_ctrl_free(struct wcd_clsh_ctrl *ctrl)
 {
        kfree(ctrl);
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_free);
+
+MODULE_DESCRIPTION("WCD93XX Class-H driver");
+MODULE_LICENSE("GPL");
index 1fbb2c2..8565a53 100644 (file)
@@ -796,6 +796,28 @@ static int avs_component_probe(struct snd_soc_component *component)
 
        ret = avs_load_topology(component, filename);
        kfree(filename);
+       if (ret == -ENOENT && !strncmp(mach->tplg_filename, "hda-", 4)) {
+               unsigned int vendor_id;
+
+               if (sscanf(mach->tplg_filename, "hda-%08x-tplg.bin", &vendor_id) != 1)
+                       return ret;
+
+               if (((vendor_id >> 16) & 0xFFFF) == 0x8086)
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-8086-generic-tplg.bin");
+               else
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-generic-tplg.bin");
+
+               filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix,
+                                    mach->tplg_filename);
+               if (!filename)
+                       return -ENOMEM;
+
+               dev_info(card->dev, "trying to load fallback topology %s\n", mach->tplg_filename);
+               ret = avs_load_topology(component, filename);
+               kfree(filename);
+       }
        if (ret < 0)
                return ret;
 
index f18406d..ba7c0ae 100644 (file)
@@ -1054,7 +1054,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream)
 
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes)
+                              struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
        struct snd_soc_component *component;
@@ -1065,7 +1065,7 @@ int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                if (component->driver->copy)
                        return soc_component_ret(component,
                                component->driver->copy(component, substream,
-                                       channel, pos, buf, bytes));
+                                       channel, pos, iter, bytes));
 
        return -EINVAL;
 }
index ff21665..d0653d7 100644 (file)
@@ -290,29 +290,29 @@ static snd_pcm_uframes_t dmaengine_pcm_pointer(
 static int dmaengine_copy(struct snd_soc_component *component,
                          struct snd_pcm_substream *substream,
                          int channel, unsigned long hwoff,
-                         struct iov_iter *buf, unsigned long bytes)
+                         struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct dmaengine_pcm *pcm = soc_component_to_pcm(component);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes) = pcm->config->process;
+                      unsigned long bytes) = pcm->config->process;
        bool is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
        void *dma_ptr = runtime->dma_area + hwoff +
                        channel * (runtime->dma_bytes / runtime->channels);
 
        if (is_playback)
-               if (copy_from_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_from_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        if (process) {
-               int ret = process(substream, channel, hwoff, buf, bytes);
+               int ret = process(substream, channel, hwoff, bytes);
                if (ret < 0)
                        return ret;
        }
 
        if (!is_playback)
-               if (copy_to_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_to_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        return 0;
index f9b5d59..0acc848 100644 (file)
@@ -1246,7 +1246,7 @@ static const struct snd_soc_dai_ops stm32_sai_pcm_dai_ops2 = {
 
 static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream,
                                       int channel, unsigned long hwoff,
-                                      struct iov_iter *buf, unsigned long bytes)
+                                      unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
index a27e244..1ec177f 100644 (file)
@@ -265,7 +265,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 
        if (!ep)
                return;
-       for (i = 0; i < ep->num_urbs; ++i) {
+       for (i = 0; i < NUM_URBS; ++i) {
                ctx = &ep->urbs[i];
                if (!ctx->urb)
                        break;
@@ -279,6 +279,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 }
 
 /* allocate URBs for an EP */
+/* the callers should handle allocation errors via free_midi_urbs() */
 static int alloc_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 {
        struct snd_usb_midi2_urb *ctx;
@@ -351,8 +352,10 @@ static int snd_usb_midi_v2_open(struct snd_ump_endpoint *ump, int dir)
                return -EIO;
        if (ep->direction == STR_OUT) {
                err = alloc_midi_urbs(ep);
-               if (err)
+               if (err) {
+                       free_midi_urbs(ep);
                        return err;
+               }
        }
        return 0;
 }
index e00520c..cffaf22 100644 (file)
@@ -159,7 +159,7 @@ void multiorder_tagged_iteration(struct xarray *xa)
        item_kill_tree(xa);
 }
 
-bool stop_iteration = false;
+bool stop_iteration;
 
 static void *creator_func(void *ptr)
 {
@@ -201,6 +201,7 @@ static void multiorder_iteration_race(struct xarray *xa)
        pthread_t worker_thread[num_threads];
        int i;
 
+       stop_iteration = false;
        pthread_create(&worker_thread[0], NULL, &creator_func, xa);
        for (i = 1; i < num_threads; i++)
                pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
@@ -211,6 +212,61 @@ static void multiorder_iteration_race(struct xarray *xa)
        item_kill_tree(xa);
 }
 
+static void *load_creator(void *ptr)
+{
+       /* 'order' is set up to ensure we have sibling entries */
+       unsigned int order;
+       struct radix_tree_root *tree = ptr;
+       int i;
+
+       rcu_register_thread();
+       item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0);
+       item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0);
+       for (i = 0; i < 10000; i++) {
+               for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) {
+                       unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
+                                               (1 << order);
+                       item_insert_order(tree, index, order);
+                       item_delete_rcu(tree, index);
+               }
+       }
+       rcu_unregister_thread();
+
+       stop_iteration = true;
+       return NULL;
+}
+
+static void *load_worker(void *ptr)
+{
+       unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1;
+
+       rcu_register_thread();
+       while (!stop_iteration) {
+               struct item *item = xa_load(ptr, index);
+               assert(!xa_is_internal(item));
+       }
+       rcu_unregister_thread();
+
+       return NULL;
+}
+
+static void load_race(struct xarray *xa)
+{
+       const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+       pthread_t worker_thread[num_threads];
+       int i;
+
+       stop_iteration = false;
+       pthread_create(&worker_thread[0], NULL, &load_creator, xa);
+       for (i = 1; i < num_threads; i++)
+               pthread_create(&worker_thread[i], NULL, &load_worker, xa);
+
+       for (i = 0; i < num_threads; i++)
+               pthread_join(worker_thread[i], NULL);
+
+       item_kill_tree(xa);
+}
+
 static DEFINE_XARRAY(array);
 
 void multiorder_checks(void)
@@ -218,12 +274,20 @@ void multiorder_checks(void)
        multiorder_iteration(&array);
        multiorder_tagged_iteration(&array);
        multiorder_iteration_race(&array);
+       load_race(&array);
 
        radix_tree_cpu_dead(0);
 }
 
-int __weak main(void)
+int __weak main(int argc, char **argv)
 {
+       int opt;
+
+       while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+               if (opt == 'v')
+                       test_verbose++;
+       }
+
        rcu_register_thread();
        radix_tree_init();
        multiorder_checks();
index 83d5655..2515943 100644 (file)
@@ -113,7 +113,7 @@ static bool supports_filesystem(const char *const filesystem)
 {
        char str[32];
        int len;
-       bool res;
+       bool res = true;
        FILE *const inf = fopen("/proc/filesystems", "r");
 
        /*
@@ -125,14 +125,16 @@ static bool supports_filesystem(const char *const filesystem)
 
        /* filesystem can be null for bind mounts. */
        if (!filesystem)
-               return true;
+               goto out;
 
        len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
        if (len >= sizeof(str))
                /* Ignores too-long filesystem names. */
-               return true;
+               goto out;
 
        res = fgrep(inf, str);
+
+out:
        fclose(inf);
        return res;
 }