arch: riscv: Copy arch specific adaptations from vendor kernel 25/316925/3
authorMichal Wilczynski <m.wilczynski@samsung.com>
Wed, 28 Aug 2024 15:21:57 +0000 (17:21 +0200)
committerMichal Wilczynski <m.wilczynski@samsung.com>
Fri, 30 Aug 2024 16:07:24 +0000 (18:07 +0200)
The vendor kernel port for 6.6 contains a number of board specific
adaptations in the arch/riscv. Copy them as is from the vendor port [1].
Omit dts, and Makefile changes as they are not relevant. Without this
commit the DRM and GPU drivers work, but intermittently there are memory
management oops.

[  825.938962] Unable to handle kernel paging request at virtual address ffffffd864f823d0
[ snip ]
[  826.059411] status: 0000000200000100 badaddr: ffffffd864f823d0 cause: 000000000000000d
[  826.067402] [<ffffffff801d7166>] __kmem_cache_alloc_node+0xd4/0x482
[  826.073741] [<ffffffff8018e32a>] __kmalloc+0x38/0x1de
[  826.078849] [<ffffffff80280738>] ext4_find_extent+0x2fc/0x360
[  826.084656] [<ffffffff80283b60>] ext4_ext_map_blocks+0x72/0x15d2
[  826.090728] [<ffffffff80296114>] ext4_map_blocks+0x14e/0x50c
[  826.096457] [<ffffffff80285fa8>] ext4_convert_unwritten_extents+0xe0/0x1a2
[  826.103391] [<ffffffff802860ba>] ext4_convert_unwritten_io_end_vec+0x50/0xb2
[  826.110511] [<ffffffff802b36ca>] ext4_end_io_rsv_work+0xc8/0x15e
[  826.116580] [<ffffffff8002d7e0>] process_one_work+0x110/0x2d2
[  826.122385] [<ffffffff8002dc48>] worker_thread+0x2a6/0x37c
[  826.127941] [<ffffffff800343d2>] kthread+0xd0/0xec
[  826.132774] [<ffffffff810635ae>] ret_from_fork+0xe/0x1c

[1] - https://github.com/BPI-SINOVOIP/pi-linux/tree/linux-6.6.36-k1

Change-Id: Iecee643e417c850ebca0c07e1d2da9c44903e470
Signed-off-by: Michal Wilczynski <m.wilczynski@samsung.com>
101 files changed:
arch/riscv/Kconfig
arch/riscv/Kconfig.socs
arch/riscv/Makefile
arch/riscv/generic/Image.its.S [new file with mode: 0644]
arch/riscv/generic/Platform [new file with mode: 0644]
arch/riscv/include/asm/asm-prototypes.h
arch/riscv/include/asm/asm.h
arch/riscv/include/asm/atomic.h
arch/riscv/include/asm/barrier.h
arch/riscv/include/asm/cmpxchg.h
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/elf.h
arch/riscv/include/asm/entry-common.h
arch/riscv/include/asm/fence.h
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/hwprobe.h
arch/riscv/include/asm/insn-def.h
arch/riscv/include/asm/io.h
arch/riscv/include/asm/irq_stack.h
arch/riscv/include/asm/mmio.h
arch/riscv/include/asm/mmiowb.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/sbi.h
arch/riscv/include/asm/scs.h [new file with mode: 0644]
arch/riscv/include/asm/simd.h [new file with mode: 0644]
arch/riscv/include/asm/switch_to.h
arch/riscv/include/asm/thread_info.h
arch/riscv/include/asm/vector.h
arch/riscv/include/asm/xor.h [new file with mode: 0644]
arch/riscv/include/uapi/asm/elf.h
arch/riscv/include/uapi/asm/hwprobe.h
arch/riscv/include/uapi/asm/setup.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/asm-offsets.c
arch/riscv/kernel/compat_vdso/Makefile
arch/riscv/kernel/copy-unaligned.S
arch/riscv/kernel/cpu-hotplug.c
arch/riscv/kernel/cpu.c
arch/riscv/kernel/cpu_ops_sbi.c
arch/riscv/kernel/cpu_ops_spinwait.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/fpu.S
arch/riscv/kernel/head.S
arch/riscv/kernel/hibernate-asm.S
arch/riscv/kernel/irq.c
arch/riscv/kernel/kernel_mode_vector.c [new file with mode: 0644]
arch/riscv/kernel/kexec_relocate.S
arch/riscv/kernel/mcount-dyn.S
arch/riscv/kernel/mcount.S
arch/riscv/kernel/module.c
arch/riscv/kernel/probes/ftrace.c
arch/riscv/kernel/probes/rethook_trampoline.S
arch/riscv/kernel/process.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/sbi-ipi.c
arch/riscv/kernel/sbi.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/stacktrace.c
arch/riscv/kernel/suspend.c
arch/riscv/kernel/suspend_entry.S
arch/riscv/kernel/sys_hwprobe.c [new file with mode: 0644]
arch/riscv/kernel/sys_riscv.c
arch/riscv/kernel/traps.c
arch/riscv/kernel/traps_misaligned.c
arch/riscv/kernel/unaligned_access_speed.c [new file with mode: 0644]
arch/riscv/kernel/vdso/Makefile
arch/riscv/kernel/vdso/flush_icache.S
arch/riscv/kernel/vdso/getcpu.S
arch/riscv/kernel/vdso/hwprobe.c
arch/riscv/kernel/vdso/rt_sigreturn.S
arch/riscv/kernel/vdso/sys_hwprobe.S
arch/riscv/kernel/vector.c
arch/riscv/kvm/aia.c
arch/riscv/kvm/aia_device.c
arch/riscv/kvm/main.c
arch/riscv/kvm/tlb.c
arch/riscv/kvm/vcpu_fp.c
arch/riscv/kvm/vcpu_onereg.c
arch/riscv/kvm/vcpu_vector.c
arch/riscv/lib/Makefile
arch/riscv/lib/clear_page.S
arch/riscv/lib/memcpy.S
arch/riscv/lib/memmove.S
arch/riscv/lib/memset.S
arch/riscv/lib/riscv_v_helpers.c [new file with mode: 0644]
arch/riscv/lib/tishift.S
arch/riscv/lib/uaccess.S
arch/riscv/lib/uaccess_vector.S [new file with mode: 0644]
arch/riscv/lib/xor.S [new file with mode: 0644]
arch/riscv/mm/init.c
arch/riscv/mm/pageattr.c
arch/riscv/net/bpf_jit_comp64.c
arch/riscv/purgatory/Makefile
arch/riscv/purgatory/entry.S
drivers/soc/spacemit/jpu/jpu.c
include/linux/cpuhotplug.h

index c785a0200573817ee89024be8316aa818e35a96a..5b08f157733c35288d0ff118192ca97f00f48413 100644 (file)
@@ -48,6 +48,7 @@ config RISCV
        select ARCH_SUPPORTS_HUGETLBFS if MMU
        select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
        select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
+       select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
        select ARCH_USE_MEMTEST
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USES_CFI_TRAPS if CFI_CLANG
@@ -119,7 +120,7 @@ config RISCV
        select HAVE_EBPF_JIT if MMU
        select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_FUNCTION_ERROR_INJECTION
-       select HAVE_GCC_PLUGINS
+       select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO if MMU && 64BIT
        select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_KPROBES if !XIP_KERNEL
@@ -161,7 +162,8 @@ config RISCV
        select THREAD_INFO_IN_TASK
        select TRACE_IRQFLAGS_SUPPORT
        select UACCESS_MEMCPY if !MMU
-       select ZONE_DMA32 if 64BIT
+       select ZONE_DMA32 if 64BIT && !ARCH_SPACEMIT_K1PRO
+       select ARCH_SUSPEND_POSSIBLE
 
 config CLANG_SUPPORTS_DYNAMIC_FTRACE
        def_bool CC_IS_CLANG
@@ -174,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE
        def_bool CC_IS_GCC
        depends on $(cc-option,-fpatchable-function-entry=8)
 
+config HAVE_SHADOW_CALL_STACK
+       def_bool $(cc-option,-fsanitize=shadow-call-stack)
+       # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
+       depends on $(ld-option,--no-relax-gp)
+
 config ARCH_MMAP_RND_BITS_MIN
        default 18 if 64BIT
        default 8
@@ -220,6 +227,19 @@ config KASAN_SHADOW_OFFSET
        default 0xdfffffff00000000 if 64BIT
        default 0xffffffff if 32BIT
 
+config ARCH_FORCE_MAX_ORDER
+       int "Order of maximal physically contiguous allocations"
+       default "11"
+       help
+         The kernel page allocator limits the size of maximal physically
+         contiguous allocations. The limit is called MAX_ORDER and it
+         defines the maximal power of two of number of pages that can be
+         allocated as a single contiguous block. This option allows
+         overriding the default setting when ability to allocate very
+         large blocks of physically contiguous memory is required.
+
+         Don't change if unsure.
+
 config ARCH_FLATMEM_ENABLE
        def_bool !NUMA
 
@@ -517,6 +537,41 @@ config RISCV_ISA_V_DEFAULT_ENABLE
 
          If you don't know what to do here, say Y.
 
+config RISCV_ISA_V_UCOPY_THRESHOLD
+       int "Threshold size for vectorized user copies"
+       depends on RISCV_ISA_V
+       default 768
+       help
+         Prefer using vectorized copy_to_user()/copy_from_user() when the
+         workload size exceeds this value.
+
+config RISCV_ISA_V_PREEMPTIVE
+       bool "Run kernel-mode Vector with kernel preemption"
+       depends on PREEMPTION
+       depends on RISCV_ISA_V
+       default y
+       help
+         Usually, in-kernel SIMD routines are run with preemption disabled.
+         Functions which envoke long running SIMD thus must yield core's
+         vector unit to prevent blocking other tasks for too long.
+
+         This config allows kernel to run SIMD without explicitly disable
+         preemption. Enabling this config will result in higher memory
+         consumption due to the allocation of per-task's kernel Vector context.
+
+config RISCV_ISA_ZAWRS
+       bool "Zawrs extension support for more efficient busy waiting"
+       depends on RISCV_ALTERNATIVE
+       default y
+       help
+         The Zawrs extension defines instructions to be used in polling loops
+         which allow a hart to enter a low-power state or to trap to the
+         hypervisor while waiting on a store to a memory location. Enable the
+         use of these instructions in the kernel when the Zawrs extension is
+         detected at boot.
+
+         If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZBB
        bool
        default y
@@ -634,6 +689,62 @@ config THREAD_SIZE_ORDER
          Specify the Pages of thread stack size (from 4KB to 64KB), which also
          affects irq stack size, which is equal to thread stack size.
 
+config RISCV_MISALIGNED
+       bool
+       select SYSCTL_ARCH_UNALIGN_ALLOW
+       help
+         Embed support for emulating misaligned loads and stores.
+
+choice
+       prompt "Unaligned Accesses Support"
+       default RISCV_PROBE_UNALIGNED_ACCESS
+       help
+         This determines the level of support for unaligned accesses. This
+         information is used by the kernel to perform optimizations. It is also
+         exposed to user space via the hwprobe syscall. The hardware will be
+         probed at boot by default.
+
+config RISCV_PROBE_UNALIGNED_ACCESS
+       bool "Probe for hardware unaligned access support"
+       select RISCV_MISALIGNED
+       help
+         During boot, the kernel will run a series of tests to determine the
+         speed of unaligned accesses. This probing will dynamically determine
+         the speed of unaligned accesses on the underlying system. If unaligned
+         memory accesses trap into the kernel as they are not supported by the
+         system, the kernel will emulate the unaligned accesses to preserve the
+         UABI.
+
+config RISCV_EMULATED_UNALIGNED_ACCESS
+       bool "Emulate unaligned access where system support is missing"
+       select RISCV_MISALIGNED
+       help
+         If unaligned memory accesses trap into the kernel as they are not
+         supported by the system, the kernel will emulate the unaligned
+         accesses to preserve the UABI. When the underlying system does support
+         unaligned accesses, the unaligned accesses are assumed to be slow.
+
+config RISCV_SLOW_UNALIGNED_ACCESS
+       bool "Assume the system supports slow unaligned memory accesses"
+       depends on NONPORTABLE
+       help
+         Assume that the system supports slow unaligned memory accesses. The
+         kernel and userspace programs may not be able to run at all on systems
+         that do not support unaligned memory accesses.
+
+config RISCV_EFFICIENT_UNALIGNED_ACCESS
+       bool "Assume the system supports fast unaligned memory accesses"
+       depends on NONPORTABLE
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
+       help
+         Assume that the system supports fast unaligned memory accesses. When
+         enabled, this option improves the performance of the kernel on such
+         systems. However, the kernel and userspace programs will run much more
+         slowly, or will not be able to run at all, on systems that do not
+         support efficient unaligned memory accesses.
+
+endchoice
+
 endmenu # "Platform type"
 
 menu "Kernel features"
@@ -896,6 +1007,18 @@ config PORTABLE
        select MMU
        select OF
 
+config ARCH_SUSPEND_POSSIBLE
+       depends on ARCH_SPACEMIT
+       def_bool y
+
+config IMAGE_LOAD_OFFSET
+       hex "Image load offset from start of RAM when load kernel to RAM"
+       default 0x400000 if 32BIT
+       default 0x200000 if 64BIT
+       help
+         This is the RAM offset from start of ram. Bootloader would use
+         this offset to load kernel image to ram.
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
index dbd14f69a968422b8b72653c187c24a15cc35e0c..47aa6589015d3e8109c76a5870610357563c8001 100644 (file)
@@ -22,17 +22,6 @@ config SOC_SIFIVE
        help
          This enables support for SiFive SoC platform hardware.
 
-config ARCH_SPACEMIT
-       bool "SpacemiT SoCs"
-       help
-         This enables support for SpacemiT SoC platform hardware.
-
-config ARCH_SPACEMIT_K1X
-       bool "SpacemiT SoCs"
-       help
-         This enables support for SpacemiT SoC platform hardware,
-         with K1-X chip.
-
 config ARCH_STARFIVE
        def_bool SOC_STARFIVE
 
@@ -122,4 +111,64 @@ config SOC_CANAAN_K210_DTB_SOURCE
 
 endif # ARCH_CANAAN
 
+config ARCH_SPACEMIT
+       bool "Spacemit SoCs"
+       select SIFIVE_PLIC
+       help
+         This enables support for Spacemit SoCs platform hardware.
+
+if ARCH_SPACEMIT
+
+choice
+       prompt "Spacemit SOCs platform"
+       help
+         choice Spacemit soc platform
+
+       config ARCH_SPACEMIT_K1
+               bool "k1"
+               help
+                 select Spacemit k1 Platform SOCs.
+
+       config ARCH_SPACEMIT_K2
+               bool "k2"
+               help
+                 select Spacemit k2 Platform SOCs.
+
+endchoice
+
+if ARCH_SPACEMIT_K1
+
+choice
+       prompt "Spacemit K1 serial SOCs"
+       help
+         choice Spacemit K1 soc platform
+
+       config ARCH_SPACEMIT_K1PRO
+               bool "k1-pro"
+               select DW_APB_TIMER_OF
+               help
+                 This enables support for Spacemit k1-pro Platform Hardware.
+
+       config ARCH_SPACEMIT_K1X
+               bool "k1-x"
+               help
+                 This enables support for Spacemit k1-x Platform Hardware.
+endchoice
+
+config ARCH_SPACEMIT_K1_FPGA
+       bool "Spacemit K1 serial SoC FPGA platform"
+       default n
+       help
+         This enable FPGA platform for K1 SoCs.
+
+endif
+
+config BIND_THREAD_TO_AICORES
+       bool "enable bind ai cores when use AI instruction"
+       default y
+       help
+         This enable bind ai cores when use AI instruction.
+
+endif
+
 endmenu # "SoC selection"
index b43a6bb7e4dcb6984edcd45f4adbcec4250aa77b..ebbe02628a276d38bf1b6d8cf54f06c727839ebc 100644 (file)
@@ -54,6 +54,10 @@ endif
 endif
 endif
 
+ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
+       KBUILD_LDFLAGS += --no-relax-gp
+endif
+
 # ISA string setting
 riscv-march-$(CONFIG_ARCH_RV32I)       := rv32ima
 riscv-march-$(CONFIG_ARCH_RV64I)       := rv64ima
@@ -104,7 +108,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
 # unaligned accesses.  While unaligned accesses are explicitly allowed in the
 # RISC-V ISA, they're emulated by machine mode traps on all extant
 # architectures.  It's faster to have GCC emit only aligned accesses.
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+endif
 
 ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
 prepare: stack_protector_prepare
@@ -130,12 +136,6 @@ endif
 libs-y += arch/riscv/lib/
 libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
-PHONY += vdso_install
-vdso_install:
-       $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
-       $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
-               $(build)=arch/riscv/kernel/compat_vdso compat_$@)
-
 ifeq ($(KBUILD_EXTMOD),)
 ifeq ($(CONFIG_MMU),y)
 prepare: vdso_prepare
@@ -147,6 +147,9 @@ vdso_prepare: prepare0
 endif
 endif
 
+vdso-install-y                 += arch/riscv/kernel/vdso/vdso.so.dbg
+vdso-install-$(CONFIG_COMPAT)  += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so
+
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy)
 KBUILD_IMAGE := $(boot)/loader.bin
diff --git a/arch/riscv/generic/Image.its.S b/arch/riscv/generic/Image.its.S
new file mode 100644 (file)
index 0000000..7752570
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/dts-v1/;
+
+/ {
+       description = KERNEL_NAME;
+       #address-cells = <ADDR_CELLS>;
+
+       images {
+               kernel {
+                       description = KERNEL_NAME;
+                       data = /incbin/(IMAGE_BINARY);
+                       type = "kernel";
+                       arch = "riscv";
+                       os = "linux";
+                       compression = IMAGE_COMPRESSION;
+                       load = /bits/ ADDR_BITS <IMAGE_LOAD_ADDRESS>;
+                       entry = /bits/ ADDR_BITS <IMAGE_ENTRY_ADDRESS>;
+                       hash {
+                               algo = IMAGE_CHECK_ALGORITHM;
+                       };
+               };
+       };
+
+       configurations {
+               default = "conf-default";
+
+               conf-default {
+                       description = "Generic Linux kernel";
+                       kernel = "kernel";
+               };
+       };
+};
diff --git a/arch/riscv/generic/Platform b/arch/riscv/generic/Platform
new file mode 100644 (file)
index 0000000..6247acb
--- /dev/null
@@ -0,0 +1,15 @@
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2024 Spacemit
+#
+# This software is licensed under the terms of the GNU General Public
+# License version 2, as published by the Free Software Foundation, and
+# may be copied, distributed, and modified under those terms.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+its-y  := Image.its.S
index 36b955c762ba08e92ca0441ee8fbae9219c1f2fa..cd627ec289f163a630b73dd03dd52a6b28692997 100644 (file)
@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __ashlti3(long long a, int b);
 
+#ifdef CONFIG_RISCV_ISA_V
+
+#ifdef CONFIG_MMU
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+#endif /* CONFIG_MMU  */
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4,
+                const unsigned long *__restrict p5);
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+#endif /* CONFIG_RISCV_ISA_V */
 
 #define DECLARE_DO_ERROR_INFO(name)    asmlinkage void name(struct pt_regs *regs)
 
index bfb4c26f113c4bb1374163b1cd3e8763a542b0cc..b0487b39e6747ae384fb51f310bf784759825240 100644 (file)
 .endm
 #endif /* CONFIG_SMP */
 
+.macro load_per_cpu dst ptr tmp
+       asm_per_cpu \dst \ptr \tmp
+       REG_L \dst, 0(\dst)
+.endm
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+/* gp is used as the shadow call stack pointer instead */
+.macro load_global_pointer
+.endm
+#else
+/* load __global_pointer to gp */
+.macro load_global_pointer
+.option push
+.option norelax
+       la gp, __global_pointer$
+.option pop
+.endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
        /* save all GPs except x1 ~ x5 */
        .macro save_from_x6_to_x31
        REG_S x6,  PT_T1(sp)
index f5dfef6c2153f189288d3a4f3fa7a1d5d1b5021b..0e0522e588ca6e5378589e5c443c37e97cc5fa53 100644 (file)
@@ -17,7 +17,6 @@
 #endif
 
 #include <asm/cmpxchg.h>
-#include <asm/barrier.h>
 
 #define __atomic_acquire_fence()                                       \
        __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
@@ -207,7 +206,7 @@ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int
                "       add      %[rc], %[p], %[a]\n"
                "       sc.w.rl  %[rc], %[rc], %[c]\n"
                "       bnez     %[rc], 0b\n"
-               "       fence    rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                : [a]"r" (a), [u]"r" (u)
@@ -228,7 +227,7 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
                "       add      %[rc], %[p], %[a]\n"
                "       sc.d.rl  %[rc], %[rc], %[c]\n"
                "       bnez     %[rc], 0b\n"
-               "       fence    rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                : [a]"r" (a), [u]"r" (u)
@@ -248,7 +247,7 @@ static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
                "       addi      %[rc], %[p], 1\n"
                "       sc.w.rl   %[rc], %[rc], %[c]\n"
                "       bnez      %[rc], 0b\n"
-               "       fence     rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
@@ -268,7 +267,7 @@ static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
                "       addi      %[rc], %[p], -1\n"
                "       sc.w.rl   %[rc], %[rc], %[c]\n"
                "       bnez      %[rc], 0b\n"
-               "       fence     rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
@@ -288,7 +287,7 @@ static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
                "       bltz     %[rc], 1f\n"
                "       sc.w.rl  %[rc], %[rc], %[c]\n"
                "       bnez     %[rc], 0b\n"
-               "       fence    rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
@@ -310,7 +309,7 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
                "       addi      %[rc], %[p], 1\n"
                "       sc.d.rl   %[rc], %[rc], %[c]\n"
                "       bnez      %[rc], 0b\n"
-               "       fence     rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
@@ -331,7 +330,7 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
                "       addi      %[rc], %[p], -1\n"
                "       sc.d.rl   %[rc], %[rc], %[c]\n"
                "       bnez      %[rc], 0b\n"
-               "       fence     rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
@@ -352,7 +351,7 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
                "       bltz     %[rc], 1f\n"
                "       sc.d.rl  %[rc], %[rc], %[c]\n"
                "       bnez     %[rc], 0b\n"
-               "       fence    rw, rw\n"
+               RISCV_FULL_BARRIER
                "1:\n"
                : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
                :
index 110752594228e23125bd7a16b4c0469cbfcda4f5..e1d9bf1deca6852e962816af066c13f29c734231 100644 (file)
 #define _ASM_RISCV_BARRIER_H
 
 #ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
+#include <asm/fence.h>
 
 #define nop()          __asm__ __volatile__ ("nop")
 #define __nops(n)      ".rept  " #n "\nnop\n.endr\n"
 #define nops(n)                __asm__ __volatile__ (__nops(n))
 
-#define RISCV_FENCE(p, s) \
-       __asm__ __volatile__ ("fence " #p "," #s : : : "memory")
 
 /* These barriers need to enforce ordering on both devices or memory. */
-#define mb()           RISCV_FENCE(iorw,iorw)
-#define rmb()          RISCV_FENCE(ir,ir)
-#define wmb()          RISCV_FENCE(ow,ow)
+#define __mb()         RISCV_FENCE(iorw, iorw)
+#define __rmb()                RISCV_FENCE(ir, ir)
+#define __wmb()                RISCV_FENCE(ow, ow)
 
 /* These barriers do not need to enforce ordering on devices, just memory. */
-#define __smp_mb()     RISCV_FENCE(rw,rw)
-#define __smp_rmb()    RISCV_FENCE(r,r)
-#define __smp_wmb()    RISCV_FENCE(w,w)
-
-#define __smp_store_release(p, v)                                      \
-do {                                                                   \
-       compiletime_assert_atomic_type(*p);                             \
-       RISCV_FENCE(rw,w);                                              \
-       WRITE_ONCE(*p, v);                                              \
-} while (0)
-
-#define __smp_load_acquire(p)                                          \
-({                                                                     \
-       typeof(*p) ___p1 = READ_ONCE(*p);                               \
-       compiletime_assert_atomic_type(*p);                             \
-       RISCV_FENCE(r,rw);                                              \
-       ___p1;                                                          \
-})
+#define __smp_mb()     RISCV_FENCE(rw, rw)
+#define __smp_rmb()    RISCV_FENCE(r, r)
+#define __smp_wmb()    RISCV_FENCE(w, w)
 
 /*
  * This is a very specific barrier: it's currently only used in two places in
@@ -69,7 +54,36 @@ do {                                                                 \
  * instances the scheduler pairs this with an mb(), so nothing is necessary on
  * the new hart.
  */
-#define smp_mb__after_spinlock()       RISCV_FENCE(iorw,iorw)
+#define smp_mb__after_spinlock()       RISCV_FENCE(iorw, iorw)
+
+#define __smp_store_release(p, v)                                      \
+do {                                                                   \
+       compiletime_assert_atomic_type(*p);                             \
+       RISCV_FENCE(rw, w);                                             \
+       WRITE_ONCE(*p, v);                                              \
+} while (0)
+
+#define __smp_load_acquire(p)                                          \
+({                                                                     \
+       typeof(*p) ___p1 = READ_ONCE(*p);                               \
+       compiletime_assert_atomic_type(*p);                             \
+       RISCV_FENCE(r, rw);                                             \
+       ___p1;                                                          \
+})
+
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+#define smp_cond_load_relaxed(ptr, cond_expr) ({                       \
+       typeof(ptr) __PTR = (ptr);                                      \
+       __unqual_scalar_typeof(*ptr) VAL;                               \
+       for (;;) {                                                      \
+               VAL = READ_ONCE(*__PTR);                                \
+               if (cond_expr)                                          \
+                       break;                                          \
+               __cmpwait_relaxed(ptr, VAL);                            \
+       }                                                               \
+       (typeof(*ptr))VAL;                                              \
+})
+#endif
 
 #include <asm-generic/barrier.h>
 
index 2f4726d3cfcc25a805adacc0b0e85c4ff8bcc220..725276dcb99602e8f90542a793f3f1a2d1e55ba4 100644 (file)
@@ -8,8 +8,10 @@
 
 #include <linux/bug.h>
 
-#include <asm/barrier.h>
+#include <asm/alternative-macros.h>
 #include <asm/fence.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
 
 #define __xchg_relaxed(ptr, new, size)                                 \
 ({                                                                     \
                        "       bne  %0, %z3, 1f\n"                     \
                        "       sc.w.rl %1, %z4, %2\n"                  \
                        "       bnez %1, 0b\n"                          \
-                       "       fence rw, rw\n"                         \
+                       RISCV_FULL_BARRIER                              \
                        "1:\n"                                          \
                        : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
                        : "rJ" ((long)__old), "rJ" (__new)              \
                        "       bne %0, %z3, 1f\n"                      \
                        "       sc.d.rl %1, %z4, %2\n"                  \
                        "       bnez %1, 0b\n"                          \
-                       "       fence rw, rw\n"                         \
+                       RISCV_FULL_BARRIER                              \
                        "1:\n"                                          \
                        : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)    \
                        : "rJ" (__old), "rJ" (__new)                    \
        arch_cmpxchg_relaxed((ptr), (o), (n));                          \
 })
 
+#ifdef CONFIG_RISCV_ISA_ZAWRS
+/*
+ * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
+ * @val we expect it to still terminate within a "reasonable" amount of time
+ * for an implementation-specific other reason, a pending, locally-enabled
+ * interrupt, or because it has been configured to raise an illegal
+ * instruction exception.
+ */
+static __always_inline void __cmpwait(volatile void *ptr,
+                                     unsigned long val,
+                                     int size)
+{
+       unsigned long tmp;
+
+       asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
+                            0, RISCV_ISA_EXT_ZAWRS, 1)
+                : : : : no_zawrs);
+
+       switch (size) {
+       case 4:
+               asm volatile(
+               "       lr.w    %0, %1\n"
+               "       xor     %0, %0, %2\n"
+               "       bnez    %0, 1f\n"
+                       ZAWRS_WRS_NTO "\n"
+               "1:"
+               : "=&r" (tmp), "+A" (*(u32 *)ptr)
+               : "r" (val));
+               break;
+#if __riscv_xlen == 64
+       case 8:
+               asm volatile(
+               "       lr.d    %0, %1\n"
+               "       xor     %0, %0, %2\n"
+               "       bnez    %0, 1f\n"
+                       ZAWRS_WRS_NTO "\n"
+               "1:"
+               : "=&r" (tmp), "+A" (*(u64 *)ptr)
+               : "r" (val));
+               break;
+#endif
+       default:
+               BUILD_BUG();
+       }
+
+       return;
+
+no_zawrs:
+       asm volatile(RISCV_PAUSE : : : "memory");
+}
+
+#define __cmpwait_relaxed(ptr, val) \
+       __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+#endif
+
 #endif /* _ASM_RISCV_CMPXCHG_H */
index d0345bd659c94f11f349e6d92b9acd6b178218f6..3b85bba4c7011c2f91ce28ec5f2fcffc6cd57a1b 100644 (file)
@@ -7,7 +7,10 @@
 #define _ASM_CPUFEATURE_H
 
 #include <linux/bitmap.h>
+#include <linux/jump_label.h>
 #include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/errno.h>
 
 /*
  * These are probed via a device_initcall(), via either the SBI or directly
@@ -25,11 +28,118 @@ struct riscv_isainfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
-DECLARE_PER_CPU(long, misaligned_access_speed);
-
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
-void check_unaligned_access(int cpu);
+void riscv_user_isa_enable(void);
+
+#if defined(CONFIG_RISCV_MISALIGNED)
+bool check_unaligned_access_emulated_all_cpus(void);
+void unaligned_emulation_finish(void);
+bool unaligned_ctl_available(void);
+DECLARE_PER_CPU(long, misaligned_access_speed);
+#else
+static inline bool unaligned_ctl_available(void)
+{
+       return false;
+}
+#endif
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
+DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
+static __always_inline bool has_fast_unaligned_accesses(void)
+{
+       return static_branch_likely(&fast_unaligned_access_speed_key);
+}
+#else
+static __always_inline bool has_fast_unaligned_accesses(void)
+{
+       if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+               return true;
+       else
+               return false;
+}
+#endif
+unsigned long riscv_get_elf_hwcap(void);
+
+struct riscv_isa_ext_data {
+       const unsigned int id;
+       const char *name;
+       const char *property;
+       const unsigned int *subset_ext_ids;
+       const unsigned int subset_ext_size;
+       int (*validate)(const struct riscv_isa_ext_data *data, const unsigned long *isa_bitmap);
+};
+
+extern const struct riscv_isa_ext_data riscv_isa_ext[];
+extern const size_t riscv_isa_ext_count;
+extern bool riscv_isa_fallback;
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
+static __always_inline bool
+riscv_has_extension_likely(const unsigned long ext)
+{
+       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+                          "ext must be < RISCV_ISA_EXT_MAX");
+
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               asm goto(
+               ALTERNATIVE("j  %l[l_no]", "nop", 0, %[ext], 1)
+               :
+               : [ext] "i" (ext)
+               :
+               : l_no);
+       } else {
+               if (!__riscv_isa_extension_available(NULL, ext))
+                       goto l_no;
+       }
+
+       return true;
+l_no:
+       return false;
+}
+
+static __always_inline bool
+riscv_has_extension_unlikely(const unsigned long ext)
+{
+       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+                          "ext must be < RISCV_ISA_EXT_MAX");
+
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               asm goto(
+               ALTERNATIVE("nop", "j   %l[l_yes]", 0, %[ext], 1)
+               :
+               : [ext] "i" (ext)
+               :
+               : l_yes);
+       } else {
+               if (__riscv_isa_extension_available(NULL, ext))
+                       goto l_yes;
+       }
+
+       return false;
+l_yes:
+       return true;
+}
+
+static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
+{
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+               return true;
+
+       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
+
+static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
+{
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+               return true;
+
+       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
 
 #endif
index 777cb8299551ca3a33147471f032d319cdbcfb35..4b61a033fd33df2d208f8f3b68aa1e4c93d9b22d 100644 (file)
 #define CSR_SIE                        0x104
 #define CSR_STVEC              0x105
 #define CSR_SCOUNTEREN         0x106
+#define CSR_SENVCFG            0x10a
 #define CSR_SSCRATCH           0x140
 #define CSR_SEPC               0x141
 #define CSR_SCAUSE             0x142
 # define CSR_STATUS    CSR_MSTATUS
 # define CSR_IE                CSR_MIE
 # define CSR_TVEC      CSR_MTVEC
+# define CSR_ENVCFG    CSR_MENVCFG
 # define CSR_SCRATCH   CSR_MSCRATCH
 # define CSR_EPC       CSR_MEPC
 # define CSR_CAUSE     CSR_MCAUSE
 # define CSR_STATUS    CSR_SSTATUS
 # define CSR_IE                CSR_SIE
 # define CSR_TVEC      CSR_STVEC
+# define CSR_ENVCFG    CSR_SENVCFG
 # define CSR_SCRATCH   CSR_SSCRATCH
 # define CSR_EPC       CSR_SEPC
 # define CSR_CAUSE     CSR_SCAUSE
index b3b2dfbdf945efa2adfbc1f022979986ace2b983..898027c8c1735dc6b2b88d77669af098e00b7568 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/auxvec.h>
 #include <asm/byteorder.h>
 #include <asm/cacheinfo.h>
+#include <asm/cpufeature.h>
 #include <asm/hwcap.h>
 
 /*
index 6e4dee49d84b985a5ab1bb59b35ecc39a94ed0de..2293e535f8659af02ef2e52ce1752827c415532e 100644 (file)
@@ -4,8 +4,39 @@
 #define _ASM_RISCV_ENTRY_COMMON_H
 
 #include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+#include <asm/vector.h>
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+                                                 unsigned long ti_work)
+{
+       if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
+               clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
+               /*
+                * We are already called with irq disabled, so go without
+                * keeping track of riscv_v_flags.
+                */
+               riscv_v_vstate_restore(&current->thread.vstate, regs);
+       }
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
 
+#ifdef CONFIG_RISCV_MISALIGNED
+int handle_misaligned_load(struct pt_regs *regs);
+int handle_misaligned_store(struct pt_regs *regs);
+#else
+static inline int handle_misaligned_load(struct pt_regs *regs)
+{
+       return -1;
+}
+static inline int handle_misaligned_store(struct pt_regs *regs)
+{
+       return -1;
+}
+#endif
+
 #endif /* _ASM_RISCV_ENTRY_COMMON_H */
index 2b443a3a487f3cdc509b5f80c7c343065ae85ef7..6bcd80325dfc1724aee2e5863ba39881ebea120b 100644 (file)
@@ -1,12 +1,18 @@
 #ifndef _ASM_RISCV_FENCE_H
 #define _ASM_RISCV_FENCE_H
 
+#define RISCV_FENCE_ASM(p, s)          "\tfence " #p "," #s "\n"
+#define RISCV_FENCE(p, s) \
+       ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); })
+
 #ifdef CONFIG_SMP
-#define RISCV_ACQUIRE_BARRIER          "\tfence r , rw\n"
-#define RISCV_RELEASE_BARRIER          "\tfence rw,  w\n"
+#define RISCV_ACQUIRE_BARRIER          RISCV_FENCE_ASM(r, rw)
+#define RISCV_RELEASE_BARRIER          RISCV_FENCE_ASM(rw, w)
+#define RISCV_FULL_BARRIER             RISCV_FENCE_ASM(rw, rw)
 #else
 #define RISCV_ACQUIRE_BARRIER
 #define RISCV_RELEASE_BARRIER
+#define RISCV_FULL_BARRIER
 #endif
 
 #endif /* _ASM_RISCV_FENCE_H */
index f4157034efa9cb778078dafe768783422ea1feb7..b18b202ca141a0907b988580d4dd49fa39fdbe8b 100644 (file)
@@ -8,25 +8,16 @@
 #ifndef _ASM_RISCV_HWCAP_H
 #define _ASM_RISCV_HWCAP_H
 
-#include <asm/alternative-macros.h>
-#include <asm/errno.h>
-#include <linux/bits.h>
 #include <uapi/asm/hwcap.h>
 
 #define RISCV_ISA_EXT_a                ('a' - 'a')
-#define RISCV_ISA_EXT_b                ('b' - 'a')
 #define RISCV_ISA_EXT_c                ('c' - 'a')
 #define RISCV_ISA_EXT_d                ('d' - 'a')
 #define RISCV_ISA_EXT_f                ('f' - 'a')
 #define RISCV_ISA_EXT_h                ('h' - 'a')
 #define RISCV_ISA_EXT_i                ('i' - 'a')
-#define RISCV_ISA_EXT_j                ('j' - 'a')
-#define RISCV_ISA_EXT_k                ('k' - 'a')
 #define RISCV_ISA_EXT_m                ('m' - 'a')
-#define RISCV_ISA_EXT_p                ('p' - 'a')
 #define RISCV_ISA_EXT_q                ('q' - 'a')
-#define RISCV_ISA_EXT_s                ('s' - 'a')
-#define RISCV_ISA_EXT_u                ('u' - 'a')
 #define RISCV_ISA_EXT_v                ('v' - 'a')
 
 /*
 #define RISCV_ISA_EXT_ZICSR            40
 #define RISCV_ISA_EXT_ZIFENCEI         41
 #define RISCV_ISA_EXT_ZIHPM            42
-
-#define RISCV_ISA_EXT_MAX              64
+#define RISCV_ISA_EXT_SMSTATEEN                43
+#define RISCV_ISA_EXT_ZICOND           44
+#define RISCV_ISA_EXT_ZBC              45
+#define RISCV_ISA_EXT_ZBKB             46
+#define RISCV_ISA_EXT_ZBKC             47
+#define RISCV_ISA_EXT_ZBKX             48
+#define RISCV_ISA_EXT_ZKND             49
+#define RISCV_ISA_EXT_ZKNE             50
+#define RISCV_ISA_EXT_ZKNH             51
+#define RISCV_ISA_EXT_ZKR              52
+#define RISCV_ISA_EXT_ZKSED            53
+#define RISCV_ISA_EXT_ZKSH             54
+#define RISCV_ISA_EXT_ZKT              55
+#define RISCV_ISA_EXT_ZVBB             56
+#define RISCV_ISA_EXT_ZVBC             57
+#define RISCV_ISA_EXT_ZVKB             58
+#define RISCV_ISA_EXT_ZVKG             59
+#define RISCV_ISA_EXT_ZVKNED           60
+#define RISCV_ISA_EXT_ZVKNHA           61
+#define RISCV_ISA_EXT_ZVKNHB           62
+#define RISCV_ISA_EXT_ZVKSED           63
+#define RISCV_ISA_EXT_ZVKSH            64
+#define RISCV_ISA_EXT_ZVKT             65
+#define RISCV_ISA_EXT_ZFH              66
+#define RISCV_ISA_EXT_ZFHMIN           67
+#define RISCV_ISA_EXT_ZIHINTNTL                68
+#define RISCV_ISA_EXT_ZVFH             69
+#define RISCV_ISA_EXT_ZVFHMIN          70
+#define RISCV_ISA_EXT_ZFA              71
+#define RISCV_ISA_EXT_ZTSO             72
+#define RISCV_ISA_EXT_ZACAS            73
+#define RISCV_ISA_EXT_XANDESPMU                74
+#define RISCV_ISA_EXT_ZVE32X           75
+#define RISCV_ISA_EXT_ZVE32F           76
+#define RISCV_ISA_EXT_ZVE64X           77
+#define RISCV_ISA_EXT_ZVE64F           78
+#define RISCV_ISA_EXT_ZVE64D           79
+#define RISCV_ISA_EXT_ZIMOP            80
+#define RISCV_ISA_EXT_ZCA              81
+#define RISCV_ISA_EXT_ZCB              82
+#define RISCV_ISA_EXT_ZCD              83
+#define RISCV_ISA_EXT_ZCF              84
+#define RISCV_ISA_EXT_ZCMOP            85
+#define RISCV_ISA_EXT_ZAWRS            86
+
+#define RISCV_ISA_EXT_XLINUXENVCFG     127
+
+#define RISCV_ISA_EXT_MAX              128
+#define RISCV_ISA_EXT_INVALID          U32_MAX
 
 #ifdef CONFIG_RISCV_M_MODE
 #define RISCV_ISA_EXT_SxAIA            RISCV_ISA_EXT_SMAIA
 #define RISCV_ISA_EXT_SxAIA            RISCV_ISA_EXT_SSAIA
 #endif
 
-#ifndef __ASSEMBLY__
-
-#include <linux/jump_label.h>
-
-unsigned long riscv_get_elf_hwcap(void);
-
-struct riscv_isa_ext_data {
-       const unsigned int id;
-       const char *name;
-       const char *property;
-};
-
-extern const struct riscv_isa_ext_data riscv_isa_ext[];
-extern const size_t riscv_isa_ext_count;
-extern bool riscv_isa_fallback;
-
-unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext) \
-       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
-static __always_inline bool
-riscv_has_extension_likely(const unsigned long ext)
-{
-       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-                          "ext must be < RISCV_ISA_EXT_MAX");
-
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm goto(
-               ALTERNATIVE("j  %l[l_no]", "nop", 0, %[ext], 1)
-               :
-               : [ext] "i" (ext)
-               :
-               : l_no);
-       } else {
-               if (!__riscv_isa_extension_available(NULL, ext))
-                       goto l_no;
-       }
-
-       return true;
-l_no:
-       return false;
-}
-
-static __always_inline bool
-riscv_has_extension_unlikely(const unsigned long ext)
-{
-       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-                          "ext must be < RISCV_ISA_EXT_MAX");
-
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm goto(
-               ALTERNATIVE("nop", "j   %l[l_yes]", 0, %[ext], 1)
-               :
-               : [ext] "i" (ext)
-               :
-               : l_yes);
-       } else {
-               if (__riscv_isa_extension_available(NULL, ext))
-                       goto l_yes;
-       }
-
-       return false;
-l_yes:
-       return true;
-}
-
-#endif
-
 #endif /* _ASM_RISCV_HWCAP_H */
index 7cad513538d8d08380eb9a6d4a65b411ddc0b05e..630507dff5ead30a368fcae56489c5ccb988e3e7 100644 (file)
@@ -8,11 +8,35 @@
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 5
+#define RISCV_HWPROBE_MAX_KEY 6
 
 static inline bool riscv_hwprobe_key_is_valid(__s64 key)
 {
        return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
 }
 
+static inline bool hwprobe_key_is_bitmask(__s64 key)
+{
+       switch (key) {
+       case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+       case RISCV_HWPROBE_KEY_IMA_EXT_0:
+       case RISCV_HWPROBE_KEY_CPUPERF_0:
+               return true;
+       }
+
+       return false;
+}
+
+static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
+                                         struct riscv_hwprobe *other_pair)
+{
+       if (pair->key != other_pair->key)
+               return false;
+
+       if (hwprobe_key_is_bitmask(pair->key))
+               return (pair->value & other_pair->value) == other_pair->value;
+
+       return pair->value == other_pair->value;
+}
+
 #endif
index 6960beb75f32942422de90583e6cbdaadcc78006..cbd51bfdf527ae9ec43d09ea499de5e26c3f22df 100644 (file)
        INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
               RS1(base), SIMM12(4))
 
+#define RISCV_PAUSE    ".4byte 0x100000f"
+#define ZAWRS_WRS_NTO  ".4byte 0x00d00073"
+#define ZAWRS_WRS_STO  ".4byte 0x01d00073"
+
 #endif /* __ASM_INSN_DEF_H */
index 42497d487a174642bee085b3a950ee6211396fd3..1c5c641075d2fd48d2f20fa8993875261ada4eb4 100644 (file)
  * sufficient to ensure this works sanely on controllers that support I/O
  * writes.
  */
-#define __io_pbr()     __asm__ __volatile__ ("fence io,i"  : : : "memory");
-#define __io_par(v)    __asm__ __volatile__ ("fence i,ior" : : : "memory");
-#define __io_pbw()     __asm__ __volatile__ ("fence iow,o" : : : "memory");
-#define __io_paw()     __asm__ __volatile__ ("fence o,io"  : : : "memory");
+#define __io_pbr()     RISCV_FENCE(io, i)
+#define __io_par(v)    RISCV_FENCE(i, ior)
+#define __io_pbw()     RISCV_FENCE(iow, o)
+#define __io_paw()     RISCV_FENCE(o, io)
 
 /*
  * Accesses from a single hart to a single I/O address must be ordered.  This
index e4042d297580080c0c3dfffed0dd711f5f371ca1..6441ded3b0cf2cf894312be28aa9e731e151f2a8 100644 (file)
@@ -12,6 +12,9 @@
 
 DECLARE_PER_CPU(ulong *, irq_stack_ptr);
 
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+                                 void (*func)(struct pt_regs *));
+
 #ifdef CONFIG_VMAP_STACK
 /*
  * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
index 4c58ee7f95ecfa65ae11de96eda76347e747fe56..06cadfd7a237aceec28e4a3db693b1ae57db8e8e 100644 (file)
@@ -12,6 +12,7 @@
 #define _ASM_RISCV_MMIO_H
 
 #include <linux/types.h>
+#include <asm/fence.h>
 #include <asm/mmiowb.h>
 
 /* Generic IO read/write.  These perform native-endian accesses. */
@@ -131,8 +132,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  * doesn't define any ordering between the memory space and the I/O space.
  */
 #define __io_br()      do {} while (0)
-#define __io_ar(v)     ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
-#define __io_bw()      ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
+#define __io_ar(v)     RISCV_FENCE(i, ir)
+#define __io_bw()      RISCV_FENCE(w, o)
 #define __io_aw()      mmiowb_set_pending()
 
 #define readb(c)       ({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
index 0b2333e71fdc5d52b06f2b96d88ab7f39ff37d70..52ce4a399d9b2b1ecab80ba7b7525c2e148cee35 100644 (file)
@@ -7,7 +7,7 @@
  * "o,w" is sufficient to ensure that all writes to the device have completed
  * before the write to the spinlock is allowed to commit.
  */
-#define mmiowb()       __asm__ __volatile__ ("fence o,w" : : : "memory");
+#define mmiowb()       RISCV_FENCE(o, w)
 
 #include <linux/smp.h>
 #include <asm-generic/mmiowb.h>
index 719c3041ae1c26cce4c049a9d9c63548852f47c6..d483e2a651e5d07bdded41b6dae1ca36cb016e4e 100644 (file)
@@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud)
 }
 
 #ifdef CONFIG_RISCV_ISA_SVNAPOT
+#include <asm/cpufeature.h>
 
 static __always_inline bool has_svnapot(void)
 {
index 4f6af8c6cfa060380594c6d0e727af6b02d08d70..af6962ccfc2fe627a0be7376cb6b1491657c9734 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/const.h>
 #include <linux/cache.h>
+#include <linux/prctl.h>
 
 #include <vdso/processor.h>
 
 struct task_struct;
 struct pt_regs;
 
+/*
+ * We use a flag to track in-kernel Vector context. Currently the flag has the
+ * following meaning:
+ *
+ *  - bit 0: indicates whether the in-kernel Vector context is active. The
+ *    activation of this state disables the preemption. On a non-RT kernel, it
+ *    also disable bh.
+ *  - bits 8: is used for tracking preemptible kernel-mode Vector, when
+ *    RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
+ *    disable the preemption if the thread's kernel_vstate.datap is allocated.
+ *    Instead, the kernel set this bit field. Then the trap entry/exit code
+ *    knows if we are entering/exiting the context that owns preempt_v.
+ *     - 0: the task is not using preempt_v
+ *     - 1: the task is actively using preempt_v. But whether does the task own
+ *          the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
+ *  - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
+ *     when preempt_v starts:
+ *     - 0: the task is actively using, and own preempt_v context.
+ *     - non-zero: the task was using preempt_v, but then took a trap within.
+ *       Thus, the task does not own preempt_v. Any use of Vector will have to
+ *       save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ *       Vector.
+ *  - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ *    restored when returning to the context that owns the preempt_v.
+ *  - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ *    trap entry code. Any context switches out-of current task need to save
+ *    it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ *    preempt_v requesting to use V needs a save.
+ */
+#define RISCV_V_CTX_DEPTH_MASK         0x00ff0000
+
+#define RISCV_V_CTX_UNIT_DEPTH         0x00010000
+#define RISCV_KERNEL_MODE_V            0x00000001
+#define RISCV_PREEMPT_V                        0x00000100
+#define RISCV_PREEMPT_V_DIRTY          0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE   0x40000000
+
 /* CPU-specific state of a task */
 struct thread_struct {
        /* Callee-saved registers */
@@ -80,8 +118,11 @@ struct thread_struct {
        unsigned long s[12];    /* s[0]: frame pointer */
        struct __riscv_d_ext_state fstate;
        unsigned long bad_cause;
-       unsigned long vstate_ctrl;
+       u32 riscv_v_flags;
+       u32 vstate_ctrl;
        struct __riscv_v_ext_state vstate;
+       unsigned long align_ctl;
+       struct __riscv_v_ext_state kernel_vstate;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -94,6 +135,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 
 #define INIT_THREAD {                                  \
        .sp = sizeof(init_stack) + (long)&init_stack,   \
+       .align_ctl = PR_UNALIGN_NOPRINT,                \
 }
 
 #define task_pt_regs(tsk)                                              \
@@ -123,6 +165,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid);
 
 extern void riscv_fill_hwcap(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+extern struct cpumask ai_core_mask_get(void);
 
 extern unsigned long signal_minsigstksz __ro_after_init;
 
@@ -134,6 +177,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
 extern long riscv_v_vstate_ctrl_get_current(void);
 #endif /* CONFIG_RISCV_ISA_V */
 
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
+#define GET_UNALIGN_CTL(tsk, addr)     get_unalign_ctl((tsk), (addr))
+#define SET_UNALIGN_CTL(tsk, val)      set_unalign_ctl((tsk), (val))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PROCESSOR_H */
index b79d0228144f4497ae2ec484b1a8a7e9831ad29a..79ea84cd4f98ac3eb1cc3959f4514736fc725f3d 100644 (file)
@@ -29,6 +29,7 @@ enum sbi_ext_id {
        SBI_EXT_RFENCE = 0x52464E43,
        SBI_EXT_HSM = 0x48534D,
        SBI_EXT_SRST = 0x53525354,
+       SBI_EXT_SUSP = 0x53555350,
        SBI_EXT_PMU = 0x504D55,
 
        /* Experimentals extensions must lie within this range */
@@ -48,6 +49,9 @@ enum sbi_ext_base_fid {
        SBI_EXT_BASE_GET_MVENDORID,
        SBI_EXT_BASE_GET_MARCHID,
        SBI_EXT_BASE_GET_MIMPID,
+#if defined(CONFIG_ARCH_SPACEMIT_K1PRO) || defined(CONFIG_ARCH_SPACEMIT_K1X)
+       SBI_EXT_BASE_FLUSH_CACHE_ALL,
+#endif
 };
 
 enum sbi_ext_time_fid {
@@ -113,6 +117,14 @@ enum sbi_srst_reset_reason {
        SBI_SRST_RESET_REASON_SYS_FAILURE,
 };
 
+enum sbi_ext_susp_fid {
+       SBI_EXT_SUSP_SYSTEM_SUSPEND = 0,
+};
+
+enum sbi_ext_susp_sleep_type {
+       SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0,
+};
+
 enum sbi_ext_pmu_fid {
        SBI_EXT_PMU_NUM_COUNTERS = 0,
        SBI_EXT_PMU_COUNTER_GET_INFO,
@@ -274,6 +286,10 @@ void sbi_shutdown(void);
 void sbi_send_ipi(unsigned int cpu);
 int sbi_remote_fence_i(const struct cpumask *cpu_mask);
 
+#if defined(CONFIG_ARCH_SPACEMIT_K1PRO) || defined(CONFIG_ARCH_SPACEMIT_K1X)
+void sbi_flush_local_dcache_all(void);
+#endif
+
 int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
                                unsigned long start,
                                unsigned long size,
@@ -327,6 +343,8 @@ static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1
 static inline void sbi_init(void) {}
 #endif /* CONFIG_RISCV_SBI */
 
+unsigned long riscv_get_mvendorid(void);
+unsigned long riscv_get_marchid(void);
 unsigned long riscv_cached_mvendorid(unsigned int cpu_id);
 unsigned long riscv_cached_marchid(unsigned int cpu_id);
 unsigned long riscv_cached_mimpid(unsigned int cpu_id);
diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h
new file mode 100644 (file)
index 0000000..0e45db7
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/* Load init_shadow_call_stack to gp. */
+.macro scs_load_init_stack
+       la      gp, init_shadow_call_stack
+       XIP_FIXUP_OFFSET gp
+.endm
+
+/* Load the per-CPU IRQ shadow call stack to gp. */
+.macro scs_load_irq_stack tmp
+       load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp
+.endm
+
+/* Load task_scs_sp(current) to gp. */
+.macro scs_load_current
+       REG_L   gp, TASK_TI_SCS_SP(tp)
+.endm
+
+/* Load task_scs_sp(current) to gp, but only if tp has changed. */
+.macro scs_load_current_if_task_changed prev
+       beq     \prev, tp, _skip_scs
+       scs_load_current
+_skip_scs:
+.endm
+
+/* Save gp to task_scs_sp(current). */
+.macro scs_save_current
+       REG_S   gp, TASK_TI_SCS_SP(tp)
+.endm
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+.macro scs_load_init_stack
+.endm
+.macro scs_load_irq_stack tmp
+.endm
+.macro scs_load_current
+.endm
+.macro scs_load_current_if_task_changed prev
+.endm
+.macro scs_save_current
+.endm
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
new file mode 100644 (file)
index 0000000..54efbf5
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+
+#include <asm/vector.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+/*
+ * may_use_simd - whether it is allowable at this time to issue vector
+ *                instructions or access the vector register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+       /*
+        * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
+        * and is clear whenever preemption is enabled.
+        */
+       if (in_hardirq() || in_nmi())
+               return false;
+
+       /*
+        * Nesting is acheived in preempt_v by spreading the control for
+        * preemptible and non-preemptible kernel-mode Vector into two fields.
+        * Always try to match with prempt_v if kernel V-context exists. Then,
+        * fallback to check non preempt_v if nesting happens, or if the config
+        * is not set.
+        */
+       if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+               if (!riscv_preempt_v_started(current))
+                       return true;
+       }
+       /*
+        * Non-preemptible kernel-mode Vector temporarily disables bh. So we
+        * must not return true on irq_disabled(). Otherwise we would fail the
+        * lockdep check calling local_bh_enable()
+        */
+       return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
+}
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+static __must_check inline bool may_use_simd(void)
+{
+       return false;
+}
+
+#endif /* ! CONFIG_RISCV_ISA_V */
+
+#endif
index a727be723c5610f9ce2bc42de6f4dd2987c7536c..f90d8e42f3c7911908ec1f5f19929ab5ba67ff3a 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/jump_label.h>
 #include <linux/sched/task_stack.h>
 #include <asm/vector.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
index d18ce0113ca1f1d74d4009fff9d2081447b2807d..1047a97ddbc8a9164240f21fcdfb99796ec6538b 100644 (file)
@@ -57,8 +57,20 @@ struct thread_info {
        long                    user_sp;        /* User stack pointer */
        int                     cpu;
        unsigned long           syscall_work;   /* SYSCALL_WORK_ flags */
+#ifdef CONFIG_SHADOW_CALL_STACK
+       void                    *scs_base;
+       void                    *scs_sp;
+#endif
 };
 
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS                                                       \
+       .scs_base       = init_shadow_call_stack,                       \
+       .scs_sp         = init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
+
 /*
  * macros/functions for gaining access to the thread information structure
  *
@@ -68,6 +80,7 @@ struct thread_info {
 {                                              \
        .flags          = 0,                    \
        .preempt_count  = INIT_PREEMPT_COUNT,   \
+       INIT_SCS                                \
 }
 
 void arch_release_task_struct(struct task_struct *tsk);
@@ -90,12 +103,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define TIF_NOTIFY_SIGNAL      9       /* signal notifications exist */
 #define TIF_UPROBE             10      /* uprobe breakpoint or singlestep */
 #define TIF_32BIT              11      /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE      12 /* restore Vector before returing to user */
 
 #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
+#define _TIF_RISCV_V_DEFER_RESTORE     (1 << TIF_RISCV_V_DEFER_RESTORE)
 
 #define _TIF_WORK_MASK \
        (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
index c5ee07b3df071d16ad956fd62e6981403d9bf133..c741e7b29603a444157ac3b73d0e47343ca9d203 100644 (file)
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <asm/ptrace.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/csr.h>
 #include <asm/asm.h>
 
 extern unsigned long riscv_v_vsize;
 int riscv_v_setup_vsize(void);
 bool riscv_v_first_use_handler(struct pt_regs *regs);
+void kernel_vector_begin(void);
+void kernel_vector_end(void);
+void get_cpu_vector_context(void);
+void put_cpu_vector_context(void);
+void riscv_v_thread_free(struct task_struct *tsk);
+void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
+
+static inline u32 riscv_v_flags(void)
+{
+       return READ_ONCE(current->thread.riscv_v_flags);
+}
 
 static __always_inline bool has_vector(void)
 {
-       return riscv_has_extension_unlikely(RISCV_ISA_EXT_v);
+       return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X);
 }
 
 static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
@@ -79,7 +91,7 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src
 {
        asm volatile (
                ".option push\n\t"
-               ".option arch, +v\n\t"
+               ".option arch, +zve32x\n\t"
                "vsetvl  x0, %2, %1\n\t"
                ".option pop\n\t"
                "csrw   " __stringify(CSR_VSTART) ", %0\n\t"
@@ -97,7 +109,7 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
        __vstate_csr_save(save_to);
        asm volatile (
                ".option push\n\t"
-               ".option arch, +v\n\t"
+               ".option arch, +zve32x\n\t"
                "vsetvli        %0, x0, e8, m8, ta, ma\n\t"
                "vse8.v         v0, (%1)\n\t"
                "add            %1, %1, %0\n\t"
@@ -119,7 +131,7 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
        riscv_v_enable();
        asm volatile (
                ".option push\n\t"
-               ".option arch, +v\n\t"
+               ".option arch, +zve32x\n\t"
                "vsetvli        %0, x0, e8, m8, ta, ma\n\t"
                "vle8.v         v0, (%1)\n\t"
                "add            %1, %1, %0\n\t"
@@ -141,7 +153,7 @@ static inline void __riscv_v_vstate_discard(void)
        riscv_v_enable();
        asm volatile (
                ".option push\n\t"
-               ".option arch, +v\n\t"
+               ".option arch, +zve32x\n\t"
                "vsetvli        %0, x0, e8, m8, ta, ma\n\t"
                "vmv.v.i        v0, -1\n\t"
                "vmv.v.i        v8, -1\n\t"
@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
        __riscv_v_vstate_dirty(regs);
 }
 
-static inline void riscv_v_vstate_save(struct task_struct *task,
+static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
                                       struct pt_regs *regs)
 {
        if ((regs->status & SR_VS) == SR_VS_DIRTY) {
-               struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
                __riscv_v_vstate_save(vstate, vstate->datap);
                __riscv_v_vstate_clean(regs);
        }
 }
 
-static inline void riscv_v_vstate_restore(struct task_struct *task,
+static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
                                          struct pt_regs *regs)
 {
        if ((regs->status & SR_VS) != SR_VS_OFF) {
-               struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
                __riscv_v_vstate_restore(vstate, vstate->datap);
                __riscv_v_vstate_clean(regs);
        }
 }
 
+static inline void riscv_v_vstate_set_restore(struct task_struct *task,
+                                             struct pt_regs *regs)
+{
+       if ((regs->status & SR_VS) != SR_VS_OFF) {
+               set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+               riscv_v_vstate_on(regs);
+       }
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+       barrier();
+       task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+       barrier();
+       task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+       return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
+}
+
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk)       do {} while (0)
+#define riscv_preempt_v_set_restore(tsk)       do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
 static inline void __switch_to_vector(struct task_struct *prev,
                                      struct task_struct *next)
 {
        struct pt_regs *regs;
 
-       regs = task_pt_regs(prev);
-       riscv_v_vstate_save(prev, regs);
-       riscv_v_vstate_restore(next, task_pt_regs(next));
+       if (riscv_preempt_v_started(prev)) {
+               if (riscv_preempt_v_dirty(prev)) {
+                       __riscv_v_vstate_save(&prev->thread.kernel_vstate,
+                                             prev->thread.kernel_vstate.datap);
+                       riscv_preempt_v_clear_dirty(prev);
+               }
+       } else {
+               regs = task_pt_regs(prev);
+               riscv_v_vstate_save(&prev->thread.vstate, regs);
+       }
+
+       if (riscv_preempt_v_started(next))
+               riscv_preempt_v_set_restore(next);
+       else
+               riscv_v_vstate_set_restore(next, task_pt_regs(next));
 }
 
 void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -208,12 +273,26 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_vsize (0)
 #define riscv_v_vstate_discard(regs)           do {} while (0)
-#define riscv_v_vstate_save(task, regs)                do {} while (0)
-#define riscv_v_vstate_restore(task, regs)     do {} while (0)
+#define riscv_v_vstate_save(vstate, regs)      do {} while (0)
+#define riscv_v_vstate_restore(vstate, regs)   do {} while (0)
 #define __switch_to_vector(__prev, __next)     do {} while (0)
 #define riscv_v_vstate_off(regs)               do {} while (0)
 #define riscv_v_vstate_on(regs)                        do {} while (0)
+#define riscv_v_thread_free(tsk)               do {} while (0)
+#define riscv_v_setup_ctx_cache()              do {} while (0)
+#define riscv_v_thread_alloc(tsk)              do {} while (0)
 
 #endif /* CONFIG_RISCV_ISA_V */
 
+/*
+ * Return the implementation's vlen value.
+ *
+ * riscv_v_vsize contains the value of "32 vector registers with vlenb length"
+ * so rebuild the vlen value in bits from it.
+ */
+static inline int riscv_vector_vlen(void)
+{
+       return riscv_v_vsize / 32 * 8;
+}
+
 #endif /* ! __ASM_RISCV_VECTOR_H */
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644 (file)
index 0000000..9601186
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2)
+{
+       kernel_vector_begin();
+       xor_regs_2_(bytes, p1, p2);
+       kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3)
+{
+       kernel_vector_begin();
+       xor_regs_3_(bytes, p1, p2, p3);
+       kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4)
+{
+       kernel_vector_begin();
+       xor_regs_4_(bytes, p1, p2, p3, p4);
+       kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4,
+                        const unsigned long *__restrict p5)
+{
+       kernel_vector_begin();
+       xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+       kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+       .name = "rvv",
+       .do_2 = xor_vector_2,
+       .do_3 = xor_vector_3,
+       .do_4 = xor_vector_4,
+       .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+       do {        \
+               xor_speed(&xor_block_8regs);    \
+               xor_speed(&xor_block_32regs);    \
+               if (has_vector()) { \
+                       xor_speed(&xor_block_rvv);\
+               } \
+       } while (0)
+#endif
index d696d6610231dd6cacacdeba77e7c96b6184d993..11a71b8533d5759ec724a8359d0ffa2a4f2e976d 100644 (file)
@@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_TLS_DTPREL64   9
 #define R_RISCV_TLS_TPREL32    10
 #define R_RISCV_TLS_TPREL64    11
+#define R_RISCV_IRELATIVE      58
 
 /* Relocation types not used by the dynamic linker */
 #define R_RISCV_BRANCH         16
@@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_ALIGN          43
 #define R_RISCV_RVC_BRANCH     44
 #define R_RISCV_RVC_JUMP       45
-#define R_RISCV_LUI            46
 #define R_RISCV_GPREL_I                47
 #define R_RISCV_GPREL_S                48
 #define R_RISCV_TPREL_I                49
@@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_SET16          55
 #define R_RISCV_SET32          56
 #define R_RISCV_32_PCREL       57
+#define R_RISCV_PLT32          59
+#define R_RISCV_SET_ULEB128    60
+#define R_RISCV_SUB_ULEB128    61
 
 
 #endif /* _UAPI_ASM_RISCV_ELF_H */
index 006bfb48343dd423d5ddd04fb38a73043318e2e1..dda76a05420b1b0cb6fdbd1308521019f25dfdd4 100644 (file)
@@ -10,7 +10,7 @@
 
 /*
  * Interface for probing hardware capabilities from userspace, see
- * Documentation/riscv/hwprobe.rst for more information.
+ * Documentation/arch/riscv/hwprobe.rst for more information.
  */
 struct riscv_hwprobe {
        __s64 key;
@@ -29,6 +29,37 @@ struct riscv_hwprobe {
 #define                RISCV_HWPROBE_EXT_ZBA           (1 << 3)
 #define                RISCV_HWPROBE_EXT_ZBB           (1 << 4)
 #define                RISCV_HWPROBE_EXT_ZBS           (1 << 5)
+#define                RISCV_HWPROBE_EXT_ZICBOZ        (1 << 6)
+#define                RISCV_HWPROBE_EXT_ZBC           (1 << 7)
+#define                RISCV_HWPROBE_EXT_ZBKB          (1 << 8)
+#define                RISCV_HWPROBE_EXT_ZBKC          (1 << 9)
+#define                RISCV_HWPROBE_EXT_ZBKX          (1 << 10)
+#define                RISCV_HWPROBE_EXT_ZKND          (1 << 11)
+#define                RISCV_HWPROBE_EXT_ZKNE          (1 << 12)
+#define                RISCV_HWPROBE_EXT_ZKNH          (1 << 13)
+#define                RISCV_HWPROBE_EXT_ZKSED         (1 << 14)
+#define                RISCV_HWPROBE_EXT_ZKSH          (1 << 15)
+#define                RISCV_HWPROBE_EXT_ZKT           (1 << 16)
+#define                RISCV_HWPROBE_EXT_ZVBB          (1 << 17)
+#define                RISCV_HWPROBE_EXT_ZVBC          (1 << 18)
+#define                RISCV_HWPROBE_EXT_ZVKB          (1 << 19)
+#define                RISCV_HWPROBE_EXT_ZVKG          (1 << 20)
+#define                RISCV_HWPROBE_EXT_ZVKNED        (1 << 21)
+#define                RISCV_HWPROBE_EXT_ZVKNHA        (1 << 22)
+#define                RISCV_HWPROBE_EXT_ZVKNHB        (1 << 23)
+#define                RISCV_HWPROBE_EXT_ZVKSED        (1 << 24)
+#define                RISCV_HWPROBE_EXT_ZVKSH         (1 << 25)
+#define                RISCV_HWPROBE_EXT_ZVKT          (1 << 26)
+#define                RISCV_HWPROBE_EXT_ZFH           (1 << 27)
+#define                RISCV_HWPROBE_EXT_ZFHMIN        (1 << 28)
+#define                RISCV_HWPROBE_EXT_ZIHINTNTL     (1 << 29)
+#define                RISCV_HWPROBE_EXT_ZVFH          (1 << 30)
+#define                RISCV_HWPROBE_EXT_ZVFHMIN       (1ULL << 31)
+#define                RISCV_HWPROBE_EXT_ZFA           (1ULL << 32)
+#define                RISCV_HWPROBE_EXT_ZTSO          (1ULL << 33)
+#define                RISCV_HWPROBE_EXT_ZACAS         (1ULL << 34)
+#define                RISCV_HWPROBE_EXT_ZICOND        (1ULL << 35)
+#define                RISCV_HWPROBE_EXT_ZIHINTPAUSE   (1ULL << 36)
 #define RISCV_HWPROBE_KEY_CPUPERF_0    5
 #define                RISCV_HWPROBE_MISALIGNED_UNKNOWN        (0 << 0)
 #define                RISCV_HWPROBE_MISALIGNED_EMULATED       (1 << 0)
@@ -36,6 +67,10 @@ struct riscv_hwprobe {
 #define                RISCV_HWPROBE_MISALIGNED_FAST           (3 << 0)
 #define                RISCV_HWPROBE_MISALIGNED_UNSUPPORTED    (4 << 0)
 #define                RISCV_HWPROBE_MISALIGNED_MASK           (7 << 0)
+#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE    6
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
+/* Flags */
+#define RISCV_HWPROBE_WHICH_CPUS       (1 << 0)
+
 #endif
index 66b13a5228808dcbf79a03bac165788bc439af18..0fd14cb60a0808b9c5c054cfa75e67ffaf9b4da6 100644 (file)
@@ -3,6 +3,6 @@
 #ifndef _UAPI_ASM_RISCV_SETUP_H
 #define _UAPI_ASM_RISCV_SETUP_H
 
-#define COMMAND_LINE_SIZE      1024
+#define COMMAND_LINE_SIZE   1024
 
 #endif /* _UAPI_ASM_RISCV_SETUP_H */
index 03968c06258ceb3069182253efc5bcd4aeda7d02..fd78940204f6fe8a54f63826da29af17771ded5d 100644 (file)
@@ -39,7 +39,6 @@ extra-y += vmlinux.lds
 obj-y  += head.o
 obj-y  += soc.o
 obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
-obj-y  += copy-unaligned.o
 obj-y  += cpu.o
 obj-y  += cpufeature.o
 obj-y  += entry.o
@@ -52,6 +51,7 @@ obj-y += setup.o
 obj-y  += signal.o
 obj-y  += syscall_table.o
 obj-y  += sys_riscv.o
+obj-y  += sys_hwprobe.o
 obj-y  += time.o
 obj-y  += traps.o
 obj-y  += riscv_ksyms.o
@@ -61,9 +61,13 @@ obj-y        += patch.o
 obj-y  += probes/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
-obj-$(CONFIG_RISCV_M_MODE)     += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
+obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)     += copy-unaligned.o
+
 obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_RISCV_ISA_V)      += vector.o
+obj-$(CONFIG_RISCV_ISA_V)      += kernel_mode_vector.o
 obj-$(CONFIG_SMP)              += smpboot.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_SMP)              += cpu_ops.o
index 9f535d5de33f93627d6d58b6930337fee97f57de..a03129f40c464868e9352436e1273424c1da060b 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 #include <asm/cpu_ops_sbi.h>
+#include <asm/stacktrace.h>
 #include <asm/suspend.h>
 
 void asm_offsets(void);
@@ -38,6 +39,9 @@ void asm_offsets(void)
        OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
        OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
        OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
+#ifdef CONFIG_SHADOW_CALL_STACK
+       OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
+#endif
 
        OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
        OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
@@ -480,4 +484,8 @@ void asm_offsets(void)
        OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
        OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
        OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
+
+       DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
+       OFFSET(STACKFRAME_FP, stackframe, fp);
+       OFFSET(STACKFRAME_RA, stackframe, ra);
 }
index b86e5e2c3aea94e13ebce84db247a2e8ab5934df..62fa393b2eb2ead77a85ce54a9c4c8b32d528f6b 100644 (file)
@@ -76,13 +76,3 @@ quiet_cmd_compat_vdsold = VDSOLD  $@
 # actual build commands
 quiet_cmd_compat_vdsoas = VDSOAS $@
       cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_compat_vdso_install = INSTALL $@
-      cmd_compat_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/compat_vdso/$@
-
-compat_vdso.so: $(obj)/compat_vdso.so.dbg
-       @mkdir -p $(MODLIB)/compat_vdso
-       $(call cmd,compat_vdso_install)
-
-compat_vdso_install: compat_vdso.so
index cfdecfbaad627153ead8cde6fe9ca7a298c1aa40..2b3d9398c113fbaea3f775b1439058e5a5178c7d 100644 (file)
@@ -9,7 +9,7 @@
 /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using word loads and stores. */
 /* Note: The size is truncated to a multiple of 8 * SZREG */
-ENTRY(__riscv_copy_words_unaligned)
+SYM_FUNC_START(__riscv_copy_words_unaligned)
        andi  a4, a2, ~((8*SZREG)-1)
        beqz  a4, 2f
        add   a3, a1, a4
@@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned)
 
 2:
        ret
-END(__riscv_copy_words_unaligned)
+SYM_FUNC_END(__riscv_copy_words_unaligned)
 
 /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using only byte accesses. */
 /* Note: The size is truncated to a multiple of 8 */
-ENTRY(__riscv_copy_bytes_unaligned)
+SYM_FUNC_START(__riscv_copy_bytes_unaligned)
        andi a4, a2, ~(8-1)
        beqz a4, 2f
        add  a3, a1, a4
@@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned)
 
 2:
        ret
-END(__riscv_copy_bytes_unaligned)
+SYM_FUNC_END(__riscv_copy_bytes_unaligned)
index 457a18efcb114880511f9eb342ccb2fb7e377382..7a2848048d69034b2a398719df1c6a3acc8bb05f 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/cpu_ops.h>
 #include <asm/numa.h>
 #include <asm/smp.h>
+#include <asm/sbi.h>
 
 bool cpu_has_hotplug(unsigned int cpu)
 {
@@ -75,8 +76,14 @@ void __noreturn arch_cpu_idle_dead(void)
 {
        idle_task_exit();
 
+#if defined(CONFIG_ARCH_SPACEMIT_K1PRO) || defined(CONFIG_ARCH_SPACEMIT_K1X)
+       sbi_flush_local_dcache_all();
+#endif
        cpuhp_ap_report_dead();
 
+#if defined(CONFIG_ARCH_SPACEMIT_K1PRO) || defined(CONFIG_ARCH_SPACEMIT_K1X)
+       sbi_flush_local_dcache_all();
+#endif
        cpu_ops[smp_processor_id()]->cpu_stop();
        /* It should never reach here */
        BUG();
index 157ace8b262c20cd1cc78221e062381f4cf21aec..ebef4e101f7b303e96027d1b47837f0f338db936 100644 (file)
@@ -139,6 +139,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
        return -1;
 }
 
+unsigned long __init riscv_get_marchid(void)
+{
+       struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
+
+#if IS_ENABLED(CONFIG_RISCV_SBI)
+       ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
+#elif IS_ENABLED(CONFIG_RISCV_M_MODE)
+       ci->marchid = csr_read(CSR_MARCHID);
+#else
+       ci->marchid = 0;
+#endif
+       return ci->marchid;
+}
+
+unsigned long __init riscv_get_mvendorid(void)
+{
+       struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
+
+#if IS_ENABLED(CONFIG_RISCV_SBI)
+       ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
+#elif IS_ENABLED(CONFIG_RISCV_M_MODE)
+       ci->mvendorid = csr_read(CSR_MVENDORID);
+#else
+       ci->mvendorid = 0;
+#endif
+       return ci->mvendorid;
+}
+
 DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
 unsigned long riscv_cached_mvendorid(unsigned int cpu_id)
@@ -170,12 +198,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu)
        struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
 
 #if IS_ENABLED(CONFIG_RISCV_SBI)
-       ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
-       ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
+       if (!ci->mvendorid)
+               ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
+       if (!ci->marchid)
+               ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
        ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid();
 #elif IS_ENABLED(CONFIG_RISCV_M_MODE)
-       ci->mvendorid = csr_read(CSR_MVENDORID);
-       ci->marchid = csr_read(CSR_MARCHID);
+       if (!ci->mvendorid)
+               ci->mvendorid = csr_read(CSR_MVENDORID);
+       if (!ci->marchid)
+               ci->marchid = csr_read(CSR_MARCHID);
        ci->mimpid = csr_read(CSR_MIMPID);
 #else
        ci->mvendorid = 0;
@@ -272,28 +304,30 @@ static int c_show(struct seq_file *m, void *v)
 {
        unsigned long cpu_id = (unsigned long)v - 1;
        struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
-       struct device_node *node;
-       const char *compat;
+       struct device_node *node = of_get_cpu_node(cpu_id, NULL);
+       const char *compat, *model;
 
        seq_printf(m, "processor\t: %lu\n", cpu_id);
        seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
+
+       if (!of_property_read_string(node, "model", &model))
+               seq_printf(m, "model name\t: %s\n", model);
+
        print_isa(m);
        print_mmu(m);
 
        if (acpi_disabled) {
-               node = of_get_cpu_node(cpu_id, NULL);
-
                if (!of_property_read_string(node, "compatible", &compat) &&
                    strcmp(compat, "riscv"))
                        seq_printf(m, "uarch\t\t: %s\n", compat);
 
-               of_node_put(node);
        }
 
        seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid);
        seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid);
        seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid);
        seq_puts(m, "\n");
+       of_node_put(node);
 
        return 0;
 }
index efa0f0816634c40772005bfe564cda4411957a81..14f53842293a59476438fb89f74e16a3e3d72c08 100644 (file)
@@ -12,6 +12,8 @@
 #include <asm/cpu_ops_sbi.h>
 #include <asm/sbi.h>
 #include <asm/smp.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
 
 extern char secondary_start_sbi[];
 const struct cpu_operations cpu_ops_sbi;
@@ -72,7 +74,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
        /* Make sure tidle is updated */
        smp_mb();
        bdata->task_ptr = tidle;
-       bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+       bdata->stack_ptr = task_pt_regs(tidle);
        /* Make sure boot data is updated */
        smp_mb();
        hsm_data = __pa(bdata);
@@ -108,11 +110,37 @@ static int sbi_cpu_is_stopped(unsigned int cpuid)
 {
        int rc;
        unsigned long hartid = cpuid_to_hartid_map(cpuid);
-
+#ifndef CONFIG_SOC_SPACEMIT_K1X
        rc = sbi_hsm_hart_get_status(hartid);
 
        if (rc == SBI_HSM_STATE_STOPPED)
                return 0;
+#else
+        unsigned long start, end;
+
+        /*
+         * cpu_kill could race with cpu_die and we can
+         * potentially end up declaring this cpu undead
+         * while it is dying. So, try again a few times.
+         */
+        start = jiffies;
+        end = start + msecs_to_jiffies(100);
+        do {
+                rc = sbi_hsm_hart_get_status(hartid);
+                if (rc == SBI_HSM_STATE_STOPPED) {
+                        pr_info("CPU%d killed (polled %d ms)\n", cpuid,
+                                jiffies_to_msecs(jiffies - start));
+                        return 0;
+                }
+
+                usleep_range(100, 1000);
+        } while (time_before(jiffies, end));
+
+        pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
+                        cpuid, rc);
+        rc = -ETIMEDOUT;
+
+#endif
        return rc;
 }
 #endif
index d98d19226b5f5175aca387cf22558bad9df1f2c5..691e0c5366d2bd0b9812fcebe2366d5da3cd2423 100644 (file)
@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
 
        /* Make sure tidle is updated */
        smp_mb();
-       WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
-                  task_stack_page(tidle) + THREAD_SIZE);
+       WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
        WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
 }
 
index e39a905aca2483ae18a1a6fb9c65b2c56578ce72..0721e6c767e557eb34bb22b88ed561e837e0e562 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpu.h>
+#include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/hwcap.h>
-#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/sbi.h>
 #include <asm/vector.h>
 
-#include "copy-unaligned.h"
-
 #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
 
-#define MISALIGNED_ACCESS_JIFFIES_LG2 1
-#define MISALIGNED_BUFFER_SIZE 0x4000
-#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
-
 unsigned long elf_hwcap __read_mostly;
 
 /* Host ISA bitmap */
@@ -40,9 +35,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 /* Per-cpu ISA extensions. */
 struct riscv_isainfo hart_isa[NR_CPUS];
 
-/* Performance information */
-DEFINE_PER_CPU(long, misaligned_access_speed);
-
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -69,7 +61,7 @@ EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
  *
  * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
  */
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit)
 {
        const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
 
@@ -80,38 +72,245 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
 }
 EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
 
-static bool riscv_isa_extension_check(int id)
+struct cpumask ai_core_mask_get(void)
 {
-       switch (id) {
-       case RISCV_ISA_EXT_ZICBOM:
-               if (!riscv_cbom_block_size) {
-                       pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n");
-                       return false;
-               } else if (!is_power_of_2(riscv_cbom_block_size)) {
-                       pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n");
-                       return false;
+       struct device_node *node;
+       const char *cpu_ai;
+       struct cpumask  cpu_mask;
+       unsigned long hartid;
+       int rc;
+
+       cpumask_clear(&cpu_mask);
+
+       for_each_of_cpu_node(node) {
+               rc = riscv_of_processor_hartid(node, &hartid);
+               if (rc < 0)
+                       continue;
+
+               if (of_property_read_string(node, "cpu-ai", &cpu_ai)) {
+                       continue;
                }
-               return true;
-       case RISCV_ISA_EXT_ZICBOZ:
-               if (!riscv_cboz_block_size) {
-                       pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n");
-                       return false;
-               } else if (!is_power_of_2(riscv_cboz_block_size)) {
-                       pr_err("cboz-block-size present, but is not a power-of-2\n");
-                       return false;
+
+               if(!strcmp(cpu_ai, "true")) {
+                       cpumask_set_cpu(hartid, &cpu_mask);
                }
-               return true;
        }
 
-       return true;
+       return cpu_mask;
+}
+
+static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data,
+                                    const unsigned long *isa_bitmap)
+{
+       if (!riscv_cbom_block_size) {
+               pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n");
+               return -EINVAL;
+       }
+       if (!is_power_of_2(riscv_cbom_block_size)) {
+               pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
+                                    const unsigned long *isa_bitmap)
+{
+       if (!riscv_cboz_block_size) {
+               pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n");
+               return -EINVAL;
+       }
+       if (!is_power_of_2(riscv_cboz_block_size)) {
+               pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) {  \
+       .name = #_name,                                                                 \
+       .property = #_name,                                                             \
+       .id = _id,                                                                      \
+       .subset_ext_ids = _subset_exts,                                                 \
+       .subset_ext_size = _subset_exts_size,                                           \
+       .validate = _validate                                                           \
+}
+
+#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, NULL)
+#define __RISCV_ISA_EXT_DATA_VALIDATE(_name, _id, _validate) \
+                       _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0, _validate)
+
+/* Used to declare pure "lasso" extension (Zk for instance) */
+#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \
+       _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, \
+                           ARRAY_SIZE(_bundled_exts), NULL)
+
+/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */
+#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \
+       _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), NULL)
+#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
+       _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
+
+static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data,
+                                const unsigned long *isa_bitmap)
+{
+       if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA))
+               return 0;
+
+       return -EPROBE_DEFER;
 }
+static int riscv_ext_zcd_validate(const struct riscv_isa_ext_data *data,
+                                 const unsigned long *isa_bitmap)
+{
+       if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) &&
+           __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d))
+               return 0;
 
-#define __RISCV_ISA_EXT_DATA(_name, _id) {     \
-       .name = #_name,                         \
-       .property = #_name,                     \
-       .id = _id,                              \
+       return -EPROBE_DEFER;
 }
 
+static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data,
+                                 const unsigned long *isa_bitmap)
+{
+       if (IS_ENABLED(CONFIG_64BIT))
+               return -EINVAL;
+
+       if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) &&
+           __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f))
+               return 0;
+
+       return -EPROBE_DEFER;
+}
+
+static const unsigned int riscv_zk_bundled_exts[] = {
+       RISCV_ISA_EXT_ZBKB,
+       RISCV_ISA_EXT_ZBKC,
+       RISCV_ISA_EXT_ZBKX,
+       RISCV_ISA_EXT_ZKND,
+       RISCV_ISA_EXT_ZKNE,
+       RISCV_ISA_EXT_ZKR,
+       RISCV_ISA_EXT_ZKT,
+};
+
+static const unsigned int riscv_zkn_bundled_exts[] = {
+       RISCV_ISA_EXT_ZBKB,
+       RISCV_ISA_EXT_ZBKC,
+       RISCV_ISA_EXT_ZBKX,
+       RISCV_ISA_EXT_ZKND,
+       RISCV_ISA_EXT_ZKNE,
+       RISCV_ISA_EXT_ZKNH,
+};
+
+static const unsigned int riscv_zks_bundled_exts[] = {
+       RISCV_ISA_EXT_ZBKB,
+       RISCV_ISA_EXT_ZBKC,
+       RISCV_ISA_EXT_ZKSED,
+       RISCV_ISA_EXT_ZKSH
+};
+
+#define RISCV_ISA_EXT_ZVKN     \
+       RISCV_ISA_EXT_ZVKNED,   \
+       RISCV_ISA_EXT_ZVKNHB,   \
+       RISCV_ISA_EXT_ZVKB,     \
+       RISCV_ISA_EXT_ZVKT
+
+static const unsigned int riscv_zvkn_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKN
+};
+
+static const unsigned int riscv_zvknc_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKN,
+       RISCV_ISA_EXT_ZVBC
+};
+
+static const unsigned int riscv_zvkng_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKN,
+       RISCV_ISA_EXT_ZVKG
+};
+
+#define RISCV_ISA_EXT_ZVKS     \
+       RISCV_ISA_EXT_ZVKSED,   \
+       RISCV_ISA_EXT_ZVKSH,    \
+       RISCV_ISA_EXT_ZVKB,     \
+       RISCV_ISA_EXT_ZVKT
+
+static const unsigned int riscv_zvks_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKS
+};
+
+static const unsigned int riscv_zvksc_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKS,
+       RISCV_ISA_EXT_ZVBC
+};
+
+static const unsigned int riscv_zvksg_bundled_exts[] = {
+       RISCV_ISA_EXT_ZVKS,
+       RISCV_ISA_EXT_ZVKG
+};
+
+static const unsigned int riscv_zvbb_exts[] = {
+       RISCV_ISA_EXT_ZVKB
+};
+
+#define RISCV_ISA_EXT_ZVE64F_IMPLY_LIST        \
+       RISCV_ISA_EXT_ZVE64X,           \
+       RISCV_ISA_EXT_ZVE32F,           \
+       RISCV_ISA_EXT_ZVE32X
+
+#define RISCV_ISA_EXT_ZVE64D_IMPLY_LIST        \
+       RISCV_ISA_EXT_ZVE64F,           \
+       RISCV_ISA_EXT_ZVE64F_IMPLY_LIST
+
+#define RISCV_ISA_EXT_V_IMPLY_LIST     \
+       RISCV_ISA_EXT_ZVE64D,           \
+       RISCV_ISA_EXT_ZVE64D_IMPLY_LIST
+
+static const unsigned int riscv_zve32f_exts[] = {
+       RISCV_ISA_EXT_ZVE32X
+};
+
+static const unsigned int riscv_zve64f_exts[] = {
+       RISCV_ISA_EXT_ZVE64F_IMPLY_LIST
+};
+
+static const unsigned int riscv_zve64d_exts[] = {
+       RISCV_ISA_EXT_ZVE64D_IMPLY_LIST
+};
+
+static const unsigned int riscv_v_exts[] = {
+       RISCV_ISA_EXT_V_IMPLY_LIST
+};
+
+static const unsigned int riscv_zve64x_exts[] = {
+       RISCV_ISA_EXT_ZVE32X,
+       RISCV_ISA_EXT_ZVE64X
+};
+
+/*
+ * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
+ * privileged ISA, the existence of the CSRs is implied by any extension which
+ * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the
+ * existence of the CSR, and treat it as a subset of those other extensions.
+ */
+static const unsigned int riscv_xlinuxenvcfg_exts[] = {
+       RISCV_ISA_EXT_XLINUXENVCFG
+};
+
+/*
+ * Zc* spec states that:
+ * - C always implies Zca
+ * - C+F implies Zcf (RV32 only)
+ * - C+D implies Zcd
+ *
+ * These extensions will be enabled and then validated depending on the
+ * availability of F/D RV32.
+ */
+static const unsigned int riscv_c_exts[] = {
+       RISCV_ISA_EXT_ZCA,
+       RISCV_ISA_EXT_ZCF,
+       RISCV_ISA_EXT_ZCD,
+};
+
 /*
  * The canonical order of ISA extension names in the ISA string is defined in
  * chapter 27 of the unprivileged specification.
@@ -158,24 +357,74 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
        __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f),
        __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d),
        __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q),
-       __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
-       __RISCV_ISA_EXT_DATA(b, RISCV_ISA_EXT_b),
-       __RISCV_ISA_EXT_DATA(k, RISCV_ISA_EXT_k),
-       __RISCV_ISA_EXT_DATA(j, RISCV_ISA_EXT_j),
-       __RISCV_ISA_EXT_DATA(p, RISCV_ISA_EXT_p),
-       __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
+       __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts),
+       __RISCV_ISA_EXT_SUPERSET(v, RISCV_ISA_EXT_v, riscv_v_exts),
        __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
-       __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
-       __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+       __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts,
+                                         riscv_ext_zicbom_validate),
+       __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts,
+                                         riscv_ext_zicboz_validate),
        __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
+       __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
        __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
        __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),
+       __RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL),
        __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
        __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
+       __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP),
+       __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+       __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
+       __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
+       __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
+       __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),
+       __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA),
+       __RISCV_ISA_EXT_DATA_VALIDATE(zcb, RISCV_ISA_EXT_ZCB, riscv_ext_zca_depends),
+       __RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate),
+       __RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate),
+       __RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends),
        __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
        __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
+       __RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC),
+       __RISCV_ISA_EXT_DATA(zbkb, RISCV_ISA_EXT_ZBKB),
+       __RISCV_ISA_EXT_DATA(zbkc, RISCV_ISA_EXT_ZBKC),
+       __RISCV_ISA_EXT_DATA(zbkx, RISCV_ISA_EXT_ZBKX),
        __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS),
+       __RISCV_ISA_EXT_BUNDLE(zk, riscv_zk_bundled_exts),
+       __RISCV_ISA_EXT_BUNDLE(zkn, riscv_zkn_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zknd, RISCV_ISA_EXT_ZKND),
+       __RISCV_ISA_EXT_DATA(zkne, RISCV_ISA_EXT_ZKNE),
+       __RISCV_ISA_EXT_DATA(zknh, RISCV_ISA_EXT_ZKNH),
+       __RISCV_ISA_EXT_DATA(zkr, RISCV_ISA_EXT_ZKR),
+       __RISCV_ISA_EXT_BUNDLE(zks, riscv_zks_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zkt, RISCV_ISA_EXT_ZKT),
+       __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED),
+       __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH),
+       __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO),
+       __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts),
+       __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC),
+       __RISCV_ISA_EXT_SUPERSET(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts),
+       __RISCV_ISA_EXT_DATA(zve32x, RISCV_ISA_EXT_ZVE32X),
+       __RISCV_ISA_EXT_SUPERSET(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts),
+       __RISCV_ISA_EXT_SUPERSET(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts),
+       __RISCV_ISA_EXT_SUPERSET(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts),
+       __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH),
+       __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN),
+       __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB),
+       __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG),
+       __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts),
+       __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED),
+       __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA),
+       __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB),
+       __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts),
+       __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED),
+       __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH),
+       __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts),
+       __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT),
        __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
+       __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN),
        __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
        __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
        __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
@@ -186,8 +435,93 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
 
 const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
 
-static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo,
-                                         unsigned long *isa2hwcap, const char *isa)
+static void riscv_isa_set_ext(const struct riscv_isa_ext_data *ext, unsigned long *bitmap)
+{
+       if (ext->id != RISCV_ISA_EXT_INVALID)
+               set_bit(ext->id, bitmap);
+
+       for (int i = 0; i < ext->subset_ext_size; i++) {
+               if (ext->subset_ext_ids[i] != RISCV_ISA_EXT_INVALID)
+                       set_bit(ext->subset_ext_ids[i], bitmap);
+       }
+}
+
+static const struct riscv_isa_ext_data *riscv_get_isa_ext_data(unsigned int ext_id)
+{
+       for (int i = 0; i < riscv_isa_ext_count; i++) {
+               if (riscv_isa_ext[i].id == ext_id)
+                       return &riscv_isa_ext[i];
+       }
+
+       return NULL;
+}
+
+/*
+ * "Resolve" a source ISA bitmap into one that matches kernel configuration as
+ * well as correct extension dependencies. Some extensions depends on specific
+ * kernel configuration to be usable (V needs CONFIG_RISCV_ISA_V for instance)
+ * and this function will actually validate all the extensions provided in
+ * source_isa into the resolved_isa based on extensions validate() callbacks.
+ */
+static void __init riscv_resolve_isa(unsigned long *source_isa,
+                                    unsigned long *resolved_isa, unsigned long *this_hwcap,
+                                    unsigned long *isa2hwcap)
+{
+       bool loop;
+       const struct riscv_isa_ext_data *ext;
+       DECLARE_BITMAP(prev_resolved_isa, RISCV_ISA_EXT_MAX);
+       int max_loop_count = riscv_isa_ext_count, ret;
+       unsigned int bit;
+
+       do {
+               loop = false;
+               if (max_loop_count-- < 0) {
+                       pr_err("Failed to reach a stable ISA state\n");
+                       return;
+               }
+               bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX);
+               for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) {
+                       ext = riscv_get_isa_ext_data(bit);
+                       if (!ext)
+                               continue;
+
+                       if (ext->validate) {
+                               ret = ext->validate(ext, resolved_isa);
+                               if (ret == -EPROBE_DEFER) {
+                                       loop = true;
+                                       continue;
+                               } else if (ret) {
+                                       /* Disable the extension entirely */
+                                       clear_bit(ext->id, source_isa);
+                                       continue;
+                               }
+                       }
+
+                       set_bit(ext->id, resolved_isa);
+                       /* No need to keep it in source isa now that it is enabled */
+                       clear_bit(ext->id, source_isa);
+
+                       /* Single letter extensions get set in hwcap */
+                       if (ext->id < RISCV_ISA_EXT_BASE)
+                               *this_hwcap |= isa2hwcap[ext->id];
+               }
+       } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa)));
+}
+
+static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap)
+{
+       for (int i = 0; i < riscv_isa_ext_count; i++) {
+               const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i];
+
+               if ((name_end - name == strlen(ext->name)) &&
+                   !strncasecmp(name, ext->name, name_end - name)) {
+                       riscv_isa_set_ext(ext, bitmap);
+                       break;
+               }
+       }
+}
+
+static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap)
 {
        /*
         * For all possible cpus, we have already validated in
@@ -200,15 +534,16 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
        while (*isa) {
                const char *ext = isa++;
                const char *ext_end = isa;
-               bool ext_long = false, ext_err = false;
+               bool ext_err = false;
 
                switch (*ext) {
                case 's':
                        /*
-                        * Workaround for invalid single-letter 's' & 'u'(QEMU).
+                        * Workaround for invalid single-letter 's' & 'u' (QEMU).
                         * No need to set the bit in riscv_isa as 's' & 'u' are
-                        * not valid ISA extensions. It works until multi-letter
-                        * extension starting with "Su" appears.
+                        * not valid ISA extensions. It works unless the first
+                        * multi-letter extension in the ISA string begins with
+                        * "Su" and is not prefixed with an underscore.
                         */
                        if (ext[-1] != '_' && ext[1] == 'u') {
                                ++isa;
@@ -239,7 +574,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
                         * character itself while eliminating the extensions version number.
                         * A simple re-increment solves this problem.
                         */
-                       ext_long = true;
                        for (; *isa && *isa != '_'; ++isa)
                                if (unlikely(!isalnum(*isa)))
                                        ext_err = true;
@@ -317,29 +651,10 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
                if (*isa == '_')
                        ++isa;
 
-#define SET_ISA_EXT_MAP(name, bit)                                             \
-               do {                                                            \
-                       if ((ext_end - ext == strlen(name)) &&                  \
-                            !strncasecmp(ext, name, strlen(name)) &&           \
-                            riscv_isa_extension_check(bit))                    \
-                               set_bit(bit, isainfo->isa);                     \
-               } while (false)                                                 \
-
                if (unlikely(ext_err))
                        continue;
-               if (!ext_long) {
-                       int nr = tolower(*ext) - 'a';
 
-                       if (riscv_isa_extension_check(nr)) {
-                               *this_hwcap |= isa2hwcap[nr];
-                               set_bit(nr, isainfo->isa);
-                       }
-               } else {
-                       for (int i = 0; i < riscv_isa_ext_count; i++)
-                               SET_ISA_EXT_MAP(riscv_isa_ext[i].name,
-                                               riscv_isa_ext[i].id);
-               }
-#undef SET_ISA_EXT_MAP
+               match_isa_ext(ext, ext_end, bitmap);
        }
 }
 
@@ -351,6 +666,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
        struct acpi_table_header *rhct;
        acpi_status status;
        unsigned int cpu;
+       u64 boot_vendorid;
+       u64 boot_archid;
 
        if (!acpi_disabled) {
                status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
@@ -358,9 +675,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
                        return;
        }
 
+       boot_vendorid = riscv_get_mvendorid();
+       boot_archid = riscv_get_marchid();
+
        for_each_possible_cpu(cpu) {
                struct riscv_isainfo *isainfo = &hart_isa[cpu];
                unsigned long this_hwcap = 0;
+               DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 };
 
                if (acpi_disabled) {
                        node = of_cpu_device_node_get(cpu);
@@ -383,7 +704,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
                        }
                }
 
-               riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa);
+               riscv_parse_isa_string(isa, source_isa);
 
                /*
                 * These ones were as they were part of the base ISA when the
@@ -391,10 +712,10 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
                 * unconditionally where `i` is in riscv,isa on DT systems.
                 */
                if (acpi_disabled) {
-                       set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa);
-                       set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa);
-                       set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa);
-                       set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa);
+                       set_bit(RISCV_ISA_EXT_ZICSR, source_isa);
+                       set_bit(RISCV_ISA_EXT_ZIFENCEI, source_isa);
+                       set_bit(RISCV_ISA_EXT_ZICNTR, source_isa);
+                       set_bit(RISCV_ISA_EXT_ZIHPM, source_isa);
                }
 
                /*
@@ -405,12 +726,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
                 * CPU cores with the ratified spec will contain non-zero
                 * marchid.
                 */
-               if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID &&
-                   riscv_cached_marchid(cpu) == 0x0) {
+               if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) {
                        this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v];
-                       clear_bit(RISCV_ISA_EXT_v, isainfo->isa);
+                       clear_bit(RISCV_ISA_EXT_v, source_isa);
                }
 
+               riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
+
                /*
                 * All "okay" hart should have same isa. Set HWCAP based on
                 * common capabilities of every "okay" hart, in case they don't
@@ -439,6 +761,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
                unsigned long this_hwcap = 0;
                struct device_node *cpu_node;
                struct riscv_isainfo *isainfo = &hart_isa[cpu];
+               DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 };
 
                cpu_node = of_cpu_device_node_get(cpu);
                if (!cpu_node) {
@@ -452,20 +775,17 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
                }
 
                for (int i = 0; i < riscv_isa_ext_count; i++) {
-                       if (of_property_match_string(cpu_node, "riscv,isa-extensions",
-                                                    riscv_isa_ext[i].property) < 0)
-                               continue;
+                       const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i];
 
-                       if (!riscv_isa_extension_check(riscv_isa_ext[i].id))
+                       if (of_property_match_string(cpu_node, "riscv,isa-extensions",
+                                                    ext->property) < 0)
                                continue;
 
-                       /* Only single letter extensions get set in hwcap */
-                       if (strnlen(riscv_isa_ext[i].name, 2) == 1)
-                               this_hwcap |= isa2hwcap[riscv_isa_ext[i].id];
-
-                       set_bit(riscv_isa_ext[i].id, isainfo->isa);
+                       riscv_isa_set_ext(ext, source_isa);
                }
 
+               riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
+
                of_node_put(cpu_node);
 
                /*
@@ -535,8 +855,14 @@ void __init riscv_fill_hwcap(void)
                elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
        }
 
-       if (elf_hwcap & COMPAT_HWCAP_ISA_V) {
+       if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) {
+               /*
+                * This cannot fail when called on the boot hart
+                */
                riscv_v_setup_vsize();
+       }
+
+       if (elf_hwcap & COMPAT_HWCAP_ISA_V) {
                /*
                 * ISA string in device tree might have 'v' flag, but
                 * CONFIG_RISCV_ISA_V is disabled in kernel.
@@ -571,107 +897,12 @@ unsigned long riscv_get_elf_hwcap(void)
        return hwcap;
 }
 
-void check_unaligned_access(int cpu)
+void riscv_user_isa_enable(void)
 {
-       u64 start_cycles, end_cycles;
-       u64 word_cycles;
-       u64 byte_cycles;
-       int ratio;
-       unsigned long start_jiffies, now;
-       struct page *page;
-       void *dst;
-       void *src;
-       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
-
-       /* We are already set since the last check */
-       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-               return;
-
-       page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
-       if (!page) {
-               pr_warn("Can't alloc pages to measure memcpy performance");
-               return;
-       }
-
-       /* Make an unaligned destination buffer. */
-       dst = (void *)((unsigned long)page_address(page) | 0x1);
-       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
-       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
-       src += 2;
-       word_cycles = -1ULL;
-       /* Do a warmup. */
-       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-       preempt_disable();
-       start_jiffies = jiffies;
-       while ((now = jiffies) == start_jiffies)
-               cpu_relax();
-
-       /*
-        * For a fixed amount of time, repeatedly try the function, and take
-        * the best time in cycles as the measurement.
-        */
-       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-               start_cycles = get_cycles64();
-               /* Ensure the CSR read can't reorder WRT to the copy. */
-               mb();
-               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-               /* Ensure the copy ends before the end time is snapped. */
-               mb();
-               end_cycles = get_cycles64();
-               if ((end_cycles - start_cycles) < word_cycles)
-                       word_cycles = end_cycles - start_cycles;
-       }
-
-       byte_cycles = -1ULL;
-       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-       start_jiffies = jiffies;
-       while ((now = jiffies) == start_jiffies)
-               cpu_relax();
-
-       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-               start_cycles = get_cycles64();
-               mb();
-               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-               mb();
-               end_cycles = get_cycles64();
-               if ((end_cycles - start_cycles) < byte_cycles)
-                       byte_cycles = end_cycles - start_cycles;
-       }
-
-       preempt_enable();
-
-       /* Don't divide by zero. */
-       if (!word_cycles || !byte_cycles) {
-               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
-                       cpu);
-
-               goto out;
-       }
-
-       if (word_cycles < byte_cycles)
-               speed = RISCV_HWPROBE_MISALIGNED_FAST;
-
-       ratio = div_u64((byte_cycles * 100), word_cycles);
-       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
-               cpu,
-               ratio / 100,
-               ratio % 100,
-               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
-
-       per_cpu(misaligned_access_speed, cpu) = speed;
-
-out:
-       __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
+       if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
+               csr_set(CSR_ENVCFG, ENVCFG_CBZE);
 }
 
-static int check_unaligned_access_boot_cpu(void)
-{
-       check_unaligned_access(0);
-       return 0;
-}
-
-arch_initcall(check_unaligned_access_boot_cpu);
-
 #ifdef CONFIG_RISCV_ALTERNATIVE
 /*
  * Alternative patch sites consider 48 bits when determining when to patch
index 278d01d2911fd512925c644057654964b9eaef61..9d1a305d55087bb3a6bdc73f8ed8ebe3206775b1 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <asm/asm.h>
 #include <asm/csr.h>
+#include <asm/scs.h>
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
@@ -25,9 +26,9 @@ SYM_CODE_START(handle_exception)
         * register will contain 0, and we should continue on the current TP.
         */
        csrrw tp, CSR_SCRATCH, tp
-       bnez tp, _save_context
+       bnez tp, .Lsave_context
 
-_restore_kernel_tpsp:
+.Lrestore_kernel_tpsp:
        csrr tp, CSR_SCRATCH
        REG_S sp, TASK_TI_KERNEL_SP(tp)
 
@@ -39,7 +40,7 @@ _restore_kernel_tpsp:
        REG_L sp, TASK_TI_KERNEL_SP(tp)
 #endif
 
-_save_context:
+.Lsave_context:
        REG_S sp, TASK_TI_USER_SP(tp)
        REG_L sp, TASK_TI_KERNEL_SP(tp)
        addi sp, sp, -(PT_SIZE_ON_STACK)
@@ -77,10 +78,15 @@ _save_context:
        csrw CSR_SCRATCH, x0
 
        /* Load the global pointer */
-.option push
-.option norelax
-       la gp, __global_pointer$
-.option pop
+       load_global_pointer
+
+       /* Load the kernel shadow call stack pointer if coming from userspace */
+       scs_load_current_if_task_changed s5
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       move a0, sp
+       call riscv_v_context_nesting_start
+#endif
        move a0, sp /* pt_regs */
        la ra, ret_from_exception
 
@@ -127,12 +133,19 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
        addi s0, sp, PT_SIZE_ON_STACK
        REG_S s0, TASK_TI_KERNEL_SP(tp)
 
+       /* Save the kernel shadow call stack pointer */
+       scs_save_current
+
        /*
         * Save TP into the scratch register , so we can find the kernel data
         * structures again.
         */
        csrw CSR_SCRATCH, tp
 1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       move a0, sp
+       call riscv_v_context_nesting_end
+#endif
        REG_L a0, PT_STATUS(sp)
        /*
         * The current load reservation is effectively part of the processor's
@@ -220,6 +233,43 @@ SYM_CODE_START(ret_from_fork)
        tail syscall_exit_to_user_mode
 SYM_CODE_END(ret_from_fork)
 
+#ifdef CONFIG_IRQ_STACKS
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ *                       void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using the per-CPU IRQ stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+       /* Create a frame record to save ra and s0 (fp) */
+       addi    sp, sp, -STACKFRAME_SIZE_ON_STACK
+       REG_S   ra, STACKFRAME_RA(sp)
+       REG_S   s0, STACKFRAME_FP(sp)
+       addi    s0, sp, STACKFRAME_SIZE_ON_STACK
+
+       /* Switch to the per-CPU shadow call stack */
+       scs_save_current
+       scs_load_irq_stack t0
+
+       /* Switch to the per-CPU IRQ stack and call the handler */
+       load_per_cpu t0, irq_stack_ptr, t1
+       li      t1, IRQ_STACK_SIZE
+       add     sp, t0, t1
+       jalr    a1
+
+       /* Switch back to the thread shadow call stack */
+       scs_load_current
+
+       /* Switch back to the thread stack and restore ra and s0 */
+       addi    sp, s0, -STACKFRAME_SIZE_ON_STACK
+       REG_L   ra, STACKFRAME_RA(sp)
+       REG_L   s0, STACKFRAME_FP(sp)
+       addi    sp, sp, STACKFRAME_SIZE_ON_STACK
+
+       ret
+SYM_FUNC_END(call_on_irq_stack)
+#endif /* CONFIG_IRQ_STACKS */
+
 /*
  * Integer register context switch
  * The callee-saved registers must be saved and restored.
@@ -249,6 +299,8 @@ SYM_FUNC_START(__switch_to)
        REG_S s9,  TASK_THREAD_S9_RA(a3)
        REG_S s10, TASK_THREAD_S10_RA(a3)
        REG_S s11, TASK_THREAD_S11_RA(a3)
+       /* Save the kernel shadow call stack pointer */
+       scs_save_current
        /* Restore context from next->thread */
        REG_L ra,  TASK_THREAD_RA_RA(a4)
        REG_L sp,  TASK_THREAD_SP_RA(a4)
@@ -266,6 +318,8 @@ SYM_FUNC_START(__switch_to)
        REG_L s11, TASK_THREAD_S11_RA(a4)
        /* The offset of thread_info in task_struct is zero. */
        move tp, a1
+       /* Switch to the next shadow call stack */
+       scs_load_current
        ret
 SYM_FUNC_END(__switch_to)
 
@@ -276,7 +330,7 @@ SYM_FUNC_END(__switch_to)
        .section ".rodata"
        .align LGREG
        /* Exception vector table */
-SYM_CODE_START(excp_vect_table)
+SYM_DATA_START_LOCAL(excp_vect_table)
        RISCV_PTR do_trap_insn_misaligned
        ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
        RISCV_PTR do_trap_insn_illegal
@@ -294,12 +348,11 @@ SYM_CODE_START(excp_vect_table)
        RISCV_PTR do_page_fault   /* load page fault */
        RISCV_PTR do_trap_unknown
        RISCV_PTR do_page_fault   /* store page fault */
-excp_vect_table_end:
-SYM_CODE_END(excp_vect_table)
+SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end)
 
 #ifndef CONFIG_MMU
-SYM_CODE_START(__user_rt_sigreturn)
+SYM_DATA_START(__user_rt_sigreturn)
        li a7, __NR_rt_sigreturn
        ecall
-SYM_CODE_END(__user_rt_sigreturn)
+SYM_DATA_END(__user_rt_sigreturn)
 #endif
index dd2205473de78571a5a4a4b68bd4b33302a77b31..2c543f130f9389aa4f1d9fee05f5027d99adaa78 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/csr.h>
 #include <asm/asm-offsets.h>
 
-ENTRY(__fstate_save)
+SYM_FUNC_START(__fstate_save)
        li  a2,  TASK_THREAD_F0
        add a0, a0, a2
        li t1, SR_FS
@@ -60,9 +60,9 @@ ENTRY(__fstate_save)
        sw t0, TASK_THREAD_FCSR_F0(a0)
        csrc CSR_STATUS, t1
        ret
-ENDPROC(__fstate_save)
+SYM_FUNC_END(__fstate_save)
 
-ENTRY(__fstate_restore)
+SYM_FUNC_START(__fstate_restore)
        li  a2,  TASK_THREAD_F0
        add a0, a0, a2
        li t1, SR_FS
@@ -103,4 +103,125 @@ ENTRY(__fstate_restore)
        fscsr t0
        csrc CSR_STATUS, t1
        ret
-ENDPROC(__fstate_restore)
+SYM_FUNC_END(__fstate_restore)
+
+#define get_f32(which) fmv.x.s a0, which; j 2f
+#define put_f32(which) fmv.s.x which, a1; j 2f
+#if __riscv_xlen == 64
+# define get_f64(which) fmv.x.d a0, which; j 2f
+# define put_f64(which) fmv.d.x which, a1; j 2f
+#else
+# define get_f64(which) fsd which, 0(a1); j 2f
+# define put_f64(which) fld which, 0(a1); j 2f
+#endif
+
+.macro fp_access_prologue
+       /*
+        * Compute jump offset to store the correct FP register since we don't
+        * have indirect FP register access
+        */
+       sll t0, a0, 3
+       la t2, 1f
+       add t0, t0, t2
+       li t1, SR_FS
+       csrs CSR_STATUS, t1
+       jr t0
+1:
+.endm
+
+.macro fp_access_epilogue
+2:
+       csrc CSR_STATUS, t1
+       ret
+.endm
+
+#define fp_access_body(__access_func) \
+       __access_func(f0); \
+       __access_func(f1); \
+       __access_func(f2); \
+       __access_func(f3); \
+       __access_func(f4); \
+       __access_func(f5); \
+       __access_func(f6); \
+       __access_func(f7); \
+       __access_func(f8); \
+       __access_func(f9); \
+       __access_func(f10); \
+       __access_func(f11); \
+       __access_func(f12); \
+       __access_func(f13); \
+       __access_func(f14); \
+       __access_func(f15); \
+       __access_func(f16); \
+       __access_func(f17); \
+       __access_func(f18); \
+       __access_func(f19); \
+       __access_func(f20); \
+       __access_func(f21); \
+       __access_func(f22); \
+       __access_func(f23); \
+       __access_func(f24); \
+       __access_func(f25); \
+       __access_func(f26); \
+       __access_func(f27); \
+       __access_func(f28); \
+       __access_func(f29); \
+       __access_func(f30); \
+       __access_func(f31)
+
+
+#ifdef CONFIG_RISCV_MISALIGNED
+
+/*
+ * Disable compressed instructions set to keep a constant offset between FP
+ * load/store/move instructions
+ */
+.option norvc
+/*
+ * put_f32_reg - Set a FP register from a register containing the value
+ * a0 = FP register index to be set
+ * a1 = value to be loaded in the FP register
+ */
+SYM_FUNC_START(put_f32_reg)
+       fp_access_prologue
+       fp_access_body(put_f32)
+       fp_access_epilogue
+SYM_FUNC_END(put_f32_reg)
+
+/*
+ * get_f32_reg - Get a FP register value and return it
+ * a0 = FP register index to be retrieved
+ */
+SYM_FUNC_START(get_f32_reg)
+       fp_access_prologue
+       fp_access_body(get_f32)
+       fp_access_epilogue
+SYM_FUNC_END(get_f32_reg)
+
+/*
+ * put_f64_reg - Set a 64 bits FP register from a value or a pointer.
+ * a0 = FP register index to be set
+ * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
+ * load the value to a pointer).
+ */
+SYM_FUNC_START(put_f64_reg)
+       fp_access_prologue
+       fp_access_body(put_f64)
+       fp_access_epilogue
+SYM_FUNC_END(put_f64_reg)
+
+/*
+ * put_f64_reg - Get a 64 bits FP register value and returned it or store it to
+ *              a pointer.
+ * a0 = FP register index to be retrieved
+ * a1 = If xlen == 32, pointer which should be loaded with the FP register value
+ *     or unused if xlen == 64. In which case the FP register value is returned
+ *     through a0
+ */
+SYM_FUNC_START(get_f64_reg)
+       fp_access_prologue
+       fp_access_body(get_f64)
+       fp_access_epilogue
+SYM_FUNC_END(get_f64_reg)
+
+#endif /* CONFIG_RISCV_MISALIGNED */
index 0097c145385f6794d9424a2034ff64654c4a62ea..a2e2f0dd3899af4a660fbd36f0bb54a157bfa4b5 100644 (file)
 #include <asm/cpu_ops_sbi.h>
 #include <asm/hwcap.h>
 #include <asm/image.h>
+#include <asm/scs.h>
 #include <asm/xip_fixup.h>
 #include "efi-header.S"
 
 __HEAD
-ENTRY(_start)
+SYM_CODE_START(_start)
        /*
         * Image header expected by Linux boot-loaders. The image header data
         * structure is described in asm/image.h.
@@ -111,10 +112,7 @@ relocate_enable_mmu:
        csrw CSR_TVEC, a0
 
        /* Reload the global pointer */
-.option push
-.option norelax
-       la gp, __global_pointer$
-.option pop
+       load_global_pointer
 
        /*
         * Switch to kernel page tables.  A full fence is necessary in order to
@@ -135,10 +133,7 @@ secondary_start_sbi:
        csrw CSR_IP, zero
 
        /* Load the global pointer */
-       .option push
-       .option norelax
-               la gp, __global_pointer$
-       .option pop
+       load_global_pointer
 
        /*
         * Disable FPU & VECTOR to detect illegal usage of
@@ -160,6 +155,7 @@ secondary_start_sbi:
        XIP_FIXUP_OFFSET a3
        add a3, a3, a1
        REG_L sp, (a3)
+       scs_load_current
 
 .Lsecondary_start_common:
 
@@ -169,12 +165,12 @@ secondary_start_sbi:
        XIP_FIXUP_OFFSET a0
        call relocate_enable_mmu
 #endif
-       call setup_trap_vector
+       call .Lsetup_trap_vector
        tail smp_callin
 #endif /* CONFIG_SMP */
 
 .align 2
-setup_trap_vector:
+.Lsetup_trap_vector:
        /* Set trap vector to exception handler */
        la a0, handle_exception
        csrw CSR_TVEC, a0
@@ -192,9 +188,9 @@ setup_trap_vector:
        wfi
        j .Lsecondary_park
 
-END(_start)
+SYM_CODE_END(_start)
 
-ENTRY(_start_kernel)
+SYM_CODE_START(_start_kernel)
        /* Mask all interrupts */
        csrw CSR_IE, zero
        csrw CSR_IP, zero
@@ -211,7 +207,7 @@ ENTRY(_start_kernel)
         * not implement PMPs, so we set up a quick trap handler to just skip
         * touching the PMPs on any trap.
         */
-       la a0, pmp_done
+       la a0, .Lpmp_done
        csrw CSR_TVEC, a0
 
        li a0, -1
@@ -219,7 +215,7 @@ ENTRY(_start_kernel)
        li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
        csrw CSR_PMPCFG0, a0
 .align 2
-pmp_done:
+.Lpmp_done:
 
        /*
         * The hartid in a0 is expected later on, and we have no firmware
@@ -229,10 +225,7 @@ pmp_done:
 #endif /* CONFIG_RISCV_M_MODE */
 
        /* Load the global pointer */
-.option push
-.option norelax
-       la gp, __global_pointer$
-.option pop
+       load_global_pointer
 
        /*
         * Disable FPU & VECTOR to detect illegal usage of
@@ -283,12 +276,12 @@ pmp_done:
        /* Clear BSS for flat non-ELF images */
        la a3, __bss_start
        la a4, __bss_stop
-       ble a4, a3, clear_bss_done
-clear_bss:
+       ble a4, a3, .Lclear_bss_done
+.Lclear_bss:
        REG_S zero, (a3)
        add a3, a3, RISCV_SZPTR
-       blt a3, a4, clear_bss
-clear_bss_done:
+       blt a3, a4, .Lclear_bss
+.Lclear_bss_done:
 #endif
        la a2, boot_cpu_hartid
        XIP_FIXUP_OFFSET a2
@@ -299,6 +292,7 @@ clear_bss_done:
        la sp, init_thread_union + THREAD_SIZE
        XIP_FIXUP_OFFSET sp
        addi sp, sp, -PT_SIZE_ON_STACK
+       scs_load_init_stack
 #ifdef CONFIG_BUILTIN_DTB
        la a0, __dtb_start
        XIP_FIXUP_OFFSET a0
@@ -312,11 +306,12 @@ clear_bss_done:
        call relocate_enable_mmu
 #endif /* CONFIG_MMU */
 
-       call setup_trap_vector
+       call .Lsetup_trap_vector
        /* Restore C environment */
        la tp, init_task
        la sp, init_thread_union + THREAD_SIZE
        addi sp, sp, -PT_SIZE_ON_STACK
+       scs_load_current
 
 #ifdef CONFIG_KASAN
        call kasan_early_init
@@ -354,10 +349,10 @@ clear_bss_done:
        tail .Lsecondary_start_common
 #endif /* CONFIG_RISCV_BOOT_SPINWAIT */
 
-END(_start_kernel)
+SYM_CODE_END(_start_kernel)
 
 #ifdef CONFIG_RISCV_M_MODE
-ENTRY(reset_regs)
+SYM_CODE_START_LOCAL(reset_regs)
        li      sp, 0
        li      gp, 0
        li      tp, 0
@@ -455,5 +450,5 @@ ENTRY(reset_regs)
 .Lreset_regs_done_vector:
 #endif /* CONFIG_RISCV_ISA_V */
        ret
-END(reset_regs)
+SYM_CODE_END(reset_regs)
 #endif /* CONFIG_RISCV_M_MODE */
index d698dd7df637ba8ad9263e2312f896c268a58b2d..d040dcf4add453dadaa328b85cd462ae43d6d31a 100644 (file)
@@ -21,7 +21,7 @@
  *
  * Always returns 0
  */
-ENTRY(__hibernate_cpu_resume)
+SYM_FUNC_START(__hibernate_cpu_resume)
        /* switch to hibernated image's page table. */
        csrw CSR_SATP, s0
        sfence.vma
@@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume)
        mv      a0, zero
 
        ret
-END(__hibernate_cpu_resume)
+SYM_FUNC_END(__hibernate_cpu_resume)
 
 /*
  * Prepare to restore the image.
@@ -42,7 +42,7 @@ END(__hibernate_cpu_resume)
  * a1: satp of temporary page tables.
  * a2: cpu_resume.
  */
-ENTRY(hibernate_restore_image)
+SYM_FUNC_START(hibernate_restore_image)
        mv      s0, a0
        mv      s1, a1
        mv      s2, a2
@@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image)
        REG_L   a1, relocated_restore_code
 
        jr      a1
-END(hibernate_restore_image)
+SYM_FUNC_END(hibernate_restore_image)
 
 /*
  * The below code will be executed from a 'safe' page.
@@ -58,7 +58,7 @@ END(hibernate_restore_image)
  * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume()
  * to restore the CPU context.
  */
-ENTRY(hibernate_core_restore_code)
+SYM_FUNC_START(hibernate_core_restore_code)
        /* switch to temp page table. */
        csrw satp, s1
        sfence.vma
@@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code)
        bnez    s4, .Lcopy
 
        jr      s2
-END(hibernate_core_restore_code)
+SYM_FUNC_END(hibernate_core_restore_code)
index 9cc0a76692715ea6ff2ec56630f7b60ed0a37f92..2f31a9caa11338aa0dc1f73f91525fda6d3550cb 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
+#include <linux/scs.h>
 #include <linux/seq_file.h>
 #include <asm/sbi.h>
 #include <asm/smp.h>
@@ -34,6 +35,24 @@ EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode);
 #ifdef CONFIG_IRQ_STACKS
 #include <asm/irq_stack.h>
 
+DECLARE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+#endif
+
+static void init_irq_scs(void)
+{
+       int cpu;
+
+       if (!scs_is_enabled())
+               return;
+
+       for_each_possible_cpu(cpu)
+               per_cpu(irq_shadow_call_stack_ptr, cpu) =
+                       scs_alloc(cpu_to_node(cpu));
+}
+
 DEFINE_PER_CPU(ulong *, irq_stack_ptr);
 
 #ifdef CONFIG_VMAP_STACK
@@ -60,41 +79,23 @@ static void init_irq_stacks(void)
 }
 #endif /* CONFIG_VMAP_STACK */
 
-#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+static void ___do_softirq(struct pt_regs *regs)
+{
+       __do_softirq();
+}
+
 void do_softirq_own_stack(void)
 {
-#ifdef CONFIG_IRQ_STACKS
-       if (on_thread_stack()) {
-               ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
-                                       + IRQ_STACK_SIZE/sizeof(ulong);
-               __asm__ __volatile(
-               "addi   sp, sp, -"RISCV_SZPTR  "\n"
-               REG_S"  ra, (sp)                \n"
-               "addi   sp, sp, -"RISCV_SZPTR  "\n"
-               REG_S"  s0, (sp)                \n"
-               "addi   s0, sp, 2*"RISCV_SZPTR "\n"
-               "move   sp, %[sp]               \n"
-               "call   __do_softirq            \n"
-               "addi   sp, s0, -2*"RISCV_SZPTR"\n"
-               REG_L"  s0, (sp)                \n"
-               "addi   sp, sp, "RISCV_SZPTR   "\n"
-               REG_L"  ra, (sp)                \n"
-               "addi   sp, sp, "RISCV_SZPTR   "\n"
-               :
-               : [sp] "r" (sp)
-               : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
-                 "s0",
-#endif
-                 "memory");
-       } else
-#endif
+       if (on_thread_stack())
+               call_on_irq_stack(NULL, ___do_softirq);
+       else
                __do_softirq();
 }
 #endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */
 
 #else
+static void init_irq_scs(void) {}
 static void init_irq_stacks(void) {}
 #endif /* CONFIG_IRQ_STACKS */
 
@@ -106,6 +107,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 
 void __init init_IRQ(void)
 {
+       init_irq_scs();
        init_irq_stacks();
        irqchip_init();
        if (!handle_arch_irq)
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
new file mode 100644 (file)
index 0000000..6afe80c
--- /dev/null
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/simd.h>
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+#include <asm/asm-prototypes.h>
+#endif
+
+static inline void riscv_v_flags_set(u32 flags)
+{
+       WRITE_ONCE(current->thread.riscv_v_flags, flags);
+}
+
+static inline void riscv_v_start(u32 flags)
+{
+       int orig;
+
+       orig = riscv_v_flags();
+       BUG_ON((orig & flags) != 0);
+       riscv_v_flags_set(orig | flags);
+       barrier();
+}
+
+static inline void riscv_v_stop(u32 flags)
+{
+       int orig;
+
+       barrier();
+       orig = riscv_v_flags();
+       BUG_ON((orig & flags) == 0);
+       riscv_v_flags_set(orig & ~flags);
+}
+
+/*
+ * Claim ownership of the CPU vector context for use by the calling context.
+ *
+ * The caller may freely manipulate the vector context metadata until
+ * put_cpu_vector_context() is called.
+ */
+void get_cpu_vector_context(void)
+{
+       /*
+        * disable softirqs so it is impossible for softirqs to nest
+        * get_cpu_vector_context() when kernel is actively using Vector.
+        */
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_disable();
+       else
+               preempt_disable();
+
+       riscv_v_start(RISCV_KERNEL_MODE_V);
+}
+
+/*
+ * Release the CPU vector context.
+ *
+ * Must be called from a context in which get_cpu_vector_context() was
+ * previously called, with no call to put_cpu_vector_context() in the
+ * meantime.
+ */
+void put_cpu_vector_context(void)
+{
+       riscv_v_stop(RISCV_KERNEL_MODE_V);
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_bh_enable();
+       else
+               preempt_enable();
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static __always_inline u32 *riscv_v_flags_ptr(void)
+{
+       return &current->thread.riscv_v_flags;
+}
+
+static inline void riscv_preempt_v_set_dirty(void)
+{
+       *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+       *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_v_ctx_depth_inc(void)
+{
+       *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline void riscv_v_ctx_depth_dec(void)
+{
+       *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline u32 riscv_v_ctx_get_depth(void)
+{
+       return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
+}
+
+static int riscv_v_stop_kernel_context(void)
+{
+       if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
+               return 1;
+
+       riscv_preempt_v_clear_dirty(current);
+       riscv_v_stop(RISCV_PREEMPT_V);
+       return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+       struct __riscv_v_ext_state *kvstate, *uvstate;
+
+       kvstate = &current->thread.kernel_vstate;
+       if (!kvstate->datap)
+               return -ENOENT;
+
+       if (riscv_preempt_v_started(current)) {
+               WARN_ON(riscv_v_ctx_get_depth() == 0);
+               *is_nested = true;
+               get_cpu_vector_context();
+               if (riscv_preempt_v_dirty(current)) {
+                       __riscv_v_vstate_save(kvstate, kvstate->datap);
+                       riscv_preempt_v_clear_dirty(current);
+               }
+               riscv_preempt_v_set_restore(current);
+               return 0;
+       }
+
+       /* Transfer the ownership of V from user to kernel, then save */
+       riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
+       if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+               uvstate = &current->thread.vstate;
+               __riscv_v_vstate_save(uvstate, uvstate->datap);
+       }
+       riscv_preempt_v_clear_dirty(current);
+       return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+       int depth;
+
+       if (!riscv_preempt_v_started(current))
+               return;
+
+       depth = riscv_v_ctx_get_depth();
+       if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+               riscv_preempt_v_set_dirty();
+
+       riscv_v_ctx_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+       struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+       u32 depth;
+
+       WARN_ON(!irqs_disabled());
+
+       if (!riscv_preempt_v_started(current))
+               return;
+
+       riscv_v_ctx_depth_dec();
+       depth = riscv_v_ctx_get_depth();
+       if (depth == 0) {
+               if (riscv_preempt_v_restore(current)) {
+                       __riscv_v_vstate_restore(vstate, vstate->datap);
+                       __riscv_v_vstate_clean(regs);
+                       riscv_preempt_v_reset_flags();
+               }
+       }
+}
+#else
+#define riscv_v_start_kernel_context(nested)   (-ENOENT)
+#define riscv_v_stop_kernel_context()          (-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+/*
+ * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the vector registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_vector_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the vector registers until kernel_vector_end() is
+ * called.
+ */
+void kernel_vector_begin(void)
+{
+       bool nested = false;
+
+       if (WARN_ON(!has_vector()))
+               return;
+
+       BUG_ON(!may_use_simd());
+
+       if (riscv_v_start_kernel_context(&nested)) {
+               get_cpu_vector_context();
+               riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+       }
+
+       if (!nested)
+               riscv_v_vstate_set_restore(current, task_pt_regs(current));
+
+       riscv_v_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_begin);
+
+/*
+ * kernel_vector_end(): give the CPU vector registers back to the current task
+ *
+ * Must be called from a context in which kernel_vector_begin() was previously
+ * called, with no call to kernel_vector_end() in the meantime.
+ *
+ * The caller must not use the vector registers after this function is called,
+ * unless kernel_vector_begin() is called again in the meantime.
+ */
+void kernel_vector_end(void)
+{
+       if (WARN_ON(!has_vector()))
+               return;
+
+       riscv_v_disable();
+
+       if (riscv_v_stop_kernel_context())
+               put_cpu_vector_context();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_end);
index 059c5e216ae75958750474cc4e49ecc886213b73..de0a4b35d01efcc6b904fadb29eb8ae25da3df7c 100644 (file)
@@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate)
         * s1: (const) Phys address to jump to after relocation
         * s2: (const) Phys address of the FDT image
         * s3: (const) The hartid of the current hart
-        * s4: Pointer to the destination address for the relocation
-        * s5: (const) Number of words per page
-        * s6: (const) 1, used for subtraction
-        * s7: (const) kernel_map.va_pa_offset, used when switching MMU off
-        * s8: (const) Physical address of the main loop
-        * s9: (debug) indirection page counter
-        * s10: (debug) entry counter
-        * s11: (debug) copied words counter
+        * s4: (const) kernel_map.va_pa_offset, used when switching MMU off
+        * s5: Pointer to the destination address for the relocation
+        * s6: (const) Physical address of the main loop
         */
        mv      s0, a0
        mv      s1, a1
        mv      s2, a2
        mv      s3, a3
-       mv      s4, zero
-       li      s5, (PAGE_SIZE / RISCV_SZPTR)
-       li      s6, 1
-       mv      s7, a4
-       mv      s8, zero
-       mv      s9, zero
-       mv      s10, zero
-       mv      s11, zero
+       mv      s4, a4
+       mv      s5, zero
+       mv      s6, zero
 
        /* Disable / cleanup interrupts */
        csrw    CSR_SIE, zero
@@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate)
         * the start of the loop below so that we jump there in
         * any case.
         */
-       la      s8, 1f
-       sub     s8, s8, s7
-       csrw    CSR_STVEC, s8
+       la      s6, 1f
+       sub     s6, s6, s4
+       csrw    CSR_STVEC, s6
+
+       /*
+        * With C-extension, here we get 42 Bytes and the next
+        * .align directive would pad zeros here up to 44 Bytes.
+        * So manually put a nop here to avoid zeros padding.
+       */
+       nop
 
        /* Process entries in a loop */
 .align 2
 1:
-       addi    s10, s10, 1
        REG_L   t0, 0(s0)               /* t0 = *image->entry */
        addi    s0, s0, RISCV_SZPTR     /* image->entry++ */
 
        /* IND_DESTINATION entry ? -> save destination address */
        andi    t1, t0, 0x1
        beqz    t1, 2f
-       andi    s4, t0, ~0x1
+       andi    s5, t0, ~0x1
        j       1b
 
 2:
@@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate)
        andi    t1, t0, 0x2
        beqz    t1, 2f
        andi    s0, t0, ~0x2
-       addi    s9, s9, 1
        csrw    CSR_SATP, zero
-       jalr    zero, s8, 0
+       jr      s6
 
 2:
        /* IND_DONE entry ? -> jump to done label */
@@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate)
        andi    t1, t0, 0x8
        beqz    t1, 1b          /* Unknown entry type, ignore it */
        andi    t0, t0, ~0x8
-       mv      t3, s5          /* i = num words per page */
+       li      t3, (PAGE_SIZE / RISCV_SZPTR)   /* i = num words per page */
 3:     /* copy loop */
        REG_L   t1, (t0)        /* t1 = *src_ptr */
-       REG_S   t1, (s4)        /* *dst_ptr = *src_ptr */
+       REG_S   t1, (s5)        /* *dst_ptr = *src_ptr */
        addi    t0, t0, RISCV_SZPTR /* stc_ptr++ */
-       addi    s4, s4, RISCV_SZPTR /* dst_ptr++ */
-       sub     t3, t3, s6      /* i-- */
-       addi    s11, s11, 1     /* c++ */
+       addi    s5, s5, RISCV_SZPTR /* dst_ptr++ */
+       addi    t3, t3, -0x1    /* i-- */
        beqz    t3, 1b          /* copy done ? */
        j       3b
 
@@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate)
         */
        fence.i
 
-       jalr    zero, a2, 0
+       jr      a2
 
 SYM_CODE_END(riscv_kexec_relocate)
 riscv_kexec_relocate_end:
index 669b8697aa38a5ed792ead53d73ce3057c62ab51..79dc81223238232a453aac10d6a5b87a06df0725 100644 (file)
@@ -3,12 +3,12 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/csr.h>
 #include <asm/unistd.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
 #include <asm/ftrace.h>
 
        .text
@@ -82,7 +82,7 @@
        .endm
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
        SAVE_ABI
 
        addi    a0, t0, -FENTRY_RA_OFFSET
@@ -91,8 +91,7 @@ ENTRY(ftrace_caller)
        mv      a1, ra
        mv      a3, sp
 
-ftrace_call:
-       .global ftrace_call
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,16 +101,15 @@ ftrace_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
        mv      a2, s0
 #endif
-ftrace_graph_call:
-       .global ftrace_graph_call
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
        call    ftrace_stub
 #endif
        RESTORE_ABI
        jr t0
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-ENTRY(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_regs_caller)
        SAVE_ALL
 
        addi    a0, t0, -FENTRY_RA_OFFSET
@@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller)
        mv      a1, ra
        mv      a3, sp
 
-ftrace_regs_call:
-       .global ftrace_regs_call
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -131,12 +128,11 @@ ftrace_regs_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
        mv      a2, s0
 #endif
-ftrace_graph_regs_call:
-       .global ftrace_graph_regs_call
+SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
        call    ftrace_stub
 #endif
 
        RESTORE_ALL
        jr t0
-ENDPROC(ftrace_regs_caller)
+SYM_FUNC_END(ftrace_regs_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
index 8818a8fa9ff3af7b3427e2456f1650c5c6cb7168..d7ec69ac6910c6ea0dadcbf7757511cc2f5f2914 100644 (file)
@@ -4,12 +4,12 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/cfi_types.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/csr.h>
 #include <asm/unistd.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
 #include <asm/ftrace.h>
 
        .text
@@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph)
        ret
 SYM_FUNC_END(ftrace_stub_graph)
 
-ENTRY(return_to_handler)
+SYM_FUNC_START(return_to_handler)
 /*
  * On implementing the frame point test, the ideal way is to compare the
  * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
@@ -76,25 +76,25 @@ ENTRY(return_to_handler)
        mv      a2, a0
        RESTORE_RET_ABI_STATE
        jalr    a2
-ENDPROC(return_to_handler)
+SYM_FUNC_END(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(MCOUNT_NAME)
+SYM_FUNC_START(MCOUNT_NAME)
        la      t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        la      t0, ftrace_graph_return
        REG_L   t1, 0(t0)
-       bne     t1, t4, do_ftrace_graph_caller
+       bne     t1, t4, .Ldo_ftrace_graph_caller
 
        la      t3, ftrace_graph_entry
        REG_L   t2, 0(t3)
        la      t6, ftrace_graph_entry_stub
-       bne     t2, t6, do_ftrace_graph_caller
+       bne     t2, t6, .Ldo_ftrace_graph_caller
 #endif
        la      t3, ftrace_trace_function
        REG_L   t5, 0(t3)
-       bne     t5, t4, do_trace
+       bne     t5, t4, .Ldo_trace
        ret
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME)
  * A pseudo representation for the function graph tracer:
  * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
  */
-do_ftrace_graph_caller:
+.Ldo_ftrace_graph_caller:
        addi    a0, s0, -SZREG
        mv      a1, ra
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
@@ -118,7 +118,7 @@ do_ftrace_graph_caller:
  * A pseudo representation for the function tracer:
  * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
  */
-do_trace:
+.Ldo_trace:
        REG_L   a1, -SZREG(s0)
        mv      a0, ra
 
@@ -126,6 +126,6 @@ do_trace:
        jalr    t5
        RESTORE_ABI_STATE
        ret
-ENDPROC(MCOUNT_NAME)
+SYM_FUNC_END(MCOUNT_NAME)
 #endif
 EXPORT_SYMBOL(MCOUNT_NAME)
index df4f6fec5d1740c2fc3724437aea6712e927180c..b5cc3659b8034408fcd398159d5b4616aa5a9607 100644 (file)
@@ -7,6 +7,9 @@
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 
+struct used_bucket {
+       struct list_head head;
+       struct hlist_head *bucket;
+};
+
+struct relocation_head {
+       struct hlist_node node;
+       struct list_head *rel_entry;
+       void *location;
+};
+
+struct relocation_entry {
+       struct list_head head;
+       Elf_Addr value;
+       unsigned int type;
+};
+
+struct relocation_handlers {
+       int (*reloc_handler)(struct module *me, void *location, Elf_Addr v);
+       int (*accumulate_handler)(struct module *me, void *location,
+                                 long buffer);
+};
+
 /*
  * The auipc+jalr instruction pair can reach any PC-relative offset
  * in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -27,68 +53,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
 #endif
 }
 
-static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
+static int riscv_insn_rmw(void *location, u32 keep, u32 set)
+{
+       __le16 *parcel = location;
+       u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
+
+       insn &= keep;
+       insn |= set;
+
+       parcel[0] = cpu_to_le16(insn);
+       parcel[1] = cpu_to_le16(insn >> 16);
+       return 0;
+}
+
+static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
+{
+       __le16 *parcel = location;
+       u16 insn = le16_to_cpu(*parcel);
+
+       insn &= keep;
+       insn |= set;
+
+       *parcel = cpu_to_le16(insn);
+       return 0;
+}
+
+static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v)
 {
        if (v != (u32)v) {
                pr_err("%s: value %016llx out of range for 32-bit field\n",
                       me->name, (long long)v);
                return -EINVAL;
        }
-       *location = v;
+       *(u32 *)location = v;
        return 0;
 }
 
-static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v)
 {
        *(u64 *)location = v;
        return 0;
 }
 
-static int apply_r_riscv_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_branch_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 imm12 = (offset & 0x1000) << (31 - 12);
        u32 imm11 = (offset & 0x800) >> (11 - 7);
        u32 imm10_5 = (offset & 0x7e0) << (30 - 10);
        u32 imm4_1 = (offset & 0x1e) << (11 - 4);
 
-       *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1;
-       return 0;
+       return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1);
 }
 
-static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
+static int apply_r_riscv_jal_rela(struct module *me, void *location,
                                  Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 imm20 = (offset & 0x100000) << (31 - 20);
        u32 imm19_12 = (offset & 0xff000);
        u32 imm11 = (offset & 0x800) << (20 - 11);
        u32 imm10_1 = (offset & 0x7fe) << (30 - 10);
 
-       *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1);
 }
 
-static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location,
                                         Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u16 imm8 = (offset & 0x100) << (12 - 8);
        u16 imm7_6 = (offset & 0xc0) >> (6 - 5);
        u16 imm5 = (offset & 0x20) >> (5 - 2);
        u16 imm4_3 = (offset & 0x18) << (12 - 5);
        u16 imm2_1 = (offset & 0x6) << (12 - 10);
 
-       *(u16 *)location = (*(u16 *)location & 0xe383) |
-                   imm8 | imm7_6 | imm5 | imm4_3 | imm2_1;
-       return 0;
+       return riscv_insn_rvc_rmw(location, 0xe383,
+                       imm8 | imm7_6 | imm5 | imm4_3 | imm2_1);
 }
 
-static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u16 imm11 = (offset & 0x800) << (12 - 11);
        u16 imm10 = (offset & 0x400) >> (10 - 8);
        u16 imm9_8 = (offset & 0x300) << (12 - 11);
@@ -98,16 +146,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
        u16 imm4 = (offset & 0x10) << (12 - 5);
        u16 imm3_1 = (offset & 0xe) << (12 - 10);
 
-       *(u16 *)location = (*(u16 *)location & 0xe003) |
-                   imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1;
-       return 0;
+       return riscv_insn_rvc_rmw(location, 0xe003,
+                       imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1);
 }
 
-static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location,
                                         Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
-       s32 hi20;
+       ptrdiff_t offset = (void *)v - location;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
                pr_err(
@@ -116,23 +162,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = (offset + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location,
                                           Elf_Addr v)
 {
        /*
         * v is the lo12 value to fill. It is calculated before calling this
         * handler.
         */
-       *location = (*location & 0xfffff) | ((v & 0xfff) << 20);
-       return 0;
+       return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20);
 }
 
-static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location,
                                           Elf_Addr v)
 {
        /*
@@ -142,15 +185,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
        u32 imm11_5 = (v & 0xfe0) << (31 - 11);
        u32 imm4_0 = (v & 0x1f) << (11 - 4);
 
-       *location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-       return 0;
+       return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_hi20_rela(struct module *me, void *location,
                                   Elf_Addr v)
 {
-       s32 hi20;
-
        if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
                pr_err(
                  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -158,22 +198,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = ((s32)v + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_i_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
        /* Skip medlow checking because of filtering by HI20 already */
        s32 hi20 = ((s32)v + 0x800) & 0xfffff000;
        s32 lo12 = ((s32)v - hi20);
-       *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20);
-       return 0;
+
+       return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20);
 }
 
-static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_s_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
        /* Skip medlow checking because of filtering by HI20 already */
@@ -181,20 +219,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
        s32 lo12 = ((s32)v - hi20);
        u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11);
        u32 imm4_0 = (lo12 & 0x1f) << (11 - 4);
-       *location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-       return 0;
+
+       return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_got_hi20_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
-       s32 hi20;
+       ptrdiff_t offset = (void *)v - location;
 
        /* Always emit the got entry */
        if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-               offset = module_emit_got_entry(me, v);
-               offset = (void *)offset - (void *)location;
+               offset = (void *)module_emit_got_entry(me, v) - location;
        } else {
                pr_err(
                  "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
@@ -202,22 +238,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = (offset + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_plt_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 hi20, lo12;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
                /* Only emit the plt entry if offset over 32-bit range */
                if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-                       offset = module_emit_plt_entry(me, v);
-                       offset = (void *)offset - (void *)location;
+                       offset = (void *)module_emit_plt_entry(me, v) - location;
                } else {
                        pr_err(
                          "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -228,15 +261,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 
        hi20 = (offset + 0x800) & 0xfffff000;
        lo12 = (offset - hi20) & 0xfff;
-       *location = (*location & 0xfff) | hi20;
-       *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-       return 0;
+       riscv_insn_rmw(location, 0xfff, hi20);
+       return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_call_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_rela(struct module *me, void *location,
                                   Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 hi20, lo12;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
@@ -248,18 +280,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 
        hi20 = (offset + 0x800) & 0xfffff000;
        lo12 = (offset - hi20) & 0xfff;
-       *location = (*location & 0xfff) | hi20;
-       *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-       return 0;
+       riscv_insn_rmw(location, 0xfff, hi20);
+       return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_relax_rela(struct module *me, u32 *location,
+static int apply_r_riscv_relax_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        return 0;
 }
 
-static int apply_r_riscv_align_rela(struct module *me, u32 *location,
+static int apply_r_riscv_align_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        pr_err(
@@ -268,91 +299,509 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location,
        return -EINVAL;
 }
 
-static int apply_r_riscv_add16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location += (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_add16_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u16 *)location += (u16)v;
        return 0;
 }
 
-static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add32_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u32 *)location += (u32)v;
        return 0;
 }
 
-static int apply_r_riscv_add64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add64_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u64 *)location += (u64)v;
        return 0;
 }
 
-static int apply_r_riscv_sub16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location -= (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_sub16_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u16 *)location -= (u16)v;
        return 0;
 }
 
-static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub32_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u32 *)location -= (u32)v;
        return 0;
 }
 
-static int apply_r_riscv_sub64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub64_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u64 *)location -= (u64)v;
        return 0;
 }
 
-static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
-                               Elf_Addr v) = {
-       [R_RISCV_32]                    = apply_r_riscv_32_rela,
-       [R_RISCV_64]                    = apply_r_riscv_64_rela,
-       [R_RISCV_BRANCH]                = apply_r_riscv_branch_rela,
-       [R_RISCV_JAL]                   = apply_r_riscv_jal_rela,
-       [R_RISCV_RVC_BRANCH]            = apply_r_riscv_rvc_branch_rela,
-       [R_RISCV_RVC_JUMP]              = apply_r_riscv_rvc_jump_rela,
-       [R_RISCV_PCREL_HI20]            = apply_r_riscv_pcrel_hi20_rela,
-       [R_RISCV_PCREL_LO12_I]          = apply_r_riscv_pcrel_lo12_i_rela,
-       [R_RISCV_PCREL_LO12_S]          = apply_r_riscv_pcrel_lo12_s_rela,
-       [R_RISCV_HI20]                  = apply_r_riscv_hi20_rela,
-       [R_RISCV_LO12_I]                = apply_r_riscv_lo12_i_rela,
-       [R_RISCV_LO12_S]                = apply_r_riscv_lo12_s_rela,
-       [R_RISCV_GOT_HI20]              = apply_r_riscv_got_hi20_rela,
-       [R_RISCV_CALL_PLT]              = apply_r_riscv_call_plt_rela,
-       [R_RISCV_CALL]                  = apply_r_riscv_call_rela,
-       [R_RISCV_RELAX]                 = apply_r_riscv_relax_rela,
-       [R_RISCV_ALIGN]                 = apply_r_riscv_align_rela,
-       [R_RISCV_ADD16]                 = apply_r_riscv_add16_rela,
-       [R_RISCV_ADD32]                 = apply_r_riscv_add32_rela,
-       [R_RISCV_ADD64]                 = apply_r_riscv_add64_rela,
-       [R_RISCV_SUB16]                 = apply_r_riscv_sub16_rela,
-       [R_RISCV_SUB32]                 = apply_r_riscv_sub32_rela,
-       [R_RISCV_SUB64]                 = apply_r_riscv_sub64_rela,
+static int dynamic_linking_not_supported(struct module *me, void *location,
+                                        Elf_Addr v)
+{
+       pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n",
+              me->name, location);
+       return -EINVAL;
+}
+
+static int tls_not_supported(struct module *me, void *location, Elf_Addr v)
+{
+       pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n",
+              me->name, location);
+       return -EINVAL;
+}
+
+static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v)
+{
+       u8 *byte = location;
+       u8 value = v;
+
+       *byte = (*byte - (value & 0x3f)) & 0x3f;
+       return 0;
+}
+
+static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v)
+{
+       u8 *byte = location;
+       u8 value = v;
+
+       *byte = (*byte & 0xc0) | (value & 0x3f);
+       return 0;
+}
+
+static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location = (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_set16_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       *(u16 *)location = (u16)v;
+       return 0;
+}
+
+static int apply_r_riscv_set32_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       *(u32 *)location = (u32)v;
+       return 0;
+}
+
+static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location,
+                                      Elf_Addr v)
+{
+       *(u32 *)location = v - (uintptr_t)location;
+       return 0;
+}
+
+static int apply_r_riscv_plt32_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       ptrdiff_t offset = (void *)v - location;
+
+       if (!riscv_insn_valid_32bit_offset(offset)) {
+               /* Only emit the plt entry if offset over 32-bit range */
+               if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+                       offset = (void *)module_emit_plt_entry(me, v) - location;
+               } else {
+                       pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+                              me->name, (long long)v, location);
+                       return -EINVAL;
+               }
+       }
+
+       *(u32 *)location = (u32)offset;
+       return 0;
+}
+
+static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+       *(long *)location = v;
+       return 0;
+}
+
+static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+       *(long *)location -= v;
+       return 0;
+}
+
+static int apply_6_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       u8 *byte = location;
+       u8 value = buffer;
+
+       if (buffer > 0x3f) {
+               pr_err("%s: value %ld out of range for 6-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+
+       *byte = (*byte & 0xc0) | (value & 0x3f);
+       return 0;
+}
+
+static int apply_8_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U8_MAX) {
+               pr_err("%s: value %ld out of range for 8-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u8 *)location = (u8)buffer;
+       return 0;
+}
+
+static int apply_16_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U16_MAX) {
+               pr_err("%s: value %ld out of range for 16-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u16 *)location = (u16)buffer;
+       return 0;
+}
+
+static int apply_32_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U32_MAX) {
+               pr_err("%s: value %ld out of range for 32-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u32 *)location = (u32)buffer;
+       return 0;
+}
+
+static int apply_64_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       *(u64 *)location = (u64)buffer;
+       return 0;
+}
+
+static int apply_uleb128_accumulation(struct module *me, void *location, long buffer)
+{
+       /*
+        * ULEB128 is a variable length encoding. Encode the buffer into
+        * the ULEB128 data format.
+        */
+       u8 *p = location;
+
+       while (buffer != 0) {
+               u8 value = buffer & 0x7f;
+
+               buffer >>= 7;
+               value |= (!!buffer) << 7;
+
+               *p++ = value;
+       }
+       return 0;
+}
+
+/*
+ * Relocations defined in the riscv-elf-psabi-doc.
+ * This handles static linking only.
+ */
+static const struct relocation_handlers reloc_handlers[] = {
+       [R_RISCV_32]            = { .reloc_handler = apply_r_riscv_32_rela },
+       [R_RISCV_64]            = { .reloc_handler = apply_r_riscv_64_rela },
+       [R_RISCV_RELATIVE]      = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_COPY]          = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_JUMP_SLOT]     = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPMOD32]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPMOD64]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPREL32]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPREL64]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_TPREL32]   = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_TPREL64]   = { .reloc_handler = dynamic_linking_not_supported },
+       /* 12-15 undefined */
+       [R_RISCV_BRANCH]        = { .reloc_handler = apply_r_riscv_branch_rela },
+       [R_RISCV_JAL]           = { .reloc_handler = apply_r_riscv_jal_rela },
+       [R_RISCV_CALL]          = { .reloc_handler = apply_r_riscv_call_rela },
+       [R_RISCV_CALL_PLT]      = { .reloc_handler = apply_r_riscv_call_plt_rela },
+       [R_RISCV_GOT_HI20]      = { .reloc_handler = apply_r_riscv_got_hi20_rela },
+       [R_RISCV_TLS_GOT_HI20]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TLS_GD_HI20]   = { .reloc_handler = tls_not_supported },
+       [R_RISCV_PCREL_HI20]    = { .reloc_handler = apply_r_riscv_pcrel_hi20_rela },
+       [R_RISCV_PCREL_LO12_I]  = { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela },
+       [R_RISCV_PCREL_LO12_S]  = { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela },
+       [R_RISCV_HI20]          = { .reloc_handler = apply_r_riscv_hi20_rela },
+       [R_RISCV_LO12_I]        = { .reloc_handler = apply_r_riscv_lo12_i_rela },
+       [R_RISCV_LO12_S]        = { .reloc_handler = apply_r_riscv_lo12_s_rela },
+       [R_RISCV_TPREL_HI20]    = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_LO12_I]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_LO12_S]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_ADD]     = { .reloc_handler = tls_not_supported },
+       [R_RISCV_ADD8]          = { .reloc_handler = apply_r_riscv_add8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_ADD16]         = { .reloc_handler = apply_r_riscv_add16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_ADD32]         = { .reloc_handler = apply_r_riscv_add32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_ADD64]         = { .reloc_handler = apply_r_riscv_add64_rela,
+                                   .accumulate_handler = apply_64_bit_accumulation },
+       [R_RISCV_SUB8]          = { .reloc_handler = apply_r_riscv_sub8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_SUB16]         = { .reloc_handler = apply_r_riscv_sub16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_SUB32]         = { .reloc_handler = apply_r_riscv_sub32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_SUB64]         = { .reloc_handler = apply_r_riscv_sub64_rela,
+                                   .accumulate_handler = apply_64_bit_accumulation },
+       /* 41-42 reserved for future standard use */
+       [R_RISCV_ALIGN]         = { .reloc_handler = apply_r_riscv_align_rela },
+       [R_RISCV_RVC_BRANCH]    = { .reloc_handler = apply_r_riscv_rvc_branch_rela },
+       [R_RISCV_RVC_JUMP]      = { .reloc_handler = apply_r_riscv_rvc_jump_rela },
+       /* 46-50 reserved for future standard use */
+       [R_RISCV_RELAX]         = { .reloc_handler = apply_r_riscv_relax_rela },
+       [R_RISCV_SUB6]          = { .reloc_handler = apply_r_riscv_sub6_rela,
+                                   .accumulate_handler = apply_6_bit_accumulation },
+       [R_RISCV_SET6]          = { .reloc_handler = apply_r_riscv_set6_rela,
+                                   .accumulate_handler = apply_6_bit_accumulation },
+       [R_RISCV_SET8]          = { .reloc_handler = apply_r_riscv_set8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_SET16]         = { .reloc_handler = apply_r_riscv_set16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_SET32]         = { .reloc_handler = apply_r_riscv_set32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_32_PCREL]      = { .reloc_handler = apply_r_riscv_32_pcrel_rela },
+       [R_RISCV_IRELATIVE]     = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_PLT32]         = { .reloc_handler = apply_r_riscv_plt32_rela },
+       [R_RISCV_SET_ULEB128]   = { .reloc_handler = apply_r_riscv_set_uleb128,
+                                   .accumulate_handler = apply_uleb128_accumulation },
+       [R_RISCV_SUB_ULEB128]   = { .reloc_handler = apply_r_riscv_sub_uleb128,
+                                   .accumulate_handler = apply_uleb128_accumulation },
+       /* 62-191 reserved for future standard use */
+       /* 192-255 nonstandard ABI extensions  */
 };
 
+static void
+process_accumulated_relocations(struct module *me,
+                               struct hlist_head **relocation_hashtable,
+                               struct list_head *used_buckets_list)
+{
+       /*
+        * Only ADD/SUB/SET/ULEB128 should end up here.
+        *
+        * Each bucket may have more than one relocation location. All
+        * relocations for a location are stored in a list in a bucket.
+        *
+        * Relocations are applied to a temp variable before being stored to the
+        * provided location to check for overflow. This also allows ULEB128 to
+        * properly decide how many entries are needed before storing to
+        * location. The final value is stored into location using the handler
+        * for the last relocation to an address.
+        *
+        * Three layers of indexing:
+        *      - Each of the buckets in use
+        *      - Groups of relocations in each bucket by location address
+        *      - Each relocation entry for a location address
+        */
+       struct used_bucket *bucket_iter;
+       struct used_bucket *bucket_iter_tmp;
+       struct relocation_head *rel_head_iter;
+       struct hlist_node *rel_head_iter_tmp;
+       struct relocation_entry *rel_entry_iter;
+       struct relocation_entry *rel_entry_iter_tmp;
+       int curr_type;
+       void *location;
+       long buffer;
+
+       list_for_each_entry_safe(bucket_iter, bucket_iter_tmp,
+                                used_buckets_list, head) {
+               hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp,
+                                         bucket_iter->bucket, node) {
+                       buffer = 0;
+                       location = rel_head_iter->location;
+                       list_for_each_entry_safe(rel_entry_iter,
+                                                rel_entry_iter_tmp,
+                                                rel_head_iter->rel_entry,
+                                                head) {
+                               curr_type = rel_entry_iter->type;
+                               reloc_handlers[curr_type].reloc_handler(
+                                       me, &buffer, rel_entry_iter->value);
+                               kfree(rel_entry_iter);
+                       }
+                       reloc_handlers[curr_type].accumulate_handler(
+                               me, location, buffer);
+                       kfree(rel_head_iter);
+               }
+               kfree(bucket_iter);
+       }
+
+       kfree(*relocation_hashtable);
+}
+
+static int add_relocation_to_accumulate(struct module *me, int type,
+                                       void *location,
+                                       unsigned int hashtable_bits, Elf_Addr v,
+                                       struct hlist_head *relocation_hashtable,
+                                       struct list_head *used_buckets_list)
+{
+       struct relocation_entry *entry;
+       struct relocation_head *rel_head;
+       struct hlist_head *current_head;
+       struct used_bucket *bucket;
+       unsigned long hash;
+       bool found = false;
+       struct relocation_head *rel_head_iter;
+
+       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+
+       if (!entry)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&entry->head);
+       entry->type = type;
+       entry->value = v;
+
+       hash = hash_min((uintptr_t)location, hashtable_bits);
+
+       current_head = &relocation_hashtable[hash];
+
+       /*
+        * Search for the relocation_head for the relocations that happen at the
+        * provided location
+        */
+       hlist_for_each_entry(rel_head_iter, current_head, node) {
+               if (rel_head_iter->location == location) {
+                       found = true;
+                       rel_head = rel_head_iter;
+                       break;
+               }
+       }
+
+       /*
+        * If there has not yet been any relocations at the provided location,
+        * create a relocation_head for that location and populate it with this
+        * relocation_entry.
+        */
+       if (!found) {
+               rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+
+               if (!rel_head) {
+                       kfree(entry);
+                       return -ENOMEM;
+               }
+
+               rel_head->rel_entry =
+                       kmalloc(sizeof(struct list_head), GFP_KERNEL);
+
+               if (!rel_head->rel_entry) {
+                       kfree(entry);
+                       kfree(rel_head);
+                       return -ENOMEM;
+               }
+
+               INIT_LIST_HEAD(rel_head->rel_entry);
+               rel_head->location = location;
+               INIT_HLIST_NODE(&rel_head->node);
+               if (!current_head->first) {
+                       bucket =
+                               kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+
+                       if (!bucket) {
+                               kfree(entry);
+                               kfree(rel_head->rel_entry);
+                               kfree(rel_head);
+                               return -ENOMEM;
+                       }
+
+                       INIT_LIST_HEAD(&bucket->head);
+                       bucket->bucket = current_head;
+                       list_add(&bucket->head, used_buckets_list);
+               }
+               hlist_add_head(&rel_head->node, current_head);
+       }
+
+       /* Add relocation to head of discovered rel_head */
+       list_add_tail(&entry->head, rel_head->rel_entry);
+
+       return 0;
+}
+
+static unsigned int
+initialize_relocation_hashtable(unsigned int num_relocations,
+                               struct hlist_head **relocation_hashtable)
+{
+       /* Can safely assume that bits is not greater than sizeof(long) */
+       unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+       /*
+        * When hashtable_size == 1, hashtable_bits == 0.
+        * This is valid because the hashing algorithm returns 0 in this case.
+        */
+       unsigned int hashtable_bits = ilog2(hashtable_size);
+
+       /*
+        * Double size of hashtable if num_relocations * 1.25 is greater than
+        * hashtable_size.
+        */
+       int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size));
+
+       hashtable_bits += should_double_size;
+
+       hashtable_size <<= should_double_size;
+
+       *relocation_hashtable = kmalloc_array(hashtable_size,
+                                             sizeof(**relocation_hashtable),
+                                             GFP_KERNEL);
+       if (!*relocation_hashtable)
+               return 0;
+
+       __hash_init(*relocation_hashtable, hashtable_size);
+
+       return hashtable_bits;
+}
+
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                       unsigned int symindex, unsigned int relsec,
                       struct module *me)
 {
        Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr;
-       int (*handler)(struct module *me, u32 *location, Elf_Addr v);
+       int (*handler)(struct module *me, void *location, Elf_Addr v);
        Elf_Sym *sym;
-       u32 *location;
+       void *location;
        unsigned int i, type;
+       unsigned int j_idx = 0;
        Elf_Addr v;
        int res;
+       unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
+       struct hlist_head *relocation_hashtable;
+       struct list_head used_buckets_list;
+       unsigned int hashtable_bits;
+
+       hashtable_bits = initialize_relocation_hashtable(num_relocations,
+                                                        &relocation_hashtable);
+
+       if (!relocation_hashtable)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&used_buckets_list);
 
        pr_debug("Applying relocate section %u to %u\n", relsec,
               sechdrs[relsec].sh_info);
 
-       for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+       for (i = 0; i < num_relocations; i++) {
                /* This is where to make the change */
                location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
                        + rel[i].r_offset;
@@ -370,8 +819,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
                type = ELF_RISCV_R_TYPE(rel[i].r_info);
 
-               if (type < ARRAY_SIZE(reloc_handlers_rela))
-                       handler = reloc_handlers_rela[type];
+               if (type < ARRAY_SIZE(reloc_handlers))
+                       handler = reloc_handlers[type].reloc_handler;
                else
                        handler = NULL;
 
@@ -384,9 +833,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                v = sym->st_value + rel[i].r_addend;
 
                if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) {
-                       unsigned int j;
+                       unsigned int j = j_idx;
+                       bool found = false;
 
-                       for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
+                       do {
                                unsigned long hi20_loc =
                                        sechdrs[sechdrs[relsec].sh_info].sh_addr
                                        + rel[j].r_offset;
@@ -415,23 +865,42 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                                        hi20 = (offset + 0x800) & 0xfffff000;
                                        lo12 = offset - hi20;
                                        v = lo12;
+                                       found = true;
 
                                        break;
                                }
-                       }
-                       if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
+
+                               j++;
+                               if (j > sechdrs[relsec].sh_size / sizeof(*rel))
+                                       j = 0;
+
+                       } while (j_idx != j);
+
+                       if (!found) {
                                pr_err(
                                  "%s: Can not find HI20 relocation information\n",
                                  me->name);
                                return -EINVAL;
                        }
+
+                       /* Record the previous j-loop end index */
+                       j_idx = j;
                }
 
-               res = handler(me, location, v);
+               if (reloc_handlers[type].accumulate_handler)
+                       res = add_relocation_to_accumulate(me, type, location,
+                                                          hashtable_bits, v,
+                                                          relocation_hashtable,
+                                                          &used_buckets_list);
+               else
+                       res = handler(me, location, v);
                if (res)
                        return res;
        }
 
+       process_accumulated_relocations(me, &relocation_hashtable,
+                                       &used_buckets_list);
+
        return 0;
 }
 
index 7142ec42e889f996fd2d0b0eb32a54fba1a89128..a69dfa610aa85724e01858cc320a55bd46298bce 100644 (file)
@@ -11,6 +11,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
        struct kprobe_ctlblk *kcb;
        int bit;
 
+       if (unlikely(kprobe_ftrace_disabled))
+               return;
+
        bit = ftrace_test_recursion_trylock(ip, parent_ip);
        if (bit < 0)
                return;
index 21bac92a170a9bd43be973f213cb8be3329175ef..f2cd83d9b0f004e0fe6ae64db9425d343a394bf9 100644 (file)
@@ -75,7 +75,7 @@
        REG_L x31, PT_T6(sp)
        .endm
 
-ENTRY(arch_rethook_trampoline)
+SYM_CODE_START(arch_rethook_trampoline)
        addi sp, sp, -(PT_SIZE_ON_STACK)
        save_all_base_regs
 
@@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline)
        addi sp, sp, PT_SIZE_ON_STACK
 
        ret
-ENDPROC(arch_rethook_trampoline)
+SYM_CODE_END(arch_rethook_trampoline)
index 83e223318822ae5952d7684e50d98cc055cac068..51042f48da173c9c215848d4b9118b92dc4cc100 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/thread_info.h>
 #include <asm/cpuidle.h>
 #include <asm/vector.h>
+#include <asm/cpufeature.h>
 
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
@@ -39,6 +40,23 @@ void arch_cpu_idle(void)
        cpu_do_idle();
 }
 
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+       if (!unaligned_ctl_available())
+               return -EINVAL;
+
+       tsk->thread.align_ctl = val;
+       return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+       if (!unaligned_ctl_available())
+               return -EINVAL;
+
+       return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
        show_regs_print_info(KERN_DEFAULT);
@@ -151,6 +169,7 @@ void flush_thread(void)
        riscv_v_vstate_off(task_pt_regs(current));
        kfree(current->thread.vstate.datap);
        memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+       clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
 #endif
 }
 
@@ -158,7 +177,7 @@ void arch_release_task_struct(struct task_struct *tsk)
 {
        /* Free the vector context of datap. */
        if (has_vector())
-               kfree(tsk->thread.vstate.datap);
+               riscv_v_thread_free(tsk);
 }
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -167,6 +186,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
        *dst = *src;
        /* clear entire V context, including datap for a new task */
        memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+       memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
+       clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
 
        return 0;
 }
@@ -200,7 +221,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
                childregs->a0 = 0; /* Return value of fork() */
                p->thread.s[0] = 0;
        }
+       p->thread.riscv_v_flags = 0;
+       if (has_vector())
+               riscv_v_thread_alloc(p);
        p->thread.ra = (unsigned long)ret_from_fork;
        p->thread.sp = (unsigned long)childregs; /* kernel sp */
        return 0;
 }
+
+void __init arch_task_cache_init(void)
+{
+       riscv_v_setup_ctx_cache();
+}
index 2afe460de16a62ba21cf3c7db4a96c5ec8f7d3a5..e8515aa9d80bf82fd6ff2598664b9fe18a6b1de3 100644 (file)
@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
         * Ensure the vector registers have been saved to the memory before
         * copying them to membuf.
         */
-       if (target == current)
-               riscv_v_vstate_save(current, task_pt_regs(current));
+       if (target == current) {
+               get_cpu_vector_context();
+               riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+               put_cpu_vector_context();
+       }
 
        ptrace_vstate.vstart = vstate->vstart;
        ptrace_vstate.vl = vstate->vl;
index a4559695ce623a6f43db179443a62af40872ebee..fbf264af7974b8ee1e26c60502b7596a0f801469 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <asm/sbi.h>
+#include <asm/csr.h>
 
 static int sbi_ipi_virq;
 
@@ -33,6 +34,15 @@ static int sbi_ipi_starting_cpu(unsigned int cpu)
        return 0;
 }
 
+#ifdef CONFIG_ARCH_SPACEMIT
+static int sbi_clint_ipi_starting_cpu(unsigned int cpu)
+{
+       csr_set(CSR_IE, IE_SIE);
+
+       return 0;
+}
+#endif
+
 void __init sbi_ipi_init(void)
 {
        int virq;
@@ -71,6 +81,11 @@ void __init sbi_ipi_init(void)
        cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
                          "irqchip/sbi-ipi:starting",
                          sbi_ipi_starting_cpu, NULL);
+#ifdef CONFIG_ARCH_SPACEMIT
+       cpuhp_setup_state(CPUHP_AP_CLINT_IPI_RISCV_STARTING,
+                         "irqchip/sbi-clint-ipi:starting",
+                         sbi_clint_ipi_starting_cpu, NULL);
+#endif
 
        riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false);
        pr_info("providing IPIs using SBI IPI extension\n");
index 5a62ed1da45332c85820fdfdd7e90046b1ae3380..f5f344d0c5ebf2ddd0932e1d59b7926b443d197f 100644 (file)
@@ -460,6 +460,15 @@ int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
 }
 EXPORT_SYMBOL(sbi_remote_hfence_vvma);
 
+#if defined(CONFIG_ARCH_SPACEMIT_K1PRO) || defined(CONFIG_ARCH_SPACEMIT_K1X)
+void sbi_flush_local_dcache_all(void)
+{
+       sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_FLUSH_CACHE_ALL, 0,
+                       0, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(sbi_flush_local_dcache_all);
+#endif
+
 /**
  * sbi_remote_hfence_vvma_asid() - Execute HFENCE.VVMA instructions on given
  * remote harts for current guest virtual address range belonging to a specific
index e600aab116a4093a5a45285f3e14e32e917f54ba..58dfba33a2652f4593f569e9ce017698870e43a4 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/acpi.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
 
 #include "head.h"
 
-#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI)
-struct screen_info screen_info __section(".data") = {
-       .orig_video_lines       = 30,
-       .orig_video_cols        = 80,
-       .orig_video_mode        = 0,
-       .orig_video_ega_bx      = 0,
-       .orig_video_isVGA       = 1,
-       .orig_video_points      = 8
-};
+#if defined(CONFIG_EFI)
+struct screen_info screen_info __section(".data");
 #endif
 
 /*
@@ -314,10 +308,13 @@ void __init setup_arch(char **cmdline_p)
        riscv_fill_hwcap();
        init_rt_signal_env();
        apply_boot_alternatives();
+
        if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
            riscv_isa_extension_available(NULL, ZICBOM))
                riscv_noncoherent_supported();
        riscv_set_dma_cache_alignment();
+
+       riscv_user_isa_enable();
 }
 
 static int __init topology_init(void)
index 21a4d0e111bc5f151f9ef4a6205e7da17a87ec10..5d69f4db9e8f38012bd4f583353ccbb3ce520465 100644 (file)
@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
        /* datap is designed to be 16 byte aligned for better performance */
        WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
 
-       riscv_v_vstate_save(current, regs);
+       get_cpu_vector_context();
+       riscv_v_vstate_save(&current->thread.vstate, regs);
+       put_cpu_vector_context();
+
        /* Copy everything of vstate but datap. */
        err = __copy_to_user(&state->v_state, &current->thread.vstate,
                             offsetof(struct __riscv_v_ext_state, datap));
@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
        if (unlikely(err))
                return err;
 
-       riscv_v_vstate_restore(current, regs);
+       riscv_v_vstate_set_restore(current, regs);
 
        return err;
 }
@@ -384,30 +387,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
        sigset_t *oldset = sigmask_to_save();
        int ret;
 
-       /* Are we from a system call? */
-       if (regs->cause == EXC_SYSCALL) {
-               /* Avoid additional syscall restarting via ret_from_exception */
-               regs->cause = -1UL;
-               /* If so, check system call restarting.. */
-               switch (regs->a0) {
-               case -ERESTART_RESTARTBLOCK:
-               case -ERESTARTNOHAND:
-                       regs->a0 = -EINTR;
-                       break;
-
-               case -ERESTARTSYS:
-                       if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
-                               regs->a0 = -EINTR;
-                               break;
-                       }
-                       fallthrough;
-               case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-                       regs->epc -= 0x4;
-                       break;
-               }
-       }
-
        rseq_signal_deliver(ksig, regs);
 
        /* Set up the stack frame */
@@ -421,35 +400,66 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 void arch_do_signal_or_restart(struct pt_regs *regs)
 {
+       unsigned long continue_addr = 0, restart_addr = 0;
+       int retval = 0;
        struct ksignal ksig;
+       bool syscall = (regs->cause == EXC_SYSCALL);
 
-       if (get_signal(&ksig)) {
-               /* Actually deliver the signal */
-               handle_signal(&ksig, regs);
-               return;
-       }
+       /* If we were from a system call, check for system call restarting */
+       if (syscall) {
+               continue_addr = regs->epc;
+               restart_addr = continue_addr - 4;
+               retval = regs->a0;
 
-       /* Did we come from a system call? */
-       if (regs->cause == EXC_SYSCALL) {
                /* Avoid additional syscall restarting via ret_from_exception */
                regs->cause = -1UL;
 
-               /* Restart the system call - no handlers present */
-               switch (regs->a0) {
+               /*
+                * Prepare for system call restart. We do this here so that a
+                * debugger will see the already changed PC.
+                */
+               switch (retval) {
                case -ERESTARTNOHAND:
                case -ERESTARTSYS:
                case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-                       regs->epc -= 0x4;
-                       break;
                case -ERESTART_RESTARTBLOCK:
-                        regs->a0 = regs->orig_a0;
-                       regs->a7 = __NR_restart_syscall;
-                       regs->epc -= 0x4;
+                       regs->a0 = regs->orig_a0;
+                       regs->epc = restart_addr;
                        break;
                }
        }
 
+       /*
+        * Get the signal to deliver. When running under ptrace, at this point
+        * the debugger may change all of our registers.
+        */
+       if (get_signal(&ksig)) {
+               /*
+                * Depending on the signal settings, we may need to revert the
+                * decision to restart the system call, but skip this if a
+                * debugger has chosen to restart at a different PC.
+                */
+               if (regs->epc == restart_addr &&
+                   (retval == -ERESTARTNOHAND ||
+                    retval == -ERESTART_RESTARTBLOCK ||
+                    (retval == -ERESTARTSYS &&
+                     !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
+                       regs->a0 = -EINTR;
+                       regs->epc = continue_addr;
+               }
+
+               /* Actually deliver the signal */
+               handle_signal(&ksig, regs);
+               return;
+       }
+
+       /*
+        * Handle restarting a different system call. As above, if a debugger
+        * has chosen to restart at a different PC, ignore the restart.
+        */
+       if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK)
+               regs->a7 = __NR_restart_syscall;
+
        /*
         * If there is no signal to deliver, we just put the saved
         * sigmask back.
index 1b8da4e40a4d6e979293e4734fb7d4b2e7407723..fb6ab7f8bfbd474bb2b199fd61ed5284342bab8d 100644 (file)
@@ -25,8 +25,9 @@
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/mm.h>
+
+#include <asm/cacheflush.h>
 #include <asm/cpu_ops.h>
-#include <asm/cpufeature.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
@@ -245,18 +246,20 @@ asmlinkage __visible void smp_callin(void)
        riscv_ipi_enable();
 
        numa_add_cpu(curr_cpuid);
-       set_cpu_online(curr_cpuid, 1);
-       check_unaligned_access(curr_cpuid);
+       set_cpu_online(curr_cpuid, true);
 
        if (has_vector()) {
                if (riscv_v_setup_vsize())
                        elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
        }
 
+       riscv_user_isa_enable();
+
        /*
         * Remote TLB flushes are ignored while the CPU is offline, so emit
         * a local TLB flush right now just in case.
         */
+       local_flush_icache_all();
        local_flush_tlb_all();
        complete(&cpu_running);
        /*
index 64a9c093aef93a1b9ec4a99bab001a1f9932e776..528ec7cc9a62255127573a31bd88cadb5dcc3d3d 100644 (file)
 
 extern asmlinkage void ret_from_exception(void);
 
+static inline int fp_is_valid(unsigned long fp, unsigned long sp)
+{
+       unsigned long low, high;
+
+       low = sp + sizeof(struct stackframe);
+       high = ALIGN(sp, THREAD_SIZE);
+
+       return !(fp < low || fp > high || fp & 0x07);
+}
+
 void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
                             bool (*fn)(void *, unsigned long), void *arg)
 {
@@ -41,21 +51,19 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
        }
 
        for (;;) {
-               unsigned long low, high;
                struct stackframe *frame;
 
                if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
                        break;
 
-               /* Validate frame pointer */
-               low = sp + sizeof(struct stackframe);
-               high = ALIGN(sp, THREAD_SIZE);
-               if (unlikely(fp < low || fp > high || fp & 0x7))
+               if (unlikely(!fp_is_valid(fp, sp)))
                        break;
+
                /* Unwind stack frame */
                frame = (struct stackframe *)fp - 1;
                sp = fp;
-               if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
+               if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) {
+                       /* We hit function where ra is not saved on the stack */
                        fp = frame->ra;
                        pc = regs->ra;
                } else {
index 3c89b8ec69c49cce4809986f51784fcbfff53630..c410783f6f69b4922cdbda2936f0675f8290aed6 100644 (file)
@@ -4,8 +4,11 @@
  * Copyright (c) 2022 Ventana Micro Systems Inc.
  */
 
+#define pr_fmt(fmt) "suspend: " fmt
 #include <linux/ftrace.h>
+#include <linux/suspend.h>
 #include <asm/csr.h>
+#include <asm/sbi.h>
 #include <asm/suspend.h>
 
 void suspend_save_csrs(struct suspend_context *context)
@@ -85,3 +88,43 @@ int cpu_suspend(unsigned long arg,
 
        return rc;
 }
+
+#ifdef CONFIG_RISCV_SBI
+static int sbi_system_suspend(unsigned long sleep_type,
+                             unsigned long resume_addr,
+                             unsigned long opaque)
+{
+       struct sbiret ret;
+
+       ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND,
+                       sleep_type, resume_addr, opaque, 0, 0, 0);
+       if (ret.error)
+               return sbi_err_map_linux_errno(ret.error);
+
+       return ret.value;
+}
+
+static int sbi_system_suspend_enter(suspend_state_t state)
+{
+       return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend);
+}
+
+static const struct platform_suspend_ops sbi_system_suspend_ops = {
+       .valid = suspend_valid_only_mem,
+       .enter = sbi_system_suspend_enter,
+};
+
+static int __init sbi_system_suspend_init(void)
+{
+       if (sbi_spec_version >= sbi_mk_version(1, 0) &&
+           sbi_probe_extension(SBI_EXT_SUSP) > 0) {
+               pr_info("SBI SUSP extension detected\n");
+               if (IS_ENABLED(CONFIG_SUSPEND))
+                       suspend_set_ops(&sbi_system_suspend_ops);
+       }
+
+       return 0;
+}
+
+arch_initcall(sbi_system_suspend_init);
+#endif /* CONFIG_RISCV_SBI */
index f7960c7c5f9e25081424f3edbe526896c1692303..2d54f309c14059ad901f80448df2ff257f047388 100644 (file)
@@ -16,7 +16,7 @@
        .altmacro
        .option norelax
 
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
        /* Save registers (except A0 and T0-T6) */
        REG_S   ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
        REG_S   sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
@@ -57,14 +57,11 @@ ENTRY(__cpu_suspend_enter)
 
        /* Return to C code */
        ret
-END(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
 
 SYM_TYPED_FUNC_START(__cpu_resume_enter)
        /* Load the global pointer */
-       .option push
-       .option norelax
-               la gp, __global_pointer$
-       .option pop
+       load_global_pointer
 
 #ifdef CONFIG_MMU
        /* Save A0 and A1 */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
new file mode 100644 (file)
index 0000000..b3c9a4e
--- /dev/null
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The hwprobe interface, for allowing userspace to probe to see which features
+ * are supported by the hardware.  See Documentation/arch/riscv/hwprobe.rst for
+ * more details.
+ */
+#include <linux/syscalls.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+#include <asm/sbi.h>
+#include <asm/switch_to.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/vector.h>
+#include <vdso/vsyscall.h>
+
+
+static void hwprobe_arch_id(struct riscv_hwprobe *pair,
+                           const struct cpumask *cpus)
+{
+       u64 id = -1ULL;
+       bool first = true;
+       int cpu;
+
+       for_each_cpu(cpu, cpus) {
+               u64 cpu_id;
+
+               switch (pair->key) {
+               case RISCV_HWPROBE_KEY_MVENDORID:
+                       cpu_id = riscv_cached_mvendorid(cpu);
+                       break;
+               case RISCV_HWPROBE_KEY_MIMPID:
+                       cpu_id = riscv_cached_mimpid(cpu);
+                       break;
+               case RISCV_HWPROBE_KEY_MARCHID:
+                       cpu_id = riscv_cached_marchid(cpu);
+                       break;
+               }
+
+               if (first) {
+                       id = cpu_id;
+                       first = false;
+               }
+
+               /*
+                * If there's a mismatch for the given set, return -1 in the
+                * value.
+                */
+               if (id != cpu_id) {
+                       id = -1ULL;
+                       break;
+               }
+       }
+
+       pair->value = id;
+}
+
+static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
+                            const struct cpumask *cpus)
+{
+       int cpu;
+       u64 missing = 0;
+
+       pair->value = 0;
+       if (has_fpu())
+               pair->value |= RISCV_HWPROBE_IMA_FD;
+
+       if (riscv_isa_extension_available(NULL, c))
+               pair->value |= RISCV_HWPROBE_IMA_C;
+
+       if (has_vector() && riscv_isa_extension_available(NULL, v))
+               pair->value |= RISCV_HWPROBE_IMA_V;
+
+       /*
+        * Loop through and record extensions that 1) anyone has, and 2) anyone
+        * doesn't have.
+        */
+       for_each_cpu(cpu, cpus) {
+               struct riscv_isainfo *isainfo = &hart_isa[cpu];
+
+#define EXT_KEY(ext)                                                                   \
+       do {                                                                            \
+               if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \
+                       pair->value |= RISCV_HWPROBE_EXT_##ext;                         \
+               else                                                                    \
+                       missing |= RISCV_HWPROBE_EXT_##ext;                             \
+       } while (false)
+
+               /*
+                * Only use EXT_KEY() for extensions which can be exposed to userspace,
+                * regardless of the kernel's configuration, as no other checks, besides
+                * presence in the hart_isa bitmap, are made.
+                */
+               EXT_KEY(ZBA);
+               EXT_KEY(ZBB);
+               EXT_KEY(ZBS);
+               EXT_KEY(ZICBOZ);
+               EXT_KEY(ZBC);
+
+               EXT_KEY(ZBKB);
+               EXT_KEY(ZBKC);
+               EXT_KEY(ZBKX);
+               EXT_KEY(ZKND);
+               EXT_KEY(ZKNE);
+               EXT_KEY(ZKNH);
+               EXT_KEY(ZKSED);
+               EXT_KEY(ZKSH);
+               EXT_KEY(ZKT);
+               EXT_KEY(ZIHINTNTL);
+               EXT_KEY(ZTSO);
+               EXT_KEY(ZACAS);
+               EXT_KEY(ZICOND);
+               EXT_KEY(ZIHINTPAUSE);
+
+               /*
+                * All the following extensions must depend on the kernel
+                * support of V.
+                */
+               if (has_vector()) {
+                       EXT_KEY(ZVBB);
+                       EXT_KEY(ZVBC);
+                       EXT_KEY(ZVKB);
+                       EXT_KEY(ZVKG);
+                       EXT_KEY(ZVKNED);
+                       EXT_KEY(ZVKNHA);
+                       EXT_KEY(ZVKNHB);
+                       EXT_KEY(ZVKSED);
+                       EXT_KEY(ZVKSH);
+                       EXT_KEY(ZVKT);
+                       EXT_KEY(ZVFH);
+                       EXT_KEY(ZVFHMIN);
+               }
+
+               if (has_fpu()) {
+                       EXT_KEY(ZFH);
+                       EXT_KEY(ZFHMIN);
+                       EXT_KEY(ZFA);
+               }
+#undef EXT_KEY
+       }
+
+       /* Now turn off reporting features if any CPU is missing it. */
+       pair->value &= ~missing;
+}
+
+static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+{
+       struct riscv_hwprobe pair;
+
+       hwprobe_isa_ext0(&pair, cpus);
+       return (pair.value & ext);
+}
+
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+       int cpu;
+       u64 perf = -1ULL;
+
+       for_each_cpu(cpu, cpus) {
+               int this_perf = per_cpu(misaligned_access_speed, cpu);
+
+               if (perf == -1ULL)
+                       perf = this_perf;
+
+               if (perf != this_perf) {
+                       perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+                       break;
+               }
+       }
+
+       if (perf == -1ULL)
+               return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+       return perf;
+}
+#else
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+       if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
+               return RISCV_HWPROBE_MISALIGNED_FAST;
+
+       if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available())
+               return RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+       return RISCV_HWPROBE_MISALIGNED_SLOW;
+}
+#endif
+
+static void hwprobe_one_pair(struct riscv_hwprobe *pair,
+                            const struct cpumask *cpus)
+{
+       switch (pair->key) {
+       case RISCV_HWPROBE_KEY_MVENDORID:
+       case RISCV_HWPROBE_KEY_MARCHID:
+       case RISCV_HWPROBE_KEY_MIMPID:
+               hwprobe_arch_id(pair, cpus);
+               break;
+       /*
+        * The kernel already assumes that the base single-letter ISA
+        * extensions are supported on all harts, and only supports the
+        * IMA base, so just cheat a bit here and tell that to
+        * userspace.
+        */
+       case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+               pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA;
+               break;
+
+       case RISCV_HWPROBE_KEY_IMA_EXT_0:
+               hwprobe_isa_ext0(pair, cpus);
+               break;
+
+       case RISCV_HWPROBE_KEY_CPUPERF_0:
+               pair->value = hwprobe_misaligned(cpus);
+               break;
+
+       case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
+               pair->value = 0;
+               if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
+                       pair->value = riscv_cboz_block_size;
+               break;
+
+       /*
+        * For forward compatibility, unknown keys don't fail the whole
+        * call, but get their element key set to -1 and value set to 0
+        * indicating they're unrecognized.
+        */
+       default:
+               pair->key = -1;
+               pair->value = 0;
+               break;
+       }
+}
+
+static int hwprobe_get_values(struct riscv_hwprobe __user *pairs,
+                             size_t pair_count, size_t cpusetsize,
+                             unsigned long __user *cpus_user,
+                             unsigned int flags)
+{
+       size_t out;
+       int ret;
+       cpumask_t cpus;
+
+       /* Check the reserved flags. */
+       if (flags != 0)
+               return -EINVAL;
+
+       /*
+        * The interface supports taking in a CPU mask, and returns values that
+        * are consistent across that mask. Allow userspace to specify NULL and
+        * 0 as a shortcut to all online CPUs.
+        */
+       cpumask_clear(&cpus);
+       if (!cpusetsize && !cpus_user) {
+               cpumask_copy(&cpus, cpu_online_mask);
+       } else {
+               if (cpusetsize > cpumask_size())
+                       cpusetsize = cpumask_size();
+
+               ret = copy_from_user(&cpus, cpus_user, cpusetsize);
+               if (ret)
+                       return -EFAULT;
+
+               /*
+                * Userspace must provide at least one online CPU, without that
+                * there's no way to define what is supported.
+                */
+               cpumask_and(&cpus, &cpus, cpu_online_mask);
+               if (cpumask_empty(&cpus))
+                       return -EINVAL;
+       }
+
+       for (out = 0; out < pair_count; out++, pairs++) {
+               struct riscv_hwprobe pair;
+
+               if (get_user(pair.key, &pairs->key))
+                       return -EFAULT;
+
+               pair.value = 0;
+               hwprobe_one_pair(&pair, &cpus);
+               ret = put_user(pair.key, &pairs->key);
+               if (ret == 0)
+                       ret = put_user(pair.value, &pairs->value);
+
+               if (ret)
+                       return -EFAULT;
+       }
+
+       return 0;
+}
+
+static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
+                           size_t pair_count, size_t cpusetsize,
+                           unsigned long __user *cpus_user,
+                           unsigned int flags)
+{
+       cpumask_t cpus, one_cpu;
+       bool clear_all = false;
+       size_t i;
+       int ret;
+
+       if (flags != RISCV_HWPROBE_WHICH_CPUS)
+               return -EINVAL;
+
+       if (!cpusetsize || !cpus_user)
+               return -EINVAL;
+
+       if (cpusetsize > cpumask_size())
+               cpusetsize = cpumask_size();
+
+       ret = copy_from_user(&cpus, cpus_user, cpusetsize);
+       if (ret)
+               return -EFAULT;
+
+       if (cpumask_empty(&cpus))
+               cpumask_copy(&cpus, cpu_online_mask);
+
+       cpumask_and(&cpus, &cpus, cpu_online_mask);
+
+       cpumask_clear(&one_cpu);
+
+       for (i = 0; i < pair_count; i++) {
+               struct riscv_hwprobe pair, tmp;
+               int cpu;
+
+               ret = copy_from_user(&pair, &pairs[i], sizeof(pair));
+               if (ret)
+                       return -EFAULT;
+
+               if (!riscv_hwprobe_key_is_valid(pair.key)) {
+                       clear_all = true;
+                       pair = (struct riscv_hwprobe){ .key = -1, };
+                       ret = copy_to_user(&pairs[i], &pair, sizeof(pair));
+                       if (ret)
+                               return -EFAULT;
+               }
+
+               if (clear_all)
+                       continue;
+
+               tmp = (struct riscv_hwprobe){ .key = pair.key, };
+
+               for_each_cpu(cpu, &cpus) {
+                       cpumask_set_cpu(cpu, &one_cpu);
+
+                       hwprobe_one_pair(&tmp, &one_cpu);
+
+                       if (!riscv_hwprobe_pair_cmp(&tmp, &pair))
+                               cpumask_clear_cpu(cpu, &cpus);
+
+                       cpumask_clear_cpu(cpu, &one_cpu);
+               }
+       }
+
+       if (clear_all)
+               cpumask_clear(&cpus);
+
+       ret = copy_to_user(cpus_user, &cpus, cpusetsize);
+       if (ret)
+               return -EFAULT;
+
+       return 0;
+}
+
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+                           size_t pair_count, size_t cpusetsize,
+                           unsigned long __user *cpus_user,
+                           unsigned int flags)
+{
+       if (flags & RISCV_HWPROBE_WHICH_CPUS)
+               return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
+                                       cpus_user, flags);
+
+       return hwprobe_get_values(pairs, pair_count, cpusetsize,
+                                 cpus_user, flags);
+}
+
+#ifdef CONFIG_MMU
+
+static int __init init_hwprobe_vdso_data(void)
+{
+       struct vdso_data *vd = __arch_get_k_vdso_data();
+       struct arch_vdso_data *avd = &vd->arch_data;
+       u64 id_bitsmash = 0;
+       struct riscv_hwprobe pair;
+       int key;
+
+       /*
+        * Initialize vDSO data with the answers for the "all CPUs" case, to
+        * save a syscall in the common case.
+        */
+       for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) {
+               pair.key = key;
+               hwprobe_one_pair(&pair, cpu_online_mask);
+
+               WARN_ON_ONCE(pair.key < 0);
+
+               avd->all_cpu_hwprobe_values[key] = pair.value;
+               /*
+                * Smash together the vendor, arch, and impl IDs to see if
+                * they're all 0 or any negative.
+                */
+               if (key <= RISCV_HWPROBE_KEY_MIMPID)
+                       id_bitsmash |= pair.value;
+       }
+
+       /*
+        * If the arch, vendor, and implementation ID are all the same across
+        * all harts, then assume all CPUs are the same, and allow the vDSO to
+        * answer queries for arbitrary masks. However if all values are 0 (not
+        * populated) or any value returns -1 (varies across CPUs), then the
+        * vDSO should defer to the kernel for exotic cpu masks.
+        */
+       avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
+       return 0;
+}
+
+arch_initcall_sync(init_hwprobe_vdso_data);
+
+#endif /* CONFIG_MMU */
+
+SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
+               size_t, pair_count, size_t, cpusetsize, unsigned long __user *,
+               cpus, unsigned int, flags)
+{
+       return do_riscv_hwprobe(pairs, pair_count, cpusetsize,
+                               cpus, flags);
+}
index 473159b5f303ab9c7f784bb388814446b13b61f2..64155323cc923a26b22abdd9357869c894bd0e17 100644 (file)
@@ -7,15 +7,6 @@
 
 #include <linux/syscalls.h>
 #include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/hwprobe.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
-#include <asm/switch_to.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-#include <asm-generic/mman-common.h>
-#include <vdso/vsyscall.h>
 
 static long riscv_sys_mmap(unsigned long addr, unsigned long len,
                           unsigned long prot, unsigned long flags,
@@ -77,265 +68,6 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
        return 0;
 }
 
-/*
- * The hwprobe interface, for allowing userspace to probe to see which features
- * are supported by the hardware.  See Documentation/riscv/hwprobe.rst for more
- * details.
- */
-static void hwprobe_arch_id(struct riscv_hwprobe *pair,
-                           const struct cpumask *cpus)
-{
-       u64 id = -1ULL;
-       bool first = true;
-       int cpu;
-
-       for_each_cpu(cpu, cpus) {
-               u64 cpu_id;
-
-               switch (pair->key) {
-               case RISCV_HWPROBE_KEY_MVENDORID:
-                       cpu_id = riscv_cached_mvendorid(cpu);
-                       break;
-               case RISCV_HWPROBE_KEY_MIMPID:
-                       cpu_id = riscv_cached_mimpid(cpu);
-                       break;
-               case RISCV_HWPROBE_KEY_MARCHID:
-                       cpu_id = riscv_cached_marchid(cpu);
-                       break;
-               }
-
-               if (first) {
-                       id = cpu_id;
-                       first = false;
-               }
-
-               /*
-                * If there's a mismatch for the given set, return -1 in the
-                * value.
-                */
-               if (id != cpu_id) {
-                       id = -1ULL;
-                       break;
-               }
-       }
-
-       pair->value = id;
-}
-
-static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
-                            const struct cpumask *cpus)
-{
-       int cpu;
-       u64 missing = 0;
-
-       pair->value = 0;
-       if (has_fpu())
-               pair->value |= RISCV_HWPROBE_IMA_FD;
-
-       if (riscv_isa_extension_available(NULL, c))
-               pair->value |= RISCV_HWPROBE_IMA_C;
-
-       if (has_vector())
-               pair->value |= RISCV_HWPROBE_IMA_V;
-
-       /*
-        * Loop through and record extensions that 1) anyone has, and 2) anyone
-        * doesn't have.
-        */
-       for_each_cpu(cpu, cpus) {
-               struct riscv_isainfo *isainfo = &hart_isa[cpu];
-
-               if (riscv_isa_extension_available(isainfo->isa, ZBA))
-                       pair->value |= RISCV_HWPROBE_EXT_ZBA;
-               else
-                       missing |= RISCV_HWPROBE_EXT_ZBA;
-
-               if (riscv_isa_extension_available(isainfo->isa, ZBB))
-                       pair->value |= RISCV_HWPROBE_EXT_ZBB;
-               else
-                       missing |= RISCV_HWPROBE_EXT_ZBB;
-
-               if (riscv_isa_extension_available(isainfo->isa, ZBS))
-                       pair->value |= RISCV_HWPROBE_EXT_ZBS;
-               else
-                       missing |= RISCV_HWPROBE_EXT_ZBS;
-       }
-
-       /* Now turn off reporting features if any CPU is missing it. */
-       pair->value &= ~missing;
-}
-
-static u64 hwprobe_misaligned(const struct cpumask *cpus)
-{
-       int cpu;
-       u64 perf = -1ULL;
-
-       for_each_cpu(cpu, cpus) {
-               int this_perf = per_cpu(misaligned_access_speed, cpu);
-
-               if (perf == -1ULL)
-                       perf = this_perf;
-
-               if (perf != this_perf) {
-                       perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
-                       break;
-               }
-       }
-
-       if (perf == -1ULL)
-               return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
-
-       return perf;
-}
-
-static void hwprobe_one_pair(struct riscv_hwprobe *pair,
-                            const struct cpumask *cpus)
-{
-       switch (pair->key) {
-       case RISCV_HWPROBE_KEY_MVENDORID:
-       case RISCV_HWPROBE_KEY_MARCHID:
-       case RISCV_HWPROBE_KEY_MIMPID:
-               hwprobe_arch_id(pair, cpus);
-               break;
-       /*
-        * The kernel already assumes that the base single-letter ISA
-        * extensions are supported on all harts, and only supports the
-        * IMA base, so just cheat a bit here and tell that to
-        * userspace.
-        */
-       case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
-               pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA;
-               break;
-
-       case RISCV_HWPROBE_KEY_IMA_EXT_0:
-               hwprobe_isa_ext0(pair, cpus);
-               break;
-
-       case RISCV_HWPROBE_KEY_CPUPERF_0:
-               pair->value = hwprobe_misaligned(cpus);
-               break;
-
-       /*
-        * For forward compatibility, unknown keys don't fail the whole
-        * call, but get their element key set to -1 and value set to 0
-        * indicating they're unrecognized.
-        */
-       default:
-               pair->key = -1;
-               pair->value = 0;
-               break;
-       }
-}
-
-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
-                           size_t pair_count, size_t cpu_count,
-                           unsigned long __user *cpus_user,
-                           unsigned int flags)
-{
-       size_t out;
-       int ret;
-       cpumask_t cpus;
-
-       /* Check the reserved flags. */
-       if (flags != 0)
-               return -EINVAL;
-
-       /*
-        * The interface supports taking in a CPU mask, and returns values that
-        * are consistent across that mask. Allow userspace to specify NULL and
-        * 0 as a shortcut to all online CPUs.
-        */
-       cpumask_clear(&cpus);
-       if (!cpu_count && !cpus_user) {
-               cpumask_copy(&cpus, cpu_online_mask);
-       } else {
-               if (cpu_count > cpumask_size())
-                       cpu_count = cpumask_size();
-
-               ret = copy_from_user(&cpus, cpus_user, cpu_count);
-               if (ret)
-                       return -EFAULT;
-
-               /*
-                * Userspace must provide at least one online CPU, without that
-                * there's no way to define what is supported.
-                */
-               cpumask_and(&cpus, &cpus, cpu_online_mask);
-               if (cpumask_empty(&cpus))
-                       return -EINVAL;
-       }
-
-       for (out = 0; out < pair_count; out++, pairs++) {
-               struct riscv_hwprobe pair;
-
-               if (get_user(pair.key, &pairs->key))
-                       return -EFAULT;
-
-               pair.value = 0;
-               hwprobe_one_pair(&pair, &cpus);
-               ret = put_user(pair.key, &pairs->key);
-               if (ret == 0)
-                       ret = put_user(pair.value, &pairs->value);
-
-               if (ret)
-                       return -EFAULT;
-       }
-
-       return 0;
-}
-
-#ifdef CONFIG_MMU
-
-static int __init init_hwprobe_vdso_data(void)
-{
-       struct vdso_data *vd = __arch_get_k_vdso_data();
-       struct arch_vdso_data *avd = &vd->arch_data;
-       u64 id_bitsmash = 0;
-       struct riscv_hwprobe pair;
-       int key;
-
-       /*
-        * Initialize vDSO data with the answers for the "all CPUs" case, to
-        * save a syscall in the common case.
-        */
-       for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) {
-               pair.key = key;
-               hwprobe_one_pair(&pair, cpu_online_mask);
-
-               WARN_ON_ONCE(pair.key < 0);
-
-               avd->all_cpu_hwprobe_values[key] = pair.value;
-               /*
-                * Smash together the vendor, arch, and impl IDs to see if
-                * they're all 0 or any negative.
-                */
-               if (key <= RISCV_HWPROBE_KEY_MIMPID)
-                       id_bitsmash |= pair.value;
-       }
-
-       /*
-        * If the arch, vendor, and implementation ID are all the same across
-        * all harts, then assume all CPUs are the same, and allow the vDSO to
-        * answer queries for arbitrary masks. However if all values are 0 (not
-        * populated) or any value returns -1 (varies across CPUs), then the
-        * vDSO should defer to the kernel for exotic cpu masks.
-        */
-       avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
-       return 0;
-}
-
-arch_initcall_sync(init_hwprobe_vdso_data);
-
-#endif /* CONFIG_MMU */
-
-SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
-               size_t, pair_count, size_t, cpu_count, unsigned long __user *,
-               cpus, unsigned int, flags)
-{
-       return do_riscv_hwprobe(pairs, pair_count, cpu_count,
-                               cpus, flags);
-}
-
 /* Not defined using SYSCALL_DEFINE0 to avoid error injection */
 asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused)
 {
index 67d0073fb624deef3945d23cb6dbd447a9125efd..9a5f781f5238325bc2b0a82c2b4e94ac1c568ca2 100644 (file)
@@ -151,10 +151,29 @@ DO_ERROR_INFO(do_trap_insn_misaligned,
 DO_ERROR_INFO(do_trap_insn_fault,
        SIGSEGV, SEGV_ACCERR, "instruction access fault");
 
+#ifdef CONFIG_BIND_THREAD_TO_AICORES
+#include <linux/cpumask.h>
+#define AI_OPCODE_MASK0  0xFE0000FF
+#define AI_OPCODE_MATCH0 0xE200002B
+#define AI_OPCODE_MASK1  0xFE0000FF
+#define AI_OPCODE_MATCH1 0xE600002B
+#endif
+
 asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs)
 {
        bool handled;
 
+#ifdef CONFIG_BIND_THREAD_TO_AICORES
+        if ((regs->epc & AI_OPCODE_MASK0) == AI_OPCODE_MATCH0 ||
+            (regs->epc & AI_OPCODE_MASK1) == AI_OPCODE_MATCH1) {
+                struct cpumask mask;
+                pid_t pid = current->pid;
+                               mask = ai_core_mask_get();
+                sched_setaffinity(pid, &mask);
+               return;
+        }
+#endif
+
        if (user_mode(regs)) {
                irqentry_enter_from_user_mode(regs);
 
@@ -181,14 +200,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
 
 DO_ERROR_INFO(do_trap_load_fault,
        SIGSEGV, SEGV_ACCERR, "load access fault");
-#ifndef CONFIG_RISCV_M_MODE
-DO_ERROR_INFO(do_trap_load_misaligned,
-       SIGBUS, BUS_ADRALN, "Oops - load address misaligned");
-DO_ERROR_INFO(do_trap_store_misaligned,
-       SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned");
-#else
-int handle_misaligned_load(struct pt_regs *regs);
-int handle_misaligned_store(struct pt_regs *regs);
 
 asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
 {
@@ -231,7 +242,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs
                irqentry_nmi_exit(regs, state);
        }
 }
-#endif
 DO_ERROR_INFO(do_trap_store_fault,
        SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
 DO_ERROR_INFO(do_trap_ecall_s,
@@ -360,34 +370,10 @@ static void noinstr handle_riscv_irq(struct pt_regs *regs)
 asmlinkage void noinstr do_irq(struct pt_regs *regs)
 {
        irqentry_state_t state = irqentry_enter(regs);
-#ifdef CONFIG_IRQ_STACKS
-       if (on_thread_stack()) {
-               ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
-                                       + IRQ_STACK_SIZE/sizeof(ulong);
-               __asm__ __volatile(
-               "addi   sp, sp, -"RISCV_SZPTR  "\n"
-               REG_S"  ra, (sp)                \n"
-               "addi   sp, sp, -"RISCV_SZPTR  "\n"
-               REG_S"  s0, (sp)                \n"
-               "addi   s0, sp, 2*"RISCV_SZPTR "\n"
-               "move   sp, %[sp]               \n"
-               "move   a0, %[regs]             \n"
-               "call   handle_riscv_irq        \n"
-               "addi   sp, s0, -2*"RISCV_SZPTR"\n"
-               REG_L"  s0, (sp)                \n"
-               "addi   sp, sp, "RISCV_SZPTR   "\n"
-               REG_L"  ra, (sp)                \n"
-               "addi   sp, sp, "RISCV_SZPTR   "\n"
-               :
-               : [sp] "r" (sp), [regs] "r" (regs)
-               : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
-                 "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
-                 "s0",
-#endif
-                 "memory");
-       } else
-#endif
+
+       if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack())
+               call_on_irq_stack(regs, handle_riscv_irq);
+       else
                handle_riscv_irq(regs);
 
        irqentry_exit(regs, state);
index 5348d842c74533e44366a2c9efff55d1247c0ebe..b62d5a2f4541e754cc7ea322e38d42b4dab65b90 100644 (file)
@@ -6,12 +6,16 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/perf_event.h>
 #include <linux/irq.h>
 #include <linux/stringify.h>
 
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
+#include <asm/entry-common.h>
+#include <asm/hwprobe.h>
+#include <asm/cpufeature.h>
 
 #define INSN_MATCH_LB                  0x3
 #define INSN_MASK_LB                   0x707f
 #define PRECISION_S 0
 #define PRECISION_D 1
 
-#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn)                 \
-static inline type load_##type(const type *addr)                       \
-{                                                                      \
-       type val;                                                       \
-       asm (#insn " %0, %1"                                            \
-       : "=&r" (val) : "m" (*addr));                                   \
-       return val;                                                     \
-}
+#ifdef CONFIG_FPU
+
+#define FP_GET_RD(insn)                (insn >> 7 & 0x1F)
+
+extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
+                     unsigned long val)
+{
+       unsigned long fp_reg = FP_GET_RD(insn);
+
+       put_f32_reg(fp_reg, val);
+       regs->status |= SR_FS_DIRTY;
 
-#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn)                        \
-static inline void store_##type(type *addr, type val)                  \
-{                                                                      \
-       asm volatile (#insn " %0, %1\n"                                 \
-       : : "r" (val), "m" (*addr));                                    \
+       return 0;
 }
 
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw)
-#if defined(CONFIG_64BIT)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld)
+extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
+{
+       unsigned long fp_reg = FP_GET_RD(insn);
+       unsigned long value;
+
+#if __riscv_xlen == 32
+       value = (unsigned long) &val;
 #else
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw)
+       value = val;
+#endif
+       put_f64_reg(fp_reg, value);
+       regs->status |= SR_FS_DIRTY;
 
-static inline u64 load_u64(const u64 *addr)
+       return 0;
+}
+
+#if __riscv_xlen == 32
+extern void get_f64_reg(unsigned long fp_reg, u64 *value);
+
+static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                     struct pt_regs *regs)
 {
-       return load_u32((u32 *)addr)
-               + ((u64)load_u32((u32 *)addr + 1) << 32);
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       u64 val;
+
+       get_f64_reg(fp_reg, &val);
+       regs->status |= SR_FS_DIRTY;
+
+       return val;
 }
+#else
+
+extern unsigned long get_f64_reg(unsigned long fp_reg);
 
-static inline void store_u64(u64 *addr, u64 val)
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
 {
-       store_u32((u32 *)addr, val);
-       store_u32((u32 *)addr + 1, val >> 32);
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       unsigned long val;
+
+       val = get_f64_reg(fp_reg);
+       regs->status |= SR_FS_DIRTY;
+
+       return val;
 }
+
 #endif
 
-static inline ulong get_insn(ulong mepc)
+extern unsigned long get_f32_reg(unsigned long fp_reg);
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
 {
-       register ulong __mepc asm ("a2") = mepc;
-       ulong val, rvc_mask = 3, tmp;
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       unsigned long val;
 
-       asm ("and %[tmp], %[addr], 2\n"
-               "bnez %[tmp], 1f\n"
-#if defined(CONFIG_64BIT)
-               __stringify(LWU) " %[insn], (%[addr])\n"
-#else
-               __stringify(LW) " %[insn], (%[addr])\n"
-#endif
-               "and %[tmp], %[insn], %[rvc_mask]\n"
-               "beq %[tmp], %[rvc_mask], 2f\n"
-               "sll %[insn], %[insn], %[xlen_minus_16]\n"
-               "srl %[insn], %[insn], %[xlen_minus_16]\n"
-               "j 2f\n"
-               "1:\n"
-               "lhu %[insn], (%[addr])\n"
-               "and %[tmp], %[insn], %[rvc_mask]\n"
-               "bne %[tmp], %[rvc_mask], 2f\n"
-               "lhu %[tmp], 2(%[addr])\n"
-               "sll %[tmp], %[tmp], 16\n"
-               "add %[insn], %[insn], %[tmp]\n"
-               "2:"
-       : [insn] "=&r" (val), [tmp] "=&r" (tmp)
-       : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask),
-         [xlen_minus_16] "i" (XLEN_MINUS_16));
+       val = get_f32_reg(fp_reg);
+       regs->status |= SR_FS_DIRTY;
 
        return val;
 }
 
+#else /* CONFIG_FPU */
+static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
+                      unsigned long val) {}
+
+static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
+
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
+{
+       return 0;
+}
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
+{
+       return 0;
+}
+
+#endif
+
+#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
+#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
+#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
+
+#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
+#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
+#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
+
+#define __read_insn(regs, insn, insn_addr, type)       \
+({                                                     \
+       int __ret;                                      \
+                                                       \
+       if (user_mode(regs)) {                          \
+               __ret = __get_user(insn, (type __user *) insn_addr); \
+       } else {                                        \
+               insn = *(type *)insn_addr;              \
+               __ret = 0;                              \
+       }                                               \
+                                                       \
+       __ret;                                          \
+})
+
+static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
+{
+       ulong insn = 0;
+
+       if (epc & 0x2) {
+               ulong tmp = 0;
+
+               if (__read_insn(regs, insn, epc, u16))
+                       return -EFAULT;
+               /* __get_user() uses regular "lw" which sign extend the loaded
+                * value make sure to clear higher order bits in case we "or" it
+                * below with the upper 16 bits half.
+                */
+               insn &= GENMASK(15, 0);
+               if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) {
+                       *r_insn = insn;
+                       return 0;
+               }
+               epc += sizeof(u16);
+               if (__read_insn(regs, tmp, epc, u16))
+                       return -EFAULT;
+               *r_insn = (tmp << 16) | insn;
+
+               return 0;
+       } else {
+               if (__read_insn(regs, insn, epc, u32))
+                       return -EFAULT;
+               if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) {
+                       *r_insn = insn;
+                       return 0;
+               }
+               insn &= GENMASK(15, 0);
+               *r_insn = insn;
+
+               return 0;
+       }
+}
+
 union reg_data {
        u8 data_bytes[8];
        ulong data_ulong;
        u64 data_u64;
 };
 
+static bool unaligned_ctl __read_mostly;
+
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
+
 int handle_misaligned_load(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
-       unsigned long insn = get_insn(epc);
-       unsigned long addr = csr_read(mtval);
-       int i, fp = 0, shift = 0, len = 0;
+       unsigned long insn;
+       unsigned long addr = regs->badaddr;
+       int fp = 0, shift = 0, len = 0;
+
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+       *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+#endif
+
+       if (!unaligned_enabled)
+               return -1;
+
+       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+               return -1;
+
+       if (get_insn(regs, epc, &insn))
+               return -1;
 
        regs->epc = 0;
 
@@ -305,13 +412,23 @@ int handle_misaligned_load(struct pt_regs *regs)
                return -1;
        }
 
+       if (!IS_ENABLED(CONFIG_FPU) && fp)
+               return -EOPNOTSUPP;
+
        val.data_u64 = 0;
-       for (i = 0; i < len; i++)
-               val.data_bytes[i] = load_u8((void *)(addr + i));
+       if (user_mode(regs)) {
+               if (raw_copy_from_user(&val, (u8 __user *)addr, len))
+                       return -1;
+       } else {
+               memcpy(&val, (u8 *)addr, len);
+       }
 
-       if (fp)
-               return -1;
-       SET_RD(insn, regs, val.data_ulong << shift >> shift);
+       if (!fp)
+               SET_RD(insn, regs, val.data_ulong << shift >> shift);
+       else if (len == 8)
+               set_f64_rd(insn, regs, val.data_u64);
+       else
+               set_f32_rd(insn, regs, val.data_ulong);
 
        regs->epc = epc + INSN_LEN(insn);
 
@@ -322,9 +439,20 @@ int handle_misaligned_store(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
-       unsigned long insn = get_insn(epc);
-       unsigned long addr = csr_read(mtval);
-       int i, len = 0;
+       unsigned long insn;
+       unsigned long addr = regs->badaddr;
+       int len = 0, fp = 0;
+
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+       if (!unaligned_enabled)
+               return -1;
+
+       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+               return -1;
+
+       if (get_insn(regs, epc, &insn))
+               return -1;
 
        regs->epc = 0;
 
@@ -336,6 +464,14 @@ int handle_misaligned_store(struct pt_regs *regs)
        } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
                len = 8;
 #endif
+       } else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2(insn, regs);
+       } else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2(insn, regs);
        } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
                len = 2;
 #if defined(CONFIG_64BIT)
@@ -352,15 +488,89 @@ int handle_misaligned_store(struct pt_regs *regs)
        } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) {
                len = 4;
                val.data_ulong = GET_RS2C(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2S(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2C(insn, regs);
+#if !defined(CONFIG_64BIT)
+       } else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2S(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2C(insn, regs);
+#endif
        } else {
                regs->epc = epc;
                return -1;
        }
 
-       for (i = 0; i < len; i++)
-               store_u8((void *)(addr + i), val.data_bytes[i]);
+       if (!IS_ENABLED(CONFIG_FPU) && fp)
+               return -EOPNOTSUPP;
+
+       if (user_mode(regs)) {
+               if (raw_copy_to_user((u8 __user *)addr, &val, len))
+                       return -1;
+       } else {
+               memcpy((u8 *)addr, &val, len);
+       }
 
        regs->epc = epc + INSN_LEN(insn);
 
        return 0;
 }
+
+static bool check_unaligned_access_emulated(int cpu)
+{
+       long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+       unsigned long tmp_var, tmp_val;
+       bool misaligned_emu_detected;
+
+       *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+       __asm__ __volatile__ (
+               "       "REG_L" %[tmp], 1(%[ptr])\n"
+               : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+
+       misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
+       /*
+        * If unaligned_ctl is already set, this means that we detected that all
+        * CPUS uses emulated misaligned access at boot time. If that changed
+        * when hotplugging the new cpu, this is something we don't handle.
+        */
+       if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+               pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
+               while (true)
+                       cpu_relax();
+       }
+
+       return misaligned_emu_detected;
+}
+
+bool check_unaligned_access_emulated_all_cpus(void)
+{
+       int cpu;
+
+       /*
+        * We can only support PR_UNALIGN controls if all CPUs have misaligned
+        * accesses emulated since tasks requesting such control can run on any
+        * CPU.
+        */
+       for_each_online_cpu(cpu)
+               if (!check_unaligned_access_emulated(cpu))
+                       return false;
+
+       unaligned_ctl = true;
+       return true;
+}
+
+bool unaligned_ctl_available(void)
+{
+       return unaligned_ctl;
+}
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
new file mode 100644 (file)
index 0000000..151489d
--- /dev/null
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+
+#include "copy-unaligned.h"
+
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+static cpumask_t fast_misaligned_access;
+static int check_unaligned_access(void *param)
+{
+       int cpu = smp_processor_id();
+       u64 start_cycles, end_cycles;
+       u64 word_cycles;
+       u64 byte_cycles;
+       int ratio;
+       unsigned long start_jiffies, now;
+       struct page *page = param;
+       void *dst;
+       void *src;
+       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
+
+       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+               return 0;
+
+       /* Make an unaligned destination buffer. */
+       dst = (void *)((unsigned long)page_address(page) | 0x1);
+       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+       src += 2;
+       word_cycles = -1ULL;
+       /* Do a warmup. */
+       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+       preempt_disable();
+       start_jiffies = jiffies;
+       while ((now = jiffies) == start_jiffies)
+               cpu_relax();
+
+       /*
+        * For a fixed amount of time, repeatedly try the function, and take
+        * the best time in cycles as the measurement.
+        */
+       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+               start_cycles = get_cycles64();
+               /* Ensure the CSR read can't reorder WRT to the copy. */
+               mb();
+               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+               /* Ensure the copy ends before the end time is snapped. */
+               mb();
+               end_cycles = get_cycles64();
+               if ((end_cycles - start_cycles) < word_cycles)
+                       word_cycles = end_cycles - start_cycles;
+       }
+
+       byte_cycles = -1ULL;
+       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+       start_jiffies = jiffies;
+       while ((now = jiffies) == start_jiffies)
+               cpu_relax();
+
+       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+               start_cycles = get_cycles64();
+               mb();
+               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+               mb();
+               end_cycles = get_cycles64();
+               if ((end_cycles - start_cycles) < byte_cycles)
+                       byte_cycles = end_cycles - start_cycles;
+       }
+
+       preempt_enable();
+
+       /* Don't divide by zero. */
+       if (!word_cycles || !byte_cycles) {
+               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+                       cpu);
+
+               return 0;
+       }
+
+       if (word_cycles < byte_cycles)
+               speed = RISCV_HWPROBE_MISALIGNED_FAST;
+
+       ratio = div_u64((byte_cycles * 100), word_cycles);
+       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+               cpu,
+               ratio / 100,
+               ratio % 100,
+               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
+
+       per_cpu(misaligned_access_speed, cpu) = speed;
+
+       /*
+        * Set the value of fast_misaligned_access of a CPU. These operations
+        * are atomic to avoid race conditions.
+        */
+       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+               cpumask_set_cpu(cpu, &fast_misaligned_access);
+       else
+               cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
+       return 0;
+}
+
+static void check_unaligned_access_nonboot_cpu(void *param)
+{
+       unsigned int cpu = smp_processor_id();
+       struct page **pages = param;
+
+       if (smp_processor_id() != 0)
+               check_unaligned_access(pages[cpu]);
+}
+
+DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+       if (cpumask_weight(mask) == weight)
+               static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
+       else
+               static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+       /*
+        * Same as set_unaligned_access_static_branches, except excludes the
+        * given CPU from the result. When a CPU is hotplugged into an offline
+        * state, this function is called before the CPU is set to offline in
+        * the cpumask, and thus the CPU needs to be explicitly excluded.
+        */
+
+       cpumask_t fast_except_me;
+
+       cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+       cpumask_clear_cpu(cpu, &fast_except_me);
+
+       modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+       /*
+        * This will be called after check_unaligned_access_all_cpus so the
+        * result of unaligned access speed for all CPUs will be available.
+        *
+        * To avoid the number of online cpus changing between reading
+        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+        * held before calling this function.
+        */
+
+       cpumask_t fast_and_online;
+
+       cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+       modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+       cpus_read_lock();
+       set_unaligned_access_static_branches();
+       cpus_read_unlock();
+
+       return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
+static int riscv_online_cpu(unsigned int cpu)
+{
+       static struct page *buf;
+
+       /* We are already set since the last check */
+       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+               goto exit;
+
+       buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+       if (!buf) {
+               pr_warn("Allocation failure, not measuring misaligned performance\n");
+               return -ENOMEM;
+       }
+
+       check_unaligned_access(buf);
+       __free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+       set_unaligned_access_static_branches();
+
+       return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+       set_unaligned_access_static_branches_except_cpu(cpu);
+
+       return 0;
+}
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+       unsigned int cpu;
+       unsigned int cpu_count = num_possible_cpus();
+       struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
+                                    GFP_KERNEL);
+
+       if (!bufs) {
+               pr_warn("Allocation failure, not measuring misaligned performance\n");
+               return 0;
+       }
+
+       /*
+        * Allocate separate buffers for each CPU so there's no fighting over
+        * cache lines.
+        */
+       for_each_cpu(cpu, cpu_online_mask) {
+               bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+               if (!bufs[cpu]) {
+                       pr_warn("Allocation failure, not measuring misaligned performance\n");
+                       goto out;
+               }
+       }
+
+       /* Check everybody except 0, who stays behind to tend jiffies. */
+       on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+       /* Check core 0. */
+       smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+       /*
+        * show the reason why cpu0 unaligned access is more efficient
+        * than nonboot cores, the nonboot cores unalgined access is measured
+        * concurrently.
+        */
+       pr_info("The real ratio of byte access time to unaligned word access should refer to the value of CPU0\n");
+       pr_info("Cpu0 unaligned access is more efficient than nonboot cores, because of system bandwidth preemption.\n");
+       pr_info("Nonboot cpus' unaligned access ratio measured simultaneously, but cpu0's measure is separately\n");
+
+       /*
+        * Setup hotplug callbacks for any new CPUs that come online or go
+        * offline.
+        */
+       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+                                 riscv_online_cpu, riscv_offline_cpu);
+
+out:
+       for_each_cpu(cpu, cpu_online_mask) {
+               if (bufs[cpu])
+                       __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+       }
+
+       kfree(bufs);
+       return 0;
+}
+
+static int check_unaligned_access_all_cpus(void)
+{
+       bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+
+       if (!all_cpus_emulated)
+               return check_unaligned_access_speed_all_cpus();
+
+       return 0;
+}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_all_cpus(void)
+{
+       check_unaligned_access_emulated_all_cpus();
+
+       return 0;
+}
+#endif
+
+arch_initcall(check_unaligned_access_all_cpus);
index 6b1dba11bf6dcde2baea248ce1395f5f334ca04f..9b517fe1b8a8ecfddfae487dc9e829cc622334f2 100644 (file)
@@ -36,7 +36,7 @@ CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY
 endif
 
 # Disable -pg to prevent insert call site
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
 
 # Disable profiling and instrumentation for VDSO code
 GCOV_PROFILE := n
@@ -73,13 +73,3 @@ quiet_cmd_vdsold = VDSOLD  $@
       cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
                    $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
                    rm $@.tmp
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso.so: $(obj)/vdso.so.dbg
-       @mkdir -p $(MODLIB)/vdso
-       $(call cmd,vdso_install)
-
-vdso_install: vdso.so
index 82f97d67c23e9bdde94b0d2f655f52d32c8fd6d1..8f884227e8bca7fd3634217e71d4ee4ed122559a 100644 (file)
@@ -8,7 +8,7 @@
 
        .text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
-ENTRY(__vdso_flush_icache)
+SYM_FUNC_START(__vdso_flush_icache)
        .cfi_startproc
 #ifdef CONFIG_SMP
        li a7, __NR_riscv_flush_icache
@@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache)
 #endif
        ret
        .cfi_endproc
-ENDPROC(__vdso_flush_icache)
+SYM_FUNC_END(__vdso_flush_icache)
index bb0c05e2ffbae3d6aa3609fc5b5de84630aaa37d..9c1bd531907f2fefda1d0778191073ca6b70df1a 100644 (file)
@@ -8,11 +8,11 @@
 
        .text
 /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
-ENTRY(__vdso_getcpu)
+SYM_FUNC_START(__vdso_getcpu)
        .cfi_startproc
        /* For now, just do the syscall. */
        li a7, __NR_getcpu
        ecall
        ret
        .cfi_endproc
-ENDPROC(__vdso_getcpu)
+SYM_FUNC_END(__vdso_getcpu)
index cadf725ef798370bbe248f80dcf207c7fd439e7b..1e926e4b5881b6b2c44ec8438870809539f773c5 100644 (file)
@@ -3,26 +3,22 @@
  * Copyright 2023 Rivos, Inc
  */
 
+#include <linux/string.h>
 #include <linux/types.h>
 #include <vdso/datapage.h>
 #include <vdso/helpers.h>
 
 extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-                        size_t cpu_count, unsigned long *cpus,
+                        size_t cpusetsize, unsigned long *cpus,
                         unsigned int flags);
 
-/* Add a prototype to avoid -Wmissing-prototypes warning. */
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-                        size_t cpu_count, unsigned long *cpus,
-                        unsigned int flags);
-
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-                        size_t cpu_count, unsigned long *cpus,
-                        unsigned int flags)
+static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
+                                size_t cpusetsize, unsigned long *cpus,
+                                unsigned int flags)
 {
        const struct vdso_data *vd = __arch_get_vdso_data();
        const struct arch_vdso_data *avd = &vd->arch_data;
-       bool all_cpus = !cpu_count && !cpus;
+       bool all_cpus = !cpusetsize && !cpus;
        struct riscv_hwprobe *p = pairs;
        struct riscv_hwprobe *end = pairs + pair_count;
 
@@ -33,7 +29,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
         * masks.
         */
        if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
-               return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags);
+               return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
 
        /* This is something we can handle, fill out the pairs. */
        while (p < end) {
@@ -50,3 +46,71 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
 
        return 0;
 }
+
+static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
+                              size_t cpusetsize, unsigned long *cpus,
+                              unsigned int flags)
+{
+       const struct vdso_data *vd = __arch_get_vdso_data();
+       const struct arch_vdso_data *avd = &vd->arch_data;
+       struct riscv_hwprobe *p = pairs;
+       struct riscv_hwprobe *end = pairs + pair_count;
+       unsigned char *c = (unsigned char *)cpus;
+       bool empty_cpus = true;
+       bool clear_all = false;
+       int i;
+
+       if (!cpusetsize || !cpus)
+               return -EINVAL;
+
+       for (i = 0; i < cpusetsize; i++) {
+               if (c[i]) {
+                       empty_cpus = false;
+                       break;
+               }
+       }
+
+       if (empty_cpus || flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
+               return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
+
+       while (p < end) {
+               if (riscv_hwprobe_key_is_valid(p->key)) {
+                       struct riscv_hwprobe t = {
+                               .key = p->key,
+                               .value = avd->all_cpu_hwprobe_values[p->key],
+                       };
+
+                       if (!riscv_hwprobe_pair_cmp(&t, p))
+                               clear_all = true;
+               } else {
+                       clear_all = true;
+                       p->key = -1;
+                       p->value = 0;
+               }
+               p++;
+       }
+
+       if (clear_all) {
+               for (i = 0; i < cpusetsize; i++)
+                       c[i] = 0;
+       }
+
+       return 0;
+}
+
+/* Add a prototype to avoid -Wmissing-prototypes warning. */
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+                        size_t cpusetsize, unsigned long *cpus,
+                        unsigned int flags);
+
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+                        size_t cpusetsize, unsigned long *cpus,
+                        unsigned int flags)
+{
+       if (flags & RISCV_HWPROBE_WHICH_CPUS)
+               return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize,
+                                          cpus, flags);
+
+       return riscv_vdso_get_values(pairs, pair_count, cpusetsize,
+                                    cpus, flags);
+}
index 10438c7c626acc8034fa22d6765422fbc7b67f0b..3dc022aa8931ad3b3798f4cc492ce795dd7b7bf5 100644 (file)
@@ -7,10 +7,10 @@
 #include <asm/unistd.h>
 
        .text
-ENTRY(__vdso_rt_sigreturn)
+SYM_FUNC_START(__vdso_rt_sigreturn)
        .cfi_startproc
        .cfi_signal_frame
        li a7, __NR_rt_sigreturn
        ecall
        .cfi_endproc
-ENDPROC(__vdso_rt_sigreturn)
+SYM_FUNC_END(__vdso_rt_sigreturn)
index 4e704146c77a092e481b8b532c19b11e3efa82e4..77e57f8305216c466f51979c91899754b4a7b382 100644 (file)
@@ -5,11 +5,11 @@
 #include <asm/unistd.h>
 
 .text
-ENTRY(riscv_hwprobe)
+SYM_FUNC_START(riscv_hwprobe)
        .cfi_startproc
        li a7, __NR_riscv_hwprobe
        ecall
        ret
 
        .cfi_endproc
-ENDPROC(riscv_hwprobe)
+SYM_FUNC_END(riscv_hwprobe)
index 8d92fb6c522cc27ca3015550bcfa3afddb6c4909..682b3feee45114694f29f2479bb7c75ce54e7e56 100644 (file)
 #include <asm/bug.h>
 
 static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
 
 unsigned long riscv_v_vsize __read_mostly;
 EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
        return 0;
 }
 
+void __init riscv_v_setup_ctx_cache(void)
+{
+       if (!has_vector())
+               return;
+
+       riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
+                                                        riscv_v_vsize, 16, SLAB_PANIC,
+                                                        0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+                                                 riscv_v_vsize, 16,
+                                                 SLAB_PANIC, NULL);
+#endif
+}
+
 static bool insn_is_vector(u32 insn_buf)
 {
        u32 opcode = insn_buf & __INSN_OPCODE_MASK;
@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
        return false;
 }
 
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+                                struct __riscv_v_ext_state *ctx)
 {
        void *datap;
 
-       datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+       datap = kmem_cache_zalloc(cache, GFP_KERNEL);
        if (!datap)
                return -ENOMEM;
 
-       current->thread.vstate.datap = datap;
-       memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
-                                                   datap));
+       ctx->datap = datap;
+       memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
        return 0;
 }
 
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
+void riscv_v_thread_free(struct task_struct *tsk)
+{
+       if (tsk->thread.vstate.datap)
+               kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+       if (tsk->thread.kernel_vstate.datap)
+               kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
+}
+
 #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
 #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
 #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
        ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
        if (inherit)
                ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
-       tsk->thread.vstate_ctrl = ctrl;
+       tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
+       tsk->thread.vstate_ctrl |= ctrl;
 }
 
 bool riscv_v_vstate_ctrl_user_allowed(void)
@@ -136,8 +173,11 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
        u32 __user *epc = (u32 __user *)regs->epc;
        u32 insn = (u32)regs->badaddr;
 
+       if (!has_vector())
+               return false;
+
        /* Do not handle if V is not supported, or disabled */
-       if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
+       if (!riscv_v_vstate_ctrl_user_allowed())
                return false;
 
        /* If V has been enabled then it is not the first-use trap */
@@ -162,12 +202,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
         * context where VS has been off. So, try to allocate the user's V
         * context and resume execution.
         */
-       if (riscv_v_thread_zalloc()) {
+       if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
                force_sig(SIGBUS);
                return true;
        }
        riscv_v_vstate_on(regs);
-       riscv_v_vstate_restore(current, regs);
+       riscv_v_vstate_set_restore(current, regs);
        return true;
 }
 
@@ -255,7 +295,6 @@ static struct ctl_table riscv_v_default_vstate_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dobool,
        },
-       { }
 };
 
 static int __init riscv_v_sysctl_init(void)
index 74bb27440527b3bafa1418fc75337162e9bc3589..a944294f6f23a70335070dc877588321429da0de 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_aia_imsic.h>
 
 struct aia_hgei_control {
index 0eb689351b7d04128ba93122209f71b830b33efe..5cd407c6a8e4f82389ad3d0a63081d6dcfec7688 100644 (file)
@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
 
 static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
 {
-       u32 hart, group = 0;
+       u32 hart = 0, group = 0;
 
-       hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
-               GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+       if (aia->nr_hart_bits)
+               hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+                      GENMASK_ULL(aia->nr_hart_bits - 1, 0);
        if (aia->nr_group_bits)
                group = (addr >> aia->nr_group_shift) &
                        GENMASK_ULL(aia->nr_group_bits - 1, 0);
index 48ae0d4b3932457f642760b01c831bf84f5fb3bc..225a435d9c9a9c25b8cf24f4501a2e9e3bb94d1b 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/sbi.h>
 
 long kvm_arch_dev_ioctl(struct file *filp,
index 44bc324aeeb08d824804fd1138e4ab2b0d5e2d8e..23c0e82b5103cdd950b2da266258260292c0cea5 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/insn-def.h>
 
 #define has_svinval()  riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
index 08ba48a395aa2a232bd00755f02355e6770d0ee7..030904d82b583e1ce3f4e44cdabe4e61e708e616 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #ifdef CONFIG_FPU
 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
index b7e0e03c69b1e50c7fa937ca513129259678aad3..9e7e755163a9ea8407da35f1b9c7eb916f151699 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/uaccess.h>
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
@@ -614,9 +614,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
        switch (reg_subtype) {
        case KVM_REG_RISCV_ISA_SINGLE:
                return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
-       case KVM_REG_RISCV_SBI_MULTI_EN:
+       case KVM_REG_RISCV_ISA_MULTI_EN:
                return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
-       case KVM_REG_RISCV_SBI_MULTI_DIS:
+       case KVM_REG_RISCV_ISA_MULTI_DIS:
                return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
        default:
                return -ENOENT;
index b430cbb695214da2a500d46c58c7113faa04237c..b339a2682f252bb8c0ac6d3803a8eab46e1e1443 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
index 26cb2502ecf8969b76a47e874f6be9e90219887b..a09aab75331f0f82d2aac031230d149dc51a8565 100644 (file)
@@ -6,8 +6,14 @@ lib-y                  += memmove.o
 lib-y                  += strcmp.o
 lib-y                  += strlen.o
 lib-y                  += strncmp.o
-lib-$(CONFIG_MMU)      += uaccess.o
+ifeq ($(CONFIG_MMU), y)
+lib-y                          += uaccess.o
+lib-$(CONFIG_RISCV_ISA_V)      += uaccess_vector.o
+endif
 lib-$(CONFIG_64BIT)    += tishift.o
 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
+lib-$(CONFIG_RISCV_ISA_V)      += xor.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V)      += xor.o
+lib-$(CONFIG_RISCV_ISA_V)      += riscv_v_helpers.o
index d7a256eb53f404feaf8ebc750453968bc5352188..44adc2f1729e767457fe1ee0ea77dd7bacd832f6 100644 (file)
@@ -4,9 +4,9 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/alternative-macros.h>
-#include <asm-generic/export.h>
 #include <asm/hwcap.h>
 #include <asm/insn-def.h>
 #include <asm/page.h>
index 1a40d01a95439e1592b673ad76e5f5b964bbbed3..44e009ec5fef683a3290d57f31239661a5352f9e 100644 (file)
@@ -7,8 +7,7 @@
 #include <asm/asm.h>
 
 /* void *memcpy(void *, const void *, size_t) */
-ENTRY(__memcpy)
-WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
        move t6, a0  /* Preserve return value */
 
        /* Defer to byte-oriented copy for small sizes */
@@ -105,6 +104,7 @@ WEAK(memcpy)
        bltu a1, a3, 5b
 6:
        ret
-END(__memcpy)
+SYM_FUNC_END(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)
index 838ff2022fe32d8e16769ef0e5962d9c294b0ed3..cb3e2e7ef0baa248d906717523a6f848f591eaf9 100644 (file)
@@ -7,7 +7,6 @@
 #include <asm/asm.h>
 
 SYM_FUNC_START(__memmove)
-SYM_FUNC_START_WEAK(memmove)
        /*
         * Returns
         *   a0 - dest
@@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove)
         */
 
        /* Return if nothing to do */
-       beq a0, a1, return_from_memmove
-       beqz a2, return_from_memmove
+       beq a0, a1, .Lreturn_from_memmove
+       beqz a2, .Lreturn_from_memmove
 
        /*
         * Register Uses
@@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove)
         * small enough not to bother.
         */
        andi t0, a2, -(2 * SZREG)
-       beqz t0, byte_copy
+       beqz t0, .Lbyte_copy
 
        /*
         * Now solve for t5 and t6.
@@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove)
         */
        xor  t0, a0, a1
        andi t1, t0, (SZREG - 1)
-       beqz t1, coaligned_copy
+       beqz t1, .Lcoaligned_copy
        /* Fall through to misaligned fixup copy */
 
-misaligned_fixup_copy:
-       bltu a1, a0, misaligned_fixup_copy_reverse
+.Lmisaligned_fixup_copy:
+       bltu a1, a0, .Lmisaligned_fixup_copy_reverse
 
-misaligned_fixup_copy_forward:
-       jal  t0, byte_copy_until_aligned_forward
+.Lmisaligned_fixup_copy_forward:
+       jal  t0, .Lbyte_copy_until_aligned_forward
 
        andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
        slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -153,10 +152,10 @@ misaligned_fixup_copy_forward:
        mv    t3, t6 /* Fix the dest pointer in case the loop was broken */
 
        add  a1, t3, a5 /* Restore the src pointer */
-       j byte_copy_forward /* Copy any remaining bytes */
+       j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-misaligned_fixup_copy_reverse:
-       jal  t0, byte_copy_until_aligned_reverse
+.Lmisaligned_fixup_copy_reverse:
+       jal  t0, .Lbyte_copy_until_aligned_reverse
 
        andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
        slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse:
        mv    t4, t5 /* Fix the dest pointer in case the loop was broken */
 
        add  a4, t4, a5 /* Restore the src pointer */
-       j byte_copy_reverse /* Copy any remaining bytes */
+       j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * Simple copy loops for SZREG co-aligned memory locations.
  * These also make calls to do byte copies for any unaligned
  * data at their terminations.
  */
-coaligned_copy:
-       bltu a1, a0, coaligned_copy_reverse
+.Lcoaligned_copy:
+       bltu a1, a0, .Lcoaligned_copy_reverse
 
-coaligned_copy_forward:
-       jal t0, byte_copy_until_aligned_forward
+.Lcoaligned_copy_forward:
+       jal t0, .Lbyte_copy_until_aligned_forward
 
        1:
        REG_L t1, ( 0 * SZREG)(a1)
@@ -235,10 +234,10 @@ coaligned_copy_forward:
        REG_S t1, (-1 * SZREG)(t3)
        bne   t3, t6, 1b
 
-       j byte_copy_forward /* Copy any remaining bytes */
+       j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-coaligned_copy_reverse:
-       jal t0, byte_copy_until_aligned_reverse
+.Lcoaligned_copy_reverse:
+       jal t0, .Lbyte_copy_until_aligned_reverse
 
        1:
        REG_L t1, (-1 * SZREG)(a4)
@@ -247,7 +246,7 @@ coaligned_copy_reverse:
        REG_S t1, ( 0 * SZREG)(t4)
        bne   t4, t5, 1b
 
-       j byte_copy_reverse /* Copy any remaining bytes */
+       j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * These are basically sub-functions within the function.  They
@@ -258,7 +257,7 @@ coaligned_copy_reverse:
  * up from where they were left and we avoid code duplication
  * without any overhead except the call in and return jumps.
  */
-byte_copy_until_aligned_forward:
+.Lbyte_copy_until_aligned_forward:
        beq  t3, t5, 2f
        1:
        lb   t1,  0(a1)
@@ -269,7 +268,7 @@ byte_copy_until_aligned_forward:
        2:
        jalr zero, 0x0(t0) /* Return to multibyte copy loop */
 
-byte_copy_until_aligned_reverse:
+.Lbyte_copy_until_aligned_reverse:
        beq  t4, t6, 2f
        1:
        lb   t1, -1(a4)
@@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse:
  * These will byte copy until they reach the end of data to copy.
  * At that point, they will call to return from memmove.
  */
-byte_copy:
-       bltu a1, a0, byte_copy_reverse
+.Lbyte_copy:
+       bltu a1, a0, .Lbyte_copy_reverse
 
-byte_copy_forward:
+.Lbyte_copy_forward:
        beq  t3, t4, 2f
        1:
        lb   t1,  0(a1)
@@ -299,7 +298,7 @@ byte_copy_forward:
        2:
        ret
 
-byte_copy_reverse:
+.Lbyte_copy_reverse:
        beq  t4, t3, 2f
        1:
        lb   t1, -1(a4)
@@ -309,10 +308,10 @@ byte_copy_reverse:
        bne  t4, t3, 1b
        2:
 
-return_from_memmove:
+.Lreturn_from_memmove:
        ret
 
-SYM_FUNC_END(memmove)
 SYM_FUNC_END(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
 SYM_FUNC_ALIAS(__pi_memmove, __memmove)
 SYM_FUNC_ALIAS(__pi___memmove, __memmove)
index 34c5360c6705c56466cf9890f1a7261e6383a5e3..35f358e70bdb6bf79bda0f366e34c98c6ef5bbc3 100644 (file)
@@ -8,8 +8,7 @@
 #include <asm/asm.h>
 
 /* void *memset(void *, int, size_t) */
-ENTRY(__memset)
-WEAK(memset)
+SYM_FUNC_START(__memset)
        move t0, a0  /* Preserve return value */
 
        /* Defer to byte-oriented fill for small sizes */
@@ -110,4 +109,5 @@ WEAK(memset)
        bltu t0, a3, 5b
 6:
        ret
-END(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
new file mode 100644 (file)
index 0000000..be38a93
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#include <asm/vector.h>
+#include <asm/simd.h>
+
+#ifdef CONFIG_MMU
+#include <asm/asm-prototypes.h>
+#endif
+
+#ifdef CONFIG_MMU
+size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
+int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int fallback_scalar_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+{
+       size_t remain, copied;
+
+       /* skip has_vector() check because it has been done by the asm  */
+       if (!may_use_simd())
+               goto fallback;
+
+       kernel_vector_begin();
+       remain = __asm_vector_usercopy(dst, src, n);
+       kernel_vector_end();
+
+       if (remain) {
+               copied = n - remain;
+               dst += copied;
+               src += copied;
+               n = remain;
+               goto fallback;
+       }
+
+       return remain;
+
+fallback:
+       return fallback_scalar_usercopy(dst, src, n);
+}
+#endif
index ef90075c4b0a9c153c02cdfc0d0fbdc98e151134..c8294bf72c064439919cc4895c6a51180f7ddec6 100644 (file)
@@ -4,7 +4,7 @@
  */
 
 #include <linux/linkage.h>
-#include <asm-generic/export.h>
+#include <linux/export.h>
 
 SYM_FUNC_START(__lshrti3)
        beqz    a2, .L1
index 09b47ebacf2e8743bc1cf77be0693b4499748afb..bbe143bb32a00f75e5adfa2e21e418492c952758 100644 (file)
@@ -1,8 +1,10 @@
 #include <linux/linkage.h>
-#include <asm-generic/export.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-extable.h>
 #include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
 
        .macro fixup op reg addr lbl
 100:
        _asm_extable    100b, \lbl
        .endm
 
-ENTRY(__asm_copy_to_user)
-ENTRY(__asm_copy_from_user)
+SYM_FUNC_START(__asm_copy_to_user)
+#ifdef CONFIG_RISCV_ISA_V
+       ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_ZVE32X, CONFIG_RISCV_ISA_V)
+       REG_L   t0, riscv_v_usercopy_threshold
+       bltu    a2, t0, fallback_scalar_usercopy
+       tail enter_vector_usercopy
+#endif
+SYM_FUNC_START(fallback_scalar_usercopy)
 
        /* Enable access to user memory */
        li t6, SR_SUM
@@ -181,13 +189,14 @@ ENTRY(__asm_copy_from_user)
        csrc CSR_STATUS, t6
        sub a0, t5, a0
        ret
-ENDPROC(__asm_copy_to_user)
-ENDPROC(__asm_copy_from_user)
+SYM_FUNC_END(__asm_copy_to_user)
+SYM_FUNC_END(fallback_scalar_usercopy)
 EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_from_user)
 
 
-ENTRY(__clear_user)
+SYM_FUNC_START(__clear_user)
 
        /* Enable access to user memory */
        li t6, SR_SUM
@@ -233,5 +242,5 @@ ENTRY(__clear_user)
        csrc CSR_STATUS, t6
        sub a0, a3, a0
        ret
-ENDPROC(__clear_user)
+SYM_FUNC_END(__clear_user)
 EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
new file mode 100644 (file)
index 0000000..51ab558
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+#include <asm/asm-extable.h>
+#include <asm/csr.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+       .macro fixup op reg addr lbl
+100:
+       \op \reg, \addr
+       _asm_extable    100b, \lbl
+       .endm
+
+SYM_FUNC_START(__asm_vector_usercopy)
+       /* Enable access to user memory */
+       li      t6, SR_SUM
+       csrs    CSR_STATUS, t6
+
+loop:
+       vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+       fixup vle8.v vData, (pSrc), 10f
+       sub iNum, iNum, iVL
+       add pSrc, pSrc, iVL
+       fixup vse8.v vData, (pDst), 11f
+       add pDst, pDst, iVL
+       bnez iNum, loop
+
+       /* Exception fixup for vector load is shared with normal exit */
+10:
+       /* Disable access to user memory */
+       csrc    CSR_STATUS, t6
+       mv      a0, iNum
+       ret
+
+       /* Exception fixup code for vector store. */
+11:
+       /* Undo the subtraction after vle8.v */
+       add     iNum, iNum, iVL
+       /* Make sure the scalar fallback skip already processed bytes */
+       csrr    t2, CSR_VSTART
+       sub     iNum, iNum, t2
+       j       10b
+SYM_FUNC_END(__asm_vector_usercopy)
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644 (file)
index 0000000..b28f243
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(xor_regs_2_)
+       vsetvli a3, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a3
+       vxor.vv v16, v0, v8
+       add a2, a2, a3
+       vse8.v v16, (a1)
+       add a1, a1, a3
+       bnez a0, xor_regs_2_
+       ret
+SYM_FUNC_END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+SYM_FUNC_START(xor_regs_3_)
+       vsetvli a4, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a4
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a4
+       vxor.vv v16, v0, v16
+       add a3, a3, a4
+       vse8.v v16, (a1)
+       add a1, a1, a4
+       bnez a0, xor_regs_3_
+       ret
+SYM_FUNC_END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+SYM_FUNC_START(xor_regs_4_)
+       vsetvli a5, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a5
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a5
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a5
+       vxor.vv v16, v0, v24
+       add a4, a4, a5
+       vse8.v v16, (a1)
+       add a1, a1, a5
+       bnez a0, xor_regs_4_
+       ret
+SYM_FUNC_END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+SYM_FUNC_START(xor_regs_5_)
+       vsetvli a6, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a6
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a6
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a6
+       vxor.vv v0, v0, v24
+       vle8.v v8, (a5)
+       add a4, a4, a6
+       vxor.vv v16, v0, v8
+       add a5, a5, a6
+       vse8.v v16, (a1)
+       add a1, a1, a6
+       bnez a0, xor_regs_5_
+       ret
+SYM_FUNC_END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
index ec02ea86aa39fa3be6ec9bde87784cc583cda3c2..2dcb33006e2656a19958c3249ca50d4a233075ff 100644 (file)
@@ -234,25 +234,31 @@ static void __init setup_bootmem(void)
                kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
 
        /*
-        * memblock allocator is not aware of the fact that last 4K bytes of
-        * the addressable memory can not be mapped because of IS_ERR_VALUE
-        * macro. Make sure that last 4k bytes are not usable by memblock
-        * if end of dram is equal to maximum addressable memory.  For 64-bit
-        * kernel, this problem can't happen here as the end of the virtual
-        * address space is occupied by the kernel mapping then this check must
-        * be done as soon as the kernel mapping base address is determined.
+        * Reserve physical address space that would be mapped to virtual
+        * addresses greater than (void *)(-PAGE_SIZE) because:
+        *  - This memory would overlap with ERR_PTR
+        *  - This memory belongs to high memory, which is not supported
+        *
+        * This is not applicable to 64-bit kernel, because virtual addresses
+        * after (void *)(-PAGE_SIZE) are not linearly mapped: they are
+        * occupied by kernel mapping. Also it is unrealistic for high memory
+        * to exist on 64-bit platforms.
         */
        if (!IS_ENABLED(CONFIG_64BIT)) {
-               max_mapped_addr = __pa(~(ulong)0);
-               if (max_mapped_addr == (phys_ram_end - 1))
-                       memblock_set_current_limit(max_mapped_addr - 4096);
+               max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
+               memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
        }
 
        min_low_pfn = PFN_UP(phys_ram_base);
        max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
        high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
 
+       #ifdef CONFIG_SOC_SPACEMIT_K1X
+       /* 2GB~4GB is IO area on spacemit-k1x, will be reserved when early_init_fdt_scan_reserved_mem */
+       dma32_phys_limit = min(2UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
+       #else
        dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
+       #endif
        set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
 
        reserve_initrd_mem();
@@ -273,7 +279,12 @@ static void __init setup_bootmem(void)
        if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
                memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
 
+#ifdef CONFIG_ZONE_DMA32
        dma_contiguous_reserve(dma32_phys_limit);
+#else
+       dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
+#endif
+
        if (IS_ENABLED(CONFIG_64BIT))
                hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 }
@@ -667,16 +678,19 @@ void __init create_pgd_mapping(pgd_t *pgdp,
 static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
                                      phys_addr_t size)
 {
-       if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
-               return PGDIR_SIZE;
+       if (debug_pagealloc_enabled())
+               return PAGE_SIZE;
 
-       if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+       if (pgtable_l5_enabled &&
+           !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
                return P4D_SIZE;
 
-       if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+       if (pgtable_l4_enabled &&
+           !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
                return PUD_SIZE;
 
-       if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
+       if (IS_ENABLED(CONFIG_64BIT) &&
+           !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
                return PMD_SIZE;
 
        return PAGE_SIZE;
index 01398fee5cf8281b5c6df39d3f6adb113a7436f0..f61b2f8291e357cc7180b98efcbfd43f84477db8 100644 (file)
@@ -387,17 +387,33 @@ int set_direct_map_default_noflush(struct page *page)
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
+static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data)
+{
+       int enable = *(int *)data;
+
+       unsigned long val = pte_val(ptep_get(pte));
+
+       if (enable)
+               val |= _PAGE_PRESENT;
+       else
+               val &= ~_PAGE_PRESENT;
+
+       set_pte(pte, __pte(val));
+
+       return 0;
+}
+
 void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
        if (!debug_pagealloc_enabled())
                return;
 
-       if (enable)
-               __set_memory((unsigned long)page_address(page), numpages,
-                            __pgprot(_PAGE_PRESENT), __pgprot(0));
-       else
-               __set_memory((unsigned long)page_address(page), numpages,
-                            __pgprot(0), __pgprot(_PAGE_PRESENT));
+       unsigned long start = (unsigned long)page_address(page);
+       unsigned long size = PAGE_SIZE * numpages;
+
+       apply_to_existing_page_range(&init_mm, start, size, debug_pagealloc_set_page, &enable);
+
+       flush_tlb_kernel_range(start, start + size);
 }
 #endif
 
index b3990874e481814ef54543f1a8c40aa012129f6e..2f041b5cea970ea9e0c1d50dc4400643f0b85a69 100644 (file)
@@ -516,33 +516,33 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
                break;
        /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
        case BPF_ADD | BPF_FETCH:
-               emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
-                    rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
+               emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
+                    rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
                if (!is64)
                        emit_zext_32(rs, ctx);
                break;
        case BPF_AND | BPF_FETCH:
-               emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
-                    rv_amoand_w(rs, rs, rd, 0, 0), ctx);
+               emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
+                    rv_amoand_w(rs, rs, rd, 1, 1), ctx);
                if (!is64)
                        emit_zext_32(rs, ctx);
                break;
        case BPF_OR | BPF_FETCH:
-               emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
-                    rv_amoor_w(rs, rs, rd, 0, 0), ctx);
+               emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
+                    rv_amoor_w(rs, rs, rd, 1, 1), ctx);
                if (!is64)
                        emit_zext_32(rs, ctx);
                break;
        case BPF_XOR | BPF_FETCH:
-               emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
-                    rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
+               emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
+                    rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
                if (!is64)
                        emit_zext_32(rs, ctx);
                break;
        /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
        case BPF_XCHG:
-               emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
-                    rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
+               emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
+                    rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
                if (!is64)
                        emit_zext_32(rs, ctx);
                break;
index 9e6476719abbbb4e9206cd89292be7f25261a5b6..6a3c16bd5ca3bb7fbd357e48f6b19cd5e7a360b0 100644 (file)
@@ -81,6 +81,10 @@ ifdef CONFIG_CFI_CLANG
 PURGATORY_CFLAGS_REMOVE                += $(CC_FLAGS_CFI)
 endif
 
+ifdef CONFIG_SHADOW_CALL_STACK
+PURGATORY_CFLAGS_REMOVE                += $(CC_FLAGS_SCS)
+endif
+
 CFLAGS_REMOVE_purgatory.o      += $(PURGATORY_CFLAGS_REMOVE)
 CFLAGS_purgatory.o             += $(PURGATORY_CFLAGS)
 
index 0194f4554130ae6b89cf5db97a069a65bc2c6fc1..5bcf3af903daa2f9fb2aaf1e57d79121bfcda988 100644 (file)
@@ -7,15 +7,11 @@
  * Author: Li Zhengyu (lizhengyu3@huawei.com)
  *
  */
-
-.macro size, sym:req
-       .size \sym, . - \sym
-.endm
+#include <linux/linkage.h>
 
 .text
 
-.globl purgatory_start
-purgatory_start:
+SYM_CODE_START(purgatory_start)
 
        lla     sp, .Lstack
        mv      s0, a0  /* The hartid of the current hart */
@@ -28,8 +24,7 @@ purgatory_start:
        mv      a1, s1
        ld      a2, riscv_kernel_entry
        jr      a2
-
-size purgatory_start
+SYM_CODE_END(purgatory_start)
 
 .align 4
        .rept   256
@@ -39,9 +34,6 @@ size purgatory_start
 
 .data
 
-.globl riscv_kernel_entry
-riscv_kernel_entry:
-       .quad   0
-size riscv_kernel_entry
+SYM_DATA(riscv_kernel_entry, .quad 0)
 
 .end
index 93dc44d8244965b96f0d77db77b7cf6a2b439a54..4f0c7ef7c9059e3e917f1f24aaa5e0ff0b976ac6 100644 (file)
@@ -268,7 +268,7 @@ static void jpu_sreset_assert(struct jpu_device *jdev)
 }
 
 
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
 static int jpu_aclk_enable(struct jpu_device *jdev)
 {
        if (IS_ERR_OR_NULL(jdev->aclk)) {
@@ -915,7 +915,7 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg)
        struct dma_buf_attachment *attach;
        struct sg_table *sg_table;
        jpu_dma_buf_info pInfo;
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
        u32 clkgate;
 #endif
        int ret = 0;
@@ -1058,7 +1058,7 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg)
 #endif
                break;
        case JDI_IOCTL_SET_CLOCK_GATE:;
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
                ret = down_interruptible(&jdev->s_jpu_sem);
                if (ret) {
                        return -EAGAIN;
@@ -1172,7 +1172,7 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg)
                     (int)inst_info.inst_idx, inst_info.inst_open_count);
                break;
        case JDI_IOCTL_RESET:
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
 
                ret = down_interruptible(&jdev->s_jpu_sem);
                if (ret) {
@@ -1260,7 +1260,7 @@ static int jpu_release(struct inode *inode, struct file *filp)
                        vfree((const void *)jdev->s_instance_pool.base);
                        jdev->s_instance_pool.base = 0;
                }
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
                jpu_clk_disable(jdev);
                pm_runtime_put_sync(jdev->jdev);
 
@@ -1569,7 +1569,7 @@ static int jpu_probe(struct platform_device *pdev)
                dev_err(jdev->jdev, "irq not be registered\n");
                return err;
        }
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
 
        jdev->aclk = devm_clk_get(&pdev->dev, "aclk");
        if (IS_ERR_OR_NULL(jdev->aclk)) {
@@ -1723,7 +1723,7 @@ static int jpu_remove(struct platform_device *pdev)
                unregister_chrdev_region(jdev->s_jpu_major, 1);
                jdev->s_jpu_major = 0;
        }
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
        jpu_clk_disable(jdev);
        pm_runtime_put_sync(jdev->jdev);
        pm_runtime_disable(&pdev->dev);
@@ -1738,7 +1738,7 @@ static int jpu_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int jpu_suspend(struct platform_device *pdev, pm_message_t state)
 {
-#ifndef CONFIG_SOC_SPACEMIT_K1_FPGA
+#ifndef CONFIG_ARCH_SPACEMIT_K1_FPGA
        struct jpu_device *jdev = platform_get_drvdata(pdev);
        jpu_clk_disable(jdev);
 #endif
index 624d4a38c358a08f2ca417523058bc1c6a319a8d..bf27dced6308a65c8dce28ef00f4c385f115e371 100644 (file)
@@ -169,6 +169,9 @@ enum cpuhp_state {
        CPUHP_AP_PERF_ARM_ACPI_STARTING,
        CPUHP_AP_PERF_ARM_STARTING,
        CPUHP_AP_PERF_RISCV_STARTING,
+#ifdef CONFIG_ARCH_SPACEMIT
+       CPUHP_AP_CLINT_IPI_RISCV_STARTING,
+#endif
        CPUHP_AP_ARM_L2X0_STARTING,
        CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
        CPUHP_AP_ARM_ARCH_TIMER_STARTING,