Merge tag 'loongarch-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 12 Oct 2022 17:35:20 +0000 (10:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 12 Oct 2022 17:35:20 +0000 (10:35 -0700)
Pull LoongArch updates from Huacai Chen:

 - Use EXPLICIT_RELOCS (ABIv2.0)

 - Use generic BUG() handler

 - Refactor TLB/Cache operations

 - Add qspinlock support

 - Add perf events support

 - Add kexec/kdump support

 - Add BPF JIT support

 - Add ACPI-based laptop driver

 - Update the default config file

* tag 'loongarch-6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (25 commits)
  LoongArch: Update Loongson-3 default config file
  LoongArch: Add ACPI-based generic laptop driver
  LoongArch: Add BPF JIT support
  LoongArch: Add some instruction opcodes and formats
  LoongArch: Move {signed,unsigned}_imm_check() to inst.h
  LoongArch: Add kdump support
  LoongArch: Add kexec support
  LoongArch: Use generic BUG() handler
  LoongArch: Add SysRq-x (TLB Dump) support
  LoongArch: Add perf events support
  LoongArch: Add qspinlock support
  LoongArch: Use TLB for ioremap()
  LoongArch: Support access filter to /dev/mem interface
  LoongArch: Refactor cache probe and flush methods
  LoongArch: mm: Refactor TLB exception handlers
  LoongArch: Support R_LARCH_GOT_PC_{LO12,HI20} in modules
  LoongArch: Support PC-relative relocations in modules
  LoongArch: Define ELF relocation types added in ABIv2.0
  LoongArch: Adjust symbol addressing for AS_HAS_EXPLICIT_RELOCS
  LoongArch: Add Kconfig option AS_HAS_EXPLICIT_RELOCS
  ...

61 files changed:
arch/loongarch/Kbuild
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/Kbuild
arch/loongarch/include/asm/bootinfo.h
arch/loongarch/include/asm/bug.h
arch/loongarch/include/asm/cacheflush.h
arch/loongarch/include/asm/cacheops.h
arch/loongarch/include/asm/cmpxchg.h
arch/loongarch/include/asm/cpu-features.h
arch/loongarch/include/asm/cpu-info.h
arch/loongarch/include/asm/elf.h
arch/loongarch/include/asm/fixmap.h
arch/loongarch/include/asm/inst.h
arch/loongarch/include/asm/io.h
arch/loongarch/include/asm/kexec.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/module.h
arch/loongarch/include/asm/module.lds.h
arch/loongarch/include/asm/percpu.h
arch/loongarch/include/asm/perf_event.h
arch/loongarch/include/asm/pgtable-bits.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/spinlock.h [new file with mode: 0644]
arch/loongarch/include/asm/spinlock_types.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/bpf_perf_event.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/perf_regs.h [new file with mode: 0644]
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/cacheinfo.c
arch/loongarch/kernel/cpu-probe.c
arch/loongarch/kernel/crash_dump.c [new file with mode: 0644]
arch/loongarch/kernel/head.S
arch/loongarch/kernel/machine_kexec.c [new file with mode: 0644]
arch/loongarch/kernel/mem.c
arch/loongarch/kernel/module-sections.c
arch/loongarch/kernel/module.c
arch/loongarch/kernel/perf_event.c [new file with mode: 0644]
arch/loongarch/kernel/perf_regs.c [new file with mode: 0644]
arch/loongarch/kernel/relocate_kernel.S [new file with mode: 0644]
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/smp.c
arch/loongarch/kernel/sysrq.c [new file with mode: 0644]
arch/loongarch/kernel/topology.c
arch/loongarch/kernel/traps.c
arch/loongarch/kernel/vmlinux.lds.S
arch/loongarch/mm/cache.c
arch/loongarch/mm/init.c
arch/loongarch/mm/mmap.c
arch/loongarch/mm/tlb.c
arch/loongarch/mm/tlbex.S
arch/loongarch/net/Makefile [new file with mode: 0644]
arch/loongarch/net/bpf_jit.c [new file with mode: 0644]
arch/loongarch/net/bpf_jit.h [new file with mode: 0644]
arch/loongarch/pci/acpi.c
arch/loongarch/pci/pci.c
drivers/platform/Kconfig
drivers/platform/Makefile
drivers/platform/loongarch/Kconfig [new file with mode: 0644]
drivers/platform/loongarch/Makefile [new file with mode: 0644]
drivers/platform/loongarch/loongson-laptop.c [new file with mode: 0644]

index ab5373d..b01f5cd 100644 (file)
@@ -1,5 +1,6 @@
 obj-y += kernel/
 obj-y += mm/
+obj-y += net/
 obj-y += vdso/
 
 # for cleaning
index e83789b..903096b 100644 (file)
@@ -50,6 +50,7 @@ config LOONGARCH
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF
        select ARCH_USE_QUEUED_RWLOCKS
+       select ARCH_USE_QUEUED_SPINLOCKS
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WANTS_NO_INSTR
@@ -61,6 +62,7 @@ config LOONGARCH
        select GENERIC_CPU_AUTOPROBE
        select GENERIC_ENTRY
        select GENERIC_GETTIMEOFDAY
+       select GENERIC_IOREMAP if !ARCH_IOREMAP
        select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
@@ -69,6 +71,7 @@ config LOONGARCH
        select GENERIC_LIB_CMPDI2
        select GENERIC_LIB_LSHRDI3
        select GENERIC_LIB_UCMPDI2
+       select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
@@ -83,6 +86,7 @@ config LOONGARCH
        select HAVE_CONTEXT_TRACKING_USER
        select HAVE_DEBUG_STACKOVERFLOW
        select HAVE_DMA_CONTIGUOUS
+       select HAVE_EBPF_JIT
        select HAVE_EXIT_THREAD
        select HAVE_FAST_GUP
        select HAVE_GENERIC_VDSO
@@ -93,6 +97,8 @@ config LOONGARCH
        select HAVE_NMI
        select HAVE_PCI
        select HAVE_PERF_EVENTS
+       select HAVE_PERF_REGS
+       select HAVE_PERF_USER_STACK_DUMP
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RSEQ
        select HAVE_SETUP_PER_CPU_AREA if NUMA
@@ -136,6 +142,14 @@ config CPU_HAS_PREFETCH
        bool
        default y
 
+config GENERIC_BUG
+       def_bool y
+       depends on BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+       def_bool y
+       depends on GENERIC_BUG
+
 config GENERIC_CALIBRATE_DELAY
        def_bool y
 
@@ -157,7 +171,7 @@ config STACKTRACE_SUPPORT
        bool
        default y
 
-# MACH_LOONGSON32 and MACH_LOONGSON64 are delibrately carried over from the
+# MACH_LOONGSON32 and MACH_LOONGSON64 are deliberately carried over from the
 # MIPS Loongson code, to preserve Loongson-specific code paths in drivers that
 # are shared between architectures, and specifically expecting the symbols.
 config MACH_LOONGSON32
@@ -166,6 +180,9 @@ config MACH_LOONGSON32
 config MACH_LOONGSON64
        def_bool 64BIT
 
+config FIX_EARLYCON_MEM
+       def_bool y
+
 config PAGE_SIZE_4KB
        bool
 
@@ -194,6 +211,9 @@ config SCHED_OMIT_FRAME_POINTER
        bool
        default y
 
+config AS_HAS_EXPLICIT_RELOCS
+       def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x))
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -399,6 +419,46 @@ config ARCH_FORCE_MAX_ORDER
          The page size is not necessarily 4KB.  Keep this in mind
          when choosing a value for this option.
 
+config ARCH_IOREMAP
+       bool "Enable LoongArch DMW-based ioremap()"
+       help
+         We use generic TLB-based ioremap() by default since it has page
+         protection support. However, you can enable LoongArch DMW-based
+         ioremap() for better performance.
+
+config KEXEC
+       bool "Kexec system call"
+       select KEXEC_CORE
+       help
+         kexec is a system call that implements the ability to shutdown your
+         current kernel, and to start another kernel.  It is like a reboot
+         but it is independent of the system firmware.   And like a reboot
+         you can start any kernel with it, not just Linux.
+
+         The name comes from the similarity to the exec system call.
+
+config CRASH_DUMP
+       bool "Build kdump crash kernel"
+       help
+         Generate crash dump after being started by kexec. This should
+         be normally only set in special crash dump kernels which are
+         loaded in the main kernel with kexec-tools into a specially
+         reserved region and then later executed after a crash by
+         kdump/kexec.
+
+         For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+       hex "Physical address where the kernel is loaded"
+       default "0x90000000a0000000"
+       depends on CRASH_DUMP
+       help
+         This gives the XKPRANGE address where the kernel is loaded.
+         If you plan to use kernel for capturing the crash dump change
+         this value to start of the reserved region (the "X" value as
+         specified in the "crashkernel=YM@XM" command line boot parameter
+         passed to the panic-ed kernel).
+
 config SECCOMP
        bool "Enable seccomp to safely compute untrusted bytecode"
        depends on PROC_FS
index d592b9d..f4cb54d 100644 (file)
@@ -43,15 +43,37 @@ endif
 
 cflags-y                       += -G0 -pipe -msoft-float
 LDFLAGS_vmlinux                        += -G0 -static -n -nostdlib
+
+# When the assembler supports explicit relocation hint, we must use it.
+# GCC may have -mexplicit-relocs off by default if it was built with an old
+# assembler, so we force it via an option.
+#
+# When the assembler does not supports explicit relocation hint, we can't use
+# it.  Disable it if the compiler supports it.
+#
+# If you've seen "unknown reloc hint" message building the kernel and you are
+# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the
+# combination of a "new" assembler and "old" compiler is not supported.  Either
+# upgrade the compiler or downgrade the assembler.
+ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
+cflags-y                       += -mexplicit-relocs
+KBUILD_CFLAGS_KERNEL           += -mdirect-extern-access
+else
+cflags-y                       += $(call cc-option,-mno-explicit-relocs)
 KBUILD_AFLAGS_KERNEL           += -Wa,-mla-global-with-pcrel
 KBUILD_CFLAGS_KERNEL           += -Wa,-mla-global-with-pcrel
 KBUILD_AFLAGS_MODULE           += -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE           += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
+endif
 
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
+ifndef CONFIG_PHYSICAL_START
 load-y         = 0x9000000000200000
+else
+load-y         = $(CONFIG_PHYSICAL_START)
+endif
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
index 3712552..3540e9c 100644 (file)
@@ -4,6 +4,7 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
 CONFIG_PREEMPT=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -45,6 +46,7 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_KEXEC=y
 CONFIG_PAGE_SIZE_16KB=y
 CONFIG_HZ_250=y
 CONFIG_ACPI=y
@@ -55,6 +57,7 @@ CONFIG_ACPI_DOCK=y
 CONFIG_ACPI_IPMI=m
 CONFIG_ACPI_PCI_SLOT=y
 CONFIG_ACPI_HOTPLUG_MEMORY=y
+CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
@@ -65,6 +68,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_PARTITION_ADVANCED=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
 CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
@@ -82,8 +87,11 @@ CONFIG_ZSMALLOC=m
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
 CONFIG_XFRM_USER=y
 CONFIG_NET_KEY=y
+CONFIG_XDP_SOCKETS=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -95,6 +103,7 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_IP_MROUTE=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
@@ -102,6 +111,7 @@ CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_BBR=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_INET6_ESP=m
 CONFIG_IPV6_MROUTE=y
 CONFIG_NETWORK_PHY_TIMESTAMPING=y
 CONFIG_NETFILTER=y
@@ -112,10 +122,11 @@ CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -200,7 +211,6 @@ CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_LOG_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -254,10 +264,14 @@ CONFIG_BPFILTER=y
 CONFIG_IP_SCTP=m
 CONFIG_RDS=y
 CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_LLC2=m
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_PRIO=m
@@ -282,9 +296,33 @@ CONFIG_VSOCKETS=m
 CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=y
 CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_STREAM_PARSER=y
 CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HS=y
 CONFIG_BT_HCIBTUSB=m
-# CONFIG_BT_HCIBTUSB_BCM is not set
+CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
+CONFIG_BT_HCIBTUSB_MTK=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_ATH3K=y
+CONFIG_BT_HCIUART_INTEL=y
+CONFIG_BT_HCIUART_AG6XX=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_MRVL=m
+CONFIG_BT_ATH3K=m
+CONFIG_BT_VIRTIO=m
 CONFIG_CFG80211=m
 CONFIG_CFG80211_WEXT=y
 CONFIG_MAC80211=m
@@ -329,7 +367,6 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -486,6 +523,7 @@ CONFIG_PPP_FILTER=y
 CONFIG_PPP_MPPE=m
 CONFIG_PPP_MULTILINK=y
 CONFIG_PPPOE=m
+CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
@@ -505,7 +543,6 @@ CONFIG_ATH9K_HTC=m
 CONFIG_IWLWIFI=m
 CONFIG_IWLDVM=m
 CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_BCAST_FILTERING=y
 CONFIG_HOSTAP=m
 CONFIG_MT7601U=m
 CONFIG_RT2X00=m
@@ -521,6 +558,14 @@ CONFIG_RTL8821AE=m
 CONFIG_RTL8192CU=m
 # CONFIG_RTLWIFI_DEBUG is not set
 CONFIG_RTL8XXXU=m
+CONFIG_RTW88=m
+CONFIG_RTW88_8822BE=m
+CONFIG_RTW88_8822CE=m
+CONFIG_RTW88_8723DE=m
+CONFIG_RTW88_8821CE=m
+CONFIG_RTW89=m
+CONFIG_RTW89_8852AE=m
+CONFIG_RTW89_8852CE=m
 CONFIG_ZD1211RW=m
 CONFIG_USB_NET_RNDIS_WLAN=m
 CONFIG_INPUT_MOUSEDEV=y
@@ -651,6 +696,11 @@ CONFIG_USB_SERIAL_FTDI_SIO=m
 CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_SERIAL_OPTION=m
 CONFIG_USB_GADGET=y
+CONFIG_TYPEC=m
+CONFIG_TYPEC_TCPM=m
+CONFIG_TYPEC_TCPCI=m
+CONFIG_TYPEC_UCSI=m
+CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
@@ -688,7 +738,6 @@ CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
 CONFIG_R8188EU=m
-# CONFIG_88EU_AP_MODE is not set
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -772,14 +821,12 @@ CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
index f2bcfcb..77ad8e6 100644 (file)
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += dma-contiguous.h
 generic-y += export.h
+generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
-generic-y += qrwlock_types.h
-generic-y += spinlock.h
-generic-y += spinlock_types.h
+generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += user.h
index 8e5881b..ed0910e 100644 (file)
@@ -40,4 +40,9 @@ extern unsigned long fw_arg0, fw_arg1, fw_arg2;
 extern struct loongson_board_info b_info;
 extern struct loongson_system_configuration loongson_sysconf;
 
+static inline bool io_master(int cpu)
+{
+       return test_bit(cpu, &loongson_sysconf.cores_io_master);
+}
+
 #endif /* _ASM_BOOTINFO_H */
index bda4910..d4ca3ba 100644 (file)
@@ -2,21 +2,59 @@
 #ifndef __ASM_BUG_H
 #define __ASM_BUG_H
 
-#include <linux/compiler.h>
+#include <asm/break.h>
+#include <linux/stringify.h>
+
+#ifndef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line)
+#else
+#define __BUGVERBOSE_LOCATION(file, line)                      \
+               .pushsection .rodata.str, "aMS", @progbits, 1;  \
+       10002:  .string file;                                   \
+               .popsection;                                    \
+                                                               \
+               .long 10002b - .;                               \
+               .short line;
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#endif
 
-#ifdef CONFIG_BUG
+#ifndef CONFIG_GENERIC_BUG
+#define __BUG_ENTRY(flags)
+#else
+#define __BUG_ENTRY(flags)                                     \
+               .pushsection __bug_table, "aw";                 \
+               .align 2;                                       \
+       10000:  .long 10001f - .;                               \
+               _BUGVERBOSE_LOCATION(__FILE__, __LINE__)        \
+               .short flags;                                   \
+               .popsection;                                    \
+       10001:
+#endif
 
-#include <asm/break.h>
+#define ASM_BUG_FLAGS(flags)                                   \
+       __BUG_ENTRY(flags)                                      \
+       break           BRK_BUG
 
-static inline void __noreturn BUG(void)
-{
-       __asm__ __volatile__("break %0" : : "i" (BRK_BUG));
-       unreachable();
-}
+#define ASM_BUG()      ASM_BUG_FLAGS(0)
 
-#define HAVE_ARCH_BUG
+#define __BUG_FLAGS(flags)                                     \
+       asm_inline volatile (__stringify(ASM_BUG_FLAGS(flags)));
 
-#endif
+#define __WARN_FLAGS(flags)                                    \
+do {                                                           \
+       instrumentation_begin();                                \
+       __BUG_FLAGS(BUGFLAG_WARNING|(flags));                   \
+       instrumentation_end();                                  \
+} while (0)
+
+#define BUG()                                                  \
+do {                                                           \
+       instrumentation_begin();                                \
+       __BUG_FLAGS(0);                                         \
+       unreachable();                                          \
+} while (0)
+
+#define HAVE_ARCH_BUG
 
 #include <asm-generic/bug.h>
 
index 6709001..0681788 100644 (file)
@@ -6,10 +6,33 @@
 #define _ASM_CACHEFLUSH_H
 
 #include <linux/mm.h>
-#include <asm/cpu-features.h>
+#include <asm/cpu-info.h>
 #include <asm/cacheops.h>
 
-extern void local_flush_icache_range(unsigned long start, unsigned long end);
+static inline bool cache_present(struct cache_desc *cdesc)
+{
+       return cdesc->flags & CACHE_PRESENT;
+}
+
+static inline bool cache_private(struct cache_desc *cdesc)
+{
+       return cdesc->flags & CACHE_PRIVATE;
+}
+
+static inline bool cache_inclusive(struct cache_desc *cdesc)
+{
+       return cdesc->flags & CACHE_INCLUSIVE;
+}
+
+static inline unsigned int cpu_last_level_cache_line_size(void)
+{
+       int cache_present = boot_cpu_data.cache_leaves_present;
+
+       return boot_cpu_data.cache_leaves[cache_present - 1].linesz;
+}
+
+asmlinkage void __flush_cache_all(void);
+void local_flush_icache_range(unsigned long start, unsigned long end);
 
 #define flush_icache_range     local_flush_icache_range
 #define flush_icache_user_range        local_flush_icache_range
@@ -35,44 +58,30 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end);
        :                                                               \
        : "i" (op), "ZC" (*(unsigned char *)(addr)))
 
-static inline void flush_icache_line_indexed(unsigned long addr)
-{
-       cache_op(Index_Invalidate_I, addr);
-}
-
-static inline void flush_dcache_line_indexed(unsigned long addr)
-{
-       cache_op(Index_Writeback_Inv_D, addr);
-}
-
-static inline void flush_vcache_line_indexed(unsigned long addr)
-{
-       cache_op(Index_Writeback_Inv_V, addr);
-}
-
-static inline void flush_scache_line_indexed(unsigned long addr)
-{
-       cache_op(Index_Writeback_Inv_S, addr);
-}
-
-static inline void flush_icache_line(unsigned long addr)
-{
-       cache_op(Hit_Invalidate_I, addr);
-}
-
-static inline void flush_dcache_line(unsigned long addr)
-{
-       cache_op(Hit_Writeback_Inv_D, addr);
-}
-
-static inline void flush_vcache_line(unsigned long addr)
-{
-       cache_op(Hit_Writeback_Inv_V, addr);
-}
-
-static inline void flush_scache_line(unsigned long addr)
+static inline void flush_cache_line(int leaf, unsigned long addr)
 {
-       cache_op(Hit_Writeback_Inv_S, addr);
+       switch (leaf) {
+       case Cache_LEAF0:
+               cache_op(Index_Writeback_Inv_LEAF0, addr);
+               break;
+       case Cache_LEAF1:
+               cache_op(Index_Writeback_Inv_LEAF1, addr);
+               break;
+       case Cache_LEAF2:
+               cache_op(Index_Writeback_Inv_LEAF2, addr);
+               break;
+       case Cache_LEAF3:
+               cache_op(Index_Writeback_Inv_LEAF3, addr);
+               break;
+       case Cache_LEAF4:
+               cache_op(Index_Writeback_Inv_LEAF4, addr);
+               break;
+       case Cache_LEAF5:
+               cache_op(Index_Writeback_Inv_LEAF5, addr);
+               break;
+       default:
+               break;
+       }
 }
 
 #include <asm-generic/cacheflush.h>
index dc280ef..0f4a86f 100644 (file)
@@ -8,16 +8,18 @@
 #define __ASM_CACHEOPS_H
 
 /*
- * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * Most cache ops are split into a 3 bit field identifying the cache, and a 2
  * bit field identifying the cache operation.
  */
-#define CacheOp_Cache                  0x03
-#define CacheOp_Op                     0x1c
+#define CacheOp_Cache                  0x07
+#define CacheOp_Op                     0x18
 
-#define Cache_I                                0x00
-#define Cache_D                                0x01
-#define Cache_V                                0x02
-#define Cache_S                                0x03
+#define Cache_LEAF0                    0x00
+#define Cache_LEAF1                    0x01
+#define Cache_LEAF2                    0x02
+#define Cache_LEAF3                    0x03
+#define Cache_LEAF4                    0x04
+#define Cache_LEAF5                    0x05
 
 #define Index_Invalidate               0x08
 #define Index_Writeback_Inv            0x08
 #define Hit_Writeback_Inv              0x10
 #define CacheOp_User_Defined           0x18
 
-#define Index_Invalidate_I             (Cache_I | Index_Invalidate)
-#define Index_Writeback_Inv_D          (Cache_D | Index_Writeback_Inv)
-#define Index_Writeback_Inv_V          (Cache_V | Index_Writeback_Inv)
-#define Index_Writeback_Inv_S          (Cache_S | Index_Writeback_Inv)
-#define Hit_Invalidate_I               (Cache_I | Hit_Invalidate)
-#define Hit_Writeback_Inv_D            (Cache_D | Hit_Writeback_Inv)
-#define Hit_Writeback_Inv_V            (Cache_V | Hit_Writeback_Inv)
-#define Hit_Writeback_Inv_S            (Cache_S | Hit_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF0      (Cache_LEAF0 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF1      (Cache_LEAF1 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF2      (Cache_LEAF2 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF3      (Cache_LEAF3 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF4      (Cache_LEAF4 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF5      (Cache_LEAF5 | Index_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF0                (Cache_LEAF0 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF1                (Cache_LEAF1 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF2                (Cache_LEAF2 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF3                (Cache_LEAF3 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF4                (Cache_LEAF4 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF5                (Cache_LEAF5 | Hit_Writeback_Inv)
 
 #endif /* __ASM_CACHEOPS_H */
index ae19e33..ecfa6cf 100644 (file)
@@ -61,8 +61,8 @@ static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val,
        return (old32 & mask) >> shift;
 }
 
-static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
-                                  int size)
+static __always_inline unsigned long
+__xchg(volatile void *ptr, unsigned long x, int size)
 {
        switch (size) {
        case 1:
@@ -159,8 +159,8 @@ static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
        return (old32 & mask) >> shift;
 }
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-                                     unsigned long new, unsigned int size)
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size)
 {
        switch (size) {
        case 1:
index a8d87c4..b079742 100644 (file)
 #define cpu_has_loongarch32            (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT)
 #define cpu_has_loongarch64            (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT)
 
-#define cpu_icache_line_size()         cpu_data[0].icache.linesz
-#define cpu_dcache_line_size()         cpu_data[0].dcache.linesz
-#define cpu_vcache_line_size()         cpu_data[0].vcache.linesz
-#define cpu_scache_line_size()         cpu_data[0].scache.linesz
-
 #ifdef CONFIG_32BIT
 # define cpu_has_64bits                        (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT)
 # define cpu_vabits                    31
index b6c4f96..cd73a6f 100644 (file)
 
 #include <asm/loongarch.h>
 
+/* cache_desc->flags */
+enum {
+       CACHE_PRESENT   = (1 << 0),
+       CACHE_PRIVATE   = (1 << 1),     /* core private cache */
+       CACHE_INCLUSIVE = (1 << 2),     /* include the inner level caches */
+};
+
 /*
  * Descriptor for a cache
  */
 struct cache_desc {
-       unsigned int waysize;   /* Bytes per way */
+       unsigned char type;
+       unsigned char level;
        unsigned short sets;    /* Number of lines per set */
        unsigned char ways;     /* Number of ways */
        unsigned char linesz;   /* Size of line in bytes */
-       unsigned char waybit;   /* Bits to select in a cache set */
        unsigned char flags;    /* Flags describing cache properties */
 };
 
+#define CACHE_LEVEL_MAX                3
+#define CACHE_LEAVES_MAX       6
+
 struct cpuinfo_loongarch {
        u64                     asid_cache;
        unsigned long           asid_mask;
@@ -40,11 +50,8 @@ struct cpuinfo_loongarch {
        int                     tlbsizemtlb;
        int                     tlbsizestlbsets;
        int                     tlbsizestlbways;
-       struct cache_desc       icache; /* Primary I-cache */
-       struct cache_desc       dcache; /* Primary D or combined I/D cache */
-       struct cache_desc       vcache; /* Victim cache, between pcache and scache */
-       struct cache_desc       scache; /* Secondary cache */
-       struct cache_desc       tcache; /* Tertiary/split secondary cache */
+       int                     cache_leaves_present; /* number of cache_leaves[] elements */
+       struct cache_desc       cache_leaves[CACHE_LEAVES_MAX];
        int                     core;   /* physical core number in package */
        int                     package;/* physical package number */
        int                     vabits; /* Virtual Address size in bits */
index 5f3ff47..7af0ceb 100644 (file)
 #define R_LARCH_SUB64                          56
 #define R_LARCH_GNU_VTINHERIT                  57
 #define R_LARCH_GNU_VTENTRY                    58
+#define R_LARCH_B16                            64
+#define R_LARCH_B21                            65
+#define R_LARCH_B26                            66
+#define R_LARCH_ABS_HI20                       67
+#define R_LARCH_ABS_LO12                       68
+#define R_LARCH_ABS64_LO20                     69
+#define R_LARCH_ABS64_HI12                     70
+#define R_LARCH_PCALA_HI20                     71
+#define R_LARCH_PCALA_LO12                     72
+#define R_LARCH_PCALA64_LO20                   73
+#define R_LARCH_PCALA64_HI12                   74
+#define R_LARCH_GOT_PC_HI20                    75
+#define R_LARCH_GOT_PC_LO12                    76
+#define R_LARCH_GOT64_PC_LO20                  77
+#define R_LARCH_GOT64_PC_HI12                  78
+#define R_LARCH_GOT_HI20                       79
+#define R_LARCH_GOT_LO12                       80
+#define R_LARCH_GOT64_LO20                     81
+#define R_LARCH_GOT64_HI12                     82
+#define R_LARCH_TLS_LE_HI20                    83
+#define R_LARCH_TLS_LE_LO12                    84
+#define R_LARCH_TLS_LE64_LO20                  85
+#define R_LARCH_TLS_LE64_HI12                  86
+#define R_LARCH_TLS_IE_PC_HI20                 87
+#define R_LARCH_TLS_IE_PC_LO12                 88
+#define R_LARCH_TLS_IE64_PC_LO20               89
+#define R_LARCH_TLS_IE64_PC_HI12               90
+#define R_LARCH_TLS_IE_HI20                    91
+#define R_LARCH_TLS_IE_LO12                    92
+#define R_LARCH_TLS_IE64_LO20                  93
+#define R_LARCH_TLS_IE64_HI12                  94
+#define R_LARCH_TLS_LD_PC_HI20                 95
+#define R_LARCH_TLS_LD_HI20                    96
+#define R_LARCH_TLS_GD_PC_HI20                 97
+#define R_LARCH_TLS_GD_HI20                    98
+#define R_LARCH_32_PCREL                       99
+#define R_LARCH_RELAX                          100
 
 #ifndef ELF_ARCH
 
index b3541df..d2e55ae 100644 (file)
 
 #define NR_FIX_BTMAPS 64
 
+enum fixed_addresses {
+       FIX_HOLE,
+       FIX_EARLYCON_MEM_BASE,
+       __end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXMAP_PAGE_IO PAGE_KERNEL_SUC
+
+extern void __set_fixmap(enum fixed_addresses idx,
+                        phys_addr_t phys, pgprot_t flags);
+
+#include <asm-generic/fixmap.h>
+
 #endif
index 7b07cbb..fce1843 100644 (file)
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <asm/asm.h>
 
+#define INSN_BREAK             0x002a0000
+
 #define ADDR_IMMMASK_LU52ID    0xFFF0000000000000
 #define ADDR_IMMMASK_LU32ID    0x000FFFFF00000000
 #define ADDR_IMMMASK_ADDU16ID  0x00000000FFFF0000
 
 #define ADDR_IMM(addr, INSN)   ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN)
 
+enum reg0i26_op {
+       b_op            = 0x14,
+       bl_op           = 0x15,
+};
+
 enum reg1i20_op {
        lu12iw_op       = 0x0a,
        lu32id_op       = 0x0b,
+       pcaddu12i_op    = 0x0e,
+       pcaddu18i_op    = 0x0f,
 };
 
 enum reg1i21_op {
@@ -28,10 +37,34 @@ enum reg1i21_op {
        bnez_op         = 0x11,
 };
 
+enum reg2_op {
+       revb2h_op       = 0x0c,
+       revb4h_op       = 0x0d,
+       revb2w_op       = 0x0e,
+       revbd_op        = 0x0f,
+       revh2w_op       = 0x10,
+       revhd_op        = 0x11,
+};
+
+enum reg2i5_op {
+       slliw_op        = 0x81,
+       srliw_op        = 0x89,
+       sraiw_op        = 0x91,
+};
+
+enum reg2i6_op {
+       sllid_op        = 0x41,
+       srlid_op        = 0x45,
+       sraid_op        = 0x49,
+};
+
 enum reg2i12_op {
        addiw_op        = 0x0a,
        addid_op        = 0x0b,
        lu52id_op       = 0x0c,
+       andi_op         = 0x0d,
+       ori_op          = 0x0e,
+       xori_op         = 0x0f,
        ldb_op          = 0xa0,
        ldh_op          = 0xa1,
        ldw_op          = 0xa2,
@@ -40,6 +73,20 @@ enum reg2i12_op {
        sth_op          = 0xa5,
        stw_op          = 0xa6,
        std_op          = 0xa7,
+       ldbu_op         = 0xa8,
+       ldhu_op         = 0xa9,
+       ldwu_op         = 0xaa,
+};
+
+enum reg2i14_op {
+       llw_op          = 0x20,
+       scw_op          = 0x21,
+       lld_op          = 0x22,
+       scd_op          = 0x23,
+       ldptrw_op       = 0x24,
+       stptrw_op       = 0x25,
+       ldptrd_op       = 0x26,
+       stptrd_op       = 0x27,
 };
 
 enum reg2i16_op {
@@ -52,6 +99,71 @@ enum reg2i16_op {
        bgeu_op         = 0x1b,
 };
 
+enum reg2bstrd_op {
+       bstrinsd_op     = 0x2,
+       bstrpickd_op    = 0x3,
+};
+
+enum reg3_op {
+       addw_op         = 0x20,
+       addd_op         = 0x21,
+       subw_op         = 0x22,
+       subd_op         = 0x23,
+       nor_op          = 0x28,
+       and_op          = 0x29,
+       or_op           = 0x2a,
+       xor_op          = 0x2b,
+       orn_op          = 0x2c,
+       andn_op         = 0x2d,
+       sllw_op         = 0x2e,
+       srlw_op         = 0x2f,
+       sraw_op         = 0x30,
+       slld_op         = 0x31,
+       srld_op         = 0x32,
+       srad_op         = 0x33,
+       mulw_op         = 0x38,
+       mulhw_op        = 0x39,
+       mulhwu_op       = 0x3a,
+       muld_op         = 0x3b,
+       mulhd_op        = 0x3c,
+       mulhdu_op       = 0x3d,
+       divw_op         = 0x40,
+       modw_op         = 0x41,
+       divwu_op        = 0x42,
+       modwu_op        = 0x43,
+       divd_op         = 0x44,
+       modd_op         = 0x45,
+       divdu_op        = 0x46,
+       moddu_op        = 0x47,
+       ldxb_op         = 0x7000,
+       ldxh_op         = 0x7008,
+       ldxw_op         = 0x7010,
+       ldxd_op         = 0x7018,
+       stxb_op         = 0x7020,
+       stxh_op         = 0x7028,
+       stxw_op         = 0x7030,
+       stxd_op         = 0x7038,
+       ldxbu_op        = 0x7040,
+       ldxhu_op        = 0x7048,
+       ldxwu_op        = 0x7050,
+       amswapw_op      = 0x70c0,
+       amswapd_op      = 0x70c1,
+       amaddw_op       = 0x70c2,
+       amaddd_op       = 0x70c3,
+       amandw_op       = 0x70c4,
+       amandd_op       = 0x70c5,
+       amorw_op        = 0x70c6,
+       amord_op        = 0x70c7,
+       amxorw_op       = 0x70c8,
+       amxord_op       = 0x70c9,
+};
+
+enum reg3sa2_op {
+       alslw_op        = 0x02,
+       alslwu_op       = 0x03,
+       alsld_op        = 0x16,
+};
+
 struct reg0i26_format {
        unsigned int immediate_h : 10;
        unsigned int immediate_l : 16;
@@ -71,6 +183,26 @@ struct reg1i21_format {
        unsigned int opcode : 6;
 };
 
+struct reg2_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int opcode : 22;
+};
+
+struct reg2i5_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int immediate : 5;
+       unsigned int opcode : 17;
+};
+
+struct reg2i6_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int immediate : 6;
+       unsigned int opcode : 16;
+};
+
 struct reg2i12_format {
        unsigned int rd : 5;
        unsigned int rj : 5;
@@ -78,6 +210,13 @@ struct reg2i12_format {
        unsigned int opcode : 10;
 };
 
+struct reg2i14_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int immediate : 14;
+       unsigned int opcode : 8;
+};
+
 struct reg2i16_format {
        unsigned int rd : 5;
        unsigned int rj : 5;
@@ -85,13 +224,43 @@ struct reg2i16_format {
        unsigned int opcode : 6;
 };
 
+struct reg2bstrd_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int lsbd : 6;
+       unsigned int msbd : 6;
+       unsigned int opcode : 10;
+};
+
+struct reg3_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int rk : 5;
+       unsigned int opcode : 17;
+};
+
+struct reg3sa2_format {
+       unsigned int rd : 5;
+       unsigned int rj : 5;
+       unsigned int rk : 5;
+       unsigned int immediate : 2;
+       unsigned int opcode : 15;
+};
+
 union loongarch_instruction {
        unsigned int word;
-       struct reg0i26_format reg0i26_format;
-       struct reg1i20_format reg1i20_format;
-       struct reg1i21_format reg1i21_format;
-       struct reg2i12_format reg2i12_format;
-       struct reg2i16_format reg2i16_format;
+       struct reg0i26_format   reg0i26_format;
+       struct reg1i20_format   reg1i20_format;
+       struct reg1i21_format   reg1i21_format;
+       struct reg2_format      reg2_format;
+       struct reg2i5_format    reg2i5_format;
+       struct reg2i6_format    reg2i6_format;
+       struct reg2i12_format   reg2i12_format;
+       struct reg2i14_format   reg2i14_format;
+       struct reg2i16_format   reg2i16_format;
+       struct reg2bstrd_format reg2bstrd_format;
+       struct reg3_format      reg3_format;
+       struct reg3sa2_format   reg3sa2_format;
 };
 
 #define LOONGARCH_INSN_SIZE    sizeof(union loongarch_instruction)
@@ -166,4 +335,235 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
 u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
 u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
 
+static inline bool signed_imm_check(long val, unsigned int bit)
+{
+       return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1));
+}
+
+static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
+{
+       return val < (1UL << bit);
+}
+
+#define DEF_EMIT_REG0I26_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              int offset)                              \
+{                                                                      \
+       unsigned int immediate_l, immediate_h;                          \
+                                                                       \
+       immediate_l = offset & 0xffff;                                  \
+       offset >>= 16;                                                  \
+       immediate_h = offset & 0x3ff;                                   \
+                                                                       \
+       insn->reg0i26_format.opcode = OP;                               \
+       insn->reg0i26_format.immediate_l = immediate_l;                 \
+       insn->reg0i26_format.immediate_h = immediate_h;                 \
+}
+
+DEF_EMIT_REG0I26_FORMAT(b, b_op)
+
+#define DEF_EMIT_REG1I20_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd, int imm)          \
+{                                                                      \
+       insn->reg1i20_format.opcode = OP;                               \
+       insn->reg1i20_format.immediate = imm;                           \
+       insn->reg1i20_format.rd = rd;                                   \
+}
+
+DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op)
+DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op)
+DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op)
+
+#define DEF_EMIT_REG2_FORMAT(NAME, OP)                                 \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj)                   \
+{                                                                      \
+       insn->reg2_format.opcode = OP;                                  \
+       insn->reg2_format.rd = rd;                                      \
+       insn->reg2_format.rj = rj;                                      \
+}
+
+DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op)
+DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op)
+DEF_EMIT_REG2_FORMAT(revbd, revbd_op)
+
+#define DEF_EMIT_REG2I5_FORMAT(NAME, OP)                               \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              int imm)                                 \
+{                                                                      \
+       insn->reg2i5_format.opcode = OP;                                \
+       insn->reg2i5_format.immediate = imm;                            \
+       insn->reg2i5_format.rd = rd;                                    \
+       insn->reg2i5_format.rj = rj;                                    \
+}
+
+DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op)
+DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op)
+DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op)
+
+#define DEF_EMIT_REG2I6_FORMAT(NAME, OP)                               \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              int imm)                                 \
+{                                                                      \
+       insn->reg2i6_format.opcode = OP;                                \
+       insn->reg2i6_format.immediate = imm;                            \
+       insn->reg2i6_format.rd = rd;                                    \
+       insn->reg2i6_format.rj = rj;                                    \
+}
+
+DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op)
+DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op)
+DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op)
+
+#define DEF_EMIT_REG2I12_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              int imm)                                 \
+{                                                                      \
+       insn->reg2i12_format.opcode = OP;                               \
+       insn->reg2i12_format.immediate = imm;                           \
+       insn->reg2i12_format.rd = rd;                                   \
+       insn->reg2i12_format.rj = rj;                                   \
+}
+
+DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op)
+DEF_EMIT_REG2I12_FORMAT(addid, addid_op)
+DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op)
+DEF_EMIT_REG2I12_FORMAT(andi, andi_op)
+DEF_EMIT_REG2I12_FORMAT(ori, ori_op)
+DEF_EMIT_REG2I12_FORMAT(xori, xori_op)
+DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op)
+DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op)
+DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op)
+DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op)
+DEF_EMIT_REG2I12_FORMAT(stb, stb_op)
+DEF_EMIT_REG2I12_FORMAT(sth, sth_op)
+DEF_EMIT_REG2I12_FORMAT(stw, stw_op)
+DEF_EMIT_REG2I12_FORMAT(std, std_op)
+
+#define DEF_EMIT_REG2I14_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              int imm)                                 \
+{                                                                      \
+       insn->reg2i14_format.opcode = OP;                               \
+       insn->reg2i14_format.immediate = imm;                           \
+       insn->reg2i14_format.rd = rd;                                   \
+       insn->reg2i14_format.rj = rj;                                   \
+}
+
+DEF_EMIT_REG2I14_FORMAT(llw, llw_op)
+DEF_EMIT_REG2I14_FORMAT(scw, scw_op)
+DEF_EMIT_REG2I14_FORMAT(lld, lld_op)
+DEF_EMIT_REG2I14_FORMAT(scd, scd_op)
+DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op)
+DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op)
+DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op)
+DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op)
+
+#define DEF_EMIT_REG2I16_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rj,                   \
+                              enum loongarch_gpr rd,                   \
+                              int offset)                              \
+{                                                                      \
+       insn->reg2i16_format.opcode = OP;                               \
+       insn->reg2i16_format.immediate = offset;                        \
+       insn->reg2i16_format.rj = rj;                                   \
+       insn->reg2i16_format.rd = rd;                                   \
+}
+
+DEF_EMIT_REG2I16_FORMAT(beq, beq_op)
+DEF_EMIT_REG2I16_FORMAT(bne, bne_op)
+DEF_EMIT_REG2I16_FORMAT(blt, blt_op)
+DEF_EMIT_REG2I16_FORMAT(bge, bge_op)
+DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op)
+DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op)
+DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op)
+
+#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP)                            \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              int msbd,                                \
+                              int lsbd)                                \
+{                                                                      \
+       insn->reg2bstrd_format.opcode = OP;                             \
+       insn->reg2bstrd_format.msbd = msbd;                             \
+       insn->reg2bstrd_format.lsbd = lsbd;                             \
+       insn->reg2bstrd_format.rj = rj;                                 \
+       insn->reg2bstrd_format.rd = rd;                                 \
+}
+
+DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op)
+
+#define DEF_EMIT_REG3_FORMAT(NAME, OP)                                 \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              enum loongarch_gpr rk)                   \
+{                                                                      \
+       insn->reg3_format.opcode = OP;                                  \
+       insn->reg3_format.rd = rd;                                      \
+       insn->reg3_format.rj = rj;                                      \
+       insn->reg3_format.rk = rk;                                      \
+}
+
+DEF_EMIT_REG3_FORMAT(addd, addd_op)
+DEF_EMIT_REG3_FORMAT(subd, subd_op)
+DEF_EMIT_REG3_FORMAT(muld, muld_op)
+DEF_EMIT_REG3_FORMAT(divdu, divdu_op)
+DEF_EMIT_REG3_FORMAT(moddu, moddu_op)
+DEF_EMIT_REG3_FORMAT(and, and_op)
+DEF_EMIT_REG3_FORMAT(or, or_op)
+DEF_EMIT_REG3_FORMAT(xor, xor_op)
+DEF_EMIT_REG3_FORMAT(sllw, sllw_op)
+DEF_EMIT_REG3_FORMAT(slld, slld_op)
+DEF_EMIT_REG3_FORMAT(srlw, srlw_op)
+DEF_EMIT_REG3_FORMAT(srld, srld_op)
+DEF_EMIT_REG3_FORMAT(sraw, sraw_op)
+DEF_EMIT_REG3_FORMAT(srad, srad_op)
+DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op)
+DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op)
+DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op)
+DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op)
+DEF_EMIT_REG3_FORMAT(stxb, stxb_op)
+DEF_EMIT_REG3_FORMAT(stxh, stxh_op)
+DEF_EMIT_REG3_FORMAT(stxw, stxw_op)
+DEF_EMIT_REG3_FORMAT(stxd, stxd_op)
+DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op)
+DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op)
+DEF_EMIT_REG3_FORMAT(amandw, amandw_op)
+DEF_EMIT_REG3_FORMAT(amandd, amandd_op)
+DEF_EMIT_REG3_FORMAT(amorw, amorw_op)
+DEF_EMIT_REG3_FORMAT(amord, amord_op)
+DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op)
+DEF_EMIT_REG3_FORMAT(amxord, amxord_op)
+DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op)
+DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op)
+
+#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP)                              \
+static inline void emit_##NAME(union loongarch_instruction *insn,      \
+                              enum loongarch_gpr rd,                   \
+                              enum loongarch_gpr rj,                   \
+                              enum loongarch_gpr rk,                   \
+                              int imm)                                 \
+{                                                                      \
+       insn->reg3sa2_format.opcode = OP;                               \
+       insn->reg3sa2_format.immediate = imm;                           \
+       insn->reg3sa2_format.rd = rd;                                   \
+       insn->reg3sa2_format.rj = rj;                                   \
+       insn->reg3sa2_format.rk = rk;                                   \
+}
+
+DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
+
 #endif /* _ASM_INST_H */
index 999944e..402a7d9 100644 (file)
@@ -27,71 +27,38 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
 #define early_memremap early_ioremap
 #define early_memunmap early_iounmap
 
+#ifdef CONFIG_ARCH_IOREMAP
+
 static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
                                         unsigned long prot_val)
 {
-       if (prot_val == _CACHE_CC)
+       if (prot_val & _CACHE_CC)
                return (void __iomem *)(unsigned long)(CACHE_BASE + offset);
        else
                return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset);
 }
 
-/*
- * ioremap -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- */
-#define ioremap(offset, size)                                  \
-       ioremap_prot((offset), (size), _CACHE_SUC)
+#define ioremap(offset, size)          \
+       ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC))
 
-/*
- * ioremap_wc - map bus memory into CPU space
- * @offset:     bus address of the memory
- * @size:       size of the resource to map
- *
- * ioremap_wc performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * but accelerated by means of write-combining feature. It is specifically
- * useful for PCIe prefetchable windows, which may vastly improve a
- * communications performance. If it was determined on boot stage, what
- * CPU CCA doesn't support WUC, the method shall fall-back to the
- * _CACHE_SUC option (see cpu_probe() method).
- */
-#define ioremap_wc(offset, size)                               \
-       ioremap_prot((offset), (size), _CACHE_WUC)
+#define iounmap(addr)                  ((void)(addr))
+
+#endif
 
 /*
- * ioremap_cache -  map bus memory into CPU space
- * @offset:        bus address of the memory
- * @size:          size of the resource to map
+ * On LoongArch, ioremap() has two variants, ioremap_wc() and ioremap_cache().
+ * They map bus memory into CPU space, the mapped memory is marked uncachable
+ * (_CACHE_SUC), uncachable but accelerated by write-combine (_CACHE_WUC) and
+ * cachable (_CACHE_CC) respectively for CPU access.
  *
- * ioremap_cache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked cachable by
- * the CPU.  Also enables full write-combining.         Useful for some
- * memory-like regions on I/O busses.
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
  */
-#define ioremap_cache(offset, size)                            \
-       ioremap_prot((offset), (size), _CACHE_CC)
+#define ioremap_wc(offset, size)       \
+       ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC))
 
-static inline void iounmap(const volatile void __iomem *addr)
-{
-}
+#define ioremap_cache(offset, size)    \
+       ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
 
 #define mmiowb() asm volatile ("dbar 0" ::: "memory")
 
@@ -107,4 +74,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 
 #include <asm-generic/io.h>
 
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
 #endif /* _ASM_IO_H */
diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
new file mode 100644 (file)
index 0000000..cf95cd3
--- /dev/null
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kexec.h for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_KEXEC_H
+#define _ASM_KEXEC_H
+
+#include <asm/stacktrace.h>
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+ /* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Reserve a page for the control code buffer */
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+                                   struct pt_regs *oldregs)
+{
+       if (oldregs)
+               memcpy(newregs, oldregs, sizeof(*newregs));
+       else
+               prepare_frametrace(newregs);
+}
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+       unsigned long efi_boot;
+       unsigned long cmdline_ptr;
+       unsigned long systable_ptr;
+};
+
+typedef void (*do_kexec_t)(unsigned long efi_boot,
+                          unsigned long cmdline_ptr,
+                          unsigned long systable_ptr,
+                          unsigned long start_addr,
+                          unsigned long first_ind_entry);
+
+struct kimage;
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+extern void kexec_reboot(void);
+
+#ifdef CONFIG_SMP
+extern atomic_t kexec_ready_to_reboot;
+extern const unsigned char kexec_smp_wait[];
+#endif
+
+#endif /* !_ASM_KEXEC_H */
index 3ba4f7e..7f8d57a 100644 (file)
@@ -187,36 +187,15 @@ static inline u32 read_cpucfg(u32 reg)
 #define  CPUCFG16_L3_DINCL             BIT(16)
 
 #define LOONGARCH_CPUCFG17             0x11
-#define  CPUCFG17_L1I_WAYS_M           GENMASK(15, 0)
-#define  CPUCFG17_L1I_SETS_M           GENMASK(23, 16)
-#define  CPUCFG17_L1I_SIZE_M           GENMASK(30, 24)
-#define  CPUCFG17_L1I_WAYS             0
-#define  CPUCFG17_L1I_SETS             16
-#define  CPUCFG17_L1I_SIZE             24
-
 #define LOONGARCH_CPUCFG18             0x12
-#define  CPUCFG18_L1D_WAYS_M           GENMASK(15, 0)
-#define  CPUCFG18_L1D_SETS_M           GENMASK(23, 16)
-#define  CPUCFG18_L1D_SIZE_M           GENMASK(30, 24)
-#define  CPUCFG18_L1D_WAYS             0
-#define  CPUCFG18_L1D_SETS             16
-#define  CPUCFG18_L1D_SIZE             24
-
 #define LOONGARCH_CPUCFG19             0x13
-#define  CPUCFG19_L2_WAYS_M            GENMASK(15, 0)
-#define  CPUCFG19_L2_SETS_M            GENMASK(23, 16)
-#define  CPUCFG19_L2_SIZE_M            GENMASK(30, 24)
-#define  CPUCFG19_L2_WAYS              0
-#define  CPUCFG19_L2_SETS              16
-#define  CPUCFG19_L2_SIZE              24
-
 #define LOONGARCH_CPUCFG20             0x14
-#define  CPUCFG20_L3_WAYS_M            GENMASK(15, 0)
-#define  CPUCFG20_L3_SETS_M            GENMASK(23, 16)
-#define  CPUCFG20_L3_SIZE_M            GENMASK(30, 24)
-#define  CPUCFG20_L3_WAYS              0
-#define  CPUCFG20_L3_SETS              16
-#define  CPUCFG20_L3_SIZE              24
+#define  CPUCFG_CACHE_WAYS_M           GENMASK(15, 0)
+#define  CPUCFG_CACHE_SETS_M           GENMASK(23, 16)
+#define  CPUCFG_CACHE_LSIZE_M          GENMASK(30, 24)
+#define  CPUCFG_CACHE_WAYS             0
+#define  CPUCFG_CACHE_SETS             16
+#define  CPUCFG_CACHE_LSIZE            24
 
 #define LOONGARCH_CPUCFG48             0x30
 #define  CPUCFG48_MCSR_LCK             BIT(0)
index 9f6718d..b29b19a 100644 (file)
@@ -17,10 +17,15 @@ struct mod_section {
 };
 
 struct mod_arch_specific {
+       struct mod_section got;
        struct mod_section plt;
        struct mod_section plt_idx;
 };
 
+struct got_entry {
+       Elf_Addr symbol_addr;
+};
+
 struct plt_entry {
        u32 inst_lu12iw;
        u32 inst_lu32id;
@@ -29,10 +34,16 @@ struct plt_entry {
 };
 
 struct plt_idx_entry {
-       unsigned long symbol_addr;
+       Elf_Addr symbol_addr;
 };
 
-Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val);
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val);
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val);
+
+static inline struct got_entry emit_got_entry(Elf_Addr val)
+{
+       return (struct got_entry) { val };
+}
 
 static inline struct plt_entry emit_plt_entry(unsigned long val)
 {
@@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val,
        return plt + plt_idx;
 }
 
+static inline struct got_entry *get_got_entry(Elf_Addr val,
+                                             const struct mod_section *sec)
+{
+       struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr;
+       int i;
+
+       for (i = 0; i < sec->num_entries; i++)
+               if (got[i].symbol_addr == val)
+                       return &got[i];
+       return NULL;
+}
+
 #endif /* _ASM_MODULE_H */
index 31c1c0d..a3d1bc0 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */
 SECTIONS {
        . = ALIGN(4);
+       .got : { BYTE(0) }
        .plt : { BYTE(0) }
        .plt.idx : { BYTE(0) }
 }
index 0bd6b01..ad8d884 100644 (file)
@@ -8,6 +8,15 @@
 #include <asm/cmpxchg.h>
 #include <asm/loongarch.h>
 
+/*
+ * The "address" (in fact, offset from $r21) of a per-CPU variable is close to
+ * the loading address of main kernel image, but far from where the modules are
+ * loaded. Tell the compiler this fact when using explicit relocs.
+ */
+#if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS)
+#define PER_CPU_ATTRIBUTES    __attribute__((model("extreme")))
+#endif
+
 /* Use r21 for fast access */
 register unsigned long __my_cpu_offset __asm__("$r21");
 
index dcb3b17..2a35a0b 100644 (file)
@@ -6,5 +6,7 @@
 
 #ifndef __LOONGARCH_PERF_EVENT_H__
 #define __LOONGARCH_PERF_EVENT_H__
-/* Nothing to show here; the file is required by linux/perf_event.h. */
+
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs
+
 #endif /* __LOONGARCH_PERF_EVENT_H__ */
index 9ca147a..3d1e0a6 100644 (file)
                                 _PAGE_GLOBAL | _PAGE_KERN |  _CACHE_SUC)
 #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
                                 _PAGE_GLOBAL | _PAGE_KERN |  _CACHE_WUC)
+
 #ifndef __ASSEMBLY__
 
+#define _PAGE_IOREMAP          pgprot_val(PAGE_KERNEL_SUC)
+
 #define pgprot_noncached pgprot_noncached
 
 static inline pgprot_t pgprot_noncached(pgprot_t _prot)
index 6d7d2a3..ca373f8 100644 (file)
@@ -13,7 +13,9 @@
 
 extern unsigned long eentry;
 extern unsigned long tlbrentry;
+extern void tlb_init(int cpu);
 extern void cpu_cache_init(void);
+extern void cache_error_setup(void);
 extern void per_cpu_trap_init(int cpu);
 extern void set_handler(unsigned long offset, void *addr, unsigned long len);
 extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len);
diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h
new file mode 100644 (file)
index 0000000..7cb3476
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_SPINLOCK_H
+#define _ASM_SPINLOCK_H
+
+#include <asm/processor.h>
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+
+#endif /* _ASM_SPINLOCK_H */
diff --git a/arch/loongarch/include/asm/spinlock_types.h b/arch/loongarch/include/asm/spinlock_types.h
new file mode 100644 (file)
index 0000000..7458d03
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_SPINLOCK_TYPES_H
+#define _ASM_SPINLOCK_TYPES_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+
+#endif
diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h
new file mode 100644 (file)
index 0000000..eb6e2fd
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <linux/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/loongarch/include/uapi/asm/perf_regs.h b/arch/loongarch/include/uapi/asm/perf_regs.h
new file mode 100644 (file)
index 0000000..29d69c0
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_LOONGARCH_PERF_REGS_H
+#define _ASM_LOONGARCH_PERF_REGS_H
+
+enum perf_event_loongarch_regs {
+       PERF_REG_LOONGARCH_PC,
+       PERF_REG_LOONGARCH_R1,
+       PERF_REG_LOONGARCH_R2,
+       PERF_REG_LOONGARCH_R3,
+       PERF_REG_LOONGARCH_R4,
+       PERF_REG_LOONGARCH_R5,
+       PERF_REG_LOONGARCH_R6,
+       PERF_REG_LOONGARCH_R7,
+       PERF_REG_LOONGARCH_R8,
+       PERF_REG_LOONGARCH_R9,
+       PERF_REG_LOONGARCH_R10,
+       PERF_REG_LOONGARCH_R11,
+       PERF_REG_LOONGARCH_R12,
+       PERF_REG_LOONGARCH_R13,
+       PERF_REG_LOONGARCH_R14,
+       PERF_REG_LOONGARCH_R15,
+       PERF_REG_LOONGARCH_R16,
+       PERF_REG_LOONGARCH_R17,
+       PERF_REG_LOONGARCH_R18,
+       PERF_REG_LOONGARCH_R19,
+       PERF_REG_LOONGARCH_R20,
+       PERF_REG_LOONGARCH_R21,
+       PERF_REG_LOONGARCH_R22,
+       PERF_REG_LOONGARCH_R23,
+       PERF_REG_LOONGARCH_R24,
+       PERF_REG_LOONGARCH_R25,
+       PERF_REG_LOONGARCH_R26,
+       PERF_REG_LOONGARCH_R27,
+       PERF_REG_LOONGARCH_R28,
+       PERF_REG_LOONGARCH_R29,
+       PERF_REG_LOONGARCH_R30,
+       PERF_REG_LOONGARCH_R31,
+       PERF_REG_LOONGARCH_MAX,
+};
+#endif /* _ASM_LOONGARCH_PERF_REGS_H */
index 6c33b5c..42be564 100644 (file)
@@ -23,7 +23,14 @@ obj-$(CONFIG_SMP)            += smp.o
 
 obj-$(CONFIG_NUMA)             += numa.o
 
+obj-$(CONFIG_MAGIC_SYSRQ)      += sysrq.o
+
+obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
+
 obj-$(CONFIG_UNWINDER_GUESS)   += unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 
+obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
+
 CPPFLAGS_vmlinux.lds           := $(KBUILD_CFLAGS)
index 4662b06..c7988f7 100644 (file)
@@ -5,73 +5,34 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/cacheinfo.h>
+#include <linux/topology.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu-info.h>
 
-/* Populates leaf and increments to next leaf */
-#define populate_cache(cache, leaf, c_level, c_type)           \
-do {                                                           \
-       leaf->type = c_type;                                    \
-       leaf->level = c_level;                                  \
-       leaf->coherency_line_size = c->cache.linesz;            \
-       leaf->number_of_sets = c->cache.sets;                   \
-       leaf->ways_of_associativity = c->cache.ways;            \
-       leaf->size = c->cache.linesz * c->cache.sets *          \
-               c->cache.ways;                                  \
-       if (leaf->level > 2)                                    \
-               leaf->size *= nodes_per_package;                \
-       leaf++;                                                 \
-} while (0)
-
 int init_cache_level(unsigned int cpu)
 {
-       struct cpuinfo_loongarch *c = &current_cpu_data;
+       int cache_present = current_cpu_data.cache_leaves_present;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-       int levels = 0, leaves = 0;
-
-       /*
-        * If Dcache is not set, we assume the cache structures
-        * are not properly initialized.
-        */
-       if (c->dcache.waysize)
-               levels += 1;
-       else
-               return -ENOENT;
-
-
-       leaves += (c->icache.waysize) ? 2 : 1;
-
-       if (c->vcache.waysize) {
-               levels++;
-               leaves++;
-       }
 
-       if (c->scache.waysize) {
-               levels++;
-               leaves++;
-       }
+       this_cpu_ci->num_levels =
+               current_cpu_data.cache_leaves[cache_present - 1].level;
+       this_cpu_ci->num_leaves = cache_present;
 
-       if (c->tcache.waysize) {
-               levels++;
-               leaves++;
-       }
-
-       this_cpu_ci->num_levels = levels;
-       this_cpu_ci->num_leaves = leaves;
        return 0;
 }
 
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
                                           struct cacheinfo *sib_leaf)
 {
-       return !((this_leaf->level == 1) || (this_leaf->level == 2));
+       return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE)
+               && !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE));
 }
 
 static void cache_cpumap_setup(unsigned int cpu)
 {
-       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-       struct cacheinfo *this_leaf, *sib_leaf;
        unsigned int index;
+       struct cacheinfo *this_leaf, *sib_leaf;
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
        for (index = 0; index < this_cpu_ci->num_leaves; index++) {
                unsigned int i;
@@ -85,8 +46,10 @@ static void cache_cpumap_setup(unsigned int cpu)
                for_each_online_cpu(i) {
                        struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 
-                       if (i == cpu || !sib_cpu_ci->info_list)
-                               continue;/* skip if itself or no cacheinfo */
+                       if (i == cpu || !sib_cpu_ci->info_list ||
+                               (cpu_to_node(i) != cpu_to_node(cpu)))
+                               continue;
+
                        sib_leaf = sib_cpu_ci->info_list + index;
                        if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
                                cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
@@ -98,31 +61,24 @@ static void cache_cpumap_setup(unsigned int cpu)
 
 int populate_cache_leaves(unsigned int cpu)
 {
-       int level = 1, nodes_per_package = 1;
-       struct cpuinfo_loongarch *c = &current_cpu_data;
+       int i, cache_present = current_cpu_data.cache_leaves_present;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
-
-       if (loongson_sysconf.nr_nodes > 1)
-               nodes_per_package = loongson_sysconf.cores_per_package
-                                       / loongson_sysconf.cores_per_node;
-
-       if (c->icache.waysize) {
-               populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
-               populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
-       } else {
-               populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
+       struct cache_desc *cd, *cdesc = current_cpu_data.cache_leaves;
+
+       for (i = 0; i < cache_present; i++) {
+               cd = cdesc + i;
+
+               this_leaf->type = cd->type;
+               this_leaf->level = cd->level;
+               this_leaf->coherency_line_size = cd->linesz;
+               this_leaf->number_of_sets = cd->sets;
+               this_leaf->ways_of_associativity = cd->ways;
+               this_leaf->size = cd->linesz * cd->sets * cd->ways;
+               this_leaf->priv = &cd->flags;
+               this_leaf++;
        }
 
-       if (c->vcache.waysize)
-               populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
-       if (c->scache.waysize)
-               populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
-       if (c->tcache.waysize)
-               populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
        cache_cpumap_setup(cpu);
        this_cpu_ci->cpu_map_populated = true;
 
index 529ab8f..255a098 100644 (file)
@@ -187,7 +187,9 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
        uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]);
        uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]);
 
-       __cpu_full_name[cpu] = cpu_full_name;
+       if (!__cpu_full_name[cpu])
+               __cpu_full_name[cpu] = cpu_full_name;
+
        *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR);
        *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME);
 
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644 (file)
index 0000000..e559307
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+                        size_t csize, unsigned long offset)
+{
+       void *vaddr;
+
+       if (!csize)
+               return 0;
+
+       vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+       if (!vaddr)
+               return -ENOMEM;
+
+       csize = copy_to_iter(vaddr + offset, csize, iter);
+
+       memunmap(vaddr);
+
+       return csize;
+}
index 7e57ae8..9742577 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/addrspace.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/bug.h>
 #include <asm/regdef.h>
 #include <asm/loongarch.h>
 #include <asm/stackframe.h>
 
 _head:
        .word   MZ_MAGIC                /* "MZ", MS-DOS header */
-       .org    0x3c                    /* 0x04 ~ 0x3b reserved */
+       .org    0x8
+       .dword  kernel_entry            /* Kernel entry point */
+       .dword  _end - _text            /* Kernel image effective size */
+       .quad   0                       /* Kernel image load offset from start of RAM */
+       .org    0x3c                    /* 0x20 ~ 0x3b reserved */
        .long   pe_header - _head       /* Offset to the PE header */
 
 pe_header:
@@ -57,19 +62,19 @@ SYM_CODE_START(kernel_entry)                        # kernel entry point
        li.w            t0, 0x00                # FPE=0, SXE=0, ASXE=0, BTE=0
        csrwr           t0, LOONGARCH_CSR_EUEN
 
-       la              t0, __bss_start         # clear .bss
+       la.pcrel        t0, __bss_start         # clear .bss
        st.d            zero, t0, 0
-       la              t1, __bss_stop - LONGSIZE
+       la.pcrel        t1, __bss_stop - LONGSIZE
 1:
        addi.d          t0, t0, LONGSIZE
        st.d            zero, t0, 0
        bne             t0, t1, 1b
 
-       la              t0, fw_arg0
+       la.pcrel        t0, fw_arg0
        st.d            a0, t0, 0               # firmware arguments
-       la              t0, fw_arg1
+       la.pcrel        t0, fw_arg1
        st.d            a1, t0, 0
-       la              t0, fw_arg2
+       la.pcrel        t0, fw_arg2
        st.d            a2, t0, 0
 
        /* KSave3 used for percpu base, initialized as 0 */
@@ -77,7 +82,7 @@ SYM_CODE_START(kernel_entry)                  # kernel entry point
        /* GPR21 used for percpu base (runtime), initialized as 0 */
        move            u0, zero
 
-       la              tp, init_thread_union
+       la.pcrel        tp, init_thread_union
        /* Set the SP after an empty pt_regs.  */
        PTR_LI          sp, (_THREAD_SIZE - 32 - PT_SIZE)
        PTR_ADD         sp, sp, tp
@@ -85,6 +90,7 @@ SYM_CODE_START(kernel_entry)                  # kernel entry point
        PTR_ADDI        sp, sp, -4 * SZREG      # init stack pointer
 
        bl              start_kernel
+       ASM_BUG()
 
 SYM_CODE_END(kernel_entry)
 
@@ -116,6 +122,8 @@ SYM_CODE_START(smpboot_entry)
        ld.d            tp, t0, CPU_BOOT_TINFO
 
        bl              start_secondary
+       ASM_BUG()
+
 SYM_CODE_END(smpboot_entry)
 
 #endif /* CONFIG_SMP */
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
new file mode 100644 (file)
index 0000000..2dcb9e0
--- /dev/null
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * machine_kexec.c for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* 0x100000 ~ 0x200000 is safe */
+#define KEXEC_CONTROL_CODE     TO_CACHE(0x100000UL)
+#define KEXEC_CMDLINE_ADDR     TO_CACHE(0x108000UL)
+
+static unsigned long reboot_code_buffer;
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void (*relocated_kexec_smp_wait)(void *);
+atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+#endif
+
+static unsigned long efi_boot;
+static unsigned long cmdline_ptr;
+static unsigned long systable_ptr;
+static unsigned long start_addr;
+static unsigned long first_ind_entry;
+
+static void kexec_image_info(const struct kimage *kimage)
+{
+       unsigned long i;
+
+       pr_debug("kexec kimage info:\n");
+       pr_debug("\ttype:        %d\n", kimage->type);
+       pr_debug("\tstart:       %lx\n", kimage->start);
+       pr_debug("\thead:        %lx\n", kimage->head);
+       pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
+
+       for (i = 0; i < kimage->nr_segments; i++) {
+               pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
+                       kimage->segment[i].mem,
+                       kimage->segment[i].mem + kimage->segment[i].memsz);
+               pr_debug("\t\t0x%lx bytes, %lu pages\n",
+                       (unsigned long)kimage->segment[i].memsz,
+                       (unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
+       }
+}
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+       int i;
+       char *bootloader = "kexec";
+       void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR;
+
+       kexec_image_info(kimage);
+
+       kimage->arch.efi_boot = fw_arg0;
+       kimage->arch.systable_ptr = fw_arg2;
+
+       /* Find the command line */
+       for (i = 0; i < kimage->nr_segments; i++) {
+               if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) {
+                       if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE))
+                               kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr;
+                       break;
+               }
+       }
+
+       if (!kimage->arch.cmdline_ptr) {
+               pr_err("Command line not included in the provided image\n");
+               return -EINVAL;
+       }
+
+       /* kexec/kdump need a safe page to save reboot_code_buffer */
+       kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE);
+
+       reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page);
+       memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
+
+#ifdef CONFIG_SMP
+       /* All secondary cpus now may jump to kexec_smp_wait cycle */
+       relocated_kexec_smp_wait = reboot_code_buffer + (void *)(kexec_smp_wait - relocate_new_kernel);
+#endif
+
+       return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void kexec_reboot(void)
+{
+       do_kexec_t do_kexec = NULL;
+
+       /*
+        * We know we were online, and there will be no incoming IPIs at
+        * this point. Mark online again before rebooting so that the crash
+        * analysis tool will see us correctly.
+        */
+       set_cpu_online(smp_processor_id(), true);
+
+       /* Ensure remote CPUs observe that we're online before rebooting. */
+       smp_mb__after_atomic();
+
+       /*
+        * Make sure we get correct instructions written by the
+        * machine_kexec_prepare() CPU.
+        */
+       __asm__ __volatile__ ("\tibar 0\n"::);
+
+#ifdef CONFIG_SMP
+       /* All secondary cpus go to kexec_smp_wait */
+       if (smp_processor_id() > 0) {
+               relocated_kexec_smp_wait(NULL);
+               unreachable();
+       }
+#endif
+
+       do_kexec = (void *)reboot_code_buffer;
+       do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry);
+
+       unreachable();
+}
+
+
+#ifdef CONFIG_SMP
+static void kexec_shutdown_secondary(void *regs)
+{
+       int cpu = smp_processor_id();
+
+       if (!cpu_online(cpu))
+               return;
+
+       /* We won't be sent IPIs any more. */
+       set_cpu_online(cpu, false);
+
+       local_irq_disable();
+       while (!atomic_read(&kexec_ready_to_reboot))
+               cpu_relax();
+
+       kexec_reboot();
+}
+
+static void crash_shutdown_secondary(void *passed_regs)
+{
+       int cpu = smp_processor_id();
+       struct pt_regs *regs = passed_regs;
+
+       /*
+        * If we are passed registers, use those. Otherwise get the
+        * regs from the last interrupt, which should be correct, as
+        * we are in an interrupt. But if the regs are not there,
+        * pull them from the top of the stack. They are probably
+        * wrong, but we need something to keep from crashing again.
+        */
+       if (!regs)
+               regs = get_irq_regs();
+       if (!regs)
+               regs = task_pt_regs(current);
+
+       if (!cpu_online(cpu))
+               return;
+
+       /* We won't be sent IPIs any more. */
+       set_cpu_online(cpu, false);
+
+       local_irq_disable();
+       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+               crash_save_cpu(regs, cpu);
+       cpumask_set_cpu(cpu, &cpus_in_crash);
+
+       while (!atomic_read(&kexec_ready_to_reboot))
+               cpu_relax();
+
+       kexec_reboot();
+}
+
+void crash_smp_send_stop(void)
+{
+       unsigned int ncpus;
+       unsigned long timeout;
+       static int cpus_stopped;
+
+       /*
+        * This function can be called twice in panic path, but obviously
+        * we should execute this only once.
+        */
+       if (cpus_stopped)
+               return;
+
+       cpus_stopped = 1;
+
+        /* Excluding the panic cpu */
+       ncpus = num_online_cpus() - 1;
+
+       smp_call_function(crash_shutdown_secondary, NULL, 0);
+       smp_wmb();
+
+       /*
+        * The crash CPU sends an IPI and wait for other CPUs to
+        * respond. Delay of at least 10 seconds.
+        */
+       timeout = MSEC_PER_SEC * 10;
+       pr_emerg("Sending IPI to other cpus...\n");
+       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+               mdelay(1);
+               cpu_relax();
+       }
+}
+#endif /* defined(CONFIG_SMP) */
+
+void machine_shutdown(void)
+{
+       int cpu;
+
+       /* All CPUs go to reboot_code_buffer */
+       for_each_possible_cpu(cpu)
+               if (!cpu_online(cpu))
+                       cpu_device_up(get_cpu_device(cpu));
+
+#ifdef CONFIG_SMP
+       smp_call_function(kexec_shutdown_secondary, NULL, 0);
+#endif
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+       int crashing_cpu;
+
+       local_irq_disable();
+
+       crashing_cpu = smp_processor_id();
+       crash_save_cpu(regs, crashing_cpu);
+
+#ifdef CONFIG_SMP
+       crash_smp_send_stop();
+#endif
+       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+       pr_info("Starting crashdump kernel...\n");
+}
+
+void machine_kexec(struct kimage *image)
+{
+       unsigned long entry, *ptr;
+       struct kimage_arch *internal = &image->arch;
+
+       efi_boot = internal->efi_boot;
+       cmdline_ptr = internal->cmdline_ptr;
+       systable_ptr = internal->systable_ptr;
+
+       start_addr = (unsigned long)phys_to_virt(image->start);
+
+       first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ?
+               (unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0;
+
+       /*
+        * The generic kexec code builds a page list with physical
+        * addresses. they are directly accessible through XKPRANGE
+        * hence the phys_to_virt() call.
+        */
+       for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+            ptr = (entry & IND_INDIRECTION) ?
+              phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+               if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
+                   *ptr & IND_DESTINATION)
+                       *ptr = (unsigned long) phys_to_virt(*ptr);
+       }
+
+       /* Mark offline before disabling local irq. */
+       set_cpu_online(smp_processor_id(), false);
+
+       /* We do not want to be bothered. */
+       local_irq_disable();
+
+       pr_notice("EFI boot flag 0x%lx\n", efi_boot);
+       pr_notice("Command line at 0x%lx\n", cmdline_ptr);
+       pr_notice("System table at 0x%lx\n", systable_ptr);
+       pr_notice("We will call new kernel at 0x%lx\n", start_addr);
+       pr_notice("Bye ...\n");
+
+       /* Make reboot code buffer available to the boot CPU. */
+       flush_cache_all();
+
+#ifdef CONFIG_SMP
+       atomic_set(&kexec_ready_to_reboot, 1);
+#endif
+
+       kexec_reboot();
+}
index 7423361..4a4107a 100644 (file)
@@ -58,7 +58,4 @@ void __init memblock_init(void)
        /* Reserve the kernel text/data/bss */
        memblock_reserve(__pa_symbol(&_text),
                         __pa_symbol(&_end) - __pa_symbol(&_text));
-
-       /* Reserve the initrd */
-       reserve_initrd_mem();
 }
index 6d49828..d296a70 100644 (file)
@@ -7,7 +7,33 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val)
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val)
+{
+       struct mod_section *got_sec = &mod->arch.got;
+       int i = got_sec->num_entries;
+       struct got_entry *got = get_got_entry(val, got_sec);
+
+       if (got)
+               return (Elf_Addr)got;
+
+       /* There is no GOT entry for val yet, create a new one. */
+       got = (struct got_entry *)got_sec->shdr->sh_addr;
+       got[i] = emit_got_entry(val);
+
+       got_sec->num_entries++;
+       if (got_sec->num_entries > got_sec->max_entries) {
+               /*
+                * This may happen when the module contains a GOT_HI20 without
+                * a paired GOT_LO12. Such a module is broken, reject it.
+                */
+               pr_err("%s: module contains bad GOT relocation\n", mod->name);
+               return 0;
+       }
+
+       return (Elf_Addr)&got[i];
+}
+
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val)
 {
        int nr;
        struct mod_section *plt_sec = &mod->arch.plt;
@@ -50,15 +76,25 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx)
        return false;
 }
 
-static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
+static void count_max_entries(Elf_Rela *relas, int num,
+                             unsigned int *plts, unsigned int *gots)
 {
        unsigned int i, type;
 
        for (i = 0; i < num; i++) {
                type = ELF_R_TYPE(relas[i].r_info);
-               if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
+               switch (type) {
+               case R_LARCH_SOP_PUSH_PLT_PCREL:
+               case R_LARCH_B26:
                        if (!duplicate_rela(relas, i))
                                (*plts)++;
+                       break;
+               case R_LARCH_GOT_PC_HI20:
+                       if (!duplicate_rela(relas, i))
+                               (*gots)++;
+                       break;
+               default:
+                       break; /* Do nothing. */
                }
        }
 }
@@ -66,18 +102,24 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
                              char *secstrings, struct module *mod)
 {
-       unsigned int i, num_plts = 0;
+       unsigned int i, num_plts = 0, num_gots = 0;
 
        /*
         * Find the empty .plt sections.
         */
        for (i = 0; i < ehdr->e_shnum; i++) {
-               if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
+               if (!strcmp(secstrings + sechdrs[i].sh_name, ".got"))
+                       mod->arch.got.shdr = sechdrs + i;
+               else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
                        mod->arch.plt.shdr = sechdrs + i;
                else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx"))
                        mod->arch.plt_idx.shdr = sechdrs + i;
        }
 
+       if (!mod->arch.got.shdr) {
+               pr_err("%s: module GOT section(s) missing\n", mod->name);
+               return -ENOEXEC;
+       }
        if (!mod->arch.plt.shdr) {
                pr_err("%s: module PLT section(s) missing\n", mod->name);
                return -ENOEXEC;
@@ -100,9 +142,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
                if (!(dst_sec->sh_flags & SHF_EXECINSTR))
                        continue;
 
-               count_max_entries(relas, num_rela, &num_plts);
+               count_max_entries(relas, num_rela, &num_plts, &num_gots);
        }
 
+       mod->arch.got.shdr->sh_type = SHT_NOBITS;
+       mod->arch.got.shdr->sh_flags = SHF_ALLOC;
+       mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES;
+       mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry);
+       mod->arch.got.num_entries = 0;
+       mod->arch.got.max_entries = num_gots;
+
        mod->arch.plt.shdr->sh_type = SHT_NOBITS;
        mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
        mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES;
index 638427f..097595b 100644 (file)
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-static inline bool signed_imm_check(long val, unsigned int bit)
-{
-       return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1));
-}
-
-static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
-{
-       return val < (1UL << bit);
-}
-
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
        if (*rela_stack_top >= RELA_STACK_DEPTH)
@@ -281,6 +271,96 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
        }
 }
 
+static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
+                       s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+       ptrdiff_t offset = (void *)v - (void *)location;
+       union loongarch_instruction *insn = (union loongarch_instruction *)location;
+
+       if (offset >= SZ_128M)
+               v = module_emit_plt_entry(mod, v);
+
+       if (offset < -SZ_128M)
+               v = module_emit_plt_entry(mod, v);
+
+       offset = (void *)v - (void *)location;
+
+       if (offset & 3) {
+               pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n",
+                               mod->name, (long long)offset, type);
+               return -ENOEXEC;
+       }
+
+       if (!signed_imm_check(offset, 28)) {
+               pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n",
+                               mod->name, (long long)offset, type);
+               return -ENOEXEC;
+       }
+
+       offset >>= 2;
+       insn->reg0i26_format.immediate_l = offset & 0xffff;
+       insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff;
+
+       return 0;
+}
+
+static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v,
+                       s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+       union loongarch_instruction *insn = (union loongarch_instruction *)location;
+       /* Use s32 for a sign-extension deliberately. */
+       s32 offset_hi20 = (void *)((v + 0x800) & ~0xfff) -
+                         (void *)((Elf_Addr)location & ~0xfff);
+       Elf_Addr anchor = (((Elf_Addr)location) & ~0xfff) + offset_hi20;
+       ptrdiff_t offset_rem = (void *)v - (void *)anchor;
+
+       switch (type) {
+       case R_LARCH_PCALA_LO12:
+               insn->reg2i12_format.immediate = v & 0xfff;
+               break;
+       case R_LARCH_PCALA_HI20:
+               v = offset_hi20 >> 12;
+               insn->reg1i20_format.immediate = v & 0xfffff;
+               break;
+       case R_LARCH_PCALA64_LO20:
+               v = offset_rem >> 32;
+               insn->reg1i20_format.immediate = v & 0xfffff;
+               break;
+       case R_LARCH_PCALA64_HI12:
+               v = offset_rem >> 52;
+               insn->reg2i12_format.immediate = v & 0xfff;
+               break;
+       default:
+               pr_err("%s: Unsupport relocation type %u\n", mod->name, type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v,
+                       s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+       Elf_Addr got = module_emit_got_entry(mod, v);
+
+       if (!got)
+               return -EINVAL;
+
+       switch (type) {
+       case R_LARCH_GOT_PC_LO12:
+               type = R_LARCH_PCALA_LO12;
+               break;
+       case R_LARCH_GOT_PC_HI20:
+               type = R_LARCH_PCALA_HI20;
+               break;
+       default:
+               pr_err("%s: Unsupport relocation type %u\n", mod->name, type);
+               return -EINVAL;
+       }
+
+       return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type);
+}
+
 /*
  * reloc_handlers_rela() - Apply a particular relocation to a module
  * @mod: the module to apply the reloc to
@@ -296,7 +376,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v,
 
 /* The handlers for known reloc types */
 static reloc_rela_handler reloc_rela_handlers[] = {
-       [R_LARCH_NONE ... R_LARCH_SUB64]                     = apply_r_larch_error,
+       [R_LARCH_NONE ... R_LARCH_RELAX]                     = apply_r_larch_error,
 
        [R_LARCH_NONE]                                       = apply_r_larch_none,
        [R_LARCH_32]                                         = apply_r_larch_32,
@@ -310,6 +390,9 @@ static reloc_rela_handler reloc_rela_handlers[] = {
        [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE]            = apply_r_larch_sop,
        [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
        [R_LARCH_ADD32 ... R_LARCH_SUB64]                    = apply_r_larch_add_sub,
+       [R_LARCH_B26]                                        = apply_r_larch_b26,
+       [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12]          = apply_r_larch_pcala,
+       [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12]          = apply_r_larch_got_pc,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
new file mode 100644 (file)
index 0000000..707bd32
--- /dev/null
@@ -0,0 +1,887 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux performance counter support for LoongArch.
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 2010 MIPS Technologies, Inc.
+ * Copyright (C) 2011 Cavium Networks, Inc.
+ * Author: Deng-Cheng Zhu
+ */
+
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
+{
+       unsigned long err;
+       unsigned long __user *user_frame_tail;
+       struct stack_frame buftail;
+
+       user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame));
+
+       /* Also check accessibility of one struct frame_tail beyond */
+       if (!access_ok(user_frame_tail, sizeof(buftail)))
+               return 0;
+
+       pagefault_disable();
+       err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail));
+       pagefault_enable();
+
+       if (err || (unsigned long)user_frame_tail >= buftail.fp)
+               return 0;
+
+       perf_callchain_store(entry, buftail.ra);
+
+       return buftail.fp;
+}
+
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+                        struct pt_regs *regs)
+{
+       unsigned long fp;
+
+       if (perf_guest_state()) {
+               /* We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->csr_era);
+
+       fp = regs->regs[22];
+
+       while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf))
+               fp = user_backtrace(entry, fp);
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+                          struct pt_regs *regs)
+{
+       struct unwind_state state;
+       unsigned long addr;
+
+       for (unwind_start(&state, current, regs);
+             !unwind_done(&state); unwind_next_frame(&state)) {
+               addr = unwind_get_return_address(&state);
+               if (!addr || perf_callchain_store(entry, addr))
+                       return;
+       }
+}
+
+#define LOONGARCH_MAX_HWEVENTS 32
+
+struct cpu_hw_events {
+       /* Array of events on this cpu. */
+       struct perf_event       *events[LOONGARCH_MAX_HWEVENTS];
+
+       /*
+        * Set the bit (indexed by the counter number) when the counter
+        * is used for an event.
+        */
+       unsigned long           used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)];
+
+       /*
+        * Software copy of the control register for each performance counter.
+        */
+       unsigned int            saved_ctrl[LOONGARCH_MAX_HWEVENTS];
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+       .saved_ctrl = {0},
+};
+
+/* The description of LoongArch performance events. */
+struct loongarch_perf_event {
+       unsigned int event_id;
+};
+
+static struct loongarch_perf_event raw_event;
+static DEFINE_MUTEX(raw_event_mutex);
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define HW_OP_UNSUPPORTED              0xffffffff
+#define CACHE_OP_UNSUPPORTED           0xffffffff
+
+#define PERF_MAP_ALL_UNSUPPORTED                                       \
+       [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED}
+
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED                                 \
+[0 ... C(MAX) - 1] = {                                                 \
+       [0 ... C(OP_MAX) - 1] = {                                       \
+               [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED},     \
+       },                                                              \
+}
+
+struct loongarch_pmu {
+       u64             max_period;
+       u64             valid_count;
+       u64             overflow;
+       const char      *name;
+       unsigned int    num_counters;
+       u64             (*read_counter)(unsigned int idx);
+       void            (*write_counter)(unsigned int idx, u64 val);
+       const struct loongarch_perf_event *(*map_raw_event)(u64 config);
+       const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
+       const struct loongarch_perf_event (*cache_event_map)
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+static struct loongarch_pmu loongarch_pmu;
+
+#define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT)
+
+#define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 |    \
+                                       CSR_PERFCTRL_PLV1 |     \
+                                       CSR_PERFCTRL_PLV2 |     \
+                                       CSR_PERFCTRL_PLV3 |     \
+                                       CSR_PERFCTRL_IE)
+
+#define M_PERFCTL_CONFIG_MASK          0x1f0000
+
+static void pause_local_counters(void);
+static void resume_local_counters(void);
+
+static u64 loongarch_pmu_read_counter(unsigned int idx)
+{
+       u64 val = -1;
+
+       switch (idx) {
+       case 0:
+               val = read_csr_perfcntr0();
+               break;
+       case 1:
+               val = read_csr_perfcntr1();
+               break;
+       case 2:
+               val = read_csr_perfcntr2();
+               break;
+       case 3:
+               val = read_csr_perfcntr3();
+               break;
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
+       }
+
+       return val;
+}
+
+static void loongarch_pmu_write_counter(unsigned int idx, u64 val)
+{
+       switch (idx) {
+       case 0:
+               write_csr_perfcntr0(val);
+               return;
+       case 1:
+               write_csr_perfcntr1(val);
+               return;
+       case 2:
+               write_csr_perfcntr2(val);
+               return;
+       case 3:
+               write_csr_perfcntr3(val);
+               return;
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return;
+       }
+}
+
+static unsigned int loongarch_pmu_read_control(unsigned int idx)
+{
+       unsigned int val = -1;
+
+       switch (idx) {
+       case 0:
+               val = read_csr_perfctrl0();
+               break;
+       case 1:
+               val = read_csr_perfctrl1();
+               break;
+       case 2:
+               val = read_csr_perfctrl2();
+               break;
+       case 3:
+               val = read_csr_perfctrl3();
+               break;
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return 0;
+       }
+
+       return val;
+}
+
+static void loongarch_pmu_write_control(unsigned int idx, unsigned int val)
+{
+       switch (idx) {
+       case 0:
+               write_csr_perfctrl0(val);
+               return;
+       case 1:
+               write_csr_perfctrl1(val);
+               return;
+       case 2:
+               write_csr_perfctrl2(val);
+               return;
+       case 3:
+               write_csr_perfctrl3(val);
+               return;
+       default:
+               WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+               return;
+       }
+}
+
+static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+       int i;
+
+       for (i = 0; i < loongarch_pmu.num_counters; i++) {
+               if (!test_and_set_bit(i, cpuc->used_mask))
+                       return i;
+       }
+
+       return -EAGAIN;
+}
+
+static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx)
+{
+       unsigned int cpu;
+       struct perf_event *event = container_of(evt, struct perf_event, hw);
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+       /* Make sure interrupt enabled. */
+       cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+               (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE;
+
+       cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
+
+       /*
+        * We do not actually let the counter run. Leave it until start().
+        */
+       pr_debug("Enabling perf counter for CPU%d\n", cpu);
+}
+
+static void loongarch_pmu_disable_event(int idx)
+{
+       unsigned long flags;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+       local_irq_save(flags);
+       cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) &
+               ~M_PERFCTL_COUNT_EVENT_WHENEVER;
+       loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
+       local_irq_restore(flags);
+}
+
+static int loongarch_pmu_event_set_period(struct perf_event *event,
+                                   struct hw_perf_event *hwc,
+                                   int idx)
+{
+       int ret = 0;
+       u64 left = local64_read(&hwc->period_left);
+       u64 period = hwc->sample_period;
+
+       if (unlikely((left + period) & (1ULL << 63))) {
+               /* left underflowed by more than period. */
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       } else  if (unlikely((left + period) <= period)) {
+               /* left underflowed by less than period. */
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > loongarch_pmu.max_period) {
+               left = loongarch_pmu.max_period;
+               local64_set(&hwc->period_left, left);
+       }
+
+       local64_set(&hwc->prev_count, loongarch_pmu.overflow - left);
+
+       loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static void loongarch_pmu_event_update(struct perf_event *event,
+                                struct hw_perf_event *hwc,
+                                int idx)
+{
+       u64 delta;
+       u64 prev_raw_count, new_raw_count;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = loongarch_pmu.read_counter(idx);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                               new_raw_count) != prev_raw_count)
+               goto again;
+
+       delta = new_raw_count - prev_raw_count;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+}
+
+static void loongarch_pmu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+
+       /* Set the period for the event. */
+       loongarch_pmu_event_set_period(event, hwc, hwc->idx);
+
+       /* Enable the event. */
+       loongarch_pmu_enable_event(hwc, hwc->idx);
+}
+
+static void loongarch_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               /* We are working on a local event. */
+               loongarch_pmu_disable_event(hwc->idx);
+               barrier();
+               loongarch_pmu_event_update(event, hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static int loongarch_pmu_add(struct perf_event *event, int flags)
+{
+       int idx, err = 0;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+
+       perf_pmu_disable(event->pmu);
+
+       /* To look for a free counter for this event. */
+       idx = loongarch_pmu_alloc_counter(cpuc, hwc);
+       if (idx < 0) {
+               err = idx;
+               goto out;
+       }
+
+       /*
+        * If there is an event in the counter we are going to use then
+        * make sure it is disabled.
+        */
+       event->hw.idx = idx;
+       loongarch_pmu_disable_event(idx);
+       cpuc->events[idx] = event;
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               loongarch_pmu_start(event, PERF_EF_RELOAD);
+
+       /* Propagate our changes to the userspace mapping. */
+       perf_event_update_userpage(event);
+
+out:
+       perf_pmu_enable(event->pmu);
+       return err;
+}
+
+static void loongarch_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+       loongarch_pmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void loongarch_pmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
+
+       loongarch_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static void loongarch_pmu_enable(struct pmu *pmu)
+{
+       resume_local_counters();
+}
+
+static void loongarch_pmu_disable(struct pmu *pmu)
+{
+       pause_local_counters();
+}
+
+static DEFINE_MUTEX(pmu_reserve_mutex);
+static atomic_t active_events = ATOMIC_INIT(0);
+
+static int get_pmc_irq(void)
+{
+       struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+
+       if (d)
+               return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
+
+       return -EINVAL;
+}
+
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+       if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
+               on_each_cpu(reset_counters, NULL, 1);
+               free_irq(get_pmc_irq(), &loongarch_pmu);
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+}
+
+static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
+                       struct perf_sample_data *data, struct pt_regs *regs)
+{
+       struct perf_event *event = cpuc->events[idx];
+       struct hw_perf_event *hwc = &event->hw;
+
+       loongarch_pmu_event_update(event, hwc, idx);
+       data->period = event->hw.last_period;
+       if (!loongarch_pmu_event_set_period(event, hwc, idx))
+               return;
+
+       if (perf_event_overflow(event, data, regs))
+               loongarch_pmu_disable_event(idx);
+}
+
+static irqreturn_t pmu_handle_irq(int irq, void *dev)
+{
+       int n;
+       int handled = IRQ_NONE;
+       uint64_t counter;
+       struct pt_regs *regs;
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * First we pause the local counters, so that when we are locked
+        * here, the counters are all paused. When it gets locked due to
+        * perf_disable(), the timer interrupt handler will be delayed.
+        *
+        * See also loongarch_pmu_start().
+        */
+       pause_local_counters();
+
+       regs = get_irq_regs();
+
+       perf_sample_data_init(&data, 0, 0);
+
+       for (n = 0; n < loongarch_pmu.num_counters; n++) {
+               if (test_bit(n, cpuc->used_mask)) {
+                       counter = loongarch_pmu.read_counter(n);
+                       if (counter & loongarch_pmu.overflow) {
+                               handle_associated_event(cpuc, n, &data, regs);
+                               handled = IRQ_HANDLED;
+                       }
+               }
+       }
+
+       resume_local_counters();
+
+       /*
+        * Do all the work for the pending perf events. We can do this
+        * in here because the performance counter interrupt is a regular
+        * interrupt, not NMI.
+        */
+       if (handled == IRQ_HANDLED)
+               irq_work_run();
+
+       return handled;
+}
+
+static int loongarch_pmu_event_init(struct perf_event *event)
+{
+       int r, irq;
+       unsigned long flags;
+
+       /* does not support taken branch sampling */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               /* Init it to avoid false validate_group */
+               event->hw.event_base = 0xffffffff;
+               return -ENOENT;
+       }
+
+       if (event->cpu >= 0 && !cpu_online(event->cpu))
+               return -ENODEV;
+
+       irq = get_pmc_irq();
+       flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
+       if (!atomic_inc_not_zero(&active_events)) {
+               mutex_lock(&pmu_reserve_mutex);
+               if (atomic_read(&active_events) == 0) {
+                       r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu);
+                       if (r < 0) {
+                               mutex_unlock(&pmu_reserve_mutex);
+                               pr_warn("PMU IRQ request failed\n");
+                               return -ENODEV;
+                       }
+               }
+               atomic_inc(&active_events);
+               mutex_unlock(&pmu_reserve_mutex);
+       }
+
+       return __hw_perf_event_init(event);
+}
+
+static struct pmu pmu = {
+       .pmu_enable     = loongarch_pmu_enable,
+       .pmu_disable    = loongarch_pmu_disable,
+       .event_init     = loongarch_pmu_event_init,
+       .add            = loongarch_pmu_add,
+       .del            = loongarch_pmu_del,
+       .start          = loongarch_pmu_start,
+       .stop           = loongarch_pmu_stop,
+       .read           = loongarch_pmu_read,
+};
+
+static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev)
+{
+       return (pev->event_id & 0xff);
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx)
+{
+       const struct loongarch_perf_event *pev;
+
+       pev = &(*loongarch_pmu.general_event_map)[idx];
+
+       if (pev->event_id == HW_OP_UNSUPPORTED)
+               return ERR_PTR(-ENOENT);
+
+       return pev;
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config)
+{
+       unsigned int cache_type, cache_op, cache_result;
+       const struct loongarch_perf_event *pev;
+
+       cache_type = (config >> 0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_op = (config >> 8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return ERR_PTR(-EINVAL);
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return ERR_PTR(-EINVAL);
+
+       pev = &((*loongarch_pmu.cache_event_map)
+                                       [cache_type]
+                                       [cache_op]
+                                       [cache_result]);
+
+       if (pev->event_id == CACHE_OP_UNSUPPORTED)
+               return ERR_PTR(-ENOENT);
+
+       return pev;
+}
+
+static int validate_group(struct perf_event *event)
+{
+       struct cpu_hw_events fake_cpuc;
+       struct perf_event *sibling, *leader = event->group_leader;
+
+       memset(&fake_cpuc, 0, sizeof(fake_cpuc));
+
+       if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
+               return -EINVAL;
+
+       for_each_sibling_event(sibling, leader) {
+               if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
+                       return -EINVAL;
+       }
+
+       if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void reset_counters(void *arg)
+{
+       int n;
+       int counters = loongarch_pmu.num_counters;
+
+       for (n = 0; n < counters; n++) {
+               loongarch_pmu_write_control(n, 0);
+               loongarch_pmu.write_counter(n, 0);
+       }
+}
+
+static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = {
+       PERF_MAP_ALL_UNSUPPORTED,
+       [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 },
+       [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 },
+       [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 },
+       [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 },
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 },
+       [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 },
+};
+
+static const struct loongarch_perf_event loongson_cache_map
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+PERF_CACHE_MAP_ALL_UNSUPPORTED,
+[C(L1D)] = {
+       /*
+        * Like some other architectures (e.g. ARM), the performance
+        * counters don't differentiate between read and write
+        * accesses/misses, so this isn't strictly correct, but it's the
+        * best we can do. Writes and reads get combined.
+        */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x8 },
+               [C(RESULT_MISS)]        = { 0x9 },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { 0x8 },
+               [C(RESULT_MISS)]        = { 0x9 },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { 0xaa },
+               [C(RESULT_MISS)]        = { 0xa9 },
+       },
+},
+[C(L1I)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x6 },
+               [C(RESULT_MISS)]        = { 0x7 },
+       },
+},
+[C(LL)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0xc },
+               [C(RESULT_MISS)]        = { 0xd },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { 0xc },
+               [C(RESULT_MISS)]        = { 0xd },
+       },
+},
+[C(ITLB)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_MISS)]    = { 0x3b },
+       },
+},
+[C(DTLB)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { 0x4 },
+               [C(RESULT_MISS)]        = { 0x3c },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { 0x4 },
+               [C(RESULT_MISS)]        = { 0x3c },
+       },
+},
+[C(BPU)] = {
+       /* Using the same code for *HW_BRANCH* */
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]  = { 0x02 },
+               [C(RESULT_MISS)]    = { 0x03 },
+       },
+},
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+       int err;
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_event_attr *attr = &event->attr;
+       const struct loongarch_perf_event *pev;
+
+       /* Returning LoongArch event descriptor for generic perf event. */
+       if (PERF_TYPE_HARDWARE == event->attr.type) {
+               if (event->attr.config >= PERF_COUNT_HW_MAX)
+                       return -EINVAL;
+               pev = loongarch_pmu_map_general_event(event->attr.config);
+       } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
+               pev = loongarch_pmu_map_cache_event(event->attr.config);
+       } else if (PERF_TYPE_RAW == event->attr.type) {
+               /* We are working on the global raw event. */
+               mutex_lock(&raw_event_mutex);
+               pev = loongarch_pmu.map_raw_event(event->attr.config);
+       } else {
+               /* The event type is not (yet) supported. */
+               return -EOPNOTSUPP;
+       }
+
+       if (IS_ERR(pev)) {
+               if (PERF_TYPE_RAW == event->attr.type)
+                       mutex_unlock(&raw_event_mutex);
+               return PTR_ERR(pev);
+       }
+
+       /*
+        * We allow max flexibility on how each individual counter shared
+        * by the single CPU operates (the mode exclusion and the range).
+        */
+       hwc->config_base = CSR_PERFCTRL_IE;
+
+       hwc->event_base = loongarch_pmu_perf_event_encode(pev);
+       if (PERF_TYPE_RAW == event->attr.type)
+               mutex_unlock(&raw_event_mutex);
+
+       if (!attr->exclude_user) {
+               hwc->config_base |= CSR_PERFCTRL_PLV3;
+               hwc->config_base |= CSR_PERFCTRL_PLV2;
+       }
+       if (!attr->exclude_kernel) {
+               hwc->config_base |= CSR_PERFCTRL_PLV0;
+       }
+       if (!attr->exclude_hv) {
+               hwc->config_base |= CSR_PERFCTRL_PLV1;
+       }
+
+       hwc->config_base &= M_PERFCTL_CONFIG_MASK;
+       /*
+        * The event can belong to another cpu. We do not assign a local
+        * counter for it for now.
+        */
+       hwc->idx = -1;
+       hwc->config = 0;
+
+       if (!hwc->sample_period) {
+               hwc->sample_period  = loongarch_pmu.max_period;
+               hwc->last_period    = hwc->sample_period;
+               local64_set(&hwc->period_left, hwc->sample_period);
+       }
+
+       err = 0;
+       if (event->group_leader != event)
+               err = validate_group(event);
+
+       event->destroy = hw_perf_event_destroy;
+
+       if (err)
+               event->destroy(event);
+
+       return err;
+}
+
+static void pause_local_counters(void)
+{
+       unsigned long flags;
+       int ctr = loongarch_pmu.num_counters;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       local_irq_save(flags);
+       do {
+               ctr--;
+               cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr);
+               loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
+                                        ~M_PERFCTL_COUNT_EVENT_WHENEVER);
+       } while (ctr > 0);
+       local_irq_restore(flags);
+}
+
+static void resume_local_counters(void)
+{
+       int ctr = loongarch_pmu.num_counters;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       do {
+               ctr--;
+               loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
+       } while (ctr > 0);
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config)
+{
+       raw_event.event_id = config & 0xff;
+
+       return &raw_event;
+}
+
+static int __init init_hw_perf_events(void)
+{
+       int counters;
+
+       if (!cpu_has_pmp)
+               return -ENODEV;
+
+       pr_info("Performance counters: ");
+       counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
+
+       loongarch_pmu.num_counters = counters;
+       loongarch_pmu.max_period = (1ULL << 63) - 1;
+       loongarch_pmu.valid_count = (1ULL << 63) - 1;
+       loongarch_pmu.overflow = 1ULL << 63;
+       loongarch_pmu.name = "loongarch/loongson64";
+       loongarch_pmu.read_counter = loongarch_pmu_read_counter;
+       loongarch_pmu.write_counter = loongarch_pmu_write_counter;
+       loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event;
+       loongarch_pmu.general_event_map = &loongson_event_map;
+       loongarch_pmu.cache_event_map = &loongson_cache_map;
+
+       on_each_cpu(reset_counters, NULL, 1);
+
+       pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
+                       loongarch_pmu.name, counters, 64);
+
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
+       return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/loongarch/kernel/perf_regs.c b/arch/loongarch/kernel/perf_regs.c
new file mode 100644 (file)
index 0000000..263ac4a
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/perf_event.h>
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_32BIT
+u64 perf_reg_abi(struct task_struct *tsk)
+{
+       return PERF_SAMPLE_REGS_ABI_32;
+}
+#else /* Must be CONFIG_64BIT */
+u64 perf_reg_abi(struct task_struct *tsk)
+{
+       if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS))
+               return PERF_SAMPLE_REGS_ABI_32;
+       else
+               return PERF_SAMPLE_REGS_ABI_64;
+}
+#endif /* CONFIG_32BIT */
+
+int perf_reg_validate(u64 mask)
+{
+       if (!mask)
+               return -EINVAL;
+       if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1))
+               return -EINVAL;
+       return 0;
+}
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+       if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX))
+               return 0;
+
+       if ((u32)idx == PERF_REG_LOONGARCH_PC)
+               return regs->csr_era;
+
+       return regs->regs[idx];
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+                       struct pt_regs *regs)
+{
+       regs_user->regs = task_pt_regs(current);
+       regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
new file mode 100644 (file)
index 0000000..d132525
--- /dev/null
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * relocate_kernel.S for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+#include <asm/loongarch.h>
+#include <asm/stackframe.h>
+#include <asm/addrspace.h>
+
+SYM_CODE_START(relocate_new_kernel)
+       /*
+        * a0: EFI boot flag for the new kernel
+        * a1: Command line pointer for the new kernel
+        * a2: System table pointer for the new kernel
+        * a3: Start address to jump to after relocation
+        * a4: Pointer to the current indirection page entry
+        */
+       move            s0, a4
+
+       /*
+        * In case of a kdump/crash kernel, the indirection page is not
+        * populated as the kernel is directly copied to a reserved location
+        */
+       beqz            s0, done
+
+process_entry:
+       PTR_L           s1, s0, 0
+       PTR_ADDI        s0, s0, SZREG
+
+       /* destination page */
+       andi            s2, s1, IND_DESTINATION
+       beqz            s2, 1f
+       li.w            t0, ~0x1
+       and             s3, s1, t0      /* store destination addr in s3 */
+       b               process_entry
+
+1:
+       /* indirection page, update s0  */
+       andi            s2, s1, IND_INDIRECTION
+       beqz            s2, 1f
+       li.w            t0, ~0x2
+       and             s0, s1, t0
+       b               process_entry
+
+1:
+       /* done page */
+       andi            s2, s1, IND_DONE
+       beqz            s2, 1f
+       b               done
+
+1:
+       /* source page */
+       andi            s2, s1, IND_SOURCE
+       beqz            s2, process_entry
+       li.w            t0, ~0x8
+       and             s1, s1, t0
+       li.w            s5, (1 << _PAGE_SHIFT) / SZREG
+
+copy_word:
+       /* copy page word by word */
+       REG_L           s4, s1, 0
+       REG_S           s4, s3, 0
+       PTR_ADDI        s3, s3, SZREG
+       PTR_ADDI        s1, s1, SZREG
+       LONG_ADDI       s5, s5, -1
+       beqz            s5, process_entry
+       b               copy_word
+       b               process_entry
+
+done:
+       ibar            0
+       dbar            0
+
+       /*
+        * Jump to the new kernel,
+        * make sure the values of a0, a1, a2 and a3 are not changed.
+        */
+       jr              a3
+SYM_CODE_END(relocate_new_kernel)
+
+#ifdef CONFIG_SMP
+/*
+ * Other CPUs should wait until code is relocated and
+ * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
+ */
+SYM_CODE_START(kexec_smp_wait)
+1:     li.w            t0, 0x100                       /* wait for init loop */
+2:     addi.w          t0, t0, -1                      /* limit mailbox access */
+       bnez            t0, 2b
+       li.w            t1, LOONGARCH_IOCSR_MBUF0
+       iocsrrd.w       s0, t1                          /* check PC as an indicator */
+       beqz            s0, 1b
+       iocsrrd.d       s0, t1                          /* get PC via mailbox */
+
+       li.d            t0, CACHE_BASE
+       or              s0, s0, t0                      /* s0 = TO_CACHE(s0) */
+       jr              s0                              /* jump to initial PC */
+SYM_CODE_END(kexec_smp_wait)
+#endif
+
+relocate_new_kernel_end:
+
+SYM_DATA_START(relocate_new_kernel_size)
+       PTR             relocate_new_kernel_end - relocate_new_kernel
+SYM_DATA_END(relocate_new_kernel_size)
index 5b49c78..1eb63fa 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/pfn.h>
@@ -185,8 +187,70 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
+static void __init arch_reserve_vmcore(void)
+{
+#ifdef CONFIG_PROC_VMCORE
+       u64 i;
+       phys_addr_t start, end;
+
+       if (!is_kdump_kernel())
+               return;
+
+       if (!elfcorehdr_size) {
+               for_each_mem_range(i, &start, &end) {
+                       if (elfcorehdr_addr >= start && elfcorehdr_addr < end) {
+                               /*
+                                * Reserve from the elf core header to the end of
+                                * the memory segment, that should all be kdump
+                                * reserved memory.
+                                */
+                               elfcorehdr_size = end - elfcorehdr_addr;
+                               break;
+                       }
+               }
+       }
+
+       if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+               pr_warn("elfcorehdr is overlapped\n");
+               return;
+       }
+
+       memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+       pr_info("Reserving %llu KiB of memory at 0x%llx for elfcorehdr\n",
+               elfcorehdr_size >> 10, elfcorehdr_addr);
+#endif
+}
+
+static void __init arch_parse_crashkernel(void)
+{
+#ifdef CONFIG_KEXEC
+       int ret;
+       unsigned long long start;
+       unsigned long long total_mem;
+       unsigned long long crash_base, crash_size;
+
+       total_mem = memblock_phys_mem_size();
+       ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
+       if (ret < 0 || crash_size <= 0)
+               return;
+
+       start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size);
+       if (start != crash_base) {
+               pr_warn("Invalid memory region reserved for crash kernel\n");
+               return;
+       }
+
+       crashk_res.start = crash_base;
+       crashk_res.end   = crash_base + crash_size - 1;
+#endif
+}
+
 void __init platform_init(void)
 {
+       arch_reserve_vmcore();
+       arch_parse_crashkernel();
+
 #ifdef CONFIG_ACPI_TABLE_UPGRADE
        acpi_table_upgrade();
 #endif
@@ -289,6 +353,15 @@ static void __init resource_init(void)
                request_resource(res, &data_resource);
                request_resource(res, &bss_resource);
        }
+
+#ifdef CONFIG_KEXEC
+       if (crashk_res.start < crashk_res.end) {
+               insert_resource(&iomem_resource, &crashk_res);
+               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+                       (unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20),
+                       (unsigned long)(crashk_res.start  >> 20));
+       }
+#endif
 }
 
 static int __init reserve_memblock_reserved_regions(void)
@@ -348,10 +421,11 @@ void __init setup_arch(char **cmdline_p)
        init_environ();
        efi_init();
        memblock_init();
+       pagetable_init();
        parse_early_param();
+       reserve_initrd_mem();
 
        platform_init();
-       pagetable_init();
        arch_mem_init(cmdline_p);
 
        resource_init();
index b5fab30..781a4d4 100644 (file)
@@ -240,11 +240,6 @@ void loongson3_smp_finish(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static bool io_master(int cpu)
-{
-       return test_bit(cpu, &loongson_sysconf.cores_io_master);
-}
-
 int loongson3_cpu_disable(void)
 {
        unsigned long flags;
diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c
new file mode 100644 (file)
index 0000000..366baef
--- /dev/null
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LoongArch specific sysrq operations.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/sysrq.h>
+#include <linux/workqueue.h>
+
+#include <asm/cpu-features.h>
+#include <asm/tlb.h>
+
+/*
+ * Dump TLB entries on all CPUs.
+ */
+
+static DEFINE_SPINLOCK(show_lock);
+
+static void sysrq_tlbdump_single(void *dummy)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&show_lock, flags);
+
+       pr_info("CPU%d:\n", smp_processor_id());
+       dump_tlb_regs();
+       pr_info("\n");
+       dump_tlb_all();
+       pr_info("\n");
+
+       spin_unlock_irqrestore(&show_lock, flags);
+}
+
+#ifdef CONFIG_SMP
+static void sysrq_tlbdump_othercpus(struct work_struct *dummy)
+{
+       smp_call_function(sysrq_tlbdump_single, NULL, 0);
+}
+
+static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus);
+#endif
+
+static void sysrq_handle_tlbdump(int key)
+{
+       sysrq_tlbdump_single(NULL);
+#ifdef CONFIG_SMP
+       schedule_work(&sysrq_tlbdump);
+#endif
+}
+
+static struct sysrq_key_op sysrq_tlbdump_op = {
+       .handler        = sysrq_handle_tlbdump,
+       .help_msg       = "show-tlbs(x)",
+       .action_msg     = "Show TLB entries",
+       .enable_mask    = SYSRQ_ENABLE_DUMP,
+};
+
+static int __init loongarch_sysrq_init(void)
+{
+       return register_sysrq_key('x', &sysrq_tlbdump_op);
+}
+arch_initcall(loongarch_sysrq_init);
index ab1a75c..caa7cd8 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/node.h>
 #include <linux/nodemask.h>
 #include <linux/percpu.h>
+#include <asm/bootinfo.h>
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
@@ -40,7 +41,7 @@ static int __init topology_init(void)
        for_each_present_cpu(i) {
                struct cpu *c = &per_cpu(cpu_devices, i);
 
-               c->hotpluggable = !!i;
+               c->hotpluggable = !io_master(i);
                ret = register_cpu(c, i);
                if (ret < 0)
                        pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret);
index 5010e95..1a4dce8 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/entry-common.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
@@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
        oops_exit();
 
+       if (regs && kexec_should_crash(current))
+               crash_kexec(regs);
+
        if (in_interrupt())
                panic("Fatal exception in interrupt");
 
@@ -374,6 +378,29 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
        irqentry_exit(regs, state);
 }
 
+#ifdef CONFIG_GENERIC_BUG
+int is_valid_bugaddr(unsigned long addr)
+{
+       return 1;
+}
+#endif /* CONFIG_GENERIC_BUG */
+
+static void bug_handler(struct pt_regs *regs)
+{
+       switch (report_bug(regs->csr_era, regs)) {
+       case BUG_TRAP_TYPE_BUG:
+       case BUG_TRAP_TYPE_NONE:
+               die_if_kernel("Oops - BUG", regs);
+               force_sig(SIGTRAP);
+               break;
+
+       case BUG_TRAP_TYPE_WARN:
+               /* Skip the BUG instruction and continue */
+               regs->csr_era += LOONGARCH_INSN_SIZE;
+               break;
+       }
+}
+
 asmlinkage void noinstr do_bp(struct pt_regs *regs)
 {
        bool user = user_mode(regs);
@@ -427,8 +454,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 
        switch (bcode) {
        case BRK_BUG:
-               die_if_kernel("Kernel bug detected", regs);
-               force_sig(SIGTRAP);
+               bug_handler(regs);
                break;
        case BRK_DIVZERO:
                die_if_kernel("Break instruction in kernel code", regs);
@@ -620,9 +646,6 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp)
        irqentry_exit(regs, state);
 }
 
-extern void tlb_init(int cpu);
-extern void cache_error_setup(void);
-
 unsigned long eentry;
 unsigned long tlbrentry;
 
index e5890be..b3309a5 100644 (file)
@@ -55,6 +55,10 @@ SECTIONS
 
        EXCEPTION_TABLE(16)
 
+       .got : ALIGN(16) { *(.got) }
+       .plt : ALIGN(16) { *(.plt) }
+       .got.plt : ALIGN(16) { *(.got.plt) }
+
        . = ALIGN(PECOFF_SEGMENT_ALIGN);
        __init_begin = .;
        __inittext_begin = .;
index e8c68dc..72685a4 100644 (file)
@@ -6,8 +6,8 @@
  * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2007 MIPS Technologies, Inc.
  */
+#include <linux/cacheinfo.h>
 #include <linux/export.h>
-#include <linux/fcntl.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
 
+#include <asm/bootinfo.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
-#include <asm/dma.h>
 #include <asm/loongarch.h>
+#include <asm/numa.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 
+void cache_error_setup(void)
+{
+       extern char __weak except_vec_cex;
+       set_merr_handler(0x0, &except_vec_cex, 0x80);
+}
+
 /*
  * LoongArch maintains ICache/DCache coherency by hardware,
  * we just need "ibar" to avoid instruction hazard here.
@@ -34,109 +41,121 @@ void local_flush_icache_range(unsigned long start, unsigned long end)
 }
 EXPORT_SYMBOL(local_flush_icache_range);
 
-void cache_error_setup(void)
-{
-       extern char __weak except_vec_cex;
-       set_merr_handler(0x0, &except_vec_cex, 0x80);
-}
-
-static unsigned long icache_size __read_mostly;
-static unsigned long dcache_size __read_mostly;
-static unsigned long vcache_size __read_mostly;
-static unsigned long scache_size __read_mostly;
-
-static char *way_string[] = { NULL, "direct mapped", "2-way",
-       "3-way", "4-way", "5-way", "6-way", "7-way", "8-way",
-       "9-way", "10-way", "11-way", "12-way",
-       "13-way", "14-way", "15-way", "16-way",
-};
-
-static void probe_pcache(void)
+static void flush_cache_leaf(unsigned int leaf)
 {
-       struct cpuinfo_loongarch *c = &current_cpu_data;
-       unsigned int lsize, sets, ways;
-       unsigned int config;
-
-       config = read_cpucfg(LOONGARCH_CPUCFG17);
-       lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE);
-       sets  = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS);
-       ways  = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1;
-
-       c->icache.linesz = lsize;
-       c->icache.sets = sets;
-       c->icache.ways = ways;
-       icache_size = sets * ways * lsize;
-       c->icache.waysize = icache_size / c->icache.ways;
-
-       config = read_cpucfg(LOONGARCH_CPUCFG18);
-       lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE);
-       sets  = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS);
-       ways  = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1;
-
-       c->dcache.linesz = lsize;
-       c->dcache.sets = sets;
-       c->dcache.ways = ways;
-       dcache_size = sets * ways * lsize;
-       c->dcache.waysize = dcache_size / c->dcache.ways;
-
-       c->options |= LOONGARCH_CPU_PREFETCH;
-
-       pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n",
-               icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz);
-
-       pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n",
-               dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz);
+       int i, j, nr_nodes;
+       uint64_t addr = CSR_DMW0_BASE;
+       struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf;
+
+       nr_nodes = cache_private(cdesc) ? 1 : loongson_sysconf.nr_nodes;
+
+       do {
+               for (i = 0; i < cdesc->sets; i++) {
+                       for (j = 0; j < cdesc->ways; j++) {
+                               flush_cache_line(leaf, addr);
+                               addr++;
+                       }
+
+                       addr -= cdesc->ways;
+                       addr += cdesc->linesz;
+               }
+               addr += (1ULL << NODE_ADDRSPACE_SHIFT);
+       } while (--nr_nodes > 0);
 }
 
-static void probe_vcache(void)
+asmlinkage __visible void __flush_cache_all(void)
 {
-       struct cpuinfo_loongarch *c = &current_cpu_data;
-       unsigned int lsize, sets, ways;
-       unsigned int config;
-
-       config = read_cpucfg(LOONGARCH_CPUCFG19);
-       lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE);
-       sets  = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS);
-       ways  = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1;
-
-       c->vcache.linesz = lsize;
-       c->vcache.sets = sets;
-       c->vcache.ways = ways;
-       vcache_size = lsize * sets * ways;
-       c->vcache.waysize = vcache_size / c->vcache.ways;
-
-       pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
-               vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
+       int leaf;
+       struct cache_desc *cdesc = current_cpu_data.cache_leaves;
+       unsigned int cache_present = current_cpu_data.cache_leaves_present;
+
+       leaf = cache_present - 1;
+       if (cache_inclusive(cdesc + leaf)) {
+               flush_cache_leaf(leaf);
+               return;
+       }
+
+       for (leaf = 0; leaf < cache_present; leaf++)
+               flush_cache_leaf(leaf);
 }
 
-static void probe_scache(void)
-{
-       struct cpuinfo_loongarch *c = &current_cpu_data;
-       unsigned int lsize, sets, ways;
-       unsigned int config;
-
-       config = read_cpucfg(LOONGARCH_CPUCFG20);
-       lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE);
-       sets  = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS);
-       ways  = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1;
-
-       c->scache.linesz = lsize;
-       c->scache.sets = sets;
-       c->scache.ways = ways;
-       /* 4 cores. scaches are shared */
-       scache_size = lsize * sets * ways;
-       c->scache.waysize = scache_size / c->scache.ways;
-
-       pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
-               scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
-}
+#define L1IUPRE                (1 << 0)
+#define L1IUUNIFY      (1 << 1)
+#define L1DPRE         (1 << 2)
+
+#define LXIUPRE                (1 << 0)
+#define LXIUUNIFY      (1 << 1)
+#define LXIUPRIV       (1 << 2)
+#define LXIUINCL       (1 << 3)
+#define LXDPRE         (1 << 4)
+#define LXDPRIV                (1 << 5)
+#define LXDINCL                (1 << 6)
+
+#define populate_cache_properties(cfg0, cdesc, level, leaf)                            \
+do {                                                                                   \
+       unsigned int cfg1;                                                              \
+                                                                                       \
+       cfg1 = read_cpucfg(LOONGARCH_CPUCFG17 + leaf);                                  \
+       if (level == 1) {                                                               \
+               cdesc->flags |= CACHE_PRIVATE;                                          \
+       } else {                                                                        \
+               if (cfg0 & LXIUPRIV)                                                    \
+                       cdesc->flags |= CACHE_PRIVATE;                                  \
+               if (cfg0 & LXIUINCL)                                                    \
+                       cdesc->flags |= CACHE_INCLUSIVE;                                \
+       }                                                                               \
+       cdesc->level = level;                                                           \
+       cdesc->flags |= CACHE_PRESENT;                                                  \
+       cdesc->ways = ((cfg1 & CPUCFG_CACHE_WAYS_M) >> CPUCFG_CACHE_WAYS) + 1;          \
+       cdesc->sets = 1 << ((cfg1 & CPUCFG_CACHE_SETS_M) >> CPUCFG_CACHE_SETS);         \
+       cdesc->linesz = 1 << ((cfg1 & CPUCFG_CACHE_LSIZE_M) >> CPUCFG_CACHE_LSIZE);     \
+       cdesc++; leaf++;                                                                \
+} while (0)
 
 void cpu_cache_init(void)
 {
-       probe_pcache();
-       probe_vcache();
-       probe_scache();
-
+       unsigned int leaf = 0, level = 1;
+       unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16);
+       struct cache_desc *cdesc = current_cpu_data.cache_leaves;
+
+       if (config & L1IUPRE) {
+               if (config & L1IUUNIFY)
+                       cdesc->type = CACHE_TYPE_UNIFIED;
+               else
+                       cdesc->type = CACHE_TYPE_INST;
+               populate_cache_properties(config, cdesc, level, leaf);
+       }
+
+       if (config & L1DPRE) {
+               cdesc->type = CACHE_TYPE_DATA;
+               populate_cache_properties(config, cdesc, level, leaf);
+       }
+
+       config = config >> 3;
+       for (level = 2; level <= CACHE_LEVEL_MAX; level++) {
+               if (!config)
+                       break;
+
+               if (config & LXIUPRE) {
+                       if (config & LXIUUNIFY)
+                               cdesc->type = CACHE_TYPE_UNIFIED;
+                       else
+                               cdesc->type = CACHE_TYPE_INST;
+                       populate_cache_properties(config, cdesc, level, leaf);
+               }
+
+               if (config & LXDPRE) {
+                       cdesc->type = CACHE_TYPE_DATA;
+                       populate_cache_properties(config, cdesc, level, leaf);
+               }
+
+               config = config >> 7;
+       }
+
+       BUG_ON(leaf > CACHE_LEAVES_MAX);
+
+       current_cpu_data.cache_leaves_present = leaf;
+       current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
        shm_align_mask = PAGE_SIZE - 1;
 }
 
index 0532ed5..0800617 100644 (file)
@@ -152,6 +152,70 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 #endif
 
+static pte_t *fixmap_pte(unsigned long addr)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_k(addr);
+       p4d = p4d_offset(pgd, addr);
+
+       if (pgd_none(*pgd)) {
+               pud_t *new __maybe_unused;
+
+               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+               pgd_populate(&init_mm, pgd, new);
+#ifndef __PAGETABLE_PUD_FOLDED
+               pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+#endif
+       }
+
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud)) {
+               pmd_t *new __maybe_unused;
+
+               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+               pud_populate(&init_mm, pud, new);
+#ifndef __PAGETABLE_PMD_FOLDED
+               pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+#endif
+       }
+
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd)) {
+               pte_t *new __maybe_unused;
+
+               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+               pmd_populate_kernel(&init_mm, pmd, new);
+       }
+
+       return pte_offset_kernel(pmd, addr);
+}
+
+void __init __set_fixmap(enum fixed_addresses idx,
+                              phys_addr_t phys, pgprot_t flags)
+{
+       unsigned long addr = __fix_to_virt(idx);
+       pte_t *ptep;
+
+       BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
+
+       ptep = fixmap_pte(addr);
+       if (!pte_none(*ptep)) {
+               pte_ERROR(*ptep);
+               return;
+       }
+
+       if (pgprot_val(flags))
+               set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
+       else {
+               pte_clear(&init_mm, addr, ptep);
+               flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+       }
+}
+
 /*
  * Align swapper_pg_dir in to 64K, allows its address to be loaded
  * with a single LUI instruction in the TLB handlers.  If we used
index 381a569..fbe1a48 100644 (file)
@@ -3,6 +3,8 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/export.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 
@@ -116,3 +118,30 @@ int __virt_addr_valid(volatile void *kaddr)
        return pfn_valid(PFN_DOWN(PHYSADDR(kaddr)));
 }
 EXPORT_SYMBOL_GPL(__virt_addr_valid);
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This might go
+ * away in the future.
+ */
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
+{
+       /*
+        * Check whether addr is covered by a memory region without the
+        * MEMBLOCK_NOMAP attribute, and whether that region covers the
+        * entire range. In theory, this could lead to false negatives
+        * if the range is covered by distinct but adjacent memory regions
+        * that only differ in other attributes. However, few of such
+        * attributes have been defined, and it is debatable whether it
+        * follows that /dev/mem read() calls should be able traverse
+        * such boundaries.
+        */
+       return memblock_is_region_memory(addr, size) && memblock_is_map_memory(addr);
+}
+
+/*
+ * Do not allow /dev/mem mappings beyond the supported physical range.
+ */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+       return !(((pfn << PAGE_SHIFT) + size) & ~(GENMASK_ULL(cpu_pabits, 0)));
+}
index 9818ce1..da3681f 100644 (file)
@@ -258,7 +258,7 @@ extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
 void setup_tlb_handler(int cpu)
 {
        setup_ptwalker();
-       output_pgtable_bits_defines();
+       local_flush_tlb_all();
 
        /* The tlb handlers are generated only once */
        if (cpu == 0) {
@@ -301,6 +301,7 @@ void tlb_init(int cpu)
        write_csr_pagesize(PS_DEFAULT_SIZE);
        write_csr_stlbpgsize(PS_DEFAULT_SIZE);
        write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE);
+
        setup_tlb_handler(cpu);
-       local_flush_tlb_all();
+       output_pgtable_bits_defines();
 }
index 3974333..d8ee8fb 100644 (file)
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 
+#define PTRS_PER_PGD_BITS      (PAGE_SHIFT - 3)
+#define PTRS_PER_PUD_BITS      (PAGE_SHIFT - 3)
+#define PTRS_PER_PMD_BITS      (PAGE_SHIFT - 3)
+#define PTRS_PER_PTE_BITS      (PAGE_SHIFT - 3)
+
        .macro tlb_do_page_fault, write
        SYM_FUNC_START(tlb_do_page_fault_\write)
        SAVE_ALL
-       csrrd   a2, LOONGARCH_CSR_BADV
-       move    a0, sp
-       REG_S   a2, sp, PT_BVADDR
-       li.w    a1, \write
-       la.abs  t0, do_page_fault
-       jirl    ra, t0, 0
+       csrrd           a2, LOONGARCH_CSR_BADV
+       move            a0, sp
+       REG_S           a2, sp, PT_BVADDR
+       li.w            a1, \write
+       la.abs          t0, do_page_fault
+       jirl            ra, t0, 0
        RESTORE_ALL_AND_RET
        SYM_FUNC_END(tlb_do_page_fault_\write)
        .endm
 SYM_FUNC_START(handle_tlb_protect)
        BACKUP_T0T1
        SAVE_ALL
-       move    a0, sp
-       move    a1, zero
-       csrrd   a2, LOONGARCH_CSR_BADV
-       REG_S   a2, sp, PT_BVADDR
-       la.abs  t0, do_page_fault
-       jirl    ra, t0, 0
+       move            a0, sp
+       move            a1, zero
+       csrrd           a2, LOONGARCH_CSR_BADV
+       REG_S           a2, sp, PT_BVADDR
+       la.abs          t0, do_page_fault
+       jirl            ra, t0, 0
        RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_tlb_protect)
 
 SYM_FUNC_START(handle_tlb_load)
-       csrwr   t0, EXCEPTION_KS0
-       csrwr   t1, EXCEPTION_KS1
-       csrwr   ra, EXCEPTION_KS2
+       csrwr           t0, EXCEPTION_KS0
+       csrwr           t1, EXCEPTION_KS1
+       csrwr           ra, EXCEPTION_KS2
 
        /*
         * The vmalloc handling is not in the hotpath.
         */
-       csrrd   t0, LOONGARCH_CSR_BADV
-       bltz    t0, vmalloc_load
-       csrrd   t1, LOONGARCH_CSR_PGDL
+       csrrd           t0, LOONGARCH_CSR_BADV
+       bltz            t0, vmalloc_load
+       csrrd           t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_load:
        /* Get PGD offset in bytes */
-       srli.d  t0, t0, PGDIR_SHIFT
-       andi    t0, t0, (PTRS_PER_PGD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       bstrpick.d      ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+       alsl.d          t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PUD_SHIFT
-       andi    t0, t0, (PTRS_PER_PUD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PMD_SHIFT
-       andi    t0, t0, (PTRS_PER_PMD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
-       ld.d    ra, t1, 0
+       ld.d            ra, t1, 0
 
        /*
         * For huge tlb entries, pmde doesn't contain an address but
         * instead contains the tlb pte. Check the PAGE_HUGE bit and
         * see if we need to jump to huge tlb processing.
         */
-       andi    t0, ra, _PAGE_HUGE
-       bnez    t0, tlb_huge_update_load
+       rotri.d         ra, ra, _PAGE_HUGE_SHIFT + 1
+       bltz            ra, tlb_huge_update_load
 
-       csrrd   t0, LOONGARCH_CSR_BADV
-       srli.d  t0, t0, PAGE_SHIFT
-       andi    t0, t0, (PTRS_PER_PTE - 1)
-       slli.d  t0, t0, _PTE_T_LOG2
-       add.d   t1, ra, t0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       bstrpick.d      t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+       alsl.d          t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_load:
-#endif
-#ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
+       ll.d            t0, t1, 0
 #else
-       ld.d    t0, t1, 0
+       ld.d            t0, t1, 0
 #endif
-       tlbsrch
-
-       srli.d  ra, t0, _PAGE_PRESENT_SHIFT
-       andi    ra, ra, 1
-       beqz    ra, nopage_tlb_load
+       andi            ra, t0, _PAGE_PRESENT
+       beqz            ra, nopage_tlb_load
 
-       ori     t0, t0, _PAGE_VALID
+       ori             t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, smp_pgtable_change_load
+       sc.d            t0, t1, 0
+       beqz            t0, smp_pgtable_change_load
 #else
-       st.d    t0, t1, 0
+       st.d            t0, t1, 0
 #endif
-       ori     t1, t1, 8
-       xori    t1, t1, 8
-       ld.d    t0, t1, 0
-       ld.d    t1, t1, 8
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       csrwr   t1, LOONGARCH_CSR_TLBELO1
+       tlbsrch
+       bstrins.d       t1, zero, 3, 3
+       ld.d            t0, t1, 0
+       ld.d            t1, t1, 8
+       csrwr           t0, LOONGARCH_CSR_TLBELO0
+       csrwr           t1, LOONGARCH_CSR_TLBELO1
        tlbwr
-leave_load:
-       csrrd   t0, EXCEPTION_KS0
-       csrrd   t1, EXCEPTION_KS1
-       csrrd   ra, EXCEPTION_KS2
+
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
        ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_load:
-       la.abs  t1, swapper_pg_dir
-       b       vmalloc_done_load
+       la.abs          t1, swapper_pg_dir
+       b               vmalloc_done_load
 #endif
 
-       /*
-        * This is the entry point when build_tlbchange_handler_head
-        * spots a huge page.
-        */
+       /* This is the entry point of a huge page. */
 tlb_huge_update_load:
 #ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
-#else
-       ld.d    t0, t1, 0
+       ll.d            ra, t1, 0
 #endif
-       srli.d  ra, t0, _PAGE_PRESENT_SHIFT
-       andi    ra, ra, 1
-       beqz    ra, nopage_tlb_load
-       tlbsrch
+       andi            t0, ra, _PAGE_PRESENT
+       beqz            t0, nopage_tlb_load
 
-       ori     t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, tlb_huge_update_load
-       ld.d    t0, t1, 0
+       ori             t0, ra, _PAGE_VALID
+       sc.d            t0, t1, 0
+       beqz            t0, tlb_huge_update_load
+       ori             t0, ra, _PAGE_VALID
 #else
-       st.d    t0, t1, 0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       ori             t0, ra, _PAGE_VALID
+       st.d            t0, t1, 0
 #endif
+       tlbsrch
        addu16i.d       t1, zero, -(CSR_TLBIDX_EHINV >> 16)
        addi.d          ra, t1, 0
        csrxchg         ra, t1, LOONGARCH_CSR_TLBIDX
        tlbwr
 
-       csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
+       csrxchg         zero, t1, LOONGARCH_CSR_TLBIDX
 
        /*
         * A huge PTE describes an area the size of the
@@ -167,21 +154,20 @@ tlb_huge_update_load:
         * address space.
         */
        /* Huge page: Move Global bit */
-       xori    t0, t0, _PAGE_HUGE
-       lu12i.w t1, _PAGE_HGLOBAL >> 12
-       and     t1, t0, t1
-       srli.d  t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-       or      t0, t0, t1
+       xori            t0, t0, _PAGE_HUGE
+       lu12i.w         t1, _PAGE_HGLOBAL >> 12
+       and             t1, t0, t1
+       srli.d          t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+       or              t0, t0, t1
 
-       addi.d  ra, t0, 0
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       addi.d  t0, ra, 0
+       move            ra, t0
+       csrwr           ra, LOONGARCH_CSR_TLBELO0
 
        /* Convert to entrylo1 */
-       addi.d  t1, zero, 1
-       slli.d  t1, t1, (HPAGE_SHIFT - 1)
-       add.d   t0, t0, t1
-       csrwr   t0, LOONGARCH_CSR_TLBELO1
+       addi.d          t1, zero, 1
+       slli.d          t1, t1, (HPAGE_SHIFT - 1)
+       add.d           t0, t0, t1
+       csrwr           t0, LOONGARCH_CSR_TLBELO1
 
        /* Set huge page tlb entry size */
        addu16i.d       t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -194,136 +180,120 @@ tlb_huge_update_load:
        addu16i.d       t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
        csrxchg         t1, t0, LOONGARCH_CSR_TLBIDX
 
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
+       ertn
+
 nopage_tlb_load:
-       dbar    0
-       csrrd   ra, EXCEPTION_KS2
-       la.abs  t0, tlb_do_page_fault_0
-       jr      t0
+       dbar            0
+       csrrd           ra, EXCEPTION_KS2
+       la.abs          t0, tlb_do_page_fault_0
+       jr              t0
 SYM_FUNC_END(handle_tlb_load)
 
 SYM_FUNC_START(handle_tlb_store)
-       csrwr   t0, EXCEPTION_KS0
-       csrwr   t1, EXCEPTION_KS1
-       csrwr   ra, EXCEPTION_KS2
+       csrwr           t0, EXCEPTION_KS0
+       csrwr           t1, EXCEPTION_KS1
+       csrwr           ra, EXCEPTION_KS2
 
        /*
         * The vmalloc handling is not in the hotpath.
         */
-       csrrd   t0, LOONGARCH_CSR_BADV
-       bltz    t0, vmalloc_store
-       csrrd   t1, LOONGARCH_CSR_PGDL
+       csrrd           t0, LOONGARCH_CSR_BADV
+       bltz            t0, vmalloc_store
+       csrrd           t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_store:
        /* Get PGD offset in bytes */
-       srli.d  t0, t0, PGDIR_SHIFT
-       andi    t0, t0, (PTRS_PER_PGD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
-
+       bstrpick.d      ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+       alsl.d          t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PUD_SHIFT
-       andi    t0, t0, (PTRS_PER_PUD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PMD_SHIFT
-       andi    t0, t0, (PTRS_PER_PMD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
-       ld.d    ra, t1, 0
+       ld.d            ra, t1, 0
 
        /*
         * For huge tlb entries, pmde doesn't contain an address but
         * instead contains the tlb pte. Check the PAGE_HUGE bit and
         * see if we need to jump to huge tlb processing.
         */
-       andi    t0, ra, _PAGE_HUGE
-       bnez    t0, tlb_huge_update_store
+       rotri.d         ra, ra, _PAGE_HUGE_SHIFT + 1
+       bltz            ra, tlb_huge_update_store
 
-       csrrd   t0, LOONGARCH_CSR_BADV
-       srli.d  t0, t0, PAGE_SHIFT
-       andi    t0, t0, (PTRS_PER_PTE - 1)
-       slli.d  t0, t0, _PTE_T_LOG2
-       add.d   t1, ra, t0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       bstrpick.d      t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+       alsl.d          t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_store:
-#endif
-#ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
+       ll.d            t0, t1, 0
 #else
-       ld.d    t0, t1, 0
+       ld.d            t0, t1, 0
 #endif
-       tlbsrch
-
-       srli.d  ra, t0, _PAGE_PRESENT_SHIFT
-       andi    ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-       xori    ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-       bnez    ra, nopage_tlb_store
+       andi            ra, t0, _PAGE_PRESENT | _PAGE_WRITE
+       xori            ra, ra, _PAGE_PRESENT | _PAGE_WRITE
+       bnez            ra, nopage_tlb_store
 
-       ori     t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       ori             t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, smp_pgtable_change_store
+       sc.d            t0, t1, 0
+       beqz            t0, smp_pgtable_change_store
 #else
-       st.d    t0, t1, 0
+       st.d            t0, t1, 0
 #endif
-
-       ori     t1, t1, 8
-       xori    t1, t1, 8
-       ld.d    t0, t1, 0
-       ld.d    t1, t1, 8
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       csrwr   t1, LOONGARCH_CSR_TLBELO1
+       tlbsrch
+       bstrins.d       t1, zero, 3, 3
+       ld.d            t0, t1, 0
+       ld.d            t1, t1, 8
+       csrwr           t0, LOONGARCH_CSR_TLBELO0
+       csrwr           t1, LOONGARCH_CSR_TLBELO1
        tlbwr
-leave_store:
-       csrrd   t0, EXCEPTION_KS0
-       csrrd   t1, EXCEPTION_KS1
-       csrrd   ra, EXCEPTION_KS2
+
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
        ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_store:
-       la.abs  t1, swapper_pg_dir
-       b       vmalloc_done_store
+       la.abs          t1, swapper_pg_dir
+       b               vmalloc_done_store
 #endif
 
-       /*
-        * This is the entry point when build_tlbchange_handler_head
-        * spots a huge page.
-        */
+       /* This is the entry point of a huge page. */
 tlb_huge_update_store:
 #ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
-#else
-       ld.d    t0, t1, 0
+       ll.d            ra, t1, 0
 #endif
-       srli.d  ra, t0, _PAGE_PRESENT_SHIFT
-       andi    ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-       xori    ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-       bnez    ra, nopage_tlb_store
-
-       tlbsrch
-       ori     t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       andi            t0, ra, _PAGE_PRESENT | _PAGE_WRITE
+       xori            t0, t0, _PAGE_PRESENT | _PAGE_WRITE
+       bnez            t0, nopage_tlb_store
 
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, tlb_huge_update_store
-       ld.d    t0, t1, 0
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       sc.d            t0, t1, 0
+       beqz            t0, tlb_huge_update_store
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #else
-       st.d    t0, t1, 0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       st.d            t0, t1, 0
 #endif
+       tlbsrch
        addu16i.d       t1, zero, -(CSR_TLBIDX_EHINV >> 16)
        addi.d          ra, t1, 0
        csrxchg         ra, t1, LOONGARCH_CSR_TLBIDX
        tlbwr
 
-       csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
+       csrxchg         zero, t1, LOONGARCH_CSR_TLBIDX
        /*
         * A huge PTE describes an area the size of the
         * configured huge page size. This is twice the
@@ -334,21 +304,20 @@ tlb_huge_update_store:
         * address space.
         */
        /* Huge page: Move Global bit */
-       xori    t0, t0, _PAGE_HUGE
-       lu12i.w t1, _PAGE_HGLOBAL >> 12
-       and     t1, t0, t1
-       srli.d  t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-       or      t0, t0, t1
+       xori            t0, t0, _PAGE_HUGE
+       lu12i.w         t1, _PAGE_HGLOBAL >> 12
+       and             t1, t0, t1
+       srli.d          t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+       or              t0, t0, t1
 
-       addi.d  ra, t0, 0
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       addi.d  t0, ra, 0
+       move            ra, t0
+       csrwr           ra, LOONGARCH_CSR_TLBELO0
 
        /* Convert to entrylo1 */
-       addi.d  t1, zero, 1
-       slli.d  t1, t1, (HPAGE_SHIFT - 1)
-       add.d   t0, t0, t1
-       csrwr   t0, LOONGARCH_CSR_TLBELO1
+       addi.d          t1, zero, 1
+       slli.d          t1, t1, (HPAGE_SHIFT - 1)
+       add.d           t0, t0, t1
+       csrwr           t0, LOONGARCH_CSR_TLBELO1
 
        /* Set huge page tlb entry size */
        addu16i.d       t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -362,126 +331,110 @@ tlb_huge_update_store:
        addu16i.d       t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
        csrxchg         t1, t0, LOONGARCH_CSR_TLBIDX
 
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
+       ertn
+
 nopage_tlb_store:
-       dbar    0
-       csrrd   ra, EXCEPTION_KS2
-       la.abs  t0, tlb_do_page_fault_1
-       jr      t0
+       dbar            0
+       csrrd           ra, EXCEPTION_KS2
+       la.abs          t0, tlb_do_page_fault_1
+       jr              t0
 SYM_FUNC_END(handle_tlb_store)
 
 SYM_FUNC_START(handle_tlb_modify)
-       csrwr   t0, EXCEPTION_KS0
-       csrwr   t1, EXCEPTION_KS1
-       csrwr   ra, EXCEPTION_KS2
+       csrwr           t0, EXCEPTION_KS0
+       csrwr           t1, EXCEPTION_KS1
+       csrwr           ra, EXCEPTION_KS2
 
        /*
         * The vmalloc handling is not in the hotpath.
         */
-       csrrd   t0, LOONGARCH_CSR_BADV
-       bltz    t0, vmalloc_modify
-       csrrd   t1, LOONGARCH_CSR_PGDL
+       csrrd           t0, LOONGARCH_CSR_BADV
+       bltz            t0, vmalloc_modify
+       csrrd           t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_modify:
        /* Get PGD offset in bytes */
-       srli.d  t0, t0, PGDIR_SHIFT
-       andi    t0, t0, (PTRS_PER_PGD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       bstrpick.d      ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+       alsl.d          t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PUD_SHIFT
-       andi    t0, t0, (PTRS_PER_PUD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-       csrrd   t0, LOONGARCH_CSR_BADV
-       ld.d    t1, t1, 0
-       srli.d  t0, t0, PMD_SHIFT
-       andi    t0, t0, (PTRS_PER_PMD - 1)
-       slli.d  t0, t0, 3
-       add.d   t1, t1, t0
+       ld.d            t1, t1, 0
+       bstrpick.d      ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+       alsl.d          t1, ra, t1, 3
 #endif
-       ld.d    ra, t1, 0
+       ld.d            ra, t1, 0
 
        /*
         * For huge tlb entries, pmde doesn't contain an address but
         * instead contains the tlb pte. Check the PAGE_HUGE bit and
         * see if we need to jump to huge tlb processing.
         */
-       andi    t0, ra, _PAGE_HUGE
-       bnez    t0, tlb_huge_update_modify
+       rotri.d         ra, ra, _PAGE_HUGE_SHIFT + 1
+       bltz            ra, tlb_huge_update_modify
 
-       csrrd   t0, LOONGARCH_CSR_BADV
-       srli.d  t0, t0, PAGE_SHIFT
-       andi    t0, t0, (PTRS_PER_PTE - 1)
-       slli.d  t0, t0, _PTE_T_LOG2
-       add.d   t1, ra, t0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       bstrpick.d      t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+       alsl.d          t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_modify:
-#endif
-#ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
+       ll.d            t0, t1, 0
 #else
-       ld.d    t0, t1, 0
+       ld.d            t0, t1, 0
 #endif
-       tlbsrch
-
-       srli.d  ra, t0, _PAGE_WRITE_SHIFT
-       andi    ra, ra, 1
-       beqz    ra, nopage_tlb_modify
+       andi            ra, t0, _PAGE_WRITE
+       beqz            ra, nopage_tlb_modify
 
-       ori     t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       ori             t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, smp_pgtable_change_modify
+       sc.d            t0, t1, 0
+       beqz            t0, smp_pgtable_change_modify
 #else
-       st.d    t0, t1, 0
+       st.d            t0, t1, 0
 #endif
-       ori     t1, t1, 8
-       xori    t1, t1, 8
-       ld.d    t0, t1, 0
-       ld.d    t1, t1, 8
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       csrwr   t1, LOONGARCH_CSR_TLBELO1
+       tlbsrch
+       bstrins.d       t1, zero, 3, 3
+       ld.d            t0, t1, 0
+       ld.d            t1, t1, 8
+       csrwr           t0, LOONGARCH_CSR_TLBELO0
+       csrwr           t1, LOONGARCH_CSR_TLBELO1
        tlbwr
-leave_modify:
-       csrrd   t0, EXCEPTION_KS0
-       csrrd   t1, EXCEPTION_KS1
-       csrrd   ra, EXCEPTION_KS2
+
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
        ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_modify:
-       la.abs  t1, swapper_pg_dir
-       b       vmalloc_done_modify
+       la.abs          t1, swapper_pg_dir
+       b               vmalloc_done_modify
 #endif
 
-       /*
-        * This is the entry point when
-        * build_tlbchange_handler_head spots a huge page.
-        */
+       /* This is the entry point of a huge page. */
 tlb_huge_update_modify:
 #ifdef CONFIG_SMP
-       ll.d    t0, t1, 0
-#else
-       ld.d    t0, t1, 0
+       ll.d            ra, t1, 0
 #endif
-
-       srli.d  ra, t0, _PAGE_WRITE_SHIFT
-       andi    ra, ra, 1
-       beqz    ra, nopage_tlb_modify
-
-       tlbsrch
-       ori     t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       andi            t0, ra, _PAGE_WRITE
+       beqz            t0, nopage_tlb_modify
 
 #ifdef CONFIG_SMP
-       sc.d    t0, t1, 0
-       beqz    t0, tlb_huge_update_modify
-       ld.d    t0, t1, 0
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       sc.d            t0, t1, 0
+       beqz            t0, tlb_huge_update_modify
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #else
-       st.d    t0, t1, 0
+       rotri.d         ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+       ori             t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+       st.d            t0, t1, 0
 #endif
        /*
         * A huge PTE describes an area the size of the
@@ -493,21 +446,20 @@ tlb_huge_update_modify:
         * address space.
         */
        /* Huge page: Move Global bit */
-       xori    t0, t0, _PAGE_HUGE
-       lu12i.w t1, _PAGE_HGLOBAL >> 12
-       and     t1, t0, t1
-       srli.d  t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-       or      t0, t0, t1
+       xori            t0, t0, _PAGE_HUGE
+       lu12i.w         t1, _PAGE_HGLOBAL >> 12
+       and             t1, t0, t1
+       srli.d          t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+       or              t0, t0, t1
 
-       addi.d  ra, t0, 0
-       csrwr   t0, LOONGARCH_CSR_TLBELO0
-       addi.d  t0, ra, 0
+       move            ra, t0
+       csrwr           ra, LOONGARCH_CSR_TLBELO0
 
        /* Convert to entrylo1 */
-       addi.d  t1, zero, 1
-       slli.d  t1, t1, (HPAGE_SHIFT - 1)
-       add.d   t0, t0, t1
-       csrwr   t0, LOONGARCH_CSR_TLBELO1
+       addi.d          t1, zero, 1
+       slli.d          t1, t1, (HPAGE_SHIFT - 1)
+       add.d           t0, t0, t1
+       csrwr           t0, LOONGARCH_CSR_TLBELO1
 
        /* Set huge page tlb entry size */
        addu16i.d       t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -521,26 +473,31 @@ tlb_huge_update_modify:
        addu16i.d       t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
        csrxchg         t1, t0, LOONGARCH_CSR_TLBIDX
 
+       csrrd           t0, EXCEPTION_KS0
+       csrrd           t1, EXCEPTION_KS1
+       csrrd           ra, EXCEPTION_KS2
+       ertn
+
 nopage_tlb_modify:
-       dbar    0
-       csrrd   ra, EXCEPTION_KS2
-       la.abs  t0, tlb_do_page_fault_1
-       jr      t0
+       dbar            0
+       csrrd           ra, EXCEPTION_KS2
+       la.abs          t0, tlb_do_page_fault_1
+       jr              t0
 SYM_FUNC_END(handle_tlb_modify)
 
 SYM_FUNC_START(handle_tlb_refill)
-       csrwr   t0, LOONGARCH_CSR_TLBRSAVE
-       csrrd   t0, LOONGARCH_CSR_PGD
-       lddir   t0, t0, 3
+       csrwr           t0, LOONGARCH_CSR_TLBRSAVE
+       csrrd           t0, LOONGARCH_CSR_PGD
+       lddir           t0, t0, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-       lddir   t0, t0, 2
+       lddir           t0, t0, 2
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-       lddir   t0, t0, 1
+       lddir           t0, t0, 1
 #endif
-       ldpte   t0, 0
-       ldpte   t0, 1
+       ldpte           t0, 0
+       ldpte           t0, 1
        tlbfill
-       csrrd   t0, LOONGARCH_CSR_TLBRSAVE
+       csrrd           t0, LOONGARCH_CSR_TLBRSAVE
        ertn
 SYM_FUNC_END(handle_tlb_refill)
diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile
new file mode 100644 (file)
index 0000000..1ec12a0
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for arch/loongarch/net
+#
+# Copyright (C) 2022 Loongson Technology Corporation Limited
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
new file mode 100644 (file)
index 0000000..43f0a98
--- /dev/null
@@ -0,0 +1,1179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * BPF JIT compiler for LoongArch
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include "bpf_jit.h"
+
+#define REG_TCC                LOONGARCH_GPR_A6
+#define TCC_SAVED      LOONGARCH_GPR_S5
+
+#define SAVE_RA                BIT(0)
+#define SAVE_TCC       BIT(1)
+
+static const int regmap[] = {
+       /* return value from in-kernel function, and exit value for eBPF program */
+       [BPF_REG_0] = LOONGARCH_GPR_A5,
+       /* arguments from eBPF program to in-kernel function */
+       [BPF_REG_1] = LOONGARCH_GPR_A0,
+       [BPF_REG_2] = LOONGARCH_GPR_A1,
+       [BPF_REG_3] = LOONGARCH_GPR_A2,
+       [BPF_REG_4] = LOONGARCH_GPR_A3,
+       [BPF_REG_5] = LOONGARCH_GPR_A4,
+       /* callee saved registers that in-kernel function will preserve */
+       [BPF_REG_6] = LOONGARCH_GPR_S0,
+       [BPF_REG_7] = LOONGARCH_GPR_S1,
+       [BPF_REG_8] = LOONGARCH_GPR_S2,
+       [BPF_REG_9] = LOONGARCH_GPR_S3,
+       /* read-only frame pointer to access stack */
+       [BPF_REG_FP] = LOONGARCH_GPR_S4,
+       /* temporary register for blinding constants */
+       [BPF_REG_AX] = LOONGARCH_GPR_T0,
+};
+
+static void mark_call(struct jit_ctx *ctx)
+{
+       ctx->flags |= SAVE_RA;
+}
+
+static void mark_tail_call(struct jit_ctx *ctx)
+{
+       ctx->flags |= SAVE_TCC;
+}
+
+static bool seen_call(struct jit_ctx *ctx)
+{
+       return (ctx->flags & SAVE_RA);
+}
+
+static bool seen_tail_call(struct jit_ctx *ctx)
+{
+       return (ctx->flags & SAVE_TCC);
+}
+
+static u8 tail_call_reg(struct jit_ctx *ctx)
+{
+       if (seen_call(ctx))
+               return TCC_SAVED;
+
+       return REG_TCC;
+}
+
+/*
+ * eBPF prog stack layout:
+ *
+ *                                        high
+ * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
+ *                            |           $ra           |
+ *                            +-------------------------+
+ *                            |           $fp           |
+ *                            +-------------------------+
+ *                            |           $s0           |
+ *                            +-------------------------+
+ *                            |           $s1           |
+ *                            +-------------------------+
+ *                            |           $s2           |
+ *                            +-------------------------+
+ *                            |           $s3           |
+ *                            +-------------------------+
+ *                            |           $s4           |
+ *                            +-------------------------+
+ *                            |           $s5           |
+ *                            +-------------------------+ <--BPF_REG_FP
+ *                            |  prog->aux->stack_depth |
+ *                            |        (optional)       |
+ * current $sp -------------> +-------------------------+
+ *                                        low
+ */
+static void build_prologue(struct jit_ctx *ctx)
+{
+       int stack_adjust = 0, store_offset, bpf_stack_adjust;
+
+       bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+
+       /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
+       stack_adjust += sizeof(long) * 8;
+
+       stack_adjust = round_up(stack_adjust, 16);
+       stack_adjust += bpf_stack_adjust;
+
+       /*
+        * First instruction initializes the tail call count (TCC).
+        * On tail call we skip this instruction, and the TCC is
+        * passed in REG_TCC from the caller.
+        */
+       emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+
+       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
+
+       store_offset = stack_adjust - sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
+
+       store_offset -= sizeof(long);
+       emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
+
+       emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
+
+       if (bpf_stack_adjust)
+               emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
+
+       /*
+        * Program contains calls and tail calls, so REG_TCC need
+        * to be saved across calls.
+        */
+       if (seen_tail_call(ctx) && seen_call(ctx))
+               move_reg(ctx, TCC_SAVED, REG_TCC);
+
+       ctx->stack_size = stack_adjust;
+}
+
+static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
+{
+       int stack_adjust = ctx->stack_size;
+       int load_offset;
+
+       load_offset = stack_adjust - sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
+
+       load_offset -= sizeof(long);
+       emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
+
+       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
+
+       if (!is_tail_call) {
+               /* Set return value */
+               move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
+               /* Return to the caller */
+               emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
+       } else {
+               /*
+                * Call the next bpf prog and skip the first instruction
+                * of TCC initialization.
+                */
+               emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
+       }
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+       __build_epilogue(ctx, false);
+}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+       return true;
+}
+
+/* initialized on the first pass of build_body() */
+static int out_offset = -1;
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
+{
+       int off;
+       u8 tcc = tail_call_reg(ctx);
+       u8 a1 = LOONGARCH_GPR_A1;
+       u8 a2 = LOONGARCH_GPR_A2;
+       u8 t1 = LOONGARCH_GPR_T1;
+       u8 t2 = LOONGARCH_GPR_T2;
+       u8 t3 = LOONGARCH_GPR_T3;
+       const int idx0 = ctx->idx;
+
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+
+       /*
+        * a0: &ctx
+        * a1: &array
+        * a2: index
+        *
+        * if (index >= array->map.max_entries)
+        *       goto out;
+        */
+       off = offsetof(struct bpf_array, map.max_entries);
+       emit_insn(ctx, ldwu, t1, a1, off);
+       /* bgeu $a2, $t1, jmp_offset */
+       if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
+               goto toofar;
+
+       /*
+        * if (--TCC < 0)
+        *       goto out;
+        */
+       emit_insn(ctx, addid, REG_TCC, tcc, -1);
+       if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+               goto toofar;
+
+       /*
+        * prog = array->ptrs[index];
+        * if (!prog)
+        *       goto out;
+        */
+       emit_insn(ctx, alsld, t2, a2, a1, 2);
+       off = offsetof(struct bpf_array, ptrs);
+       emit_insn(ctx, ldd, t2, t2, off);
+       /* beq $t2, $zero, jmp_offset */
+       if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+               goto toofar;
+
+       /* goto *(prog->bpf_func + 4); */
+       off = offsetof(struct bpf_prog, bpf_func);
+       emit_insn(ctx, ldd, t3, t2, off);
+       __build_epilogue(ctx, true);
+
+       /* out: */
+       if (out_offset == -1)
+               out_offset = cur_offset;
+       if (cur_offset != out_offset) {
+               pr_err_once("tail_call out_offset = %d, expected %d!\n",
+                           cur_offset, out_offset);
+               return -1;
+       }
+
+       return 0;
+
+toofar:
+       pr_info_once("tail_call: jump too far\n");
+       return -1;
+#undef cur_offset
+#undef jmp_offset
+}
+
+static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+       const u8 t1 = LOONGARCH_GPR_T1;
+       const u8 t2 = LOONGARCH_GPR_T2;
+       const u8 t3 = LOONGARCH_GPR_T3;
+       const u8 src = regmap[insn->src_reg];
+       const u8 dst = regmap[insn->dst_reg];
+       const s16 off = insn->off;
+       const s32 imm = insn->imm;
+       const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
+
+       move_imm(ctx, t1, off, false);
+       emit_insn(ctx, addd, t1, dst, t1);
+       move_reg(ctx, t3, src);
+
+       switch (imm) {
+       /* lock *(size *)(dst + off) <op>= src */
+       case BPF_ADD:
+               if (isdw)
+                       emit_insn(ctx, amaddd, t2, t1, src);
+               else
+                       emit_insn(ctx, amaddw, t2, t1, src);
+               break;
+       case BPF_AND:
+               if (isdw)
+                       emit_insn(ctx, amandd, t2, t1, src);
+               else
+                       emit_insn(ctx, amandw, t2, t1, src);
+               break;
+       case BPF_OR:
+               if (isdw)
+                       emit_insn(ctx, amord, t2, t1, src);
+               else
+                       emit_insn(ctx, amorw, t2, t1, src);
+               break;
+       case BPF_XOR:
+               if (isdw)
+                       emit_insn(ctx, amxord, t2, t1, src);
+               else
+                       emit_insn(ctx, amxorw, t2, t1, src);
+               break;
+       /* src = atomic_fetch_<op>(dst + off, src) */
+       case BPF_ADD | BPF_FETCH:
+               if (isdw) {
+                       emit_insn(ctx, amaddd, src, t1, t3);
+               } else {
+                       emit_insn(ctx, amaddw, src, t1, t3);
+                       emit_zext_32(ctx, src, true);
+               }
+               break;
+       case BPF_AND | BPF_FETCH:
+               if (isdw) {
+                       emit_insn(ctx, amandd, src, t1, t3);
+               } else {
+                       emit_insn(ctx, amandw, src, t1, t3);
+                       emit_zext_32(ctx, src, true);
+               }
+               break;
+       case BPF_OR | BPF_FETCH:
+               if (isdw) {
+                       emit_insn(ctx, amord, src, t1, t3);
+               } else {
+                       emit_insn(ctx, amorw, src, t1, t3);
+                       emit_zext_32(ctx, src, true);
+               }
+               break;
+       case BPF_XOR | BPF_FETCH:
+               if (isdw) {
+                       emit_insn(ctx, amxord, src, t1, t3);
+               } else {
+                       emit_insn(ctx, amxorw, src, t1, t3);
+                       emit_zext_32(ctx, src, true);
+               }
+               break;
+       /* src = atomic_xchg(dst + off, src); */
+       case BPF_XCHG:
+               if (isdw) {
+                       emit_insn(ctx, amswapd, src, t1, t3);
+               } else {
+                       emit_insn(ctx, amswapw, src, t1, t3);
+                       emit_zext_32(ctx, src, true);
+               }
+               break;
+       /* r0 = atomic_cmpxchg(dst + off, r0, src); */
+       case BPF_CMPXCHG:
+               u8 r0 = regmap[BPF_REG_0];
+
+               move_reg(ctx, t2, r0);
+               if (isdw) {
+                       emit_insn(ctx, lld, r0, t1, 0);
+                       emit_insn(ctx, bne, t2, r0, 4);
+                       move_reg(ctx, t3, src);
+                       emit_insn(ctx, scd, t3, t1, 0);
+                       emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
+               } else {
+                       emit_insn(ctx, llw, r0, t1, 0);
+                       emit_zext_32(ctx, t2, true);
+                       emit_zext_32(ctx, r0, true);
+                       emit_insn(ctx, bne, t2, r0, 4);
+                       move_reg(ctx, t3, src);
+                       emit_insn(ctx, scw, t3, t1, 0);
+                       emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
+                       emit_zext_32(ctx, r0, true);
+               }
+               break;
+       }
+}
+
+static bool is_signed_bpf_cond(u8 cond)
+{
+       return cond == BPF_JSGT || cond == BPF_JSLT ||
+              cond == BPF_JSGE || cond == BPF_JSLE;
+}
+
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
+{
+       const bool is32 = BPF_CLASS(insn->code) == BPF_ALU ||
+                         BPF_CLASS(insn->code) == BPF_JMP32;
+       const u8 code = insn->code;
+       const u8 cond = BPF_OP(code);
+       const u8 t1 = LOONGARCH_GPR_T1;
+       const u8 t2 = LOONGARCH_GPR_T2;
+       const u8 src = regmap[insn->src_reg];
+       const u8 dst = regmap[insn->dst_reg];
+       const s16 off = insn->off;
+       const s32 imm = insn->imm;
+       int jmp_offset;
+       int i = insn - ctx->prog->insnsi;
+
+       switch (code) {
+       /* dst = src */
+       case BPF_ALU | BPF_MOV | BPF_X:
+       case BPF_ALU64 | BPF_MOV | BPF_X:
+               move_reg(ctx, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = imm */
+       case BPF_ALU | BPF_MOV | BPF_K:
+       case BPF_ALU64 | BPF_MOV | BPF_K:
+               move_imm(ctx, dst, imm, is32);
+               break;
+
+       /* dst = dst + src */
+       case BPF_ALU | BPF_ADD | BPF_X:
+       case BPF_ALU64 | BPF_ADD | BPF_X:
+               emit_insn(ctx, addd, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst + imm */
+       case BPF_ALU | BPF_ADD | BPF_K:
+       case BPF_ALU64 | BPF_ADD | BPF_K:
+               if (is_signed_imm12(imm)) {
+                       emit_insn(ctx, addid, dst, dst, imm);
+               } else {
+                       move_imm(ctx, t1, imm, is32);
+                       emit_insn(ctx, addd, dst, dst, t1);
+               }
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst - src */
+       case BPF_ALU | BPF_SUB | BPF_X:
+       case BPF_ALU64 | BPF_SUB | BPF_X:
+               emit_insn(ctx, subd, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst - imm */
+       case BPF_ALU | BPF_SUB | BPF_K:
+       case BPF_ALU64 | BPF_SUB | BPF_K:
+               if (is_signed_imm12(-imm)) {
+                       emit_insn(ctx, addid, dst, dst, -imm);
+               } else {
+                       move_imm(ctx, t1, imm, is32);
+                       emit_insn(ctx, subd, dst, dst, t1);
+               }
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst * src */
+       case BPF_ALU | BPF_MUL | BPF_X:
+       case BPF_ALU64 | BPF_MUL | BPF_X:
+               emit_insn(ctx, muld, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst * imm */
+       case BPF_ALU | BPF_MUL | BPF_K:
+       case BPF_ALU64 | BPF_MUL | BPF_K:
+               move_imm(ctx, t1, imm, is32);
+               emit_insn(ctx, muld, dst, dst, t1);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst / src */
+       case BPF_ALU | BPF_DIV | BPF_X:
+       case BPF_ALU64 | BPF_DIV | BPF_X:
+               emit_zext_32(ctx, dst, is32);
+               move_reg(ctx, t1, src);
+               emit_zext_32(ctx, t1, is32);
+               emit_insn(ctx, divdu, dst, dst, t1);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst / imm */
+       case BPF_ALU | BPF_DIV | BPF_K:
+       case BPF_ALU64 | BPF_DIV | BPF_K:
+               move_imm(ctx, t1, imm, is32);
+               emit_zext_32(ctx, dst, is32);
+               emit_insn(ctx, divdu, dst, dst, t1);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst % src */
+       case BPF_ALU | BPF_MOD | BPF_X:
+       case BPF_ALU64 | BPF_MOD | BPF_X:
+               emit_zext_32(ctx, dst, is32);
+               move_reg(ctx, t1, src);
+               emit_zext_32(ctx, t1, is32);
+               emit_insn(ctx, moddu, dst, dst, t1);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst % imm */
+       case BPF_ALU | BPF_MOD | BPF_K:
+       case BPF_ALU64 | BPF_MOD | BPF_K:
+               move_imm(ctx, t1, imm, is32);
+               emit_zext_32(ctx, dst, is32);
+               emit_insn(ctx, moddu, dst, dst, t1);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = -dst */
+       case BPF_ALU | BPF_NEG:
+       case BPF_ALU64 | BPF_NEG:
+               move_imm(ctx, t1, imm, is32);
+               emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst & src */
+       case BPF_ALU | BPF_AND | BPF_X:
+       case BPF_ALU64 | BPF_AND | BPF_X:
+               emit_insn(ctx, and, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst & imm */
+       case BPF_ALU | BPF_AND | BPF_K:
+       case BPF_ALU64 | BPF_AND | BPF_K:
+               if (is_unsigned_imm12(imm)) {
+                       emit_insn(ctx, andi, dst, dst, imm);
+               } else {
+                       move_imm(ctx, t1, imm, is32);
+                       emit_insn(ctx, and, dst, dst, t1);
+               }
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst | src */
+       case BPF_ALU | BPF_OR | BPF_X:
+       case BPF_ALU64 | BPF_OR | BPF_X:
+               emit_insn(ctx, or, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst | imm */
+       case BPF_ALU | BPF_OR | BPF_K:
+       case BPF_ALU64 | BPF_OR | BPF_K:
+               if (is_unsigned_imm12(imm)) {
+                       emit_insn(ctx, ori, dst, dst, imm);
+               } else {
+                       move_imm(ctx, t1, imm, is32);
+                       emit_insn(ctx, or, dst, dst, t1);
+               }
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst ^ src */
+       case BPF_ALU | BPF_XOR | BPF_X:
+       case BPF_ALU64 | BPF_XOR | BPF_X:
+               emit_insn(ctx, xor, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst ^ imm */
+       case BPF_ALU | BPF_XOR | BPF_K:
+       case BPF_ALU64 | BPF_XOR | BPF_K:
+               if (is_unsigned_imm12(imm)) {
+                       emit_insn(ctx, xori, dst, dst, imm);
+               } else {
+                       move_imm(ctx, t1, imm, is32);
+                       emit_insn(ctx, xor, dst, dst, t1);
+               }
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       /* dst = dst << src (logical) */
+       case BPF_ALU | BPF_LSH | BPF_X:
+               emit_insn(ctx, sllw, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_LSH | BPF_X:
+               emit_insn(ctx, slld, dst, dst, src);
+               break;
+
+       /* dst = dst << imm (logical) */
+       case BPF_ALU | BPF_LSH | BPF_K:
+               emit_insn(ctx, slliw, dst, dst, imm);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_LSH | BPF_K:
+               emit_insn(ctx, sllid, dst, dst, imm);
+               break;
+
+       /* dst = dst >> src (logical) */
+       case BPF_ALU | BPF_RSH | BPF_X:
+               emit_insn(ctx, srlw, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_RSH | BPF_X:
+               emit_insn(ctx, srld, dst, dst, src);
+               break;
+
+       /* dst = dst >> imm (logical) */
+       case BPF_ALU | BPF_RSH | BPF_K:
+               emit_insn(ctx, srliw, dst, dst, imm);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_RSH | BPF_K:
+               emit_insn(ctx, srlid, dst, dst, imm);
+               break;
+
+       /* dst = dst >> src (arithmetic) */
+       case BPF_ALU | BPF_ARSH | BPF_X:
+               emit_insn(ctx, sraw, dst, dst, src);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_ARSH | BPF_X:
+               emit_insn(ctx, srad, dst, dst, src);
+               break;
+
+       /* dst = dst >> imm (arithmetic) */
+       case BPF_ALU | BPF_ARSH | BPF_K:
+               emit_insn(ctx, sraiw, dst, dst, imm);
+               emit_zext_32(ctx, dst, is32);
+               break;
+
+       case BPF_ALU64 | BPF_ARSH | BPF_K:
+               emit_insn(ctx, sraid, dst, dst, imm);
+               break;
+
+       /* dst = BSWAP##imm(dst) */
+       case BPF_ALU | BPF_END | BPF_FROM_LE:
+               switch (imm) {
+               case 16:
+                       /* zero-extend 16 bits into 64 bits */
+                       emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
+                       break;
+               case 32:
+                       /* zero-extend 32 bits into 64 bits */
+                       emit_zext_32(ctx, dst, is32);
+                       break;
+               case 64:
+                       /* do nothing */
+                       break;
+               }
+               break;
+
+       case BPF_ALU | BPF_END | BPF_FROM_BE:
+               switch (imm) {
+               case 16:
+                       emit_insn(ctx, revb2h, dst, dst);
+                       /* zero-extend 16 bits into 64 bits */
+                       emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
+                       break;
+               case 32:
+                       emit_insn(ctx, revb2w, dst, dst);
+                       /* zero-extend 32 bits into 64 bits */
+                       emit_zext_32(ctx, dst, is32);
+                       break;
+               case 64:
+                       emit_insn(ctx, revbd, dst, dst);
+                       break;
+               }
+               break;
+
+       /* PC += off if dst cond src */
+       case BPF_JMP | BPF_JEQ | BPF_X:
+       case BPF_JMP | BPF_JNE | BPF_X:
+       case BPF_JMP | BPF_JGT | BPF_X:
+       case BPF_JMP | BPF_JGE | BPF_X:
+       case BPF_JMP | BPF_JLT | BPF_X:
+       case BPF_JMP | BPF_JLE | BPF_X:
+       case BPF_JMP | BPF_JSGT | BPF_X:
+       case BPF_JMP | BPF_JSGE | BPF_X:
+       case BPF_JMP | BPF_JSLT | BPF_X:
+       case BPF_JMP | BPF_JSLE | BPF_X:
+       case BPF_JMP32 | BPF_JEQ | BPF_X:
+       case BPF_JMP32 | BPF_JNE | BPF_X:
+       case BPF_JMP32 | BPF_JGT | BPF_X:
+       case BPF_JMP32 | BPF_JGE | BPF_X:
+       case BPF_JMP32 | BPF_JLT | BPF_X:
+       case BPF_JMP32 | BPF_JLE | BPF_X:
+       case BPF_JMP32 | BPF_JSGT | BPF_X:
+       case BPF_JMP32 | BPF_JSGE | BPF_X:
+       case BPF_JMP32 | BPF_JSLT | BPF_X:
+       case BPF_JMP32 | BPF_JSLE | BPF_X:
+               jmp_offset = bpf2la_offset(i, off, ctx);
+               move_reg(ctx, t1, dst);
+               move_reg(ctx, t2, src);
+               if (is_signed_bpf_cond(BPF_OP(code))) {
+                       emit_sext_32(ctx, t1, is32);
+                       emit_sext_32(ctx, t2, is32);
+               } else {
+                       emit_zext_32(ctx, t1, is32);
+                       emit_zext_32(ctx, t2, is32);
+               }
+               if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* PC += off if dst cond imm */
+       case BPF_JMP | BPF_JEQ | BPF_K:
+       case BPF_JMP | BPF_JNE | BPF_K:
+       case BPF_JMP | BPF_JGT | BPF_K:
+       case BPF_JMP | BPF_JGE | BPF_K:
+       case BPF_JMP | BPF_JLT | BPF_K:
+       case BPF_JMP | BPF_JLE | BPF_K:
+       case BPF_JMP | BPF_JSGT | BPF_K:
+       case BPF_JMP | BPF_JSGE | BPF_K:
+       case BPF_JMP | BPF_JSLT | BPF_K:
+       case BPF_JMP | BPF_JSLE | BPF_K:
+       case BPF_JMP32 | BPF_JEQ | BPF_K:
+       case BPF_JMP32 | BPF_JNE | BPF_K:
+       case BPF_JMP32 | BPF_JGT | BPF_K:
+       case BPF_JMP32 | BPF_JGE | BPF_K:
+       case BPF_JMP32 | BPF_JLT | BPF_K:
+       case BPF_JMP32 | BPF_JLE | BPF_K:
+       case BPF_JMP32 | BPF_JSGT | BPF_K:
+       case BPF_JMP32 | BPF_JSGE | BPF_K:
+       case BPF_JMP32 | BPF_JSLT | BPF_K:
+       case BPF_JMP32 | BPF_JSLE | BPF_K:
+               u8 t7 = -1;
+               jmp_offset = bpf2la_offset(i, off, ctx);
+               if (imm) {
+                       move_imm(ctx, t1, imm, false);
+                       t7 = t1;
+               } else {
+                       /* If imm is 0, simply use zero register. */
+                       t7 = LOONGARCH_GPR_ZERO;
+               }
+               move_reg(ctx, t2, dst);
+               if (is_signed_bpf_cond(BPF_OP(code))) {
+                       emit_sext_32(ctx, t7, is32);
+                       emit_sext_32(ctx, t2, is32);
+               } else {
+                       emit_zext_32(ctx, t7, is32);
+                       emit_zext_32(ctx, t2, is32);
+               }
+               if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* PC += off if dst & src */
+       case BPF_JMP | BPF_JSET | BPF_X:
+       case BPF_JMP32 | BPF_JSET | BPF_X:
+               jmp_offset = bpf2la_offset(i, off, ctx);
+               emit_insn(ctx, and, t1, dst, src);
+               emit_zext_32(ctx, t1, is32);
+               if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* PC += off if dst & imm */
+       case BPF_JMP | BPF_JSET | BPF_K:
+       case BPF_JMP32 | BPF_JSET | BPF_K:
+               jmp_offset = bpf2la_offset(i, off, ctx);
+               move_imm(ctx, t1, imm, is32);
+               emit_insn(ctx, and, t1, dst, t1);
+               emit_zext_32(ctx, t1, is32);
+               if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* PC += off */
+       case BPF_JMP | BPF_JA:
+               jmp_offset = bpf2la_offset(i, off, ctx);
+               if (emit_uncond_jmp(ctx, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* function call */
+       case BPF_JMP | BPF_CALL:
+               int ret;
+               u64 func_addr;
+               bool func_addr_fixed;
+
+               mark_call(ctx);
+               ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+                                           &func_addr, &func_addr_fixed);
+               if (ret < 0)
+                       return ret;
+
+               move_imm(ctx, t1, func_addr, is32);
+               emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
+               move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
+               break;
+
+       /* tail call */
+       case BPF_JMP | BPF_TAIL_CALL:
+               mark_tail_call(ctx);
+               if (emit_bpf_tail_call(ctx) < 0)
+                       return -EINVAL;
+               break;
+
+       /* function return */
+       case BPF_JMP | BPF_EXIT:
+               emit_sext_32(ctx, regmap[BPF_REG_0], true);
+
+               if (i == ctx->prog->len - 1)
+                       break;
+
+               jmp_offset = epilogue_offset(ctx);
+               if (emit_uncond_jmp(ctx, jmp_offset) < 0)
+                       goto toofar;
+               break;
+
+       /* dst = imm64 */
+       case BPF_LD | BPF_IMM | BPF_DW:
+               u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
+
+               move_imm(ctx, dst, imm64, is32);
+               return 1;
+
+       /* dst = *(size *)(src + off) */
+       case BPF_LDX | BPF_MEM | BPF_B:
+       case BPF_LDX | BPF_MEM | BPF_H:
+       case BPF_LDX | BPF_MEM | BPF_W:
+       case BPF_LDX | BPF_MEM | BPF_DW:
+               switch (BPF_SIZE(code)) {
+               case BPF_B:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, ldbu, dst, src, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, ldxbu, dst, src, t1);
+                       }
+                       break;
+               case BPF_H:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, ldhu, dst, src, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, ldxhu, dst, src, t1);
+                       }
+                       break;
+               case BPF_W:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, ldwu, dst, src, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, ldptrw, dst, src, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, ldxwu, dst, src, t1);
+                       }
+                       break;
+               case BPF_DW:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, ldd, dst, src, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, ldptrd, dst, src, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, ldxd, dst, src, t1);
+                       }
+                       break;
+               }
+               break;
+
+       /* *(size *)(dst + off) = imm */
+       case BPF_ST | BPF_MEM | BPF_B:
+       case BPF_ST | BPF_MEM | BPF_H:
+       case BPF_ST | BPF_MEM | BPF_W:
+       case BPF_ST | BPF_MEM | BPF_DW:
+               switch (BPF_SIZE(code)) {
+               case BPF_B:
+                       move_imm(ctx, t1, imm, is32);
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, stb, t1, dst, off);
+                       } else {
+                               move_imm(ctx, t2, off, is32);
+                               emit_insn(ctx, stxb, t1, dst, t2);
+                       }
+                       break;
+               case BPF_H:
+                       move_imm(ctx, t1, imm, is32);
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, sth, t1, dst, off);
+                       } else {
+                               move_imm(ctx, t2, off, is32);
+                               emit_insn(ctx, stxh, t1, dst, t2);
+                       }
+                       break;
+               case BPF_W:
+                       move_imm(ctx, t1, imm, is32);
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, stw, t1, dst, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, stptrw, t1, dst, off);
+                       } else {
+                               move_imm(ctx, t2, off, is32);
+                               emit_insn(ctx, stxw, t1, dst, t2);
+                       }
+                       break;
+               case BPF_DW:
+                       move_imm(ctx, t1, imm, is32);
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, std, t1, dst, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, stptrd, t1, dst, off);
+                       } else {
+                               move_imm(ctx, t2, off, is32);
+                               emit_insn(ctx, stxd, t1, dst, t2);
+                       }
+                       break;
+               }
+               break;
+
+       /* *(size *)(dst + off) = src */
+       case BPF_STX | BPF_MEM | BPF_B:
+       case BPF_STX | BPF_MEM | BPF_H:
+       case BPF_STX | BPF_MEM | BPF_W:
+       case BPF_STX | BPF_MEM | BPF_DW:
+               switch (BPF_SIZE(code)) {
+               case BPF_B:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, stb, src, dst, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, stxb, src, dst, t1);
+                       }
+                       break;
+               case BPF_H:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, sth, src, dst, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, stxh, src, dst, t1);
+                       }
+                       break;
+               case BPF_W:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, stw, src, dst, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, stptrw, src, dst, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, stxw, src, dst, t1);
+                       }
+                       break;
+               case BPF_DW:
+                       if (is_signed_imm12(off)) {
+                               emit_insn(ctx, std, src, dst, off);
+                       } else if (is_signed_imm14(off)) {
+                               emit_insn(ctx, stptrd, src, dst, off);
+                       } else {
+                               move_imm(ctx, t1, off, is32);
+                               emit_insn(ctx, stxd, src, dst, t1);
+                       }
+                       break;
+               }
+               break;
+
+       case BPF_STX | BPF_ATOMIC | BPF_W:
+       case BPF_STX | BPF_ATOMIC | BPF_DW:
+               emit_atomic(insn, ctx);
+               break;
+
+       default:
+               pr_err("bpf_jit: unknown opcode %02x\n", code);
+               return -EINVAL;
+       }
+
+       return 0;
+
+toofar:
+       pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
+       return -E2BIG;
+}
+
+static int build_body(struct jit_ctx *ctx, bool extra_pass)
+{
+       int i;
+       const struct bpf_prog *prog = ctx->prog;
+
+       for (i = 0; i < prog->len; i++) {
+               const struct bpf_insn *insn = &prog->insnsi[i];
+               int ret;
+
+               if (ctx->image == NULL)
+                       ctx->offset[i] = ctx->idx;
+
+               ret = build_insn(insn, ctx, extra_pass);
+               if (ret > 0) {
+                       i++;
+                       if (ctx->image == NULL)
+                               ctx->offset[i] = ctx->idx;
+                       continue;
+               }
+               if (ret)
+                       return ret;
+       }
+
+       if (ctx->image == NULL)
+               ctx->offset[i] = ctx->idx;
+
+       return 0;
+}
+
+/* Fill space with break instructions */
+static void jit_fill_hole(void *area, unsigned int size)
+{
+       u32 *ptr;
+
+       /* We are guaranteed to have aligned memory */
+       for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+               *ptr++ = INSN_BREAK;
+}
+
+static int validate_code(struct jit_ctx *ctx)
+{
+       int i;
+       union loongarch_instruction insn;
+
+       for (i = 0; i < ctx->idx; i++) {
+               insn = ctx->image[i];
+               /* Check INSN_BREAK */
+               if (insn.word == INSN_BREAK)
+                       return -1;
+       }
+
+       return 0;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+       bool tmp_blinded = false, extra_pass = false;
+       u8 *image_ptr;
+       int image_size;
+       struct jit_ctx ctx;
+       struct jit_data *jit_data;
+       struct bpf_binary_header *header;
+       struct bpf_prog *tmp, *orig_prog = prog;
+
+       /*
+        * If BPF JIT was not enabled then we must fall back to
+        * the interpreter.
+        */
+       if (!prog->jit_requested)
+               return orig_prog;
+
+       tmp = bpf_jit_blind_constants(prog);
+       /*
+        * If blinding was requested and we failed during blinding,
+        * we must fall back to the interpreter. Otherwise, we save
+        * the new JITed code.
+        */
+       if (IS_ERR(tmp))
+               return orig_prog;
+
+       if (tmp != prog) {
+               tmp_blinded = true;
+               prog = tmp;
+       }
+
+       jit_data = prog->aux->jit_data;
+       if (!jit_data) {
+               jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+               if (!jit_data) {
+                       prog = orig_prog;
+                       goto out;
+               }
+               prog->aux->jit_data = jit_data;
+       }
+       if (jit_data->ctx.offset) {
+               ctx = jit_data->ctx;
+               image_ptr = jit_data->image;
+               header = jit_data->header;
+               extra_pass = true;
+               image_size = sizeof(u32) * ctx.idx;
+               goto skip_init_ctx;
+       }
+
+       memset(&ctx, 0, sizeof(ctx));
+       ctx.prog = prog;
+
+       ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
+       if (ctx.offset == NULL) {
+               prog = orig_prog;
+               goto out_offset;
+       }
+
+       /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
+       build_prologue(&ctx);
+       if (build_body(&ctx, extra_pass)) {
+               prog = orig_prog;
+               goto out_offset;
+       }
+       ctx.epilogue_offset = ctx.idx;
+       build_epilogue(&ctx);
+
+       /* Now we know the actual image size.
+        * As each LoongArch instruction is of length 32bit,
+        * we are translating number of JITed intructions into
+        * the size required to store these JITed code.
+        */
+       image_size = sizeof(u32) * ctx.idx;
+       /* Now we know the size of the structure to make */
+       header = bpf_jit_binary_alloc(image_size, &image_ptr,
+                                     sizeof(u32), jit_fill_hole);
+       if (header == NULL) {
+               prog = orig_prog;
+               goto out_offset;
+       }
+
+       /* 2. Now, the actual pass to generate final JIT code */
+       ctx.image = (union loongarch_instruction *)image_ptr;
+
+skip_init_ctx:
+       ctx.idx = 0;
+
+       build_prologue(&ctx);
+       if (build_body(&ctx, extra_pass)) {
+               bpf_jit_binary_free(header);
+               prog = orig_prog;
+               goto out_offset;
+       }
+       build_epilogue(&ctx);
+
+       /* 3. Extra pass to validate JITed code */
+       if (validate_code(&ctx)) {
+               bpf_jit_binary_free(header);
+               prog = orig_prog;
+               goto out_offset;
+       }
+
+       /* And we're done */
+       if (bpf_jit_enable > 1)
+               bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+
+       /* Update the icache */
+       flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
+
+       if (!prog->is_func || extra_pass) {
+               if (extra_pass && ctx.idx != jit_data->ctx.idx) {
+                       pr_err_once("multi-func JIT bug %d != %d\n",
+                                   ctx.idx, jit_data->ctx.idx);
+                       bpf_jit_binary_free(header);
+                       prog->bpf_func = NULL;
+                       prog->jited = 0;
+                       prog->jited_len = 0;
+                       goto out_offset;
+               }
+               bpf_jit_binary_lock_ro(header);
+       } else {
+               jit_data->ctx = ctx;
+               jit_data->image = image_ptr;
+               jit_data->header = header;
+       }
+       prog->jited = 1;
+       prog->jited_len = image_size;
+       prog->bpf_func = (void *)ctx.image;
+
+       if (!prog->is_func || extra_pass) {
+               int i;
+
+               /* offset[prog->len] is the size of program */
+               for (i = 0; i <= prog->len; i++)
+                       ctx.offset[i] *= LOONGARCH_INSN_SIZE;
+               bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
+
+out_offset:
+               kvfree(ctx.offset);
+               kfree(jit_data);
+               prog->aux->jit_data = NULL;
+       }
+
+out:
+       if (tmp_blinded)
+               bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
+
+       out_offset = -1;
+
+       return prog;
+}
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
new file mode 100644 (file)
index 0000000..e665ddb
--- /dev/null
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * BPF JIT compiler for LoongArch
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+
+struct jit_ctx {
+       const struct bpf_prog *prog;
+       unsigned int idx;
+       unsigned int flags;
+       unsigned int epilogue_offset;
+       u32 *offset;
+       union loongarch_instruction *image;
+       u32 stack_size;
+};
+
+struct jit_data {
+       struct bpf_binary_header *header;
+       u8 *image;
+       struct jit_ctx ctx;
+};
+
+#define emit_insn(ctx, func, ...)                                              \
+do {                                                                           \
+       if (ctx->image != NULL) {                                               \
+               union loongarch_instruction *insn = &ctx->image[ctx->idx];      \
+               emit_##func(insn, ##__VA_ARGS__);                               \
+       }                                                                       \
+       ctx->idx++;                                                             \
+} while (0)
+
+#define is_signed_imm12(val)   signed_imm_check(val, 12)
+#define is_signed_imm14(val)   signed_imm_check(val, 14)
+#define is_signed_imm16(val)   signed_imm_check(val, 16)
+#define is_signed_imm26(val)   signed_imm_check(val, 26)
+#define is_signed_imm32(val)   signed_imm_check(val, 32)
+#define is_signed_imm52(val)   signed_imm_check(val, 52)
+#define is_unsigned_imm12(val) unsigned_imm_check(val, 12)
+
+static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx)
+{
+       /* BPF JMP offset is relative to the next instruction */
+       bpf_insn++;
+       /*
+        * Whereas LoongArch branch instructions encode the offset
+        * from the branch itself, so we must subtract 1 from the
+        * instruction offset.
+        */
+       return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1));
+}
+
+static inline int epilogue_offset(const struct jit_ctx *ctx)
+{
+       int from = ctx->idx;
+       int to = ctx->epilogue_offset;
+
+       return (to - from);
+}
+
+/* Zero-extend 32 bits into 64 bits */
+static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32)
+{
+       if (!is32)
+               return;
+
+       emit_insn(ctx, lu32id, reg, 0);
+}
+
+/* Signed-extend 32 bits into 64 bits */
+static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32)
+{
+       if (!is32)
+               return;
+
+       emit_insn(ctx, addiw, reg, reg, 0);
+}
+
+static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32)
+{
+       long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31;
+
+       /* or rd, $zero, $zero */
+       if (imm == 0) {
+               emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO);
+               return;
+       }
+
+       /* addiw rd, $zero, imm_11_0 */
+       if (is_signed_imm12(imm)) {
+               emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm);
+               goto zext;
+       }
+
+       /* ori rd, $zero, imm_11_0 */
+       if (is_unsigned_imm12(imm)) {
+               emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm);
+               goto zext;
+       }
+
+       /* lu52id rd, $zero, imm_63_52 */
+       imm_63_52 = (imm >> 52) & 0xfff;
+       imm_51_0 = imm & 0xfffffffffffff;
+       if (imm_63_52 != 0 && imm_51_0 == 0) {
+               emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, imm_63_52);
+               return;
+       }
+
+       /* lu12iw rd, imm_31_12 */
+       imm_31_12 = (imm >> 12) & 0xfffff;
+       emit_insn(ctx, lu12iw, rd, imm_31_12);
+
+       /* ori rd, rd, imm_11_0 */
+       imm_11_0 = imm & 0xfff;
+       if (imm_11_0 != 0)
+               emit_insn(ctx, ori, rd, rd, imm_11_0);
+
+       if (!is_signed_imm32(imm)) {
+               if (imm_51_0 != 0) {
+                       /*
+                        * If bit[51:31] is all 0 or all 1,
+                        * it means bit[51:32] is sign extended by lu12iw,
+                        * no need to call lu32id to do a new filled operation.
+                        */
+                       imm_51_31 = (imm >> 31) & 0x1fffff;
+                       if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) {
+                               /* lu32id rd, imm_51_32 */
+                               imm_51_32 = (imm >> 32) & 0xfffff;
+                               emit_insn(ctx, lu32id, rd, imm_51_32);
+                       }
+               }
+
+               /* lu52id rd, rd, imm_63_52 */
+               if (!is_signed_imm52(imm))
+                       emit_insn(ctx, lu52id, rd, rd, imm_63_52);
+       }
+
+zext:
+       emit_zext_32(ctx, rd, is32);
+}
+
+static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd,
+                           enum loongarch_gpr rj)
+{
+       emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO);
+}
+
+static inline int invert_jmp_cond(u8 cond)
+{
+       switch (cond) {
+       case BPF_JEQ:
+               return BPF_JNE;
+       case BPF_JNE:
+       case BPF_JSET:
+               return BPF_JEQ;
+       case BPF_JGT:
+               return BPF_JLE;
+       case BPF_JGE:
+               return BPF_JLT;
+       case BPF_JLT:
+               return BPF_JGE;
+       case BPF_JLE:
+               return BPF_JGT;
+       case BPF_JSGT:
+               return BPF_JSLE;
+       case BPF_JSGE:
+               return BPF_JSLT;
+       case BPF_JSLT:
+               return BPF_JSGE;
+       case BPF_JSLE:
+               return BPF_JSGT;
+       }
+       return -1;
+}
+
+static inline void cond_jmp_offset(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+                                  enum loongarch_gpr rd, int jmp_offset)
+{
+       switch (cond) {
+       case BPF_JEQ:
+               /* PC += jmp_offset if rj == rd */
+               emit_insn(ctx, beq, rj, rd, jmp_offset);
+               return;
+       case BPF_JNE:
+       case BPF_JSET:
+               /* PC += jmp_offset if rj != rd */
+               emit_insn(ctx, bne, rj, rd, jmp_offset);
+               return;
+       case BPF_JGT:
+               /* PC += jmp_offset if rj > rd (unsigned) */
+               emit_insn(ctx, bltu, rd, rj, jmp_offset);
+               return;
+       case BPF_JLT:
+               /* PC += jmp_offset if rj < rd (unsigned) */
+               emit_insn(ctx, bltu, rj, rd, jmp_offset);
+               return;
+       case BPF_JGE:
+               /* PC += jmp_offset if rj >= rd (unsigned) */
+               emit_insn(ctx, bgeu, rj, rd, jmp_offset);
+               return;
+       case BPF_JLE:
+               /* PC += jmp_offset if rj <= rd (unsigned) */
+               emit_insn(ctx, bgeu, rd, rj, jmp_offset);
+               return;
+       case BPF_JSGT:
+               /* PC += jmp_offset if rj > rd (signed) */
+               emit_insn(ctx, blt, rd, rj, jmp_offset);
+               return;
+       case BPF_JSLT:
+               /* PC += jmp_offset if rj < rd (signed) */
+               emit_insn(ctx, blt, rj, rd, jmp_offset);
+               return;
+       case BPF_JSGE:
+               /* PC += jmp_offset if rj >= rd (signed) */
+               emit_insn(ctx, bge, rj, rd, jmp_offset);
+               return;
+       case BPF_JSLE:
+               /* PC += jmp_offset if rj <= rd (signed) */
+               emit_insn(ctx, bge, rd, rj, jmp_offset);
+               return;
+       }
+}
+
+static inline void cond_jmp_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+                                  enum loongarch_gpr rd, int jmp_offset)
+{
+       cond = invert_jmp_cond(cond);
+       cond_jmp_offset(ctx, cond, rj, rd, 2);
+       emit_insn(ctx, b, jmp_offset);
+}
+
+static inline void uncond_jmp_offs26(struct jit_ctx *ctx, int jmp_offset)
+{
+       emit_insn(ctx, b, jmp_offset);
+}
+
+static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+                               enum loongarch_gpr rd, int jmp_offset)
+{
+       /*
+        * A large PC-relative jump offset may overflow the immediate field of
+        * the native conditional branch instruction, triggering a conversion
+        * to use an absolute jump instead, this jump sequence is particularly
+        * nasty. For now, use cond_jmp_offs26() directly to keep it simple.
+        * In the future, maybe we can add support for far branching, the branch
+        * relaxation requires more than two passes to converge, the code seems
+        * too complex to understand, not quite sure whether it is necessary and
+        * worth the extra pain. Anyway, just leave it as it is to enhance code
+        * readability now.
+        */
+       if (is_signed_imm26(jmp_offset)) {
+               cond_jmp_offs26(ctx, cond, rj, rd, jmp_offset);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static inline int emit_uncond_jmp(struct jit_ctx *ctx, int jmp_offset)
+{
+       if (is_signed_imm26(jmp_offset)) {
+               uncond_jmp_offs26(ctx, jmp_offset);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+                                   enum loongarch_gpr rd, int jmp_offset)
+{
+       if (is_signed_imm16(jmp_offset)) {
+               cond_jmp_offset(ctx, cond, rj, rd, jmp_offset);
+               return 0;
+       }
+
+       return -EINVAL;
+}
index bf92148..8235ec9 100644 (file)
@@ -83,6 +83,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci)
 }
 
 /*
+ * Create a PCI config space window
+ *  - reserve mem region
+ *  - alloc struct pci_config_window with space for all mappings
+ *  - ioremap the config space
+ */
+static struct pci_config_window *arch_pci_ecam_create(struct device *dev,
+               struct resource *cfgres, struct resource *busr, const struct pci_ecam_ops *ops)
+{
+       int bsz, bus_range, err;
+       struct resource *conflict;
+       struct pci_config_window *cfg;
+
+       if (busr->start > busr->end)
+               return ERR_PTR(-EINVAL);
+
+       cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+       if (!cfg)
+               return ERR_PTR(-ENOMEM);
+
+       cfg->parent = dev;
+       cfg->ops = ops;
+       cfg->busr.start = busr->start;
+       cfg->busr.end = busr->end;
+       cfg->busr.flags = IORESOURCE_BUS;
+       bus_range = resource_size(cfgres) >> ops->bus_shift;
+
+       bsz = 1 << ops->bus_shift;
+
+       cfg->res.start = cfgres->start;
+       cfg->res.end = cfgres->end;
+       cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       cfg->res.name = "PCI ECAM";
+
+       conflict = request_resource_conflict(&iomem_resource, &cfg->res);
+       if (conflict) {
+               err = -EBUSY;
+               dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n",
+                       &cfg->res, conflict->name, conflict);
+               goto err_exit;
+       }
+
+       cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz);
+       if (!cfg->win)
+               goto err_exit_iomap;
+
+       if (ops->init) {
+               err = ops->init(cfg);
+               if (err)
+                       goto err_exit;
+       }
+       dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr);
+
+       return cfg;
+
+err_exit_iomap:
+       err = -ENOMEM;
+       dev_err(dev, "ECAM ioremap failed\n");
+err_exit:
+       pci_ecam_free(cfg);
+       return ERR_PTR(err);
+}
+
+/*
  * Lookup the bus range for the domain in MCFG, and set up config space
  * mapping.
  */
@@ -106,11 +169,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
 
        bus_shift = ecam_ops->bus_shift ? : 20;
 
-       cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift);
-       cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1;
-       cfgres.flags = IORESOURCE_MEM;
+       if (bus_shift == 20)
+               cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
+       else {
+               cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift);
+               cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1;
+               cfgres.end |= BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16);
+               cfgres.flags = IORESOURCE_MEM;
+               cfg = arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
+       }
 
-       cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
        if (IS_ERR(cfg)) {
                dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg));
                return NULL;
index e9b7c34..2726639 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
+#include <asm/cacheflush.h>
 #include <asm/loongson.h>
 
 #define PCI_DEVICE_ID_LOONGSON_HOST     0x7a00
@@ -45,12 +46,10 @@ static int __init pcibios_init(void)
        unsigned int lsize;
 
        /*
-        * Set PCI cacheline size to that of the highest level in the
+        * Set PCI cacheline size to that of the last level in the
         * cache hierarchy.
         */
-       lsize = cpu_dcache_line_size();
-       lsize = cpu_vcache_line_size() ? : lsize;
-       lsize = cpu_scache_line_size() ? : lsize;
+       lsize = cpu_last_level_cache_line_size();
 
        BUG_ON(!lsize);
 
index b437847..dbd3277 100644 (file)
@@ -3,6 +3,8 @@ if MIPS
 source "drivers/platform/mips/Kconfig"
 endif
 
+source "drivers/platform/loongarch/Kconfig"
+
 source "drivers/platform/goldfish/Kconfig"
 
 source "drivers/platform/chrome/Kconfig"
index 4de08ef..4164017 100644 (file)
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_X86)              += x86/
+obj-$(CONFIG_LOONGARCH)                += loongarch/
 obj-$(CONFIG_MELLANOX_PLATFORM)        += mellanox/
 obj-$(CONFIG_MIPS)             += mips/
 obj-$(CONFIG_OLPC_EC)          += olpc/
diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig
new file mode 100644 (file)
index 0000000..5633e4d
--- /dev/null
@@ -0,0 +1,31 @@
+#
+# LoongArch Platform Specific Drivers
+#
+
+menuconfig LOONGARCH_PLATFORM_DEVICES
+       bool "LoongArch Platform Specific Device Drivers"
+       default y
+       depends on LOONGARCH
+       help
+         Say Y here to get to see options for device drivers of various
+         LoongArch platforms, including vendor-specific laptop/desktop
+         extension and hardware monitor drivers. This option itself does
+         not add any kernel code.
+
+         If you say N, all options in this submenu will be skipped and disabled.
+
+if LOONGARCH_PLATFORM_DEVICES
+
+config LOONGSON_LAPTOP
+       tristate "Generic Loongson-3 Laptop Driver"
+       depends on ACPI
+       depends on BACKLIGHT_CLASS_DEVICE
+       depends on INPUT
+       depends on MACH_LOONGSON64
+       select ACPI_VIDEO
+       select INPUT_SPARSEKMAP
+       default y
+       help
+         ACPI-based Loongson-3 family laptops generic driver.
+
+endif # LOONGARCH_PLATFORM_DEVICES
diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile
new file mode 100644 (file)
index 0000000..f43ab03
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_LOONGSON_LAPTOP) += loongson-laptop.o
diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c
new file mode 100644 (file)
index 0000000..f0166ad
--- /dev/null
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Generic Loongson processor based LAPTOP/ALL-IN-ONE driver
+ *
+ *  Jianmin Lv <lvjianmin@loongson.cn>
+ *  Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <acpi/video.h>
+
+/* 1. Driver-wide structs and misc. variables */
+
+/* ACPI HIDs */
+#define LOONGSON_ACPI_EC_HID   "PNP0C09"
+#define LOONGSON_ACPI_HKEY_HID "LOON0000"
+
+#define ACPI_LAPTOP_NAME "loongson-laptop"
+#define ACPI_LAPTOP_ACPI_EVENT_PREFIX "loongson"
+
+#define MAX_ACPI_ARGS                  3
+#define GENERIC_HOTKEY_MAP_MAX         64
+
+#define GENERIC_EVENT_TYPE_OFF         12
+#define GENERIC_EVENT_TYPE_MASK                0xF000
+#define GENERIC_EVENT_CODE_MASK                0x0FFF
+
+struct generic_sub_driver {
+       u32 type;
+       char *name;
+       acpi_handle *handle;
+       struct acpi_device *device;
+       struct platform_driver *driver;
+       int (*init)(struct generic_sub_driver *sub_driver);
+       void (*notify)(struct generic_sub_driver *sub_driver, u32 event);
+       u8 acpi_notify_installed;
+};
+
+static u32 input_device_registered;
+static struct input_dev *generic_inputdev;
+
+static acpi_handle hotkey_handle;
+static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX];
+
+int loongson_laptop_turn_on_backlight(void);
+int loongson_laptop_turn_off_backlight(void);
+static int loongson_laptop_backlight_update(struct backlight_device *bd);
+
+/* 2. ACPI Helpers and device model */
+
+static int acpi_evalf(acpi_handle handle, int *res, char *method, char *fmt, ...)
+{
+       char res_type;
+       char *fmt0 = fmt;
+       va_list ap;
+       int success, quiet;
+       acpi_status status;
+       struct acpi_object_list params;
+       struct acpi_buffer result, *resultp;
+       union acpi_object in_objs[MAX_ACPI_ARGS], out_obj;
+
+       if (!*fmt) {
+               pr_err("acpi_evalf() called with empty format\n");
+               return 0;
+       }
+
+       if (*fmt == 'q') {
+               quiet = 1;
+               fmt++;
+       } else
+               quiet = 0;
+
+       res_type = *(fmt++);
+
+       params.count = 0;
+       params.pointer = &in_objs[0];
+
+       va_start(ap, fmt);
+       while (*fmt) {
+               char c = *(fmt++);
+               switch (c) {
+               case 'd':       /* int */
+                       in_objs[params.count].integer.value = va_arg(ap, int);
+                       in_objs[params.count++].type = ACPI_TYPE_INTEGER;
+                       break;
+                       /* add more types as needed */
+               default:
+                       pr_err("acpi_evalf() called with invalid format character '%c'\n", c);
+                       va_end(ap);
+                       return 0;
+               }
+       }
+       va_end(ap);
+
+       if (res_type != 'v') {
+               result.length = sizeof(out_obj);
+               result.pointer = &out_obj;
+               resultp = &result;
+       } else
+               resultp = NULL;
+
+       status = acpi_evaluate_object(handle, method, &params, resultp);
+
+       switch (res_type) {
+       case 'd':               /* int */
+               success = (status == AE_OK && out_obj.type == ACPI_TYPE_INTEGER);
+               if (success && res)
+                       *res = out_obj.integer.value;
+               break;
+       case 'v':               /* void */
+               success = status == AE_OK;
+               break;
+               /* add more types as needed */
+       default:
+               pr_err("acpi_evalf() called with invalid format character '%c'\n", res_type);
+               return 0;
+       }
+
+       if (!success && !quiet)
+               pr_err("acpi_evalf(%s, %s, ...) failed: %s\n",
+                      method, fmt0, acpi_format_exception(status));
+
+       return success;
+}
+
+static int hotkey_status_get(int *status)
+{
+       if (!acpi_evalf(hotkey_handle, status, "GSWS", "d"))
+               return -EIO;
+
+       return 0;
+}
+
+static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data)
+{
+       struct generic_sub_driver *sub_driver = data;
+
+       if (!sub_driver || !sub_driver->notify)
+               return;
+       sub_driver->notify(sub_driver, event);
+}
+
+static int __init setup_acpi_notify(struct generic_sub_driver *sub_driver)
+{
+       acpi_status status;
+
+       if (!*sub_driver->handle)
+               return 0;
+
+       sub_driver->device = acpi_fetch_acpi_dev(*sub_driver->handle);
+       if (!sub_driver->device) {
+               pr_err("acpi_fetch_acpi_dev(%s) failed\n", sub_driver->name);
+               return -ENODEV;
+       }
+
+       sub_driver->device->driver_data = sub_driver;
+       sprintf(acpi_device_class(sub_driver->device), "%s/%s",
+               ACPI_LAPTOP_ACPI_EVENT_PREFIX, sub_driver->name);
+
+       status = acpi_install_notify_handler(*sub_driver->handle,
+                       sub_driver->type, dispatch_acpi_notify, sub_driver);
+       if (ACPI_FAILURE(status)) {
+               if (status == AE_ALREADY_EXISTS) {
+                       pr_notice("Another device driver is already "
+                                 "handling %s events\n", sub_driver->name);
+               } else {
+                       pr_err("acpi_install_notify_handler(%s) failed: %s\n",
+                              sub_driver->name, acpi_format_exception(status));
+               }
+               return -ENODEV;
+       }
+       sub_driver->acpi_notify_installed = 1;
+
+       return 0;
+}
+
+static int loongson_hotkey_suspend(struct device *dev)
+{
+       return 0;
+}
+
+static int loongson_hotkey_resume(struct device *dev)
+{
+       int status = 0;
+       struct key_entry ke;
+       struct backlight_device *bd;
+
+       /*
+        * Only if the firmware supports SW_LID event model, we can handle the
+        * event. This is for the consideration of development board without EC.
+        */
+       if (test_bit(SW_LID, generic_inputdev->swbit)) {
+               if (hotkey_status_get(&status) < 0)
+                       return -EIO;
+               /*
+                * The input device sw element records the last lid status.
+                * When the system is awakened by other wake-up sources,
+                * the lid event will also be reported. The judgment of
+                * adding SW_LID bit which in sw element can avoid this
+                * case.
+                *
+                * Input system will drop lid event when current lid event
+                * value and last lid status in the same. So laptop driver
+                * doesn't report repeated events.
+                *
+                * Lid status is generally 0, but hardware exception is
+                * considered. So add lid status confirmation.
+                */
+               if (test_bit(SW_LID, generic_inputdev->sw) && !(status & (1 << SW_LID))) {
+                       ke.type = KE_SW;
+                       ke.sw.value = (u8)status;
+                       ke.sw.code = SW_LID;
+                       sparse_keymap_report_entry(generic_inputdev, &ke, 1, true);
+               }
+       }
+
+       bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM);
+       if (bd) {
+               loongson_laptop_backlight_update(bd) ?
+               pr_warn("Loongson_backlight: resume brightness failed") :
+               pr_info("Loongson_backlight: resume brightness %d\n", bd->props.brightness);
+       }
+
+       return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(loongson_hotkey_pm,
+               loongson_hotkey_suspend, loongson_hotkey_resume);
+
+static int loongson_hotkey_probe(struct platform_device *pdev)
+{
+       hotkey_handle = ACPI_HANDLE(&pdev->dev);
+
+       if (!hotkey_handle)
+               return -ENODEV;
+
+       return 0;
+}
+
+static const struct acpi_device_id loongson_device_ids[] = {
+       {LOONGSON_ACPI_HKEY_HID, 0},
+       {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, loongson_device_ids);
+
+static struct platform_driver loongson_hotkey_driver = {
+       .probe          = loongson_hotkey_probe,
+       .driver         = {
+               .name   = "loongson-hotkey",
+               .owner  = THIS_MODULE,
+               .pm     = pm_ptr(&loongson_hotkey_pm),
+               .acpi_match_table = loongson_device_ids,
+       },
+};
+
+static int hotkey_map(void)
+{
+       u32 index;
+       acpi_status status;
+       struct acpi_buffer buf;
+       union acpi_object *pack;
+
+       buf.length = ACPI_ALLOCATE_BUFFER;
+       status = acpi_evaluate_object_typed(hotkey_handle, "KMAP", NULL, &buf, ACPI_TYPE_PACKAGE);
+       if (status != AE_OK) {
+               pr_err("ACPI exception: %s\n", acpi_format_exception(status));
+               return -1;
+       }
+       pack = buf.pointer;
+       for (index = 0; index < pack->package.count; index++) {
+               union acpi_object *element, *sub_pack;
+
+               sub_pack = &pack->package.elements[index];
+
+               element = &sub_pack->package.elements[0];
+               hotkey_keycode_map[index].type = element->integer.value;
+               element = &sub_pack->package.elements[1];
+               hotkey_keycode_map[index].code = element->integer.value;
+               element = &sub_pack->package.elements[2];
+               hotkey_keycode_map[index].keycode = element->integer.value;
+       }
+
+       return 0;
+}
+
+static int hotkey_backlight_set(bool enable)
+{
+       if (!acpi_evalf(hotkey_handle, NULL, "VCBL", "vd", enable ? 1 : 0))
+               return -EIO;
+
+       return 0;
+}
+
+static int ec_get_brightness(void)
+{
+       int status = 0;
+
+       if (!hotkey_handle)
+               return -ENXIO;
+
+       if (!acpi_evalf(hotkey_handle, &status, "ECBG", "d"))
+               return -EIO;
+
+       return status;
+}
+
+static int ec_set_brightness(int level)
+{
+
+       int ret = 0;
+
+       if (!hotkey_handle)
+               return -ENXIO;
+
+       if (!acpi_evalf(hotkey_handle, NULL, "ECBS", "vd", level))
+               ret = -EIO;
+
+       return ret;
+}
+
+static int ec_backlight_level(u8 level)
+{
+       int status = 0;
+
+       if (!hotkey_handle)
+               return -ENXIO;
+
+       if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d"))
+               return -EIO;
+
+       if ((status < 0) || (level > status))
+               return status;
+
+       if (!acpi_evalf(hotkey_handle, &status, "ECSL", "d"))
+               return -EIO;
+
+       if ((status < 0) || (level < status))
+               return status;
+
+       return level;
+}
+
+static int loongson_laptop_backlight_update(struct backlight_device *bd)
+{
+       int lvl = ec_backlight_level(bd->props.brightness);
+
+       if (lvl < 0)
+               return -EIO;
+       if (ec_set_brightness(lvl))
+               return -EIO;
+
+       return 0;
+}
+
+static int loongson_laptop_get_brightness(struct backlight_device *bd)
+{
+       int level;
+
+       level = ec_get_brightness();
+       if (level < 0)
+               return -EIO;
+
+       return level;
+}
+
+static const struct backlight_ops backlight_laptop_ops = {
+       .update_status = loongson_laptop_backlight_update,
+       .get_brightness = loongson_laptop_get_brightness,
+};
+
+static int laptop_backlight_register(void)
+{
+       int status = 0;
+       struct backlight_properties props;
+
+       memset(&props, 0, sizeof(props));
+
+       if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d"))
+               return -EIO;
+
+       props.brightness = 1;
+       props.max_brightness = status;
+       props.type = BACKLIGHT_PLATFORM;
+
+       backlight_device_register("loongson_laptop",
+                               NULL, NULL, &backlight_laptop_ops, &props);
+
+       return 0;
+}
+
+int loongson_laptop_turn_on_backlight(void)
+{
+       int status;
+       union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+       struct acpi_object_list args = { 1, &arg0 };
+
+       arg0.integer.value = 1;
+       status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL);
+       if (ACPI_FAILURE(status)) {
+               pr_info("Loongson lvds error: 0x%x\n", status);
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+int loongson_laptop_turn_off_backlight(void)
+{
+       int status;
+       union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+       struct acpi_object_list args = { 1, &arg0 };
+
+       arg0.integer.value = 0;
+       status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL);
+       if (ACPI_FAILURE(status)) {
+               pr_info("Loongson lvds error: 0x%x\n", status);
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static int __init event_init(struct generic_sub_driver *sub_driver)
+{
+       int ret;
+
+       ret = hotkey_map();
+       if (ret < 0) {
+               pr_err("Failed to parse keymap from DSDT\n");
+               return ret;
+       }
+
+       ret = sparse_keymap_setup(generic_inputdev, hotkey_keycode_map, NULL);
+       if (ret < 0) {
+               pr_err("Failed to setup input device keymap\n");
+               input_free_device(generic_inputdev);
+
+               return ret;
+       }
+
+       /*
+        * This hotkey driver handle backlight event when
+        * acpi_video_get_backlight_type() gets acpi_backlight_vendor
+        */
+       if (acpi_video_get_backlight_type() == acpi_backlight_vendor)
+               hotkey_backlight_set(true);
+       else
+               hotkey_backlight_set(false);
+
+       pr_info("ACPI: enabling firmware HKEY event interface...\n");
+
+       return ret;
+}
+
+static void event_notify(struct generic_sub_driver *sub_driver, u32 event)
+{
+       int type, scan_code;
+       struct key_entry *ke = NULL;
+
+       scan_code = event & GENERIC_EVENT_CODE_MASK;
+       type = (event & GENERIC_EVENT_TYPE_MASK) >> GENERIC_EVENT_TYPE_OFF;
+       ke = sparse_keymap_entry_from_scancode(generic_inputdev, scan_code);
+       if (ke) {
+               if (type == KE_SW) {
+                       int status = 0;
+
+                       if (hotkey_status_get(&status) < 0)
+                               return;
+
+                       ke->sw.value = !!(status & (1 << ke->sw.code));
+               }
+               sparse_keymap_report_entry(generic_inputdev, ke, 1, true);
+       }
+}
+
+/* 3. Infrastructure */
+
+static void generic_subdriver_exit(struct generic_sub_driver *sub_driver);
+
+static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver)
+{
+       int ret;
+
+       if (!sub_driver || !sub_driver->driver)
+               return -EINVAL;
+
+       ret = platform_driver_register(sub_driver->driver);
+       if (ret)
+               return -EINVAL;
+
+       if (sub_driver->init)
+               sub_driver->init(sub_driver);
+
+       if (sub_driver->notify) {
+               ret = setup_acpi_notify(sub_driver);
+               if (ret == -ENODEV) {
+                       ret = 0;
+                       goto err_out;
+               }
+               if (ret < 0)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       generic_subdriver_exit(sub_driver);
+       return (ret < 0) ? ret : 0;
+}
+
+static void generic_subdriver_exit(struct generic_sub_driver *sub_driver)
+{
+
+       if (sub_driver->acpi_notify_installed) {
+               acpi_remove_notify_handler(*sub_driver->handle,
+                                          sub_driver->type, dispatch_acpi_notify);
+               sub_driver->acpi_notify_installed = 0;
+       }
+       platform_driver_unregister(sub_driver->driver);
+}
+
+static struct generic_sub_driver generic_sub_drivers[] __refdata = {
+       {
+               .name = "hotkey",
+               .init = event_init,
+               .notify = event_notify,
+               .handle = &hotkey_handle,
+               .type = ACPI_DEVICE_NOTIFY,
+               .driver = &loongson_hotkey_driver,
+       },
+};
+
+static int __init generic_acpi_laptop_init(void)
+{
+       bool ec_found;
+       int i, ret, status;
+
+       if (acpi_disabled)
+               return -ENODEV;
+
+       /* The EC device is required */
+       ec_found = acpi_dev_found(LOONGSON_ACPI_EC_HID);
+       if (!ec_found)
+               return -ENODEV;
+
+       /* Enable SCI for EC */
+       acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1);
+
+       generic_inputdev = input_allocate_device();
+       if (!generic_inputdev) {
+               pr_err("Unable to allocate input device\n");
+               return -ENOMEM;
+       }
+
+       /* Prepare input device, but don't register */
+       generic_inputdev->name =
+               "Loongson Generic Laptop/All-in-One Extra Buttons";
+       generic_inputdev->phys = ACPI_LAPTOP_NAME "/input0";
+       generic_inputdev->id.bustype = BUS_HOST;
+       generic_inputdev->dev.parent = NULL;
+
+       /* Init subdrivers */
+       for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) {
+               ret = generic_subdriver_init(&generic_sub_drivers[i]);
+               if (ret < 0) {
+                       input_free_device(generic_inputdev);
+                       while (--i >= 0)
+                               generic_subdriver_exit(&generic_sub_drivers[i]);
+                       return ret;
+               }
+       }
+
+       ret = input_register_device(generic_inputdev);
+       if (ret < 0) {
+               input_free_device(generic_inputdev);
+               while (--i >= 0)
+                       generic_subdriver_exit(&generic_sub_drivers[i]);
+               pr_err("Unable to register input device\n");
+               return ret;
+       }
+
+       input_device_registered = 1;
+
+       if (acpi_evalf(hotkey_handle, &status, "ECBG", "d")) {
+               pr_info("Loongson Laptop used, init brightness is 0x%x\n", status);
+               ret = laptop_backlight_register();
+               if (ret < 0)
+                       pr_err("Loongson Laptop: laptop-backlight device register failed\n");
+       }
+
+       return 0;
+}
+
+static void __exit generic_acpi_laptop_exit(void)
+{
+       if (generic_inputdev) {
+               if (input_device_registered)
+                       input_unregister_device(generic_inputdev);
+               else
+                       input_free_device(generic_inputdev);
+       }
+}
+
+module_init(generic_acpi_laptop_init);
+module_exit(generic_acpi_laptop_exit);
+
+MODULE_AUTHOR("Jianmin Lv <lvjianmin@loongson.cn>");
+MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
+MODULE_DESCRIPTION("Loongson Laptop/All-in-One ACPI Driver");
+MODULE_LICENSE("GPL");