Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2022 17:37:00 +0000 (10:37 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2022 17:37:00 +0000 (10:37 -0700)
Pull arm64 updates from Will Deacon:
 "Highlights include a major rework of our kPTI page-table rewriting
  code (which makes it both more maintainable and considerably faster in
  the cases where it is required) as well as significant changes to our
  early boot code to reduce the need for data cache maintenance and
  greatly simplify the KASLR relocation dance.

  Summary:

   - Remove unused generic cpuidle support (replaced by PSCI version)

   - Fix documentation describing the kernel virtual address space

   - Handling of some new CPU errata in Arm implementations

   - Rework of our exception table code in preparation for handling
     machine checks (i.e. RAS errors) more gracefully

   - Switch over to the generic implementation of ioremap()

   - Fix lockdep tracking in NMI context

   - Instrument our memory barrier macros for KCSAN

   - Rework of the kPTI G->nG page-table repainting so that the MMU
     remains enabled and the boot time is no longer slowed to a crawl
     for systems which require the late remapping

   - Enable support for direct swapping of 2MiB transparent huge-pages
     on systems without MTE

   - Fix handling of MTE tags with allocating new pages with HW KASAN

   - Expose the SMIDR register to userspace via sysfs

   - Continued rework of the stack unwinder, particularly improving the
     behaviour under KASAN

   - More repainting of our system register definitions to match the
     architectural terminology

   - Improvements to the layout of the vDSO objects

   - Support for allocating additional bits of HWCAP2 and exposing
     FEAT_EBF16 to userspace on CPUs that support it

   - Considerable rework and optimisation of our early boot code to
     reduce the need for cache maintenance and avoid jumping in and out
     of the kernel when handling relocation under KASLR

   - Support for disabling SVE and SME support on the kernel
     command-line

   - Support for the Hisilicon HNS3 PMU

   - Miscellanous cleanups, trivial updates and minor fixes"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (136 commits)
  arm64: Delay initialisation of cpuinfo_arm64::reg_{zcr,smcr}
  arm64: fix KASAN_INLINE
  arm64/hwcap: Support FEAT_EBF16
  arm64/cpufeature: Store elf_hwcaps as a bitmap rather than unsigned long
  arm64/hwcap: Document allocation of upper bits of AT_HWCAP
  arm64: enable THP_SWAP for arm64
  arm64/mm: use GENMASK_ULL for TTBR_BADDR_MASK_52
  arm64: errata: Remove AES hwcap for COMPAT tasks
  arm64: numa: Don't check node against MAX_NUMNODES
  drivers/perf: arm_spe: Fix consistency of SYS_PMSCR_EL1.CX
  perf: RISC-V: Add of_node_put() when breaking out of for_each_of_cpu_node()
  docs: perf: Include hns3-pmu.rst in toctree to fix 'htmldocs' WARNING
  arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"
  mm: kasan: Skip page unpoisoning only if __GFP_SKIP_KASAN_UNPOISON
  mm: kasan: Skip unpoisoning of user pages
  mm: kasan: Ensure the tags are visible before the tag in page->flags
  drivers/perf: hisi: add driver for HNS3 PMU
  drivers/perf: hisi: Add description for HNS3 PMU driver
  drivers/perf: riscv_pmu_sbi: perf format
  perf/arm-cci: Use the bitmap API to allocate bitmaps
  ...

123 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/perf/hns3-pmu.rst [new file with mode: 0644]
Documentation/admin-guide/perf/index.rst
Documentation/arm64/elf_hwcaps.rst
Documentation/arm64/memory.rst
Documentation/arm64/silicon-errata.rst
Documentation/features/vm/ioremap_prot/arch-support.txt
Documentation/memory-barriers.txt
Documentation/virt/kvm/arm/hyp-abi.rst
MAINTAINERS
arch/Kconfig
arch/arm/include/asm/io.h
arch/arm/mm/ioremap.c
arch/arm/mm/nommu.c
arch/arm64/Kconfig
arch/arm64/boot/Makefile
arch/arm64/include/asm/asm-extable.h
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/asm_pointer_auth.h
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/barrier.h
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpu_ops.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/cpuidle.h
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/io.h
arch/arm64/include/asm/kernel-pgtable.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/asm/virt.h
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/acpi.c
arch/arm64/kernel/acpi_numa.c
arch/arm64/kernel/alternative.c
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuidle.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/head.S
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/hyp-stub.S
arch/arm64/kernel/idreg-override.c
arch/arm64/kernel/image-vars.h
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/kuser32.S
arch/arm64/kernel/mte.c
arch/arm64/kernel/pi/Makefile [new file with mode: 0644]
arch/arm64/kernel/pi/kaslr_early.c [new file with mode: 0644]
arch/arm64/kernel/signal.c
arch/arm64/kernel/sigreturn32.S
arch/arm64/kernel/sleep.S
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso/vdso.lds.S
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kernel/vdso32/vdso.lds.S
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
arch/arm64/kvm/hyp/nvhe/sys_regs.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/mte.S
arch/arm64/mm/cache.S
arch/arm64/mm/copypage.c
arch/arm64/mm/dma-mapping.c
arch/arm64/mm/extable.c
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/init.c
arch/arm64/mm/ioremap.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/mteswap.c
arch/arm64/mm/proc.S
arch/arm64/tools/cpucaps
arch/arm64/tools/gen-sysreg.awk
arch/arm64/tools/sysreg
arch/x86/Kconfig
arch/x86/Kconfig.debug
drivers/cpuidle/Kconfig.arm
drivers/perf/arm-cci.c
drivers/perf/arm-ccn.c
drivers/perf/arm_spe_pmu.c
drivers/perf/fsl_imx8_ddr_perf.c
drivers/perf/hisilicon/Kconfig
drivers/perf/hisilicon/Makefile
drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.c
drivers/perf/hisilicon/hisi_uncore_pmu.h
drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
drivers/perf/hisilicon/hns3_pmu.c [new file with mode: 0644]
drivers/perf/marvell_cn10k_tad_pmu.c
drivers/perf/riscv_pmu.c
drivers/perf/riscv_pmu_sbi.c
include/asm-generic/barrier.h
include/asm-generic/io.h
include/linux/cpuhotplug.h
include/linux/gfp.h
include/linux/huge_mm.h
include/linux/perf/riscv_pmu.h
mm/ioremap.c
mm/kasan/common.c
mm/page_alloc.c
mm/swap_slots.c

index bcc974d..df79e12 100644 (file)
@@ -493,12 +493,13 @@ What:             /sys/devices/system/cpu/cpuX/regs/
                /sys/devices/system/cpu/cpuX/regs/identification/
                /sys/devices/system/cpu/cpuX/regs/identification/midr_el1
                /sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
+               /sys/devices/system/cpu/cpuX/regs/identification/smidr_el1
 Date:          June 2016
 Contact:       Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
 Description:   AArch64 CPU registers
 
                'identification' directory exposes the CPU ID registers for
-               identifying model and revision of the CPU.
+               identifying model and revision of the CPU and SMCU.
 
 What:          /sys/devices/system/cpu/aarch32_el0
 Date:          May 2021
index cc3ea8f..5e9147f 100644 (file)
        arm64.nomte     [ARM64] Unconditionally disable Memory Tagging Extension
                        support
 
+       arm64.nosve     [ARM64] Unconditionally disable Scalable Vector
+                       Extension support
+
+       arm64.nosme     [ARM64] Unconditionally disable Scalable Matrix
+                       Extension support
+
        ataflop=        [HW,M68k]
 
        atarimouse=     [HW,MOUSE] Atari Mouse
                                improves system performance, but it may also
                                expose users to several CPU vulnerabilities.
                                Equivalent to: nopti [X86,PPC]
-                                              kpti=0 [ARM64]
+                                              if nokaslr then kpti=0 [ARM64]
                                               nospectre_v1 [X86,PPC]
                                               nobp=0 [S390]
                                               nospectre_v2 [X86,PPC,S390,ARM64]
diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst
new file mode 100644 (file)
index 0000000..578407e
--- /dev/null
@@ -0,0 +1,136 @@
+======================================
+HNS3 Performance Monitoring Unit (PMU)
+======================================
+
+HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an
+End Point device to collect performance statistics of HiSilicon SoC NIC.
+On Hip09, each SICL(Super I/O cluster) has one PMU device.
+
+HNS3 PMU supports collection of performance statistics such as bandwidth,
+latency, packet rate and interrupt rate.
+
+Each HNS3 PMU supports 8 hardware events.
+
+HNS3 PMU driver
+===============
+
+The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
+
+  /sys/devices/hns3_pmu_sicl_<sicl_id>
+
+PMU driver provides description of available events, filter modes, format,
+identifier and cpumask in sysfs.
+
+The "events" directory describes the event code of all supported events
+shown in perf list.
+
+The "filtermode" directory describes the supported filter modes of each
+event.
+
+The "format" directory describes all formats of the config (events) and
+config1 (filter options) fields of the perf_event_attr structure.
+
+The "identifier" file shows version of PMU hardware device.
+
+The "bdf_min" and "bdf_max" files show the supported bdf range of each
+pmu device.
+
+The "hw_clk_freq" file shows the hardware clock frequency of each pmu
+device.
+
+Example usage of checking event code and subevent code::
+
+  $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
+  config=0x00204
+  $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
+  config=0x10204
+
+Each performance statistic has a pair of events to get two values to
+calculate real performance data in userspace.
+
+The bits 0~15 of config (here 0x0204) are the true hardware event code. If
+two events have same value of bits 0~15 of config, that means they are
+event pair. And the bit 16 of config indicates getting counter 0 or
+counter 1 of hardware event.
+
+After getting two values of event pair in usersapce, the formula of
+computation to calculate real performance data is:::
+
+  counter 0 / counter 1
+
+Example usage of checking supported filter mode::
+
+  $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
+  filter mode supported: global/port/port-tc/func/func-queue/
+
+Example usage of perf::
+
+  $# perf list
+  hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event]
+  hns3_pmu_sicl_0/bw_ssu_rpu_time/     [kernel PMU event]
+  ------------------------------------------
+
+  $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000
+  or
+  $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000
+
+
+Filter modes
+--------------
+
+1. global mode
+PMU collect performance statistics for all HNS3 PCIe functions of IO DIE.
+Set the "global" filter option to 1 will enable this mode.
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000
+
+2. port mode
+PMU collect performance statistic of one whole physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0xF in this mode,
+here tc stands for traffic class.
+
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000
+
+3. port-tc mode
+PMU collect performance statistic of one tc of physical port. The port id
+is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this
+mode.
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000
+
+4. func mode
+PMU collect performance statistic of one PF/VF. The function id is BDF of
+PF/VF, its conversion formula::
+
+  func = (bus << 8) + (device << 3) + (function)
+
+for example:
+  BDF         func
+  35:00.0    0x3500
+  35:00.1    0x3501
+  35:01.0    0x3508
+
+In this mode, the "queue" filter option must be set to 0xFFFF.
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000
+
+5. func-queue mode
+PMU collect performance statistic of one queue of PF/VF. The function id
+is BDF of PF/VF, the "queue" filter option must be set to the exact queue
+id of function.
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000
+
+6. func-intr mode
+PMU collect performance statistic of one interrupt of PF/VF. The function
+id is BDF of PF/VF, the "intr" filter option must be set to the exact
+interrupt id of function.
+Example usage of perf::
+
+  $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000
index 69b23f0..9c9ece8 100644 (file)
@@ -9,6 +9,7 @@ Performance monitor support
 
    hisi-pmu
    hisi-pcie-pmu
+   hns3-pmu
    imx-ddr
    qcom_l2_pmu
    qcom_l3_pmu
index 3d116fb..31fc10b 100644 (file)
@@ -301,6 +301,10 @@ HWCAP2_WFXT
 
     Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
 
+HWCAP2_EBF16
+
+    Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
index 901cd09..2a641ba 100644 (file)
@@ -33,9 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   0000000000000000     0000ffffffffffff         256TB          user
   ffff000000000000     ffff7fffffffffff         128TB          kernel logical memory map
  [ffff600000000000     ffff7fffffffffff]         32TB          [kasan shadow region]
-  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
-  ffff800008000000     ffff80000fffffff         128MB          modules
-  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  ffff800000000000     ffff800007ffffff         128MB          modules
+  ffff800008000000     fffffbffefffffff         124TB          vmalloc
   fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
   fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
   fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
@@ -51,9 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   0000000000000000     000fffffffffffff           4PB          user
   fff0000000000000     ffff7fffffffffff          ~4PB          kernel logical memory map
  [fffd800000000000     ffff7fffffffffff]        512TB          [kasan shadow region]
-  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
-  ffff800008000000     ffff80000fffffff         128MB          modules
-  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  ffff800000000000     ffff800007ffffff         128MB          modules
+  ffff800008000000     fffffbffefffffff         124TB          vmalloc
   fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
   fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
   fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
index d27db84..33b04db 100644 (file)
@@ -82,10 +82,14 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #1742098        | ARM64_ERRATUM_1742098       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A72      | #1655431        | ARM64_ERRATUM_1742098       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
@@ -102,6 +106,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
index b01bf7b..6bd78eb 100644 (file)
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: |  ok  |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
index b12df91..832b5d3 100644 (file)
@@ -1894,6 +1894,7 @@ There are some more advanced barrier functions:
 
  (*) dma_wmb();
  (*) dma_rmb();
+ (*) dma_mb();
 
      These are for use with consistent memory to guarantee the ordering
      of writes or reads of shared memory accessible to both the CPU and a
@@ -1925,11 +1926,11 @@ There are some more advanced barrier functions:
      The dma_rmb() allows us guarantee the device has released ownership
      before we read the data from the descriptor, and the dma_wmb() allows
      us to guarantee the data is written to the descriptor before the device
-     can see it now has ownership.  Note that, when using writel(), a prior
-     wmb() is not needed to guarantee that the cache coherent memory writes
-     have completed before writing to the MMIO region.  The cheaper
-     writel_relaxed() does not provide this guarantee and must not be used
-     here.
+     can see it now has ownership.  The dma_mb() implies both a dma_rmb() and
+     a dma_wmb().  Note that, when using writel(), a prior wmb() is not needed
+     to guarantee that the cache coherent memory writes have completed before
+     writing to the MMIO region.  The cheaper writel_relaxed() does not provide
+     this guarantee and must not be used here.
 
      See the subsection "Kernel I/O barrier effects" for more information on
      relaxed I/O accessors and the Documentation/core-api/dma-api.rst file for
index 4d43fbc..412b276 100644 (file)
@@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
 
 * ::
 
-    x0 = HVC_VHE_RESTART (arm64 only)
+    x0 = HVC_FINALISE_EL2 (arm64 only)
 
-  Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
-  the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
-  being off, and VHE not being disabled by any other means (command line
-  option, for example).
+  Finish configuring EL2 depending on the command-line options,
+  including an attempt to upgrade the kernel's exception level from
+  EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU
+  supporting VHE, the EL2 MMU being off, and VHE not being disabled by
+  any other means (command line option, for example).
 
 Any other value of r0/x0 triggers a hypervisor-specific handling,
 which is not documented here.
index e10b183..04ec80e 100644 (file)
@@ -9038,6 +9038,12 @@ F:       Documentation/admin-guide/perf/hisi-pcie-pmu.rst
 F:     Documentation/admin-guide/perf/hisi-pmu.rst
 F:     drivers/perf/hisilicon
 
+HISILICON HNS3 PMU DRIVER
+M:     Guangbin Huang <huangguangbin2@huawei.com>
+S:     Supported
+F:     Documentation/admin-guide/perf/hns3-pmu.rst
+F:     drivers/perf/hisilicon/hns3_pmu.c
+
 HISILICON QM AND ZIP Controller DRIVER
 M:     Zhou Wang <wangzhou1@hisilicon.com>
 L:     linux-crypto@vger.kernel.org
index 71b9272..5ea3e38 100644 (file)
@@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS
 config TRACE_IRQFLAGS_SUPPORT
        bool
 
+config TRACE_IRQFLAGS_NMI_SUPPORT
+       bool
+
 #
 # An arch should select this if it provides all these things:
 #
index eba7cbc..7fcdc78 100644 (file)
@@ -139,11 +139,9 @@ extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
 extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 void __arm_iomem_set_ro(void __iomem *ptr, size_t size);
-extern void __iounmap(volatile void __iomem *addr);
 
 extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
        unsigned int, void *);
-extern void (*arch_iounmap)(volatile void __iomem *);
 
 /*
  * Bad read/write accesses...
@@ -380,7 +378,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
 #define ioremap_wc ioremap_wc
 #define ioremap_wt ioremap_wc
 
-void iounmap(volatile void __iomem *iomem_cookie);
+void iounmap(volatile void __iomem *io_addr);
 #define iounmap iounmap
 
 void *arch_memremap_wb(phys_addr_t phys_addr, size_t size);
index 576c0e6..2129070 100644 (file)
@@ -418,7 +418,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
                                                   __builtin_return_address(0));
 }
 
-void __iounmap(volatile void __iomem *io_addr)
+void iounmap(volatile void __iomem *io_addr)
 {
        void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
        struct static_vm *svm;
@@ -446,13 +446,6 @@ void __iounmap(volatile void __iomem *io_addr)
 
        vunmap(addr);
 }
-
-void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
-
-void iounmap(volatile void __iomem *cookie)
-{
-       arch_iounmap(cookie);
-}
 EXPORT_SYMBOL(iounmap);
 
 #if defined(CONFIG_PCI) || IS_ENABLED(CONFIG_PCMCIA)
index 2658f52..c42deba 100644 (file)
@@ -230,14 +230,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
        return (void *)phys_addr;
 }
 
-void __iounmap(volatile void __iomem *addr)
-{
-}
-EXPORT_SYMBOL(__iounmap);
-
-void (*arch_iounmap)(volatile void __iomem *);
-
-void iounmap(volatile void __iomem *addr)
+void iounmap(volatile void __iomem *io_addr)
 {
 }
 EXPORT_SYMBOL(iounmap);
index 1652a98..340e611 100644 (file)
@@ -101,6 +101,7 @@ config ARM64
        select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
        select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WANTS_NO_INSTR
+       select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
        select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARM_AMBA
        select ARM_ARCH_TIMER
@@ -126,6 +127,7 @@ config ARM64
        select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
        select GENERIC_IDLE_POLL_SETUP
+       select GENERIC_IOREMAP
        select GENERIC_IRQ_IPI
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
@@ -188,6 +190,7 @@ config ARM64
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
+       select HAVE_IOREMAP_PROT
        select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_KVM
        select HAVE_NMI
@@ -226,6 +229,7 @@ config ARM64
        select THREAD_INFO_IN_TASK
        select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
        select TRACE_IRQFLAGS_SUPPORT
+       select TRACE_IRQFLAGS_NMI_SUPPORT
        help
          ARM 64-bit (AArch64) Linux support.
 
@@ -503,6 +507,22 @@ config ARM64_ERRATUM_834220
 
          If unsure, say Y.
 
+config ARM64_ERRATUM_1742098
+       bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
+       depends on COMPAT
+       default y
+       help
+         This option removes the AES hwcap for aarch32 user-space to
+         workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
+
+         Affected parts may corrupt the AES state if an interrupt is
+         taken between a pair of AES instructions. These instructions
+         are only present if the cryptography extensions are present.
+         All software should have a fallback implementation for CPUs
+         that don't implement the cryptography extensions.
+
+         If unsure, say Y.
+
 config ARM64_ERRATUM_845719
        bool "Cortex-A53: 845719: a load might read incorrect data"
        depends on COMPAT
@@ -821,6 +841,23 @@ config ARM64_ERRATUM_2224489
 
          If unsure, say Y.
 
+config ARM64_ERRATUM_2441009
+       bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
+       default y
+       select ARM64_WORKAROUND_REPEAT_TLBI
+       help
+         This option adds a workaround for ARM Cortex-A510 erratum #2441009.
+
+         Under very rare circumstances, affected Cortex-A510 CPUs
+         may not handle a race between a break-before-make sequence on one
+         CPU, and another CPU accessing the same page. This could allow a
+         store to a page that has been unmapped.
+
+         Work around this by adding the affected CPUs to the list that needs
+         TLB sequences to be done twice.
+
+         If unsure, say Y.
+
 config ARM64_ERRATUM_2064142
        bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
        depends on CORESIGHT_TRBE
index ebe80fa..a0e3ded 100644 (file)
@@ -16,7 +16,7 @@
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo
+targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst
 
 $(obj)/Image: vmlinux FORCE
        $(call if_changed,objcopy)
@@ -35,3 +35,6 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
 
 $(obj)/Image.lzo: $(obj)/Image FORCE
        $(call if_changed,lzo)
+
+$(obj)/Image.zst: $(obj)/Image FORCE
+       $(call if_changed,zstd)
index c39f243..980d1dd 100644 (file)
@@ -2,12 +2,27 @@
 #ifndef __ASM_ASM_EXTABLE_H
 #define __ASM_ASM_EXTABLE_H
 
+#include <linux/bits.h>
+#include <asm/gpr-num.h>
+
 #define EX_TYPE_NONE                   0
-#define EX_TYPE_FIXUP                  1
-#define EX_TYPE_BPF                    2
-#define EX_TYPE_UACCESS_ERR_ZERO       3
+#define EX_TYPE_BPF                    1
+#define EX_TYPE_UACCESS_ERR_ZERO       2
+#define EX_TYPE_KACCESS_ERR_ZERO       3
 #define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
 
+/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
+#define EX_DATA_REG_ERR_SHIFT  0
+#define EX_DATA_REG_ERR                GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO       GENMASK(9, 5)
+
+/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA       GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR       GENMASK(9, 5)
+
 #ifdef __ASSEMBLY__
 
 #define __ASM_EXTABLE_RAW(insn, fixup, type, data)     \
        .short          (data);                         \
        .popsection;
 
+#define EX_DATA_REG(reg, gpr)  \
+       (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT)
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)          \
+       __ASM_EXTABLE_RAW(insn, fixup,                                  \
+                         EX_TYPE_UACCESS_ERR_ZERO,                     \
+                         (                                             \
+                           EX_DATA_REG(ERR, err) |                     \
+                           EX_DATA_REG(ZERO, zero)                     \
+                         ))
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)                     \
+       _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup)                              \
+       _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
 /*
- * Create an exception table entry for `insn`, which will branch to `fixup`
+ * Create an exception table entry for uaccess `insn`, which will branch to `fixup`
  * when an unhandled fault is taken.
  */
-       .macro          _asm_extable, insn, fixup
-       __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
+       .macro          _asm_extable_uaccess, insn, fixup
+       _ASM_EXTABLE_UACCESS(\insn, \fixup)
        .endm
 
 /*
  * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
  * do nothing.
  */
-       .macro          _cond_extable, insn, fixup
-       .ifnc           \fixup,
-       _asm_extable    \insn, \fixup
+       .macro          _cond_uaccess_extable, insn, fixup
+       .ifnc                   \fixup,
+       _asm_extable_uaccess    \insn, \fixup
        .endif
        .endm
 
 #else /* __ASSEMBLY__ */
 
-#include <linux/bits.h>
 #include <linux/stringify.h>
 
-#include <asm/gpr-num.h>
-
 #define __ASM_EXTABLE_RAW(insn, fixup, type, data)     \
        ".pushsection   __ex_table, \"a\"\n"            \
        ".align         2\n"                            \
        ".short         (" data ")\n"                   \
        ".popsection\n"
 
-#define _ASM_EXTABLE(insn, fixup) \
-       __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
-
-#define EX_DATA_REG_ERR_SHIFT  0
-#define EX_DATA_REG_ERR                GENMASK(4, 0)
-#define EX_DATA_REG_ZERO_SHIFT 5
-#define EX_DATA_REG_ZERO       GENMASK(9, 5)
-
 #define EX_DATA_REG(reg, gpr)                                          \
        "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
 
                            EX_DATA_REG(ZERO, zero)                     \
                          ")")
 
+#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero)          \
+       __DEFINE_ASM_GPR_NUMS                                           \
+       __ASM_EXTABLE_RAW(#insn, #fixup,                                \
+                         __stringify(EX_TYPE_KACCESS_ERR_ZERO),        \
+                         "("                                           \
+                           EX_DATA_REG(ERR, err) " | "                 \
+                           EX_DATA_REG(ZERO, zero)                     \
+                         ")")
+
 #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)                     \
        _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
 
-#define EX_DATA_REG_DATA_SHIFT 0
-#define EX_DATA_REG_DATA       GENMASK(4, 0)
-#define EX_DATA_REG_ADDR_SHIFT 5
-#define EX_DATA_REG_ADDR       GENMASK(9, 5)
+#define _ASM_EXTABLE_UACCESS(insn, fixup)                              \
+       _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err)                     \
+       _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
 
 #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)           \
        __DEFINE_ASM_GPR_NUMS                                                   \
index 0557af8..75b211c 100644 (file)
@@ -61,7 +61,7 @@ alternative_else_nop_endif
 
 #define USER(l, x...)                          \
 9999:  x;                                      \
-       _asm_extable    9999b, l
+       _asm_extable_uaccess    9999b, l
 
 /*
  * Generate the assembly for LDTR/STTR with exception table entries.
@@ -73,8 +73,8 @@ alternative_else_nop_endif
 8889:          ldtr    \reg2, [\addr, #8];
                add     \addr, \addr, \post_inc;
 
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
+               _asm_extable_uaccess    8888b, \l;
+               _asm_extable_uaccess    8889b, \l;
        .endm
 
        .macro user_stp l, reg1, reg2, addr, post_inc
@@ -82,14 +82,14 @@ alternative_else_nop_endif
 8889:          sttr    \reg2, [\addr, #8];
                add     \addr, \addr, \post_inc;
 
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
+               _asm_extable_uaccess    8888b,\l;
+               _asm_extable_uaccess    8889b,\l;
        .endm
 
        .macro user_ldst l, inst, reg, addr, post_inc
 8888:          \inst           \reg, [\addr];
                add             \addr, \addr, \post_inc;
 
-               _asm_extable    8888b,\l;
+               _asm_extable_uaccess    8888b, \l;
        .endm
 #endif
index ead62f7..13ecc79 100644 (file)
@@ -59,9 +59,9 @@ alternative_else_nop_endif
 
        .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
        mrs     \tmp1, id_aa64isar1_el1
-       ubfx    \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
+       ubfx    \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8
        mrs_s   \tmp2, SYS_ID_AA64ISAR2_EL1
-       ubfx    \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4
+       ubfx    \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4
        orr     \tmp1, \tmp1, \tmp2
        cbz     \tmp1, .Lno_addr_auth\@
        mov_q   \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
index 8c5a61a..5846145 100644 (file)
@@ -360,6 +360,20 @@ alternative_cb_end
        .endm
 
 /*
+ * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
+ *
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of _end.
+ */
+       .macro  idmap_get_t0sz, reg
+       adrp    \reg, _end
+       orr     \reg, \reg, #(1 << VA_BITS_MIN) - 1
+       clz     \reg, \reg
+       .endm
+
+/*
  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
  * ID_AA64MMFR0_EL1.PARange value
  *
@@ -423,7 +437,7 @@ alternative_endif
        b.lo    .Ldcache_op\@
        dsb     \domain
 
-       _cond_extable .Ldcache_op\@, \fixup
+       _cond_uaccess_extable .Ldcache_op\@, \fixup
        .endm
 
 /*
@@ -462,7 +476,19 @@ alternative_endif
        dsb     ish
        isb
 
-       _cond_extable .Licache_op\@, \fixup
+       _cond_uaccess_extable .Licache_op\@, \fixup
+       .endm
+
+/*
+ * load_ttbr1 - install @pgtbl as a TTBR1 page table
+ * pgtbl preserved
+ * tmp1/tmp2 clobbered, either may overlap with pgtbl
+ */
+       .macro          load_ttbr1, pgtbl, tmp1, tmp2
+       phys_to_ttbr    \tmp1, \pgtbl
+       offset_ttbr1    \tmp1, \tmp2
+       msr             ttbr1_el1, \tmp1
+       isb
        .endm
 
 /*
@@ -478,10 +504,7 @@ alternative_endif
        isb
        tlbi    vmalle1
        dsb     nsh
-       phys_to_ttbr \tmp, \page_table
-       offset_ttbr1 \tmp, \tmp2
-       msr     ttbr1_el1, \tmp
-       isb
+       load_ttbr1 \page_table, \tmp, \tmp2
        .endm
 
 /*
index 9f3e2c3..2cfc424 100644 (file)
 #define pmr_sync()     do {} while (0)
 #endif
 
-#define mb()           dsb(sy)
-#define rmb()          dsb(ld)
-#define wmb()          dsb(st)
+#define __mb()         dsb(sy)
+#define __rmb()                dsb(ld)
+#define __wmb()                dsb(st)
 
-#define dma_mb()       dmb(osh)
-#define dma_rmb()      dmb(oshld)
-#define dma_wmb()      dmb(oshst)
+#define __dma_mb()     dmb(osh)
+#define __dma_rmb()    dmb(oshld)
+#define __dma_wmb()    dmb(oshst)
 
 #define io_stop_wc()   dgh()
 
index 7c2181c..ca9b487 100644 (file)
@@ -5,34 +5,9 @@
 #ifndef __ASM_CACHE_H
 #define __ASM_CACHE_H
 
-#include <asm/cputype.h>
-#include <asm/mte-def.h>
-
-#define CTR_L1IP_SHIFT         14
-#define CTR_L1IP_MASK          3
-#define CTR_DMINLINE_SHIFT     16
-#define CTR_IMINLINE_SHIFT     0
-#define CTR_IMINLINE_MASK      0xf
-#define CTR_ERG_SHIFT          20
-#define CTR_CWG_SHIFT          24
-#define CTR_CWG_MASK           15
-#define CTR_IDC_SHIFT          28
-#define CTR_DIC_SHIFT          29
-
-#define CTR_CACHE_MINLINE_MASK \
-       (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
-
-#define CTR_L1IP(ctr)          (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
-
-#define ICACHE_POLICY_VPIPT    0
-#define ICACHE_POLICY_RESERVED 1
-#define ICACHE_POLICY_VIPT     2
-#define ICACHE_POLICY_PIPT     3
-
 #define L1_CACHE_SHIFT         (6)
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
-
 #define CLIDR_LOUU_SHIFT       27
 #define CLIDR_LOC_SHIFT                24
 #define CLIDR_LOUIS_SHIFT      21
 #include <linux/bitops.h>
 #include <linux/kasan-enabled.h>
 
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+#include <asm/sysreg.h>
+
 #ifdef CONFIG_KASAN_SW_TAGS
 #define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
 #elif defined(CONFIG_KASAN_HW_TAGS)
@@ -66,6 +45,12 @@ static inline unsigned int arch_slab_minalign(void)
 #define arch_slab_minalign() arch_slab_minalign()
 #endif
 
+#define CTR_CACHE_MINLINE_MASK \
+       (0xf << CTR_EL0_DMINLINE_SHIFT | \
+        CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT)
+
+#define CTR_L1IP(ctr)          SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
+
 #define ICACHEF_ALIASING       0
 #define ICACHEF_VPIPT          1
 extern unsigned long __icache_flags;
@@ -86,7 +71,7 @@ static __always_inline int icache_is_vpipt(void)
 
 static inline u32 cache_type_cwg(void)
 {
-       return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+       return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK;
 }
 
 #define __read_mostly __section(".data..read_mostly")
@@ -120,12 +105,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
 {
        u32 ctr = read_cpuid_cachetype();
 
-       if (!(ctr & BIT(CTR_IDC_SHIFT))) {
+       if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
                u64 clidr = read_sysreg(clidr_el1);
 
                if (CLIDR_LOC(clidr) == 0 ||
                    (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
-                       ctr |= BIT(CTR_IDC_SHIFT);
+                       ctr |= BIT(CTR_EL0_IDC_SHIFT);
        }
 
        return ctr;
index 5a228e2..37185e9 100644 (file)
@@ -105,13 +105,6 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
 #define flush_icache_range flush_icache_range
 
 /*
- * Cache maintenance functions used by the DMA API. No to be used directly.
- */
-extern void __dma_map_area(const void *, size_t, int);
-extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_area(const void *, size_t);
-
-/*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
  * space" model to handle this.
index 115cdec..fd7a922 100644 (file)
@@ -46,6 +46,7 @@ struct cpuinfo_arm64 {
        u64             reg_midr;
        u64             reg_revidr;
        u64             reg_gmid;
+       u64             reg_smidr;
 
        u64             reg_id_aa64dfr0;
        u64             reg_id_aa64dfr1;
index e95c4df..a444c89 100644 (file)
  * @cpu_die:   Makes a cpu leave the kernel. Must not fail. Called from the
  *             cpu being killed.
  * @cpu_kill:  Ensures a cpu has left the kernel. Called from another cpu.
- * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for
- *                a proposed logical id.
- * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
- *               to wrong parameters or error conditions. Called from the
- *               CPU being suspended. Must be called with IRQs disabled.
  */
 struct cpu_operations {
        const char      *name;
@@ -49,10 +44,6 @@ struct cpu_operations {
        void            (*cpu_die)(unsigned int cpu);
        int             (*cpu_kill)(unsigned int cpu);
 #endif
-#ifdef CONFIG_CPU_IDLE
-       int             (*cpu_init_idle)(unsigned int);
-       int             (*cpu_suspend)(unsigned long);
-#endif
 };
 
 int __init init_cpu_ops(int cpu);
index 14a8f3d..fd7d75a 100644 (file)
@@ -11,7 +11,7 @@
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 
-#define MAX_CPU_FEATURES       64
+#define MAX_CPU_FEATURES       128
 #define cpu_feature(x)         KERNEL_HWCAP_ ## x
 
 #ifndef __ASSEMBLY__
@@ -673,7 +673,7 @@ static inline bool supports_clearbhb(int scope)
                isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
 
        return cpuid_feature_extract_unsigned_field(isar2,
-                                                   ID_AA64ISAR2_CLEARBHB_SHIFT);
+                                                   ID_AA64ISAR2_EL1_BC_SHIFT);
 }
 
 const struct cpumask *system_32bit_el0_cpumask(void);
@@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 }
 
 extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64pfr0_override;
 extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64zfr0_override;
+extern struct arm64_ftr_override id_aa64smfr0_override;
 extern struct arm64_ftr_override id_aa64isar1_override;
 extern struct arm64_ftr_override id_aa64isar2_override;
 
index 14a19d1..2047713 100644 (file)
@@ -4,21 +4,6 @@
 
 #include <asm/proc-fns.h>
 
-#ifdef CONFIG_CPU_IDLE
-extern int arm_cpuidle_init(unsigned int cpu);
-extern int arm_cpuidle_suspend(int index);
-#else
-static inline int arm_cpuidle_init(unsigned int cpu)
-{
-       return -EOPNOTSUPP;
-}
-
-static inline int arm_cpuidle_suspend(int index)
-{
-       return -EOPNOTSUPP;
-}
-#endif
-
 #ifdef CONFIG_ARM64_PSEUDO_NMI
 #include <asm/arch_gicv3.h>
 
index 34ceff0..2630faa 100644 (file)
        msr     cptr_el2, x0                    // Disable copro. traps to EL2
 .endm
 
-/* SVE register access */
-.macro __init_el2_nvhe_sve
-       mrs     x1, id_aa64pfr0_el1
-       ubfx    x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
-       cbz     x1, .Lskip_sve_\@
-
-       bic     x0, x0, #CPTR_EL2_TZ            // Also disable SVE traps
-       msr     cptr_el2, x0                    // Disable copro. traps to EL2
-       isb
-       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
-       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
-.Lskip_sve_\@:
-.endm
-
-/* SME register access and priority mapping */
-.macro __init_el2_nvhe_sme
-       mrs     x1, id_aa64pfr1_el1
-       ubfx    x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
-       cbz     x1, .Lskip_sme_\@
-
-       bic     x0, x0, #CPTR_EL2_TSM           // Also disable SME traps
-       msr     cptr_el2, x0                    // Disable copro. traps to EL2
-       isb
-
-       mrs     x1, sctlr_el2
-       orr     x1, x1, #SCTLR_ELx_ENTP2        // Disable TPIDR2 traps
-       msr     sctlr_el2, x1
-       isb
-
-       mov     x1, #0                          // SMCR controls
-
-       mrs_s   x2, SYS_ID_AA64SMFR0_EL1
-       ubfx    x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
-       cbz     x2, .Lskip_sme_fa64_\@
-
-       orr     x1, x1, SMCR_ELx_FA64_MASK
-.Lskip_sme_fa64_\@:
-
-       orr     x1, x1, #SMCR_ELx_LEN_MASK      // Enable full SME vector
-       msr_s   SYS_SMCR_EL2, x1                // length for EL1.
-
-       mrs_s   x1, SYS_SMIDR_EL1               // Priority mapping supported?
-       ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
-       cbz     x1, .Lskip_sme_\@
-
-       msr_s   SYS_SMPRIMAP_EL2, xzr           // Make all priorities equal
-
-       mrs     x1, id_aa64mmfr1_el1            // HCRX_EL2 present?
-       ubfx    x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
-       cbz     x1, .Lskip_sme_\@
-
-       mrs_s   x1, SYS_HCRX_EL2
-       orr     x1, x1, #HCRX_EL2_SMPME_MASK    // Enable priority mapping
-       msr_s   SYS_HCRX_EL2, x1
-
-.Lskip_sme_\@:
-.endm
-
 /* Disable any fine grained traps */
 .macro __init_el2_fgt
        mrs     x1, id_aa64mmfr0_el1
        __init_el2_hstr
        __init_el2_nvhe_idregs
        __init_el2_nvhe_cptr
-       __init_el2_nvhe_sve
-       __init_el2_nvhe_sme
        __init_el2_fgt
        __init_el2_nvhe_prepare_eret
 .endm
index daff882..71ed5fd 100644 (file)
@@ -62,10 +62,12 @@ enum fixed_addresses {
 #endif /* CONFIG_ACPI_APEI_GHES */
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_RELOCATABLE
+       FIX_ENTRY_TRAMP_TEXT4,  /* one extra slot for the data page */
+#endif
        FIX_ENTRY_TRAMP_TEXT3,
        FIX_ENTRY_TRAMP_TEXT2,
        FIX_ENTRY_TRAMP_TEXT1,
-       FIX_ENTRY_TRAMP_DATA,
 #define TRAMP_VALIAS           (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
        __end_of_permanent_fixed_addresses,
index aa443d8..cef4ae7 100644 (file)
@@ -85,7 +85,7 @@
 #define KERNEL_HWCAP_PACA              __khwcap_feature(PACA)
 #define KERNEL_HWCAP_PACG              __khwcap_feature(PACG)
 
-#define __khwcap2_feature(x)           (const_ilog2(HWCAP2_ ## x) + 32)
+#define __khwcap2_feature(x)           (const_ilog2(HWCAP2_ ## x) + 64)
 #define KERNEL_HWCAP_DCPODP            __khwcap2_feature(DCPODP)
 #define KERNEL_HWCAP_SVE2              __khwcap2_feature(SVE2)
 #define KERNEL_HWCAP_SVEAES            __khwcap2_feature(SVEAES)
 #define KERNEL_HWCAP_SME_F32F32                __khwcap2_feature(SME_F32F32)
 #define KERNEL_HWCAP_SME_FA64          __khwcap2_feature(SME_FA64)
 #define KERNEL_HWCAP_WFXT              __khwcap2_feature(WFXT)
+#define KERNEL_HWCAP_EBF16             __khwcap2_feature(EBF16)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 3995652..87dd42d 100644 (file)
@@ -163,13 +163,16 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
 /*
  * I/O memory mapping functions.
  */
-extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
-extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
-#define ioremap(addr, size)            __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_wc(addr, size)         __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-#define ioremap_np(addr, size)         __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot);
+#define ioremap_allowed ioremap_allowed
+
+#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
+
+#define ioremap_wc(addr, size) \
+       ioremap_prot((addr), (size), PROT_NORMAL_NC)
+#define ioremap_np(addr, size) \
+       ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
 
 /*
  * io{read,write}{16,32,64}be() macros
@@ -184,6 +187,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 
 #include <asm-generic/io.h>
 
+#define ioremap_cache ioremap_cache
+static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
+{
+       if (pfn_is_map_memory(__phys_to_pfn(addr)))
+               return (void __iomem *)__phys_to_virt(addr);
+
+       return ioremap_prot(addr, size, PROT_NORMAL);
+}
+
 /*
  * More restrictive address range checking than the default implementation
  * (PHYS_OFFSET and PHYS_MASK taken into account).
index 96dc0f7..02e59fa 100644 (file)
@@ -8,6 +8,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+#include <asm/boot.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sparsemem.h>
 
  */
 #if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#define IDMAP_PGTABLE_LEVELS   (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
 #else
 #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#define IDMAP_PGTABLE_LEVELS   (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
 #endif
 
 
                        + EARLY_PUDS((vstart), (vend))  /* each PUD needs a next level page table */    \
                        + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
 #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
-#define IDMAP_DIR_SIZE         (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* the initial ID map may need two extra pages if it needs to be extended */
+#if VA_BITS < 48
+#define INIT_IDMAP_DIR_SIZE    ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
+#else
+#define INIT_IDMAP_DIR_SIZE    (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
+#endif
+#define INIT_IDMAP_DIR_PAGES   EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
 
 /* Initial memory map size */
 #if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_PMD_FLAGS      (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
 #if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_MM_MMUFLAGS    (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RW_MMUFLAGS    (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RX_MMUFLAGS    (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
 #else
-#define SWAPPER_MM_MMUFLAGS    (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RW_MMUFLAGS    (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RX_MMUFLAGS    (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
 #endif
 
 /*
index 0af70d9..227d256 100644 (file)
 #include <linux/types.h>
 #include <asm/bug.h>
 
+#if VA_BITS > 48
 extern u64                     vabits_actual;
+#else
+#define vabits_actual          ((u64)VA_BITS)
+#endif
 
 extern s64                     memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
@@ -351,6 +355,11 @@ static inline void *phys_to_virt(phys_addr_t x)
 })
 
 void dump_mem_limit(void);
+
+static inline bool defer_reserve_crashkernel(void)
+{
+       return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32);
+}
 #endif /* !ASSEMBLY */
 
 /*
index 6770667..c7ccd82 100644 (file)
@@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
  * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
  * physical memory, in which case it will be smaller.
  */
-extern u64 idmap_t0sz;
-extern u64 idmap_ptrs_per_pgd;
+extern int idmap_t0sz;
 
 /*
  * Ensure TCR.T0SZ is set to the provided value.
@@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void)
                cpu_switch_mm(mm->pgd, mm);
 }
 
-static inline void cpu_install_idmap(void)
+static inline void __cpu_install_idmap(pgd_t *idmap)
 {
        cpu_set_reserved_ttbr0();
        local_flush_tlb_all();
        cpu_set_idmap_tcr_t0sz();
 
-       cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
+       cpu_switch_mm(lm_alias(idmap), &init_mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+       __cpu_install_idmap(idmap_pg_dir);
 }
 
 /*
@@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
+static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
 {
        typedef void (ttbr_replace_func)(phys_addr_t);
        extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
 
        replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
 
-       cpu_install_idmap();
+       __cpu_install_idmap(idmap);
        replace_phys(ttbr1);
        cpu_uninstall_idmap();
 }
index dd3d12b..5ab8d16 100644 (file)
  */
 #ifdef CONFIG_ARM64_PA_BITS_52
 /*
- * This should be GENMASK_ULL(47, 2).
  * TTBR_ELx[1] is RES0 in this configuration.
  */
-#define TTBR_BADDR_MASK_52     (((UL(1) << 46) - 1) << 2)
+#define TTBR_BADDR_MASK_52     GENMASK_ULL(47, 2)
 #endif
 
 #ifdef CONFIG_ARM64_VA_BITS_52
index 0b6632f..b5df82a 100644 (file)
        __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool arch_thp_swp_supported(void)
+{
+       return !system_supports_mte();
+}
+#define arch_thp_swp_supported arch_thp_swp_supported
+
 /*
  * Outside of a few very special situations (e.g. hibernation), we always
  * use broadcast TLB invalidation instructions, therefore a spurious page
@@ -427,6 +433,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
        return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
 }
 
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+       unsigned long pfn = pte_pfn(pte);
+
+       return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * See the comment in include/linux/pgtable.h
index 9e58749..86eb0bf 100644 (file)
@@ -272,8 +272,9 @@ void tls_preserve_current_state(void);
 
 static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 {
+       s32 previous_syscall = regs->syscallno;
        memset(regs, 0, sizeof(*regs));
-       forget_syscall(regs);
+       regs->syscallno = previous_syscall;
        regs->pc = pc;
 
        if (system_uses_irq_prio_masking())
index 42ff95d..7c71358 100644 (file)
 
 #define SYS_ID_AA64PFR0_EL1            sys_reg(3, 0, 0, 4, 0)
 #define SYS_ID_AA64PFR1_EL1            sys_reg(3, 0, 0, 4, 1)
-#define SYS_ID_AA64ZFR0_EL1            sys_reg(3, 0, 0, 4, 4)
-#define SYS_ID_AA64SMFR0_EL1           sys_reg(3, 0, 0, 4, 5)
 
 #define SYS_ID_AA64DFR0_EL1            sys_reg(3, 0, 0, 5, 0)
 #define SYS_ID_AA64DFR1_EL1            sys_reg(3, 0, 0, 5, 1)
 #define SYS_ID_AA64AFR0_EL1            sys_reg(3, 0, 0, 5, 4)
 #define SYS_ID_AA64AFR1_EL1            sys_reg(3, 0, 0, 5, 5)
 
-#define SYS_ID_AA64ISAR1_EL1           sys_reg(3, 0, 0, 6, 1)
-#define SYS_ID_AA64ISAR2_EL1           sys_reg(3, 0, 0, 6, 2)
-
 #define SYS_ID_AA64MMFR0_EL1           sys_reg(3, 0, 0, 7, 0)
 #define SYS_ID_AA64MMFR1_EL1           sys_reg(3, 0, 0, 7, 1)
 #define SYS_ID_AA64MMFR2_EL1           sys_reg(3, 0, 0, 7, 2)
 #define SYS_MAIR_EL1                   sys_reg(3, 0, 10, 2, 0)
 #define SYS_AMAIR_EL1                  sys_reg(3, 0, 10, 3, 0)
 
-#define SYS_LORSA_EL1                  sys_reg(3, 0, 10, 4, 0)
-#define SYS_LOREA_EL1                  sys_reg(3, 0, 10, 4, 1)
-#define SYS_LORN_EL1                   sys_reg(3, 0, 10, 4, 2)
-#define SYS_LORC_EL1                   sys_reg(3, 0, 10, 4, 3)
-#define SYS_LORID_EL1                  sys_reg(3, 0, 10, 4, 7)
-
 #define SYS_VBAR_EL1                   sys_reg(3, 0, 12, 0, 0)
 #define SYS_DISR_EL1                   sys_reg(3, 0, 12, 1, 1)
 
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_CCSIDR_EL1                 sys_reg(3, 1, 0, 0, 0)
-#define SYS_GMID_EL1                   sys_reg(3, 1, 0, 0, 4)
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 
 #define SMIDR_EL1_IMPLEMENTER_SHIFT    24
 #define SMIDR_EL1_SMPS_SHIFT   15
 #define SMIDR_EL1_AFFINITY_SHIFT       0
 
-#define SYS_CTR_EL0                    sys_reg(3, 3, 0, 0, 1)
-#define SYS_DCZID_EL0                  sys_reg(3, 3, 0, 0, 7)
-
 #define SYS_RNDR_EL0                   sys_reg(3, 3, 2, 4, 0)
 #define SYS_RNDRRS_EL0                 sys_reg(3, 3, 2, 4, 1)
 
 /* Position the attr at the correct index */
 #define MAIR_ATTRIDX(attr, idx)                ((attr) << ((idx) * 8))
 
-/* id_aa64isar1 */
-#define ID_AA64ISAR1_I8MM_SHIFT                52
-#define ID_AA64ISAR1_DGH_SHIFT         48
-#define ID_AA64ISAR1_BF16_SHIFT                44
-#define ID_AA64ISAR1_SPECRES_SHIFT     40
-#define ID_AA64ISAR1_SB_SHIFT          36
-#define ID_AA64ISAR1_FRINTTS_SHIFT     32
-#define ID_AA64ISAR1_GPI_SHIFT         28
-#define ID_AA64ISAR1_GPA_SHIFT         24
-#define ID_AA64ISAR1_LRCPC_SHIFT       20
-#define ID_AA64ISAR1_FCMA_SHIFT                16
-#define ID_AA64ISAR1_JSCVT_SHIFT       12
-#define ID_AA64ISAR1_API_SHIFT         8
-#define ID_AA64ISAR1_APA_SHIFT         4
-#define ID_AA64ISAR1_DPB_SHIFT         0
-
-#define ID_AA64ISAR1_APA_NI                    0x0
-#define ID_AA64ISAR1_APA_ARCHITECTED           0x1
-#define ID_AA64ISAR1_APA_ARCH_EPAC             0x2
-#define ID_AA64ISAR1_APA_ARCH_EPAC2            0x3
-#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC       0x4
-#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB   0x5
-#define ID_AA64ISAR1_API_NI                    0x0
-#define ID_AA64ISAR1_API_IMP_DEF               0x1
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC          0x2
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2         0x3
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC    0x4
-#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB        0x5
-#define ID_AA64ISAR1_GPA_NI                    0x0
-#define ID_AA64ISAR1_GPA_ARCHITECTED           0x1
-#define ID_AA64ISAR1_GPI_NI                    0x0
-#define ID_AA64ISAR1_GPI_IMP_DEF               0x1
-
-/* id_aa64isar2 */
-#define ID_AA64ISAR2_CLEARBHB_SHIFT    28
-#define ID_AA64ISAR2_APA3_SHIFT                12
-#define ID_AA64ISAR2_GPA3_SHIFT                8
-#define ID_AA64ISAR2_RPRES_SHIFT       4
-#define ID_AA64ISAR2_WFXT_SHIFT                0
-
-#define ID_AA64ISAR2_RPRES_8BIT                0x0
-#define ID_AA64ISAR2_RPRES_12BIT       0x1
-/*
- * Value 0x1 has been removed from the architecture, and is
- * reserved, but has not yet been removed from the ARM ARM
- * as of ARM DDI 0487G.b.
- */
-#define ID_AA64ISAR2_WFXT_NI           0x0
-#define ID_AA64ISAR2_WFXT_SUPPORTED    0x2
-
-#define ID_AA64ISAR2_APA3_NI                   0x0
-#define ID_AA64ISAR2_APA3_ARCHITECTED          0x1
-#define ID_AA64ISAR2_APA3_ARCH_EPAC            0x2
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2           0x3
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC      0x4
-#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB  0x5
-
-#define ID_AA64ISAR2_GPA3_NI                   0x0
-#define ID_AA64ISAR2_GPA3_ARCHITECTED          0x1
-
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT         60
 #define ID_AA64PFR0_CSV2_SHIFT         56
 #define ID_AA64PFR1_MTE                        0x2
 #define ID_AA64PFR1_MTE_ASYMM          0x3
 
-/* id_aa64zfr0 */
-#define ID_AA64ZFR0_F64MM_SHIFT                56
-#define ID_AA64ZFR0_F32MM_SHIFT                52
-#define ID_AA64ZFR0_I8MM_SHIFT         44
-#define ID_AA64ZFR0_SM4_SHIFT          40
-#define ID_AA64ZFR0_SHA3_SHIFT         32
-#define ID_AA64ZFR0_BF16_SHIFT         20
-#define ID_AA64ZFR0_BITPERM_SHIFT      16
-#define ID_AA64ZFR0_AES_SHIFT          4
-#define ID_AA64ZFR0_SVEVER_SHIFT       0
-
-#define ID_AA64ZFR0_F64MM              0x1
-#define ID_AA64ZFR0_F32MM              0x1
-#define ID_AA64ZFR0_I8MM               0x1
-#define ID_AA64ZFR0_BF16               0x1
-#define ID_AA64ZFR0_SM4                        0x1
-#define ID_AA64ZFR0_SHA3               0x1
-#define ID_AA64ZFR0_BITPERM            0x1
-#define ID_AA64ZFR0_AES                        0x1
-#define ID_AA64ZFR0_AES_PMULL          0x2
-#define ID_AA64ZFR0_SVEVER_SVE2                0x1
-
-/* id_aa64smfr0 */
-#define ID_AA64SMFR0_FA64_SHIFT                63
-#define ID_AA64SMFR0_I16I64_SHIFT      52
-#define ID_AA64SMFR0_F64F64_SHIFT      48
-#define ID_AA64SMFR0_I8I32_SHIFT       36
-#define ID_AA64SMFR0_F16F32_SHIFT      35
-#define ID_AA64SMFR0_B16F32_SHIFT      34
-#define ID_AA64SMFR0_F32F32_SHIFT      32
-
-#define ID_AA64SMFR0_FA64              0x1
-#define ID_AA64SMFR0_I16I64            0xf
-#define ID_AA64SMFR0_F64F64            0x1
-#define ID_AA64SMFR0_I8I32             0xf
-#define ID_AA64SMFR0_F16F32            0x1
-#define ID_AA64SMFR0_B16F32            0x1
-#define ID_AA64SMFR0_F32F32            0x1
-
 /* id_aa64mmfr0 */
 #define ID_AA64MMFR0_ECV_SHIFT         60
 #define ID_AA64MMFR0_FGT_SHIFT         56
 
 /* id_aa64mmfr1 */
 #define ID_AA64MMFR1_ECBHB_SHIFT       60
+#define ID_AA64MMFR1_TIDCP1_SHIFT      52
 #define ID_AA64MMFR1_HCX_SHIFT         40
 #define ID_AA64MMFR1_AFP_SHIFT         44
 #define ID_AA64MMFR1_ETS_SHIFT         36
 #define ID_AA64MMFR1_VMIDBITS_8                0
 #define ID_AA64MMFR1_VMIDBITS_16       2
 
+#define ID_AA64MMFR1_TIDCP1_NI         0
+#define ID_AA64MMFR1_TIDCP1_IMP                1
+
 /* id_aa64mmfr2 */
 #define ID_AA64MMFR2_E0PD_SHIFT                60
 #define ID_AA64MMFR2_EVT_SHIFT         56
 #define MVFR2_FPMISC_SHIFT             4
 #define MVFR2_SIMDMISC_SHIFT           0
 
-#define DCZID_DZP_SHIFT                        4
-#define DCZID_BS_SHIFT                 0
-
 #define CPACR_EL1_FPEN_EL1EN   (BIT(20)) /* enable EL1 access */
 #define CPACR_EL1_FPEN_EL0EN   (BIT(21)) /* enable EL0 access, if EL1EN set */
 
 #define SYS_RGSR_EL1_SEED_MASK 0xffffUL
 
 /* GMID_EL1 field definitions */
-#define SYS_GMID_EL1_BS_SHIFT  0
-#define SYS_GMID_EL1_BS_SIZE   4
+#define GMID_EL1_BS_SHIFT      0
+#define GMID_EL1_BS_SIZE       4
 
 /* TFSR{,E0}_EL1 bit definitions */
 #define SYS_TFSR_EL1_TF0_SHIFT 0
 
 #endif
 
+#define SYS_FIELD_GET(reg, field, val)         \
+                FIELD_GET(reg##_##field##_MASK, val)
+
 #define SYS_FIELD_PREP(reg, field, val)                \
                 FIELD_PREP(reg##_##field##_MASK, val)
 
index 63f9c82..2fc9f08 100644 (file)
@@ -232,34 +232,34 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
  * The "__xxx_error" versions set the third argument to -EFAULT if an error
  * occurs, and leave it unchanged on success.
  */
-#define __get_mem_asm(load, reg, x, addr, err)                         \
+#define __get_mem_asm(load, reg, x, addr, err, type)                   \
        asm volatile(                                                   \
        "1:     " load "        " reg "1, [%2]\n"                       \
        "2:\n"                                                          \
-       _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1)                 \
+       _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1)          \
        : "+r" (err), "=&r" (x)                                         \
        : "r" (addr))
 
-#define __raw_get_mem(ldr, x, ptr, err)                                        \
-do {                                                                   \
-       unsigned long __gu_val;                                         \
-       switch (sizeof(*(ptr))) {                                       \
-       case 1:                                                         \
-               __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err));   \
-               break;                                                  \
-       case 2:                                                         \
-               __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err));   \
-               break;                                                  \
-       case 4:                                                         \
-               __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err));       \
-               break;                                                  \
-       case 8:                                                         \
-               __get_mem_asm(ldr, "%x",  __gu_val, (ptr), (err));      \
-               break;                                                  \
-       default:                                                        \
-               BUILD_BUG();                                            \
-       }                                                               \
-       (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
+#define __raw_get_mem(ldr, x, ptr, err, type)                                  \
+do {                                                                           \
+       unsigned long __gu_val;                                                 \
+       switch (sizeof(*(ptr))) {                                               \
+       case 1:                                                                 \
+               __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type);     \
+               break;                                                          \
+       case 2:                                                                 \
+               __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type);     \
+               break;                                                          \
+       case 4:                                                                 \
+               __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type);         \
+               break;                                                          \
+       case 8:                                                                 \
+               __get_mem_asm(ldr, "%x",  __gu_val, (ptr), (err), type);        \
+               break;                                                          \
+       default:                                                                \
+               BUILD_BUG();                                                    \
+       }                                                                       \
+       (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
 } while (0)
 
 /*
@@ -274,7 +274,7 @@ do {                                                                        \
        __chk_user_ptr(ptr);                                            \
                                                                        \
        uaccess_ttbr0_enable();                                         \
-       __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err);               \
+       __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U);            \
        uaccess_ttbr0_disable();                                        \
                                                                        \
        (x) = __rgu_val;                                                \
@@ -314,40 +314,40 @@ do {                                                                      \
                                                                        \
        __uaccess_enable_tco_async();                                   \
        __raw_get_mem("ldr", *((type *)(__gkn_dst)),                    \
-                     (__force type *)(__gkn_src), __gkn_err);          \
+                     (__force type *)(__gkn_src), __gkn_err, K);       \
        __uaccess_disable_tco_async();                                  \
                                                                        \
        if (unlikely(__gkn_err))                                        \
                goto err_label;                                         \
 } while (0)
 
-#define __put_mem_asm(store, reg, x, addr, err)                                \
+#define __put_mem_asm(store, reg, x, addr, err, type)                  \
        asm volatile(                                                   \
        "1:     " store "       " reg "1, [%2]\n"                       \
        "2:\n"                                                          \
-       _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)                           \
+       _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0)                    \
        : "+r" (err)                                                    \
        : "r" (x), "r" (addr))
 
-#define __raw_put_mem(str, x, ptr, err)                                        \
-do {                                                                   \
-       __typeof__(*(ptr)) __pu_val = (x);                              \
-       switch (sizeof(*(ptr))) {                                       \
-       case 1:                                                         \
-               __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err));   \
-               break;                                                  \
-       case 2:                                                         \
-               __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err));   \
-               break;                                                  \
-       case 4:                                                         \
-               __put_mem_asm(str, "%w", __pu_val, (ptr), (err));       \
-               break;                                                  \
-       case 8:                                                         \
-               __put_mem_asm(str, "%x", __pu_val, (ptr), (err));       \
-               break;                                                  \
-       default:                                                        \
-               BUILD_BUG();                                            \
-       }                                                               \
+#define __raw_put_mem(str, x, ptr, err, type)                                  \
+do {                                                                           \
+       __typeof__(*(ptr)) __pu_val = (x);                                      \
+       switch (sizeof(*(ptr))) {                                               \
+       case 1:                                                                 \
+               __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type);     \
+               break;                                                          \
+       case 2:                                                                 \
+               __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type);     \
+               break;                                                          \
+       case 4:                                                                 \
+               __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type);         \
+               break;                                                          \
+       case 8:                                                                 \
+               __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type);         \
+               break;                                                          \
+       default:                                                                \
+               BUILD_BUG();                                                    \
+       }                                                                       \
 } while (0)
 
 /*
@@ -362,7 +362,7 @@ do {                                                                        \
        __chk_user_ptr(__rpu_ptr);                                      \
                                                                        \
        uaccess_ttbr0_enable();                                         \
-       __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err);               \
+       __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U);            \
        uaccess_ttbr0_disable();                                        \
 } while (0)
 
@@ -400,7 +400,7 @@ do {                                                                        \
                                                                        \
        __uaccess_enable_tco_async();                                   \
        __raw_put_mem("str", *((type *)(__pkn_src)),                    \
-                     (__force type *)(__pkn_dst), __pkn_err);          \
+                     (__force type *)(__pkn_dst), __pkn_err, K);       \
        __uaccess_disable_tco_async();                                  \
                                                                        \
        if (unlikely(__pkn_err))                                        \
index 0e80db4..4eb601e 100644 (file)
@@ -36,9 +36,9 @@
 #define HVC_RESET_VECTORS 2
 
 /*
- * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
+ * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
  */
-#define HVC_VHE_RESTART        3
+#define HVC_FINALISE_EL2       3
 
 /* Max number of HYP stub hypercalls */
 #define HVC_STUB_HCALL_NR 4
 #define BOOT_CPU_MODE_EL1      (0xe11)
 #define BOOT_CPU_MODE_EL2      (0xe12)
 
+/*
+ * Flags returned together with the boot mode, but not preserved in
+ * __boot_cpu_mode. Used by the idreg override code to work out the
+ * boot state.
+ */
+#define BOOT_CPU_FLAG_E2H      BIT_ULL(32)
+
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
index 4bb2cc8..1ad2568 100644 (file)
@@ -19,6 +19,9 @@
 
 /*
  * HWCAP flags - for AT_HWCAP
+ *
+ * Bits 62 and 63 are reserved for use by libc.
+ * Bits 32-61 are unallocated for potential use by libc.
  */
 #define HWCAP_FP               (1 << 0)
 #define HWCAP_ASIMD            (1 << 1)
@@ -88,5 +91,6 @@
 #define HWCAP2_SME_F32F32      (1 << 29)
 #define HWCAP2_SME_FA64                (1 << 30)
 #define HWCAP2_WFXT            (1UL << 31)
+#define HWCAP2_EBF16           (1UL << 32)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index fa7981d..1add7b0 100644 (file)
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_syscall.o         = -fstack-protector -fstack-protector-strong
 CFLAGS_syscall.o       += -fno-stack-protector
 
+# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
+# can significantly impact performance. Avoid instrumenting the stack trace
+# collection code to minimize this impact.
+KASAN_SANITIZE_stacktrace.o := n
+
 # It's not safe to invoke KCOV when portions of the kernel environment aren't
 # available or are out-of-sync with HW state. Since `noinstr` doesn't always
 # inhibit KCOV instrumentation, disable it for the entire compilation unit.
@@ -59,7 +64,7 @@ obj-$(CONFIG_ACPI)                    += acpi.o
 obj-$(CONFIG_ACPI_NUMA)                        += acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)      += acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)                 += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)           += kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE)           += kaslr.o pi/
 obj-$(CONFIG_HIBERNATION)              += hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)                 += elfcore.o
 obj-$(CONFIG_KEXEC_CORE)               += machine_kexec.o relocate_kernel.o    \
index e4dea8d..a5a256e 100644 (file)
@@ -351,7 +351,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
                                prot = __acpi_get_writethrough_mem_attribute();
                }
        }
-       return __ioremap(phys, size, prot);
+       return ioremap_prot(phys, size, pgprot_val(prot));
 }
 
 /*
index fdfecf0..e51535a 100644 (file)
@@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
        pxm = pa->proximity_domain;
        node = acpi_map_pxm_to_node(pxm);
 
-       if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+       if (node == NUMA_NO_NODE) {
                pr_err("SRAT: Too many proximity domains %d\n", pxm);
                bad_srat();
                return;
index 7bbf510..9bcaa5e 100644 (file)
@@ -121,7 +121,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
 
        ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
        d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
-                                                          CTR_DMINLINE_SHIFT);
+                                                          CTR_EL0_DminLine_SHIFT);
        cur = start & ~(d_size - 1);
        do {
                /*
index 6875a16..fb0e7c7 100644 (file)
@@ -59,6 +59,7 @@ struct insn_emulation {
 static LIST_HEAD(insn_emulation);
 static int nr_insn_emulated __initdata;
 static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+static DEFINE_MUTEX(insn_emulation_mutex);
 
 static void register_emulation_hooks(struct insn_emulation_ops *ops)
 {
@@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
                                  loff_t *ppos)
 {
        int ret = 0;
-       struct insn_emulation *insn = (struct insn_emulation *) table->data;
+       struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
        enum insn_emulation_mode prev_mode = insn->current_mode;
 
-       table->data = &insn->current_mode;
+       mutex_lock(&insn_emulation_mutex);
        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
        if (ret || !write || prev_mode == insn->current_mode)
@@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
                update_insn_emulation_mode(insn, INSN_UNDEF);
        }
 ret:
-       table->data = insn;
+       mutex_unlock(&insn_emulation_mutex);
        return ret;
 }
 
@@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void)
                sysctl->maxlen = sizeof(int);
 
                sysctl->procname = insn->ops->name;
-               sysctl->data = insn;
+               sysctl->data = &insn->current_mode;
                sysctl->extra1 = &insn->min;
                sysctl->extra2 = &insn->max;
                sysctl->proc_handler = emulation_proc_handler;
index c05cc3b..7e6289e 100644 (file)
@@ -187,7 +187,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
                                int scope)
 {
        u32 midr = read_cpuid_id();
-       bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+       bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
        const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
 
        WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -212,6 +212,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
                ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
        },
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_2441009
+       {
+               /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */
+               ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+       },
+#endif
        {},
 };
 #endif
@@ -395,6 +401,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
 };
 #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
 
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+static struct midr_range broken_aarch32_aes[] = {
+       MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf),
+       MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+       {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
        {
@@ -480,7 +494,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
        {
-               .desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
+               .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009",
                .capability = ARM64_WORKAROUND_REPEAT_TLBI,
                .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
                .matches = cpucap_multi_entry_cap_matches,
@@ -658,6 +672,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
        },
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+       {
+               .desc = "ARM erratum 1742098",
+               .capability = ARM64_WORKAROUND_1742098,
+               CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+       },
+#endif
        {
        }
 };
index 8d88433..ad64cab 100644 (file)
@@ -79,6 +79,7 @@
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/fpsimd.h>
+#include <asm/hwcap.h>
 #include <asm/insn.h>
 #include <asm/kvm_host.h>
 #include <asm/mmu_context.h>
@@ -91,7 +92,7 @@
 #include <asm/virt.h>
 
 /* Kernel representation of AT_HWCAP and AT_HWCAP2 */
-static unsigned long elf_hwcap __read_mostly;
+static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP_DEFAULT       \
@@ -209,35 +210,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -276,41 +277,41 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
+                      FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
        ARM64_FTR_END,
 };
 
@@ -361,6 +362,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
@@ -396,18 +398,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1),
        /*
         * Linux can handle differing I-cache policies. Userspace JITs will
         * make use of *minLine.
         * If we have differing I-cache policies, report it as the weakest - VIPT.
         */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT),   /* L1Ip */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT),        /* L1Ip */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -453,13 +455,13 @@ static const struct arm64_ftr_bits ftr_mvfr2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_dczid[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1),
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_gmid[] = {
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0),
        ARM64_FTR_END,
 };
 
@@ -561,7 +563,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
 
 static const struct arm64_ftr_bits ftr_id_dfr0[] = {
        /* [31:28] TraceFilt */
-       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf),
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0),
@@ -631,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = {
        __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
 
 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
 struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
 
@@ -668,11 +673,14 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
 
        /* Op1 = 0, CRn = 0, CRm = 4 */
-       ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+       ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
+                              &id_aa64pfr0_override),
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
                               &id_aa64pfr1_override),
-       ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
-       ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
+       ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
+                              &id_aa64zfr0_override),
+       ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
+                              &id_aa64smfr0_override),
 
        /* Op1 = 0, CRn = 0, CRm = 5 */
        ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -993,15 +1001,24 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
        if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
                init_32bit_cpu_features(&info->aarch32);
 
-       if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+       if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+           id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+               info->reg_zcr = read_zcr_features();
                init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
                vec_init_vq_map(ARM64_VEC_SVE);
        }
 
-       if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+       if (IS_ENABLED(CONFIG_ARM64_SME) &&
+           id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+               info->reg_smcr = read_smcr_features();
+               /*
+                * We mask out SMPS since even if the hardware
+                * supports priorities the kernel does not at present
+                * and we block access to them.
+                */
+               info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
                init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
-               if (IS_ENABLED(CONFIG_ARM64_SME))
-                       vec_init_vq_map(ARM64_VEC_SME);
+               vec_init_vq_map(ARM64_VEC_SME);
        }
 
        if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
@@ -1233,23 +1250,31 @@ void update_cpu_features(int cpu,
        taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
                                      info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
 
-       if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
+       if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+           id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+               info->reg_zcr = read_zcr_features();
                taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
                                        info->reg_zcr, boot->reg_zcr);
 
-               /* Probe vector lengths, unless we already gave up on SVE */
-               if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
-                   !system_capabilities_finalized())
+               /* Probe vector lengths */
+               if (!system_capabilities_finalized())
                        vec_update_vq_map(ARM64_VEC_SVE);
        }
 
-       if (id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+       if (IS_ENABLED(CONFIG_ARM64_SME) &&
+           id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+               info->reg_smcr = read_smcr_features();
+               /*
+                * We mask out SMPS since even if the hardware
+                * supports priorities the kernel does not at present
+                * and we block access to them.
+                */
+               info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
                taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
                                        info->reg_smcr, boot->reg_smcr);
 
-               /* Probe vector lengths, unless we already gave up on SME */
-               if (id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)) &&
-                   !system_capabilities_finalized())
+               /* Probe vector lengths */
+               if (!system_capabilities_finalized())
                        vec_update_vq_map(ARM64_VEC_SME);
        }
 
@@ -1480,7 +1505,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
        else
                ctr = read_cpuid_effective_cachetype();
 
-       return ctr & BIT(CTR_IDC_SHIFT);
+       return ctr & BIT(CTR_EL0_IDC_SHIFT);
 }
 
 static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
@@ -1491,7 +1516,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu
         * to the CTR_EL0 on this CPU and emulate it with the real/safe
         * value.
         */
-       if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT)))
+       if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT)))
                sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
@@ -1505,7 +1530,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
        else
                ctr = read_cpuid_cachetype();
 
-       return ctr & BIT(CTR_DIC_SHIFT);
+       return ctr & BIT(CTR_EL0_DIC_SHIFT);
 }
 
 static bool __maybe_unused
@@ -1645,14 +1670,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 }
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define KPTI_NG_TEMP_VA                (-(1UL << PMD_SHIFT))
+
+extern
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+                            phys_addr_t size, pgprot_t prot,
+                            phys_addr_t (*pgtable_alloc)(int), int flags);
+
+static phys_addr_t kpti_ng_temp_alloc;
+
+static phys_addr_t kpti_ng_pgd_alloc(int shift)
+{
+       kpti_ng_temp_alloc -= PAGE_SIZE;
+       return kpti_ng_temp_alloc;
+}
+
 static void __nocfi
 kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 {
-       typedef void (kpti_remap_fn)(int, int, phys_addr_t);
+       typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
        extern kpti_remap_fn idmap_kpti_install_ng_mappings;
        kpti_remap_fn *remap_fn;
 
        int cpu = smp_processor_id();
+       int levels = CONFIG_PGTABLE_LEVELS;
+       int order = order_base_2(levels);
+       u64 kpti_ng_temp_pgd_pa = 0;
+       pgd_t *kpti_ng_temp_pgd;
+       u64 alloc = 0;
 
        if (__this_cpu_read(this_cpu_vector) == vectors) {
                const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
@@ -1670,12 +1715,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 
        remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings));
 
+       if (!cpu) {
+               alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
+               kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
+               kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
+
+               //
+               // Create a minimal page table hierarchy that permits us to map
+               // the swapper page tables temporarily as we traverse them.
+               //
+               // The physical pages are laid out as follows:
+               //
+               // +--------+-/-------+-/------ +-\\--------+
+               // :  PTE[] : | PMD[] : | PUD[] : || PGD[]  :
+               // +--------+-\-------+-\------ +-//--------+
+               //      ^
+               // The first page is mapped into this hierarchy at a PMD_SHIFT
+               // aligned virtual address, so that we can manipulate the PTE
+               // level entries while the mapping is active. The first entry
+               // covers the PTE[] page itself, the remaining entries are free
+               // to be used as a ad-hoc fixmap.
+               //
+               create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
+                                       KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
+                                       kpti_ng_pgd_alloc, 0);
+       }
+
        cpu_install_idmap();
-       remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
+       remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
        cpu_uninstall_idmap();
 
-       if (!cpu)
+       if (!cpu) {
+               free_pages(alloc, order);
                arm64_use_ng_mappings = true;
+       }
 }
 #else
 static void
@@ -1971,6 +2044,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 }
 #endif /* CONFIG_ARM64_MTE */
 
+static void elf_hwcap_fixup(void)
+{
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+       if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
+               compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
+#endif /* ARM64_ERRATUM_1742098 */
+}
+
 #ifdef CONFIG_KVM
 static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
 {
@@ -1978,6 +2059,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in
 }
 #endif /* CONFIG_KVM */
 
+static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused)
+{
+       sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP);
+}
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2132,7 +2218,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
-               .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT,
                .field_width = 4,
                .min_field_value = 1,
        },
@@ -2143,7 +2229,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT,
                .field_width = 4,
                .min_field_value = 2,
        },
@@ -2303,7 +2389,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cpuid_feature,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
-               .field_pos = ID_AA64ISAR1_SB_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT,
                .field_width = 4,
                .sign = FTR_UNSIGNED,
                .min_field_value = 1,
@@ -2315,9 +2401,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_APA_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
+               .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth,
                .matches = has_address_auth_cpucap,
        },
        {
@@ -2326,9 +2412,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR2_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR2_APA3_SHIFT,
+               .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED,
+               .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth,
                .matches = has_address_auth_cpucap,
        },
        {
@@ -2337,9 +2423,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_API_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_API_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR1_API_IMP_DEF,
+               .min_field_value = ID_AA64ISAR1_EL1_API_PAuth,
                .matches = has_address_auth_cpucap,
        },
        {
@@ -2353,9 +2439,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_GPA_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
+               .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP,
                .matches = has_cpuid_feature,
        },
        {
@@ -2364,9 +2450,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR2_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR2_GPA3_SHIFT,
+               .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED,
+               .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP,
                .matches = has_cpuid_feature,
        },
        {
@@ -2375,9 +2461,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_GPI_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT,
                .field_width = 4,
-               .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
+               .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP,
                .matches = has_cpuid_feature,
        },
        {
@@ -2478,7 +2564,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR1_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+               .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT,
                .field_width = 4,
                .matches = has_cpuid_feature,
                .min_field_value = 1,
@@ -2503,9 +2589,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .capability = ARM64_SME_FA64,
                .sys_reg = SYS_ID_AA64SMFR0_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64SMFR0_FA64_SHIFT,
+               .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT,
                .field_width = 1,
-               .min_field_value = ID_AA64SMFR0_FA64,
+               .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP,
                .matches = has_cpuid_feature,
                .cpu_enable = fa64_kernel_enable,
        },
@@ -2516,10 +2602,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .sys_reg = SYS_ID_AA64ISAR2_EL1,
                .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64ISAR2_WFXT_SHIFT,
+               .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT,
                .field_width = 4,
                .matches = has_cpuid_feature,
-               .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED,
+               .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP,
+       },
+       {
+               .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality",
+               .capability = ARM64_HAS_TIDCP1,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .sys_reg = SYS_ID_AA64MMFR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT,
+               .field_width = 4,
+               .min_field_value = ID_AA64MMFR1_TIDCP1_IMP,
+               .matches = has_cpuid_feature,
+               .cpu_enable = cpu_trap_el0_impdef,
        },
        {},
 };
@@ -2560,33 +2658,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 #ifdef CONFIG_ARM64_PTR_AUTH
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT,
                                  4, FTR_UNSIGNED,
-                                 ID_AA64ISAR1_APA_ARCHITECTED)
+                                 ID_AA64ISAR1_EL1_APA_PAuth)
        },
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT,
-                                 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED)
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT,
+                                 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth)
        },
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
-                                 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT,
+                                 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth)
        },
        {},
 };
 
 static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
-                                 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT,
+                                 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP)
        },
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT,
-                                 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED)
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT,
+                                 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP)
        },
        {
-               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
-                                 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+               HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT,
+                                 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP)
        },
        {},
 };
@@ -2614,30 +2712,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
-       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
+       HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
        HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
        HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
-       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+       HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
        HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_BTI
@@ -2653,17 +2752,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 #endif /* CONFIG_ARM64_MTE */
        HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
        HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
-       HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
-       HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+       HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+       HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
 #ifdef CONFIG_ARM64_SME
        HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
-       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+       HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
 #endif /* CONFIG_ARM64_SME */
        {},
 };
@@ -3098,14 +3197,12 @@ static bool __maybe_unused __system_matches_cap(unsigned int n)
 
 void cpu_set_feature(unsigned int num)
 {
-       WARN_ON(num >= MAX_CPU_FEATURES);
-       elf_hwcap |= BIT(num);
+       set_bit(num, elf_hwcap);
 }
 
 bool cpu_have_feature(unsigned int num)
 {
-       WARN_ON(num >= MAX_CPU_FEATURES);
-       return elf_hwcap & BIT(num);
+       return test_bit(num, elf_hwcap);
 }
 EXPORT_SYMBOL_GPL(cpu_have_feature);
 
@@ -3116,12 +3213,12 @@ unsigned long cpu_get_elf_hwcap(void)
         * note that for userspace compatibility we guarantee that bits 62
         * and 63 will always be returned as 0.
         */
-       return lower_32_bits(elf_hwcap);
+       return elf_hwcap[0];
 }
 
 unsigned long cpu_get_elf_hwcap2(void)
 {
-       return upper_32_bits(elf_hwcap);
+       return elf_hwcap[1];
 }
 
 static void __init setup_system_capabilities(void)
@@ -3143,8 +3240,10 @@ void __init setup_cpu_features(void)
        setup_system_capabilities();
        setup_elf_hwcaps(arm64_elf_hwcaps);
 
-       if (system_supports_32bit_el0())
+       if (system_supports_32bit_el0()) {
                setup_elf_hwcaps(compat_elf_hwcaps);
+               elf_hwcap_fixup();
+       }
 
        if (system_uses_ttbr0_pan())
                pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
@@ -3197,6 +3296,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
                                                         cpu_active_mask);
        get_cpu_device(lucky_winner)->offline_disabled = true;
        setup_elf_hwcaps(compat_elf_hwcaps);
+       elf_hwcap_fixup();
        pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
                cpu, lucky_winner);
        return 0;
@@ -3218,7 +3318,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
 
 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
 {
-       cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+       cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 }
 
 /*
index 3006f43..4150e30 100644 (file)
 #include <linux/of_device.h>
 #include <linux/psci.h>
 
-#include <asm/cpuidle.h>
-#include <asm/cpu_ops.h>
-
-int arm_cpuidle_init(unsigned int cpu)
-{
-       const struct cpu_operations *ops = get_cpu_ops(cpu);
-       int ret = -EOPNOTSUPP;
-
-       if (ops && ops->cpu_suspend && ops->cpu_init_idle)
-               ret = ops->cpu_init_idle(cpu);
-
-       return ret;
-}
-
-/**
- * arm_cpuidle_suspend() - function to enter a low-power idle state
- * @index: argument to pass to CPU suspend operations
- *
- * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
- * operations back-end error code otherwise.
- */
-int arm_cpuidle_suspend(int index)
-{
-       int cpu = smp_processor_id();
-       const struct cpu_operations *ops = get_cpu_ops(cpu);
-
-       return ops->cpu_suspend(index);
-}
-
 #ifdef CONFIG_ACPI
 
 #include <acpi/processor.h>
index 8eff0a3..d7702f3 100644 (file)
 DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 static struct cpuinfo_arm64 boot_cpu_data;
 
-static const char *icache_policy_str[] = {
-       [ICACHE_POLICY_VPIPT]           = "VPIPT",
-       [ICACHE_POLICY_RESERVED]        = "RESERVED/UNKNOWN",
-       [ICACHE_POLICY_VIPT]            = "VIPT",
-       [ICACHE_POLICY_PIPT]            = "PIPT",
-};
+static inline const char *icache_policy_str(int l1ip)
+{
+       switch (l1ip) {
+       case CTR_EL0_L1Ip_VPIPT:
+               return "VPIPT";
+       case CTR_EL0_L1Ip_VIPT:
+               return "VIPT";
+       case CTR_EL0_L1Ip_PIPT:
+               return "PIPT";
+       default:
+               return "RESERVED/UNKNOWN";
+       }
+}
 
 unsigned long __icache_flags;
 
@@ -107,6 +114,7 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_SME_F32F32]       = "smef32f32",
        [KERNEL_HWCAP_SME_FA64]         = "smefa64",
        [KERNEL_HWCAP_WFXT]             = "wfxt",
+       [KERNEL_HWCAP_EBF16]            = "ebf16",
 };
 
 #ifdef CONFIG_COMPAT
@@ -267,6 +275,7 @@ static struct kobj_type cpuregs_kobj_type = {
 
 CPUREGS_ATTR_RO(midr_el1, midr);
 CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(smidr_el1, smidr);
 
 static struct attribute *cpuregs_id_attrs[] = {
        &cpuregs_attr_midr_el1.attr,
@@ -279,6 +288,16 @@ static const struct attribute_group cpuregs_attr_group = {
        .name = "identification"
 };
 
+static struct attribute *sme_cpuregs_id_attrs[] = {
+       &cpuregs_attr_smidr_el1.attr,
+       NULL
+};
+
+static const struct attribute_group sme_cpuregs_attr_group = {
+       .attrs = sme_cpuregs_id_attrs,
+       .name = "identification"
+};
+
 static int cpuid_cpu_online(unsigned int cpu)
 {
        int rc;
@@ -296,6 +315,8 @@ static int cpuid_cpu_online(unsigned int cpu)
        rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group);
        if (rc)
                kobject_del(&info->kobj);
+       if (system_supports_sme())
+               rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group);
 out:
        return rc;
 }
@@ -342,19 +363,19 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
        u32 l1ip = CTR_L1IP(info->reg_ctr);
 
        switch (l1ip) {
-       case ICACHE_POLICY_PIPT:
+       case CTR_EL0_L1Ip_PIPT:
                break;
-       case ICACHE_POLICY_VPIPT:
+       case CTR_EL0_L1Ip_VPIPT:
                set_bit(ICACHEF_VPIPT, &__icache_flags);
                break;
-       case ICACHE_POLICY_RESERVED:
-       case ICACHE_POLICY_VIPT:
+       case CTR_EL0_L1Ip_VIPT:
+       default:
                /* Assume aliasing */
                set_bit(ICACHEF_ALIASING, &__icache_flags);
                break;
        }
 
-       pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
+       pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu);
 }
 
 static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
@@ -418,14 +439,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
                __cpuinfo_store_cpu_32bit(&info->aarch32);
 
-       if (IS_ENABLED(CONFIG_ARM64_SVE) &&
-           id_aa64pfr0_sve(info->reg_id_aa64pfr0))
-               info->reg_zcr = read_zcr_features();
-
-       if (IS_ENABLED(CONFIG_ARM64_SME) &&
-           id_aa64pfr1_sme(info->reg_id_aa64pfr1))
-               info->reg_smcr = read_smcr_features();
-
        cpuinfo_detect_icache_policy(info);
 }
 
index 5b82b92..254fe31 100644 (file)
@@ -636,18 +636,28 @@ alternative_else_nop_endif
         */
        .endm
 
-       .macro tramp_data_page  dst
-       adr_l   \dst, .entry.tramp.text
-       sub     \dst, \dst, PAGE_SIZE
-       .endm
-
-       .macro tramp_data_read_var      dst, var
-#ifdef CONFIG_RANDOMIZE_BASE
-       tramp_data_page         \dst
-       add     \dst, \dst, #:lo12:__entry_tramp_data_\var
-       ldr     \dst, [\dst]
+       .macro          tramp_data_read_var     dst, var
+#ifdef CONFIG_RELOCATABLE
+       ldr             \dst, .L__tramp_data_\var
+       .ifndef         .L__tramp_data_\var
+       .pushsection    ".entry.tramp.rodata", "a", %progbits
+       .align          3
+.L__tramp_data_\var:
+       .quad           \var
+       .popsection
+       .endif
 #else
-       ldr     \dst, =\var
+       /*
+        * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a
+        * compile time constant (and hence not secret and not worth hiding).
+        *
+        * As statically allocated kernel code and data always live in the top
+        * 47 bits of the address space we can sign-extend bit 47 and avoid an
+        * instruction to load the upper 16 bits (which must be 0xFFFF).
+        */
+       movz            \dst, :abs_g2_s:\var
+       movk            \dst, :abs_g1_nc:\var
+       movk            \dst, :abs_g0_nc:\var
 #endif
        .endm
 
@@ -695,7 +705,7 @@ alternative_else_nop_endif
        msr     vbar_el1, x30
        isb
        .else
-       ldr     x30, =vectors
+       adr_l   x30, vectors
        .endif // \kpti == 1
 
        .if     \bhb == BHB_MITIGATION_FW
@@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native)
 SYM_CODE_START(tramp_exit_compat)
        tramp_exit      32
 SYM_CODE_END(tramp_exit_compat)
-
-       .ltorg
        .popsection                             // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-       .pushsection ".rodata", "a"
-       .align PAGE_SHIFT
-SYM_DATA_START(__entry_tramp_data_start)
-__entry_tramp_data_vectors:
-       .quad   vectors
-#ifdef CONFIG_ARM_SDE_INTERFACE
-__entry_tramp_data___sdei_asm_handler:
-       .quad   __sdei_asm_handler
-#endif /* CONFIG_ARM_SDE_INTERFACE */
-__entry_tramp_data_this_cpu_vector:
-       .quad   this_cpu_vector
-SYM_DATA_END(__entry_tramp_data_start)
-       .popsection                             // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 /*
@@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack)
  * This clobbers x4, __sdei_handler() will restore this from firmware's
  * copy.
  */
-.ltorg
 .pushsection ".entry.tramp.text", "ax"
 SYM_CODE_START(__sdei_asm_entry_trampoline)
        mrs     x4, ttbr1_el1
@@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline)
 1:     sdei_handler_exit exit_mode=x2
 SYM_CODE_END(__sdei_asm_exit_trampoline)
 NOKPROBE(__sdei_asm_exit_trampoline)
-       .ltorg
 .popsection            // .entry.tramp.text
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
index aecf307..dd63ffc 100644 (file)
@@ -445,7 +445,6 @@ static void fpsimd_save(void)
 
        if (system_supports_sme()) {
                u64 *svcr = last->svcr;
-               *svcr = read_sysreg_s(SYS_SVCR);
 
                *svcr = read_sysreg_s(SYS_SVCR);
 
index 6a98f1a..cefe6a7 100644 (file)
@@ -37,8 +37,6 @@
 
 #include "efi-header.S"
 
-#define __PHYS_OFFSET  KERNEL_START
-
 #if (PAGE_OFFSET & 0x1fffff) != 0
 #error PAGE_OFFSET must be at least 2MB aligned
 #endif
@@ -51,9 +49,6 @@
  *   MMU = off, D-cache = off, I-cache = on or off,
  *   x0 = physical address to the FDT blob.
  *
- * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET).
- *
  * Note that the callee-saved registers are used for storing variables
  * that are useful before the MMU is enabled. The allocations are described
  * in the entry routines.
         * primary lowlevel boot path:
         *
         *  Register   Scope                      Purpose
+        *  x20        primary_entry() .. __primary_switch()    CPU boot mode
         *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
+        *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
         *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
-        *  x28        __create_page_tables()                   callee preserved temp register
-        *  x19/x20    __primary_switch()                       callee preserved temp registers
-        *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
+        *  x24        __primary_switch()                       linear map KASLR seed
+        *  x25        primary_entry() .. start_kernel()        supported VA size
+        *  x28        create_idmap()                           callee preserved temp register
         */
 SYM_CODE_START(primary_entry)
        bl      preserve_boot_args
        bl      init_kernel_el                  // w0=cpu_boot_mode
-       adrp    x23, __PHYS_OFFSET
-       and     x23, x23, MIN_KIMG_ALIGN - 1    // KASLR offset, defaults to 0
-       bl      set_cpu_boot_mode_flag
-       bl      __create_page_tables
+       mov     x20, x0
+       bl      create_idmap
+
        /*
         * The following calls CPU setup code, see arch/arm64/mm/proc.S for
         * details.
         * On return, the CPU will be ready for the MMU to be turned on and
         * the TCR will have been set.
         */
+#if VA_BITS > 48
+       mrs_s   x0, SYS_ID_AA64MMFR2_EL1
+       tst     x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+       mov     x0, #VA_BITS
+       mov     x25, #VA_BITS_MIN
+       csel    x25, x25, x0, eq
+       mov     x0, x25
+#endif
        bl      __cpu_setup                     // initialise processor
        b       __primary_switch
 SYM_CODE_END(primary_entry)
@@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
        b       dcache_inval_poc                // tail call
 SYM_CODE_END(preserve_boot_args)
 
-/*
- * Macro to create a table entry to the next page.
- *
- *     tbl:    page table address
- *     virt:   virtual address
- *     shift:  #imm page table shift
- *     ptrs:   #imm pointers per table page
- *
- * Preserves:  virt
- * Corrupts:   ptrs, tmp1, tmp2
- * Returns:    tbl -> next level table page address
- */
-       .macro  create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
-       add     \tmp1, \tbl, #PAGE_SIZE
-       phys_to_pte \tmp2, \tmp1
-       orr     \tmp2, \tmp2, #PMD_TYPE_TABLE   // address of next table and entry type
-       lsr     \tmp1, \virt, #\shift
-       sub     \ptrs, \ptrs, #1
-       and     \tmp1, \tmp1, \ptrs             // table index
-       str     \tmp2, [\tbl, \tmp1, lsl #3]
-       add     \tbl, \tbl, #PAGE_SIZE          // next level table page
-       .endm
+SYM_FUNC_START_LOCAL(clear_page_tables)
+       /*
+        * Clear the init page tables.
+        */
+       adrp    x0, init_pg_dir
+       adrp    x1, init_pg_end
+       sub     x2, x1, x0
+       mov     x1, xzr
+       b       __pi_memset                     // tail call
+SYM_FUNC_END(clear_page_tables)
 
 /*
  * Macro to populate page table entries, these entries can be pointers to the next level
@@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args)
  *     vstart: virtual address of start of range
  *     vend:   virtual address of end of range - we map [vstart, vend]
  *     shift:  shift used to transform virtual address into index
- *     ptrs:   number of entries in page table
+ *     order:  #imm 2log(number of entries in page table)
  *     istart: index in table corresponding to vstart
  *     iend:   index in table corresponding to vend
  *     count:  On entry: how many extra entries were required in previous level, scales
  *                       our end index.
  *             On exit: returns how many extra entries required for next page table level
  *
- * Preserves:  vstart, vend, shift, ptrs
+ * Preserves:  vstart, vend
  * Returns:    istart, iend, count
  */
-       .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
-       lsr     \iend, \vend, \shift
-       mov     \istart, \ptrs
-       sub     \istart, \istart, #1
-       and     \iend, \iend, \istart   // iend = (vend >> shift) & (ptrs - 1)
-       mov     \istart, \ptrs
-       mul     \istart, \istart, \count
-       add     \iend, \iend, \istart   // iend += count * ptrs
-                                       // our entries span multiple tables
-
-       lsr     \istart, \vstart, \shift
-       mov     \count, \ptrs
-       sub     \count, \count, #1
-       and     \istart, \istart, \count
-
+       .macro compute_indices, vstart, vend, shift, order, istart, iend, count
+       ubfx    \istart, \vstart, \shift, \order
+       ubfx    \iend, \vend, \shift, \order
+       add     \iend, \iend, \count, lsl \order
        sub     \count, \iend, \istart
        .endm
 
@@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args)
  *     vend:   virtual address of end of range - we map [vstart, vend - 1]
  *     flags:  flags to use to map last level entries
  *     phys:   physical address corresponding to vstart - physical memory is contiguous
- *     pgds:   the number of pgd entries
+ *     order:  #imm 2log(number of entries in PGD table)
+ *
+ * If extra_shift is set, an extra level will be populated if the end address does
+ * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
  *
  * Temporaries:        istart, iend, tmp, count, sv - these need to be different registers
  * Preserves:  vstart, flags
  * Corrupts:   tbl, rtbl, vend, istart, iend, tmp, count, sv
  */
-       .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+       .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
        sub \vend, \vend, #1
        add \rtbl, \tbl, #PAGE_SIZE
-       mov \sv, \rtbl
        mov \count, #0
-       compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+
+       .ifnb   \extra_shift
+       tst     \vend, #~((1 << (\extra_shift)) - 1)
+       b.eq    .L_\@
+       compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
+       mov \sv, \rtbl
        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
        mov \tbl, \sv
+       .endif
+.L_\@:
+       compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
        mov \sv, \rtbl
+       populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+       mov \tbl, \sv
 
 #if SWAPPER_PGTABLE_LEVELS > 3
-       compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+       compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+       mov \sv, \rtbl
        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
        mov \tbl, \sv
-       mov \sv, \rtbl
 #endif
 
 #if SWAPPER_PGTABLE_LEVELS > 2
-       compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+       compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+       mov \sv, \rtbl
        populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
        mov \tbl, \sv
 #endif
 
-       compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
-       bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
-       populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
+       compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+       bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
+       populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
        .endm
 
 /*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- *   - identity mapping to enable the MMU (low address, TTBR0)
- *   - first few MB of the kernel linear mapping to jump to once the MMU has
- *     been enabled
+ * Remap a subregion created with the map_memory macro with modified attributes
+ * or output address. The entire remapped region must have been covered in the
+ * invocation of map_memory.
+ *
+ * x0: last level table address (returned in first argument to map_memory)
+ * x1: start VA of the existing mapping
+ * x2: start VA of the region to update
+ * x3: end VA of the region to update (exclusive)
+ * x4: start PA associated with the region to update
+ * x5: attributes to set on the updated region
+ * x6: order of the last level mappings
  */
-SYM_FUNC_START_LOCAL(__create_page_tables)
-       mov     x28, lr
+SYM_FUNC_START_LOCAL(remap_region)
+       sub     x3, x3, #1              // make end inclusive
 
-       /*
-        * Invalidate the init page tables to avoid potential dirty cache lines
-        * being evicted. Other page tables are allocated in rodata as part of
-        * the kernel image, and thus are clean to the PoC per the boot
-        * protocol.
-        */
-       adrp    x0, init_pg_dir
-       adrp    x1, init_pg_end
-       bl      dcache_inval_poc
+       // Get the index offset for the start of the last level table
+       lsr     x1, x1, x6
+       bfi     x1, xzr, #0, #PAGE_SHIFT - 3
 
-       /*
-        * Clear the init page tables.
-        */
-       adrp    x0, init_pg_dir
-       adrp    x1, init_pg_end
-       sub     x1, x1, x0
-1:     stp     xzr, xzr, [x0], #16
-       stp     xzr, xzr, [x0], #16
-       stp     xzr, xzr, [x0], #16
-       stp     xzr, xzr, [x0], #16
-       subs    x1, x1, #64
-       b.ne    1b
+       // Derive the start and end indexes into the last level table
+       // associated with the provided region
+       lsr     x2, x2, x6
+       lsr     x3, x3, x6
+       sub     x2, x2, x1
+       sub     x3, x3, x1
 
-       mov     x7, SWAPPER_MM_MMUFLAGS
+       mov     x1, #1
+       lsl     x6, x1, x6              // block size at this level
 
-       /*
-        * Create the identity mapping.
-        */
-       adrp    x0, idmap_pg_dir
-       adrp    x3, __idmap_text_start          // __pa(__idmap_text_start)
-
-#ifdef CONFIG_ARM64_VA_BITS_52
-       mrs_s   x6, SYS_ID_AA64MMFR2_EL1
-       and     x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
-       mov     x5, #52
-       cbnz    x6, 1f
-#endif
-       mov     x5, #VA_BITS_MIN
-1:
-       adr_l   x6, vabits_actual
-       str     x5, [x6]
-       dmb     sy
-       dc      ivac, x6                // Invalidate potentially stale cache line
+       populate_entries x0, x4, x2, x3, x5, x6, x7
+       ret
+SYM_FUNC_END(remap_region)
 
+SYM_FUNC_START_LOCAL(create_idmap)
+       mov     x28, lr
        /*
-        * VA_BITS may be too small to allow for an ID mapping to be created
-        * that covers system RAM if that is located sufficiently high in the
-        * physical address space. So for the ID map, use an extended virtual
-        * range in that case, and configure an additional translation level
-        * if needed.
+        * The ID map carries a 1:1 mapping of the physical address range
+        * covered by the loaded image, which could be anywhere in DRAM. This
+        * means that the required size of the VA (== PA) space is decided at
+        * boot time, and could be more than the configured size of the VA
+        * space for ordinary kernel and user space mappings.
+        *
+        * There are three cases to consider here:
+        * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
+        *   the placement of the image. In this case, we configure one extra
+        *   level of translation on the fly for the ID map only. (This case
+        *   also covers 42-bit VA/52-bit PA on 64k pages).
         *
-        * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
-        * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
-        * this number conveniently equals the number of leading zeroes in
-        * the physical address of __idmap_text_end.
+        * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
+        *   only happen when using 64k pages, in which case we need to extend
+        *   the root level table rather than add a level. Note that we can
+        *   treat this case as 'always extended' as long as we take care not
+        *   to program an unsupported T0SZ value into the TCR register.
+        *
+        * - Combinations that would require two additional levels of
+        *   translation are not supported, e.g., VA_BITS==36 on 16k pages, or
+        *   VA_BITS==39/4k pages with 5-level paging, where the input address
+        *   requires more than 47 or 48 bits, respectively.
         */
-       adrp    x5, __idmap_text_end
-       clz     x5, x5
-       cmp     x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
-       b.ge    1f                      // .. then skip VA range extension
-
-       adr_l   x6, idmap_t0sz
-       str     x5, [x6]
-       dmb     sy
-       dc      ivac, x6                // Invalidate potentially stale cache line
-
 #if (VA_BITS < 48)
+#define IDMAP_PGD_ORDER        (VA_BITS - PGDIR_SHIFT)
 #define EXTRA_SHIFT    (PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS     (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
 
        /*
         * If VA_BITS < 48, we have to configure an additional table level.
@@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 #if VA_BITS != EXTRA_SHIFT
 #error "Mismatch between VA_BITS and page size/number of translation levels"
 #endif
-
-       mov     x4, EXTRA_PTRS
-       create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
 #else
+#define IDMAP_PGD_ORDER        (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+#define EXTRA_SHIFT
        /*
         * If VA_BITS == 48, we don't have to configure an additional
         * translation level, but the top-level table has more entries.
         */
-       mov     x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-       str_l   x4, idmap_ptrs_per_pgd, x5
 #endif
-1:
-       ldr_l   x4, idmap_ptrs_per_pgd
-       adr_l   x6, __idmap_text_end            // __pa(__idmap_text_end)
-
-       map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
-
-       /*
-        * Map the kernel image (starting with PHYS_OFFSET).
-        */
-       adrp    x0, init_pg_dir
-       mov_q   x5, KIMAGE_VADDR                // compile time __va(_text)
-       add     x5, x5, x23                     // add KASLR displacement
-       mov     x4, PTRS_PER_PGD
-       adrp    x6, _end                        // runtime __pa(_end)
-       adrp    x3, _text                       // runtime __pa(_text)
-       sub     x6, x6, x3                      // _end - _text
-       add     x6, x6, x5                      // runtime __va(_end)
-
-       map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
+       adrp    x0, init_idmap_pg_dir
+       adrp    x3, _text
+       adrp    x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+       mov     x7, SWAPPER_RX_MMUFLAGS
+
+       map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
+
+       /* Remap the kernel page tables r/w in the ID map */
+       adrp    x1, _text
+       adrp    x2, init_pg_dir
+       adrp    x3, init_pg_end
+       bic     x4, x2, #SWAPPER_BLOCK_SIZE - 1
+       mov     x5, SWAPPER_RW_MMUFLAGS
+       mov     x6, #SWAPPER_BLOCK_SHIFT
+       bl      remap_region
+
+       /* Remap the FDT after the kernel image */
+       adrp    x1, _text
+       adrp    x22, _end + SWAPPER_BLOCK_SIZE
+       bic     x2, x22, #SWAPPER_BLOCK_SIZE - 1
+       bfi     x22, x21, #0, #SWAPPER_BLOCK_SHIFT              // remapped FDT address
+       add     x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+       bic     x4, x21, #SWAPPER_BLOCK_SIZE - 1
+       mov     x5, SWAPPER_RW_MMUFLAGS
+       mov     x6, #SWAPPER_BLOCK_SHIFT
+       bl      remap_region
 
        /*
         * Since the page tables have been populated with non-cacheable
@@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
         */
        dmb     sy
 
-       adrp    x0, idmap_pg_dir
-       adrp    x1, idmap_pg_end
+       adrp    x0, init_idmap_pg_dir
+       adrp    x1, init_idmap_pg_end
        bl      dcache_inval_poc
+       ret     x28
+SYM_FUNC_END(create_idmap)
 
+SYM_FUNC_START_LOCAL(create_kernel_mapping)
        adrp    x0, init_pg_dir
-       adrp    x1, init_pg_end
-       bl      dcache_inval_poc
+       mov_q   x5, KIMAGE_VADDR                // compile time __va(_text)
+       add     x5, x5, x23                     // add KASLR displacement
+       adrp    x6, _end                        // runtime __pa(_end)
+       adrp    x3, _text                       // runtime __pa(_text)
+       sub     x6, x6, x3                      // _end - _text
+       add     x6, x6, x5                      // runtime __va(_end)
+       mov     x7, SWAPPER_RW_MMUFLAGS
 
-       ret     x28
-SYM_FUNC_END(__create_page_tables)
+       map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
+
+       dsb     ishst                           // sync with page table walker
+       ret
+SYM_FUNC_END(create_kernel_mapping)
 
        /*
         * Initialize CPU registers with task-specific and cpu-specific context.
@@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables)
 /*
  * The following fragment of code is executed with the MMU enabled.
  *
- *   x0 = __PHYS_OFFSET
+ *   x0 = __pa(KERNEL_START)
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
        adr_l   x4, init_task
@@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
        sub     x4, x4, x0                      // the kernel virtual and
        str_l   x4, kimage_voffset, x5          // physical mappings
 
+       mov     x0, x20
+       bl      set_cpu_boot_mode_flag
+
        // Clear BSS
        adr_l   x0, __bss_start
        mov     x1, xzr
@@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched)
        bl      __pi_memset
        dsb     ishst                           // Make zero page visible to PTW
 
+#if VA_BITS > 48
+       adr_l   x8, vabits_actual               // Set this early so KASAN early init
+       str     x25, [x8]                       // ... observes the correct value
+       dc      civac, x8                       // Make visible to booting secondaries
+#endif
+
+#ifdef CONFIG_RANDOMIZE_BASE
+       adrp    x5, memstart_offset_seed        // Save KASLR linear map seed
+       strh    w24, [x5, :lo12:memstart_offset_seed]
+#endif
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        bl      kasan_early_init
 #endif
        mov     x0, x21                         // pass FDT address in x0
        bl      early_fdt_map                   // Try mapping the FDT early
+       mov     x0, x20                         // pass the full boot status
        bl      init_feature_override           // Parse cpu feature overrides
-#ifdef CONFIG_RANDOMIZE_BASE
-       tst     x23, ~(MIN_KIMG_ALIGN - 1)      // already running randomized?
-       b.ne    0f
-       bl      kaslr_early_init                // parse FDT for KASLR options
-       cbz     x0, 0f                          // KASLR disabled? just proceed
-       orr     x23, x23, x0                    // record KASLR offset
-       ldp     x29, x30, [sp], #16             // we must enable KASLR, return
-       ret                                     // to __primary_switch()
-0:
-#endif
-       bl      switch_to_vhe                   // Prefer VHE if possible
+       mov     x0, x20
+       bl      finalise_el2                    // Prefer VHE if possible
        ldp     x29, x30, [sp], #16
        bl      start_kernel
        ASM_BUG()
 SYM_FUNC_END(__primary_switched)
 
-       .pushsection ".rodata", "a"
-SYM_DATA_START(kimage_vaddr)
-       .quad           _text
-SYM_DATA_END(kimage_vaddr)
-EXPORT_SYMBOL(kimage_vaddr)
-       .popsection
-
 /*
  * end early head section, begin head code that is also used for
  * hotplug and needs to have the same protections as the text region
@@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr)
  * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
  * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
  *
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
- * booted in EL1 or EL2 respectively.
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
+ * booted in EL1 or EL2 respectively, with the top 32 bits containing
+ * potential context flags. These flags are *not* stored in __boot_cpu_mode.
  */
 SYM_FUNC_START(init_kernel_el)
        mrs     x0, CurrentEL
@@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
        msr     vbar_el2, x0
        isb
 
+       mov_q   x1, INIT_SCTLR_EL1_MMU_OFF
+
        /*
         * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
         * making it impossible to start in nVHE mode. Is that
@@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
        and     x0, x0, #HCR_E2H
        cbz     x0, 1f
 
-       /* Switching to VHE requires a sane SCTLR_EL1 as a start */
-       mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
-       msr_s   SYS_SCTLR_EL12, x0
-
-       /*
-        * Force an eret into a helper "function", and let it return
-        * to our original caller... This makes sure that we have
-        * initialised the basic PSTATE state.
-        */
-       mov     x0, #INIT_PSTATE_EL2
-       msr     spsr_el1, x0
-       adr     x0, __cpu_stick_to_vhe
-       msr     elr_el1, x0
-       eret
+       /* Set a sane SCTLR_EL1, the VHE way */
+       msr_s   SYS_SCTLR_EL12, x1
+       mov     x2, #BOOT_CPU_FLAG_E2H
+       b       2f
 
 1:
-       mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
-       msr     sctlr_el1, x0
-
+       msr     sctlr_el1, x1
+       mov     x2, xzr
+2:
        msr     elr_el2, lr
        mov     w0, #BOOT_CPU_MODE_EL2
+       orr     x0, x0, x2
        eret
-
-__cpu_stick_to_vhe:
-       mov     x0, #HVC_VHE_RESTART
-       hvc     #0
-       mov     x0, #BOOT_CPU_MODE_EL2
-       ret
 SYM_FUNC_END(init_kernel_el)
 
 /*
@@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
        b.ne    1f
        add     x1, x1, #4
 1:     str     w0, [x1]                        // Save CPU boot mode
-       dmb     sy
-       dc      ivac, x1                        // Invalidate potentially stale cache line
        ret
 SYM_FUNC_END(set_cpu_boot_mode_flag)
 
-/*
- * These values are written with the MMU off, but read with the MMU on.
- * Writers will invalidate the corresponding address, discarding up to a
- * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
- * sufficient alignment that the CWG doesn't overlap another section.
- */
-       .pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-SYM_DATA_START(__boot_cpu_mode)
-       .long   BOOT_CPU_MODE_EL2
-       .long   BOOT_CPU_MODE_EL1
-SYM_DATA_END(__boot_cpu_mode)
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- */
-SYM_DATA_START(__early_cpu_boot_status)
-       .quad   0
-SYM_DATA_END(__early_cpu_boot_status)
-
-       .popsection
-
        /*
         * This provides a "holding pen" for platforms to hold all secondary
         * cores are held until we're ready for them to initialise.
         */
 SYM_FUNC_START(secondary_holding_pen)
        bl      init_kernel_el                  // w0=cpu_boot_mode
-       bl      set_cpu_boot_mode_flag
-       mrs     x0, mpidr_el1
+       mrs     x2, mpidr_el1
        mov_q   x1, MPIDR_HWID_BITMASK
-       and     x0, x0, x1
+       and     x2, x2, x1
        adr_l   x3, secondary_holding_pen_release
 pen:   ldr     x4, [x3]
-       cmp     x4, x0
+       cmp     x4, x2
        b.eq    secondary_startup
        wfe
        b       pen
@@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen)
         */
 SYM_FUNC_START(secondary_entry)
        bl      init_kernel_el                  // w0=cpu_boot_mode
-       bl      set_cpu_boot_mode_flag
        b       secondary_startup
 SYM_FUNC_END(secondary_entry)
 
@@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup)
        /*
         * Common entry point for secondary CPUs.
         */
-       bl      switch_to_vhe
+       mov     x20, x0                         // preserve boot mode
+       bl      finalise_el2
        bl      __cpu_secondary_check52bitva
+#if VA_BITS > 48
+       ldr_l   x0, vabits_actual
+#endif
        bl      __cpu_setup                     // initialise processor
        adrp    x1, swapper_pg_dir
+       adrp    x2, idmap_pg_dir
        bl      __enable_mmu
        ldr     x8, =__secondary_switched
        br      x8
 SYM_FUNC_END(secondary_startup)
 
 SYM_FUNC_START_LOCAL(__secondary_switched)
+       mov     x0, x20
+       bl      set_cpu_boot_mode_flag
+       str_l   xzr, __early_cpu_boot_status, x3
        adr_l   x5, vectors
        msr     vbar_el1, x5
        isb
@@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow)
  *
  *  x0  = SCTLR_EL1 value for turning on the MMU.
  *  x1  = TTBR1_EL1 value
+ *  x2  = ID map root table address
  *
  * Returns to the caller via x30/lr. This requires the caller to be covered
  * by the .idmap.text section.
@@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow)
  * If it isn't, park the CPU
  */
 SYM_FUNC_START(__enable_mmu)
-       mrs     x2, ID_AA64MMFR0_EL1
-       ubfx    x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+       mrs     x3, ID_AA64MMFR0_EL1
+       ubfx    x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+       cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
        b.lt    __no_granule_support
-       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+       cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
        b.gt    __no_granule_support
-       update_early_cpu_boot_status 0, x2, x3
-       adrp    x2, idmap_pg_dir
-       phys_to_ttbr x1, x1
        phys_to_ttbr x2, x2
        msr     ttbr0_el1, x2                   // load TTBR0
-       offset_ttbr1 x1, x3
-       msr     ttbr1_el1, x1                   // load TTBR1
-       isb
+       load_ttbr1 x1, x1, x3
 
        set_sctlr_el1   x0
 
@@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu)
 SYM_FUNC_END(__enable_mmu)
 
 SYM_FUNC_START(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_VA_BITS_52
+#if VA_BITS > 48
        ldr_l   x0, vabits_actual
        cmp     x0, #52
        b.ne    2f
@@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
         * Iterate over each entry in the relocation table, and apply the
         * relocations in place.
         */
-       ldr     w9, =__rela_offset              // offset to reloc table
-       ldr     w10, =__rela_size               // size of reloc table
-
+       adr_l   x9, __rela_start
+       adr_l   x10, __rela_end
        mov_q   x11, KIMAGE_VADDR               // default virtual offset
        add     x11, x11, x23                   // actual virtual offset
-       add     x9, x9, x11                     // __va(.rela)
-       add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
 
 0:     cmp     x9, x10
        b.hs    1f
@@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
         * entry in x9, the address being relocated by the current address or
         * bitmap entry in x13 and the address being relocated by the current
         * bit in x14.
-        *
-        * Because addends are stored in place in the binary, RELR relocations
-        * cannot be applied idempotently. We use x24 to keep track of the
-        * currently applied displacement so that we can correctly relocate if
-        * __relocate_kernel is called twice with non-zero displacements (i.e.
-        * if there is both a physical misalignment and a KASLR displacement).
         */
-       ldr     w9, =__relr_offset              // offset to reloc table
-       ldr     w10, =__relr_size               // size of reloc table
-       add     x9, x9, x11                     // __va(.relr)
-       add     x10, x9, x10                    // __va(.relr) + sizeof(.relr)
-
-       sub     x15, x23, x24                   // delta from previous offset
-       cbz     x15, 7f                         // nothing to do if unchanged
-       mov     x24, x23                        // save new offset
+       adr_l   x9, __relr_start
+       adr_l   x10, __relr_end
 
 2:     cmp     x9, x10
        b.hs    7f
@@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
        tbnz    x11, #0, 3f                     // branch to handle bitmaps
        add     x13, x11, x23
        ldr     x12, [x13]                      // relocate address entry
-       add     x12, x12, x15
+       add     x12, x12, x23
        str     x12, [x13], #8                  // adjust to start of bitmap
        b       2b
 
@@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
        cbz     x11, 6f
        tbz     x11, #0, 5f                     // skip bit if not set
        ldr     x12, [x14]                      // relocate bit
-       add     x12, x12, x15
+       add     x12, x12, x23
        str     x12, [x14]
 
 5:     add     x14, x14, #8                    // move to next bit's address
@@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel)
 #endif
 
 SYM_FUNC_START_LOCAL(__primary_switch)
+       adrp    x1, reserved_pg_dir
+       adrp    x2, init_idmap_pg_dir
+       bl      __enable_mmu
+#ifdef CONFIG_RELOCATABLE
+       adrp    x23, KERNEL_START
+       and     x23, x23, MIN_KIMG_ALIGN - 1
 #ifdef CONFIG_RANDOMIZE_BASE
-       mov     x19, x0                         // preserve new SCTLR_EL1 value
-       mrs     x20, sctlr_el1                  // preserve old SCTLR_EL1 value
+       mov     x0, x22
+       adrp    x1, init_pg_end
+       mov     sp, x1
+       mov     x29, xzr
+       bl      __pi_kaslr_early_init
+       and     x24, x0, #SZ_2M - 1             // capture memstart offset seed
+       bic     x0, x0, #SZ_2M - 1
+       orr     x23, x23, x0                    // record kernel offset
+#endif
 #endif
+       bl      clear_page_tables
+       bl      create_kernel_mapping
 
        adrp    x1, init_pg_dir
-       bl      __enable_mmu
+       load_ttbr1 x1, x1, x2
 #ifdef CONFIG_RELOCATABLE
-#ifdef CONFIG_RELR
-       mov     x24, #0                         // no RELR displacement yet
-#endif
        bl      __relocate_kernel
-#ifdef CONFIG_RANDOMIZE_BASE
-       ldr     x8, =__primary_switched
-       adrp    x0, __PHYS_OFFSET
-       blr     x8
-
-       /*
-        * If we return here, we have a KASLR displacement in x23 which we need
-        * to take into account by discarding the current kernel mapping and
-        * creating a new one.
-        */
-       pre_disable_mmu_workaround
-       msr     sctlr_el1, x20                  // disable the MMU
-       isb
-       bl      __create_page_tables            // recreate kernel mapping
-
-       tlbi    vmalle1                         // Remove any stale TLB entries
-       dsb     nsh
-       isb
-
-       set_sctlr_el1   x19                     // re-enable the MMU
-
-       bl      __relocate_kernel
-#endif
 #endif
        ldr     x8, =__primary_switched
-       adrp    x0, __PHYS_OFFSET
+       adrp    x0, KERNEL_START                // __pa(KERNEL_START)
        br      x8
 SYM_FUNC_END(__primary_switch)
index 2e24834..af5df48 100644 (file)
@@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void)
                unsigned long pfn = xa_state.xa_index;
                struct page *page = pfn_to_online_page(pfn);
 
-               /*
-                * It is not required to invoke page_kasan_tag_reset(page)
-                * at this point since the tags stored in page->flags are
-                * already restored.
-                */
                mte_restore_page_tags(page_address(page), tags);
 
                mte_free_tag_storage(tags);
index 43d2126..12c7fad 100644 (file)
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
+// Warning, hardcoded register allocation
+// This will clobber x1 and x2, and expect x1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail
+       ubfx    x1, x1, #\fld, #\width
+       cbz     x1, \fail
+
+       adr_l   x1, \idreg\()_override
+       ldr     x2, [x1, FTR_OVR_VAL_OFFSET]
+       ldr     x1, [x1, FTR_OVR_MASK_OFFSET]
+       ubfx    x2, x2, #\fld, #\width
+       ubfx    x1, x1, #\fld, #\width
+       cmp     x1, xzr
+       and     x2, x2, x1
+       csinv   x2, x2, xzr, ne
+       cbnz    x2, \pass
+       b       \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail
+       mrs     x1, \idreg\()_el1
+       __check_override \idreg \fld 4 \pass \fail
+.endm
+
        .text
        .pushsection    .hyp.text, "ax"
 
@@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync)
        msr     vbar_el2, x1
        b       9f
 
-1:     cmp     x0, #HVC_VHE_RESTART
-       b.eq    mutate_to_vhe
+1:     cmp     x0, #HVC_FINALISE_EL2
+       b.eq    __finalise_el2
 
 2:     cmp     x0, #HVC_SOFT_RESTART
        b.ne    3f
@@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync)
        eret
 SYM_CODE_END(elx_sync)
 
-// nVHE? No way! Give me the real thing!
-SYM_CODE_START_LOCAL(mutate_to_vhe)
+SYM_CODE_START_LOCAL(__finalise_el2)
+       check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
+
+.Linit_sve:    /* SVE register access */
+       mrs     x0, cptr_el2                    // Disable SVE traps
+       bic     x0, x0, #CPTR_EL2_TZ
+       msr     cptr_el2, x0
+       isb
+       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
+       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
+
+.Lskip_sve:
+       check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
+
+.Linit_sme:    /* SME register access and priority mapping */
+       mrs     x0, cptr_el2                    // Disable SME traps
+       bic     x0, x0, #CPTR_EL2_TSM
+       msr     cptr_el2, x0
+       isb
+
+       mrs     x1, sctlr_el2
+       orr     x1, x1, #SCTLR_ELx_ENTP2        // Disable TPIDR2 traps
+       msr     sctlr_el2, x1
+       isb
+
+       mov     x0, #0                          // SMCR controls
+
+       // Full FP in SM?
+       mrs_s   x1, SYS_ID_AA64SMFR0_EL1
+       __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
+
+.Linit_sme_fa64:
+       orr     x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64:
+
+       orr     x0, x0, #SMCR_ELx_LEN_MASK      // Enable full SME vector
+       msr_s   SYS_SMCR_EL2, x0                // length for EL1.
+
+       mrs_s   x1, SYS_SMIDR_EL1               // Priority mapping supported?
+       ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+       cbz     x1, .Lskip_sme
+
+       msr_s   SYS_SMPRIMAP_EL2, xzr           // Make all priorities equal
+
+       mrs     x1, id_aa64mmfr1_el1            // HCRX_EL2 present?
+       ubfx    x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+       cbz     x1, .Lskip_sme
+
+       mrs_s   x1, SYS_HCRX_EL2
+       orr     x1, x1, #HCRX_EL2_SMPME_MASK    // Enable priority mapping
+       msr_s   SYS_HCRX_EL2, x1
+
+.Lskip_sme:
+
+       // nVHE? No way! Give me the real thing!
        // Sanity check: MMU *must* be off
        mrs     x1, sctlr_el2
        tbnz    x1, #0, 1f
 
        // Needs to be VHE capable, obviously
-       mrs     x1, id_aa64mmfr1_el1
-       ubfx    x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
-       cbz     x1, 1f
-
-       // Check whether VHE is disabled from the command line
-       adr_l   x1, id_aa64mmfr1_override
-       ldr     x2, [x1, FTR_OVR_VAL_OFFSET]
-       ldr     x1, [x1, FTR_OVR_MASK_OFFSET]
-       ubfx    x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-       ubfx    x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
-       cmp     x1, xzr
-       and     x2, x2, x1
-       csinv   x2, x2, xzr, ne
-       cbnz    x2, 2f
+       check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
 
 1:     mov_q   x0, HVC_STUB_ERR
        eret
@@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
        msr     spsr_el1, x0
 
        b       enter_vhe
-SYM_CODE_END(mutate_to_vhe)
+SYM_CODE_END(__finalise_el2)
 
        // At the point where we reach enter_vhe(), we run with
-       // the MMU off (which is enforced by mutate_to_vhe()).
+       // the MMU off (which is enforced by __finalise_el2()).
        // We thus need to be in the idmap, or everything will
        // explode when enabling the MMU.
 
@@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors)
 SYM_FUNC_END(__hyp_reset_vectors)
 
 /*
- * Entry point to switch to VHE if deemed capable
+ * Entry point to finalise EL2 and switch to VHE if deemed capable
+ *
+ * w0: boot mode, as returned by init_kernel_el()
  */
-SYM_FUNC_START(switch_to_vhe)
+SYM_FUNC_START(finalise_el2)
        // Need to have booted at EL2
-       adr_l   x1, __boot_cpu_mode
-       ldr     w0, [x1]
        cmp     w0, #BOOT_CPU_MODE_EL2
        b.ne    1f
 
@@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe)
        cmp     x0, #CurrentEL_EL1
        b.ne    1f
 
-       // Turn the world upside down
-       mov     x0, #HVC_VHE_RESTART
+       mov     x0, #HVC_FINALISE_EL2
        hvc     #0
 1:
        ret
-SYM_FUNC_END(switch_to_vhe)
+SYM_FUNC_END(finalise_el2)
index 8a2ceb5..1b0542c 100644 (file)
 #define FTR_ALIAS_NAME_LEN     30
 #define FTR_ALIAS_OPTION_LEN   116
 
+static u64 __boot_status __initdata;
+
 struct ftr_set_desc {
        char                            name[FTR_DESC_NAME_LEN];
        struct arm64_ftr_override       *override;
        struct {
                char                    name[FTR_DESC_FIELD_LEN];
                u8                      shift;
+               u8                      width;
                bool                    (*filter)(u64 val);
        }                               fields[];
 };
 
+#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+
 static bool __init mmfr1_vh_filter(u64 val)
 {
        /*
@@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val)
         * the user was trying to force nVHE on us, proceed with
         * attitude adjustment.
         */
-       return !(is_kernel_in_hyp_mode() && val == 0);
+       return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
+                val == 0);
 }
 
 static const struct ftr_set_desc mmfr1 __initconst = {
        .name           = "id_aa64mmfr1",
        .override       = &id_aa64mmfr1_override,
        .fields         = {
-               { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
+               FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
+               {}
+       },
+};
+
+static bool __init pfr0_sve_filter(u64 val)
+{
+       /*
+        * Disabling SVE also means disabling all the features that
+        * are associated with it. The easiest way to do it is just to
+        * override id_aa64zfr0_el1 to be 0.
+        */
+       if (!val) {
+               id_aa64zfr0_override.val = 0;
+               id_aa64zfr0_override.mask = GENMASK(63, 0);
+       }
+
+       return true;
+}
+
+static const struct ftr_set_desc pfr0 __initconst = {
+       .name           = "id_aa64pfr0",
+       .override       = &id_aa64pfr0_override,
+       .fields         = {
+               FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
                {}
        },
 };
 
+static bool __init pfr1_sme_filter(u64 val)
+{
+       /*
+        * Similarly to SVE, disabling SME also means disabling all
+        * the features that are associated with it. Just set
+        * id_aa64smfr0_el1 to 0 and don't look back.
+        */
+       if (!val) {
+               id_aa64smfr0_override.val = 0;
+               id_aa64smfr0_override.mask = GENMASK(63, 0);
+       }
+
+       return true;
+}
+
 static const struct ftr_set_desc pfr1 __initconst = {
        .name           = "id_aa64pfr1",
        .override       = &id_aa64pfr1_override,
        .fields         = {
-               { "bt", ID_AA64PFR1_BT_SHIFT },
-               { "mte", ID_AA64PFR1_MTE_SHIFT},
+               FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
+               FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
+               FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
                {}
        },
 };
@@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = {
        .name           = "id_aa64isar1",
        .override       = &id_aa64isar1_override,
        .fields         = {
-               { "gpi", ID_AA64ISAR1_GPI_SHIFT },
-               { "gpa", ID_AA64ISAR1_GPA_SHIFT },
-               { "api", ID_AA64ISAR1_API_SHIFT },
-               { "apa", ID_AA64ISAR1_APA_SHIFT },
+               FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
+               FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
+               FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
+               FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
                {}
        },
 };
@@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = {
        .name           = "id_aa64isar2",
        .override       = &id_aa64isar2_override,
        .fields         = {
-               { "gpa3", ID_AA64ISAR2_GPA3_SHIFT },
-               { "apa3", ID_AA64ISAR2_APA3_SHIFT },
+               FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
+               FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+               {}
+       },
+};
+
+static const struct ftr_set_desc smfr0 __initconst = {
+       .name           = "id_aa64smfr0",
+       .override       = &id_aa64smfr0_override,
+       .fields         = {
+               /* FA64 is a one bit field... :-/ */
+               { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
                {}
        },
 };
@@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = {
        .override       = &kaslr_feature_override,
 #endif
        .fields         = {
-               { "disabled", 0 },
+               FIELD("disabled", 0, NULL),
                {}
        },
 };
 
 static const struct ftr_set_desc * const regs[] __initconst = {
        &mmfr1,
+       &pfr0,
        &pfr1,
        &isar1,
        &isar2,
+       &smfr0,
        &kaslr,
 };
 
@@ -108,6 +166,8 @@ static const struct {
 } aliases[] __initconst = {
        { "kvm-arm.mode=nvhe",          "id_aa64mmfr1.vh=0" },
        { "kvm-arm.mode=protected",     "id_aa64mmfr1.vh=0" },
+       { "arm64.nosve",                "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" },
+       { "arm64.nosme",                "id_aa64pfr1.sme=0" },
        { "arm64.nobti",                "id_aa64pfr1.bt=0" },
        { "arm64.nopauth",
          "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
@@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline)
 
                for (f = 0; strlen(regs[i]->fields[f].name); f++) {
                        u64 shift = regs[i]->fields[f].shift;
-                       u64 mask = 0xfUL << shift;
+                       u64 width = regs[i]->fields[f].width ?: 4;
+                       u64 mask = GENMASK_ULL(shift + width - 1, shift);
                        u64 v;
 
                        if (find_field(cmdline, regs[i], f, &v))
@@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline)
 
                        /*
                         * If an override gets filtered out, advertise
-                        * it by setting the value to 0xf, but
+                        * it by setting the value to the all-ones while
                         * clearing the mask... Yes, this is fragile.
                         */
                        if (regs[i]->fields[f].filter &&
@@ -234,9 +295,9 @@ static __init void parse_cmdline(void)
 }
 
 /* Keep checkers quiet */
-void init_feature_override(void);
+void init_feature_override(u64 boot_status);
 
-asmlinkage void __init init_feature_override(void)
+asmlinkage void __init init_feature_override(u64 boot_status)
 {
        int i;
 
@@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void)
                }
        }
 
+       __boot_status = boot_status;
+
        parse_cmdline();
 
        for (i = 0; i < ARRAY_SIZE(regs); i++) {
index 241c86b..afa69e0 100644 (file)
 #error This file should only be included in vmlinux.lds.S
 #endif
 
-#ifdef CONFIG_EFI
-
-__efistub_kernel_size          = _edata - _text;
-__efistub_primary_entry_offset = primary_entry - _text;
-
+PROVIDE(__efistub_kernel_size          = _edata - _text);
+PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
 
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -25,31 +22,37 @@ __efistub_primary_entry_offset      = primary_entry - _text;
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp               = __pi_memcmp;
-__efistub_memchr               = __pi_memchr;
-__efistub_memcpy               = __pi_memcpy;
-__efistub_memmove              = __pi_memmove;
-__efistub_memset               = __pi_memset;
-__efistub_strlen               = __pi_strlen;
-__efistub_strnlen              = __pi_strnlen;
-__efistub_strcmp               = __pi_strcmp;
-__efistub_strncmp              = __pi_strncmp;
-__efistub_strrchr              = __pi_strrchr;
-__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
-
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-__efistub___memcpy             = __pi_memcpy;
-__efistub___memmove            = __pi_memmove;
-__efistub___memset             = __pi_memset;
-#endif
+PROVIDE(__efistub_memcmp               = __pi_memcmp);
+PROVIDE(__efistub_memchr               = __pi_memchr);
+PROVIDE(__efistub_memcpy               = __pi_memcpy);
+PROVIDE(__efistub_memmove              = __pi_memmove);
+PROVIDE(__efistub_memset               = __pi_memset);
+PROVIDE(__efistub_strlen               = __pi_strlen);
+PROVIDE(__efistub_strnlen              = __pi_strnlen);
+PROVIDE(__efistub_strcmp               = __pi_strcmp);
+PROVIDE(__efistub_strncmp              = __pi_strncmp);
+PROVIDE(__efistub_strrchr              = __pi_strrchr);
+PROVIDE(__efistub_dcache_clean_poc     = __pi_dcache_clean_poc);
+
+PROVIDE(__efistub__text                        = _text);
+PROVIDE(__efistub__end                 = _end);
+PROVIDE(__efistub__edata               = _edata);
+PROVIDE(__efistub_screen_info          = screen_info);
+PROVIDE(__efistub__ctype               = _ctype);
 
-__efistub__text                        = _text;
-__efistub__end                 = _end;
-__efistub__edata               = _edata;
-__efistub_screen_info          = screen_info;
-__efistub__ctype               = _ctype;
+/*
+ * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which
+ * instruments the conventional ones. Therefore, any references from the EFI
+ * stub or other position independent, low level C code should be redirected to
+ * the non-instrumented versions as well.
+ */
+PROVIDE(__efistub___memcpy             = __pi_memcpy);
+PROVIDE(__efistub___memmove            = __pi_memmove);
+PROVIDE(__efistub___memset             = __pi_memset);
 
-#endif
+PROVIDE(__pi___memcpy                  = __pi_memcpy);
+PROVIDE(__pi___memmove                 = __pi_memmove);
+PROVIDE(__pi___memset                  = __pi_memset);
 
 #ifdef CONFIG_KVM
 
index 418b2bb..325455d 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/pgtable.h>
 #include <linux/random.h>
 
-#include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 
-enum kaslr_status {
-       KASLR_ENABLED,
-       KASLR_DISABLED_CMDLINE,
-       KASLR_DISABLED_NO_SEED,
-       KASLR_DISABLED_FDT_REMAP,
-};
-
-static enum kaslr_status __initdata kaslr_status;
 u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
-static __init u64 get_kaslr_seed(void *fdt)
-{
-       int node, len;
-       fdt64_t *prop;
-       u64 ret;
-
-       node = fdt_path_offset(fdt, "/chosen");
-       if (node < 0)
-               return 0;
-
-       prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
-       if (!prop || len != sizeof(u64))
-               return 0;
-
-       ret = fdt64_to_cpu(*prop);
-       *prop = 0;
-       return ret;
-}
-
 struct arm64_ftr_override kaslr_feature_override __initdata;
 
-/*
- * This routine will be executed with the kernel mapped at its default virtual
- * address, and if it returns successfully, the kernel will be remapped, and
- * start_kernel() will be executed from a randomized virtual offset. The
- * relocation will result in all absolute references (e.g., static variables
- * containing function pointers) to be reinitialized, and zero-initialized
- * .bss variables will be reset to 0.
- */
-u64 __init kaslr_early_init(void)
+static int __init kaslr_init(void)
 {
-       void *fdt;
-       u64 seed, offset, mask, module_range;
-       unsigned long raw;
+       u64 module_range;
+       u32 seed;
 
        /*
         * Set a reasonable default for module_alloc_base in case
         * we end up running with module randomization disabled.
         */
        module_alloc_base = (u64)_etext - MODULES_VSIZE;
-       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
-                           (unsigned long)&module_alloc_base +
-                                   sizeof(module_alloc_base));
-
-       /*
-        * Try to map the FDT early. If this fails, we simply bail,
-        * and proceed with KASLR disabled. We will make another
-        * attempt at mapping the FDT in setup_machine()
-        */
-       fdt = get_early_fdt_ptr();
-       if (!fdt) {
-               kaslr_status = KASLR_DISABLED_FDT_REMAP;
-               return 0;
-       }
 
-       /*
-        * Retrieve (and wipe) the seed from the FDT
-        */
-       seed = get_kaslr_seed(fdt);
-
-       /*
-        * Check if 'nokaslr' appears on the command line, and
-        * return 0 if that is the case.
-        */
        if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
-               kaslr_status = KASLR_DISABLED_CMDLINE;
+               pr_info("KASLR disabled on command line\n");
                return 0;
        }
 
-       /*
-        * Mix in any entropy obtainable architecturally if enabled
-        * and supported.
-        */
-
-       if (arch_get_random_seed_long_early(&raw))
-               seed ^= raw;
-
-       if (!seed) {
-               kaslr_status = KASLR_DISABLED_NO_SEED;
+       if (!kaslr_offset()) {
+               pr_warn("KASLR disabled due to lack of seed\n");
                return 0;
        }
 
+       pr_info("KASLR enabled\n");
+
        /*
-        * OK, so we are proceeding with KASLR enabled. Calculate a suitable
-        * kernel image offset from the seed. Let's place the kernel in the
-        * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
-        * the lower and upper quarters to avoid colliding with other
-        * allocations.
-        * Even if we could randomize at page granularity for 16k and 64k pages,
-        * let's always round to 2 MB so we don't interfere with the ability to
-        * map using contiguous PTEs
+        * KASAN without KASAN_VMALLOC does not expect the module region to
+        * intersect the vmalloc region, since shadow memory is allocated for
+        * each module at load time, whereas the vmalloc region will already be
+        * shadowed by KASAN zero pages.
         */
-       mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1);
-       offset = BIT(VA_BITS_MIN - 3) + (seed & mask);
+       BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+                     IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
+                    !IS_ENABLED(CONFIG_KASAN_VMALLOC));
 
-       /* use the top 16 bits to randomize the linear region */
-       memstart_offset_seed = seed >> 48;
-
-       if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
-           (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-            IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
-               /*
-                * KASAN without KASAN_VMALLOC does not expect the module region
-                * to intersect the vmalloc region, since shadow memory is
-                * allocated for each module at load time, whereas the vmalloc
-                * region is shadowed by KASAN zero pages. So keep modules
-                * out of the vmalloc region if KASAN is enabled without
-                * KASAN_VMALLOC, and put the kernel well within 4 GB of the
-                * module region.
-                */
-               return offset % SZ_2G;
+       seed = get_random_u32();
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
                /*
@@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void)
                 * resolved normally.)
                 */
                module_range = SZ_2G - (u64)(_end - _stext);
-               module_alloc_base = max((u64)_end + offset - SZ_2G,
-                                       (u64)MODULES_VADDR);
+               module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
        } else {
                /*
                 * Randomize the module region by setting module_alloc_base to
@@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void)
                 * when ARM64_MODULE_PLTS is enabled.
                 */
                module_range = MODULES_VSIZE - (u64)(_etext - _stext);
-               module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
        }
 
        /* use the lower 21 bits to randomize the base of the module region */
        module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
        module_alloc_base &= PAGE_MASK;
 
-       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
-                           (unsigned long)&module_alloc_base +
-                                   sizeof(module_alloc_base));
-       dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
-                           (unsigned long)&memstart_offset_seed +
-                                   sizeof(memstart_offset_seed));
-
-       return offset;
-}
-
-static int __init kaslr_init(void)
-{
-       switch (kaslr_status) {
-       case KASLR_ENABLED:
-               pr_info("KASLR enabled\n");
-               break;
-       case KASLR_DISABLED_CMDLINE:
-               pr_info("KASLR disabled on command line\n");
-               break;
-       case KASLR_DISABLED_NO_SEED:
-               pr_warn("KASLR disabled due to lack of seed\n");
-               break;
-       case KASLR_DISABLED_FDT_REMAP:
-               pr_warn("KASLR disabled due to FDT remapping failure\n");
-               break;
-       }
-
        return 0;
 }
-core_initcall(kaslr_init)
+subsys_initcall(kaslr_init)
index 42bd8c0..692e9d2 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <asm/unistd.h>
 
+       .section .rodata
        .align  5
        .globl  __kuser_helper_start
 __kuser_helper_start:
index f6b0074..b2b7302 100644 (file)
@@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
        if (!pte_is_tagged)
                return;
 
-       page_kasan_tag_reset(page);
-       /*
-        * We need smp_wmb() in between setting the flags and clearing the
-        * tags because if another thread reads page->flags and builds a
-        * tagged address out of it, there is an actual dependency to the
-        * memory access, but on the current thread we do not guarantee that
-        * the new page->flags are visible before the tags were updated.
-        */
-       smp_wmb();
        mte_clear_page_tags(page_address(page));
 }
 
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
new file mode 100644 (file)
index 0000000..8392914
--- /dev/null
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 Google LLC
+
+KBUILD_CFLAGS  := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+                  -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+                  $(call cc-option,-mbranch-protection=none) \
+                  -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+                  -include $(srctree)/include/linux/hidden.h \
+                  -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
+                  $(call cc-option,-fno-addrsig)
+
+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS  := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+# disable LTO
+KBUILD_CFLAGS  := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
+GCOV_PROFILE   := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT        := n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+                              --remove-section=.note.gnu.property \
+                              --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+       $(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+       $(call if_changed_rule,cc_o_c)
+
+obj-y          := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y                := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
new file mode 100644 (file)
index 0000000..6c3855e
--- /dev/null
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+       size_t l1, l2;
+
+       l2 = strlen(s2);
+       if (!l2)
+               return (char *)s1;
+       l1 = strlen(s1);
+       while (l1 >= l2) {
+               l1--;
+               if (!memcmp(s1, s2, l2))
+                       return (char *)s1;
+               s1++;
+       }
+       return NULL;
+}
+static bool cmdline_contains_nokaslr(const u8 *cmdline)
+{
+       const u8 *str;
+
+       str = __strstr(cmdline, "nokaslr");
+       return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static bool is_kaslr_disabled_cmdline(void *fdt)
+{
+       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+               int node;
+               const u8 *prop;
+
+               node = fdt_path_offset(fdt, "/chosen");
+               if (node < 0)
+                       goto out;
+
+               prop = fdt_getprop(fdt, node, "bootargs", NULL);
+               if (!prop)
+                       goto out;
+
+               if (cmdline_contains_nokaslr(prop))
+                       return true;
+
+               if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+                       goto out;
+
+               return false;
+       }
+out:
+       return cmdline_contains_nokaslr(CONFIG_CMDLINE);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+       int node, len;
+       fdt64_t *prop;
+       u64 ret;
+
+       node = fdt_path_offset(fdt, "/chosen");
+       if (node < 0)
+               return 0;
+
+       prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+       if (!prop || len != sizeof(u64))
+               return 0;
+
+       ret = fdt64_to_cpu(*prop);
+       *prop = 0;
+       return ret;
+}
+
+asmlinkage u64 kaslr_early_init(void *fdt)
+{
+       u64 seed;
+
+       if (is_kaslr_disabled_cmdline(fdt))
+               return 0;
+
+       seed = get_kaslr_seed(fdt);
+       if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+                if (!__early_cpu_has_rndr() ||
+                    !__arm64_rndr((unsigned long *)&seed))
+#endif
+               return 0;
+       }
+
+       /*
+        * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+        * kernel image offset from the seed. Let's place the kernel in the
+        * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+        * the lower and upper quarters to avoid colliding with other
+        * allocations.
+        */
+       return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
index b0980fb..3e6d035 100644 (file)
@@ -280,6 +280,9 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
 
                vl = task_get_sme_vl(current);
        } else {
+               if (!system_supports_sve())
+                       return -EINVAL;
+
                vl = task_get_sve_vl(current);
        }
 
@@ -342,9 +345,14 @@ fpsimd_only:
 
 #else /* ! CONFIG_ARM64_SVE */
 
-/* Turn any non-optimised out attempts to use these into a link error: */
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+       WARN_ON_ONCE(1);
+       return -EINVAL;
+}
+
+/* Turn any non-optimised out attempts to use this into a link error: */
 extern int preserve_sve_context(void __user *ctx);
-extern int restore_sve_fpsimd_context(struct user_ctxs *user);
 
 #endif /* ! CONFIG_ARM64_SVE */
 
@@ -649,14 +657,10 @@ static int restore_sigframe(struct pt_regs *regs,
                if (!user.fpsimd)
                        return -EINVAL;
 
-               if (user.sve) {
-                       if (!system_supports_sve())
-                               return -EINVAL;
-
+               if (user.sve)
                        err = restore_sve_fpsimd_context(&user);
-               } else {
+               else
                        err = restore_fpsimd_context(user.fpsimd);
-               }
        }
 
        if (err == 0 && system_supports_sme() && user.za)
index 475d30d..ccbd4aa 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <asm/unistd.h>
 
+       .section .rodata
        .globl __aarch32_sigret_code_start
 __aarch32_sigret_code_start:
 
index 4ea9392..617f78a 100644 (file)
@@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter)
        .pushsection ".idmap.text", "awx"
 SYM_CODE_START(cpu_resume)
        bl      init_kernel_el
-       bl      switch_to_vhe
+       bl      finalise_el2
        bl      __cpu_setup
        /* enable the MMU early - so we can access sleep_save_stash by va */
        adrp    x1, swapper_pg_dir
+       adrp    x2, idmap_pg_dir
        bl      __enable_mmu
        ldr     x8, =_cpu_resume
        br      x8
index 0467cb7..fcaa151 100644 (file)
@@ -38,6 +38,8 @@
  * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
  *               associated with the most recently encountered replacement lr
  *               value.
+ *
+ * @task:        The task being unwound.
  */
 struct unwind_state {
        unsigned long fp;
@@ -48,13 +50,13 @@ struct unwind_state {
 #ifdef CONFIG_KRETPROBES
        struct llist_node *kr_cur;
 #endif
+       struct task_struct *task;
 };
 
-static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
-                               unsigned long pc)
+static void unwind_init_common(struct unwind_state *state,
+                              struct task_struct *task)
 {
-       state->fp = fp;
-       state->pc = pc;
+       state->task = task;
 #ifdef CONFIG_KRETPROBES
        state->kr_cur = NULL;
 #endif
@@ -72,7 +74,57 @@ static notrace void unwind_init(struct unwind_state *state, unsigned long fp,
        state->prev_fp = 0;
        state->prev_type = STACK_TYPE_UNKNOWN;
 }
-NOKPROBE_SYMBOL(unwind_init);
+
+/*
+ * Start an unwind from a pt_regs.
+ *
+ * The unwind will begin at the PC within the regs.
+ *
+ * The regs must be on a stack currently owned by the calling task.
+ */
+static inline void unwind_init_from_regs(struct unwind_state *state,
+                                        struct pt_regs *regs)
+{
+       unwind_init_common(state, current);
+
+       state->fp = regs->regs[29];
+       state->pc = regs->pc;
+}
+
+/*
+ * Start an unwind from a caller.
+ *
+ * The unwind will begin at the caller of whichever function this is inlined
+ * into.
+ *
+ * The function which invokes this must be noinline.
+ */
+static __always_inline void unwind_init_from_caller(struct unwind_state *state)
+{
+       unwind_init_common(state, current);
+
+       state->fp = (unsigned long)__builtin_frame_address(1);
+       state->pc = (unsigned long)__builtin_return_address(0);
+}
+
+/*
+ * Start an unwind from a blocked task.
+ *
+ * The unwind will begin at the blocked tasks saved PC (i.e. the caller of
+ * cpu_switch_to()).
+ *
+ * The caller should ensure the task is blocked in cpu_switch_to() for the
+ * duration of the unwind, or the unwind will be bogus. It is never valid to
+ * call this for the current task.
+ */
+static inline void unwind_init_from_task(struct unwind_state *state,
+                                        struct task_struct *task)
+{
+       unwind_init_common(state, task);
+
+       state->fp = thread_saved_fp(task);
+       state->pc = thread_saved_pc(task);
+}
 
 /*
  * Unwind from one frame record (A) to the next frame record (B).
@@ -81,9 +133,9 @@ NOKPROBE_SYMBOL(unwind_init);
  * records (e.g. a cycle), determined based on the location and fp value of A
  * and the location (but not the fp value) of B.
  */
-static int notrace unwind_next(struct task_struct *tsk,
-                              struct unwind_state *state)
+static int notrace unwind_next(struct unwind_state *state)
 {
+       struct task_struct *tsk = state->task;
        unsigned long fp = state->fp;
        struct stack_info info;
 
@@ -117,15 +169,15 @@ static int notrace unwind_next(struct task_struct *tsk,
                if (fp <= state->prev_fp)
                        return -EINVAL;
        } else {
-               set_bit(state->prev_type, state->stacks_done);
+               __set_bit(state->prev_type, state->stacks_done);
        }
 
        /*
         * Record this frame record's values and location. The prev_fp and
         * prev_type are only meaningful to the next unwind_next() invocation.
         */
-       state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
-       state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+       state->fp = READ_ONCE(*(unsigned long *)(fp));
+       state->pc = READ_ONCE(*(unsigned long *)(fp + 8));
        state->prev_fp = fp;
        state->prev_type = info.type;
 
@@ -157,8 +209,7 @@ static int notrace unwind_next(struct task_struct *tsk,
 }
 NOKPROBE_SYMBOL(unwind_next);
 
-static void notrace unwind(struct task_struct *tsk,
-                          struct unwind_state *state,
+static void notrace unwind(struct unwind_state *state,
                           stack_trace_consume_fn consume_entry, void *cookie)
 {
        while (1) {
@@ -166,7 +217,7 @@ static void notrace unwind(struct task_struct *tsk,
 
                if (!consume_entry(cookie, state->pc))
                        break;
-               ret = unwind_next(tsk, state);
+               ret = unwind_next(state);
                if (ret < 0)
                        break;
        }
@@ -212,15 +263,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 {
        struct unwind_state state;
 
-       if (regs)
-               unwind_init(&state, regs->regs[29], regs->pc);
-       else if (task == current)
-               unwind_init(&state,
-                               (unsigned long)__builtin_frame_address(1),
-                               (unsigned long)__builtin_return_address(0));
-       else
-               unwind_init(&state, thread_saved_fp(task),
-                               thread_saved_pc(task));
-
-       unwind(task, &state, consume_entry, cookie);
+       if (regs) {
+               if (task != current)
+                       return;
+               unwind_init_from_regs(&state, regs);
+       } else if (task == current) {
+               unwind_init_from_caller(&state);
+       } else {
+               unwind_init_from_task(&state, task);
+       }
+
+       unwind(&state, consume_entry, cookie);
 }
index 2b0887e..9135fe0 100644 (file)
@@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void)
 
        /* Restore CnP bit in TTBR1_EL1 */
        if (system_supports_cnp())
-               cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+               cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 
        /*
         * PSTATE was not saved over suspend/resume, re-enable any detected
index 9ac7a81..b7fed33 100644 (file)
@@ -579,11 +579,11 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
 
        if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
                /* Hide DIC so that we can trap the unnecessary maintenance...*/
-               val &= ~BIT(CTR_DIC_SHIFT);
+               val &= ~BIT(CTR_EL0_DIC_SHIFT);
 
                /* ... and fake IminLine to reduce the number of traps. */
-               val &= ~CTR_IMINLINE_MASK;
-               val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+               val &= ~CTR_EL0_IminLine_MASK;
+               val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK;
        }
 
        pt_regs_write_reg(regs, rt, val);
index f6e25d7..bafbf78 100644 (file)
@@ -24,7 +24,13 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
 # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
 # preparation in build-time C")).
 ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
-            -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T
+            -Bsymbolic --build-id=sha1 -n $(btildflags-y)
+
+ifdef CONFIG_LD_ORPHAN_WARN
+  ldflags-y += --orphan-handling=warn
+endif
+
+ldflags-y += -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
index a5e61e0..e69fb4a 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/const.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
 OUTPUT_ARCH(aarch64)
@@ -49,11 +50,24 @@ SECTIONS
 
        .dynamic        : { *(.dynamic) }               :text   :dynamic
 
-       .rodata         : { *(.rodata*) }               :text
+       .rela.dyn       : ALIGN(8) { *(.rela .rela*) }
+
+       .rodata         : {
+               *(.rodata*)
+               *(.got)
+               *(.got.plt)
+               *(.plt)
+               *(.plt.*)
+               *(.iplt)
+               *(.igot .igot.plt)
+       }                                               :text
 
        _end = .;
        PROVIDE(end = .);
 
+       DWARF_DEBUG
+       ELF_DETAILS
+
        /DISCARD/       : {
                *(.data .data.* .gnu.linkonce.d.* .sdata*)
                *(.bss .sbss .dynbss .dynsbss)
index 05ba1aa..36c8f66 100644 (file)
@@ -104,6 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
 VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += --orphan-handling=warn
 
 
 # Borrow vdsomunge.c from the arm vDSO
index 3348ce5..8d95d7d 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/const.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
 OUTPUT_ARCH(arm)
@@ -35,12 +36,30 @@ SECTIONS
 
        .dynamic        : { *(.dynamic) }               :text   :dynamic
 
-       .rodata         : { *(.rodata*) }               :text
+       .rodata         : {
+               *(.rodata*)
+               *(.got)
+               *(.got.plt)
+               *(.plt)
+               *(.rel.iplt)
+               *(.iplt)
+               *(.igot.plt)
+       }                                               :text
 
-       .text           : { *(.text*) }                 :text   =0xe7f001f2
+       .text           : {
+               *(.text*)
+               *(.glue_7)
+               *(.glue_7t)
+               *(.vfp11_veneer)
+               *(.v4_bx)
+       }                                               :text   =0xe7f001f2
 
-       .got            : { *(.got) }
-       .rel.plt        : { *(.rel.plt) }
+       .rel.dyn        : { *(.rel*) }
+
+       .ARM.exidx : { *(.ARM.exidx*) }
+       DWARF_DEBUG
+       ELF_DETAILS
+       .ARM.attributes 0 : { *(.ARM.attributes) }
 
        /DISCARD/       : {
                *(.note.GNU-stack)
index 2d4a8f9..45131e3 100644 (file)
@@ -115,7 +115,8 @@ jiffies = jiffies_64;
        __entry_tramp_text_start = .;                   \
        *(.entry.tramp.text)                            \
        . = ALIGN(PAGE_SIZE);                           \
-       __entry_tramp_text_end = .;
+       __entry_tramp_text_end = .;                     \
+       *(.entry.tramp.rodata)
 #else
 #define TRAMP_TEXT
 #endif
@@ -198,8 +199,7 @@ SECTIONS
        }
 
        idmap_pg_dir = .;
-       . += IDMAP_DIR_SIZE;
-       idmap_pg_end = .;
+       . += PAGE_SIZE;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
        tramp_pg_dir = .;
@@ -235,6 +235,10 @@ SECTIONS
        __inittext_end = .;
        __initdata_begin = .;
 
+       init_idmap_pg_dir = .;
+       . += INIT_IDMAP_DIR_SIZE;
+       init_idmap_pg_end = .;
+
        .init.data : {
                INIT_DATA
                INIT_SETUP(16)
@@ -253,21 +257,17 @@ SECTIONS
        HYPERVISOR_RELOC_SECTION
 
        .rela.dyn : ALIGN(8) {
+               __rela_start = .;
                *(.rela .rela*)
+               __rela_end = .;
        }
 
-       __rela_offset   = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
-       __rela_size     = SIZEOF(.rela.dyn);
-
-#ifdef CONFIG_RELR
        .relr.dyn : ALIGN(8) {
+               __relr_start = .;
                *(.relr.dyn)
+               __relr_end = .;
        }
 
-       __relr_offset   = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
-       __relr_size     = SIZEOF(.relr.dyn);
-#endif
-
        . = ALIGN(SEGMENT_ALIGN);
        __initdata_end = .;
        __init_end = .;
index fd55014..fa6e466 100644 (file)
        )
 
 #define PVM_ID_AA64ISAR1_ALLOW (\
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
        )
 
 #define PVM_ID_AA64ISAR2_ALLOW (\
-       ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \
-       ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \
+       ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
+       ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
        )
 
 u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
index 35a4331..6b94c3e 100644 (file)
@@ -173,10 +173,10 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
        u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
 
        if (!vcpu_has_ptrauth(vcpu))
-               allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
-                               ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
-                               ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
-                               ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+               allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+                               ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+                               ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+                               ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
 
        return id_aa64isar1_el1_sys_val & allow_mask;
 }
@@ -186,8 +186,8 @@ static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
        u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
 
        if (!vcpu_has_ptrauth(vcpu))
-               allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
-                               ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+               allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+                               ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
 
        return id_aa64isar2_el1_sys_val & allow_mask;
 }
index c06c047..c4fb387 100644 (file)
@@ -1136,17 +1136,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
                break;
        case SYS_ID_AA64ISAR1_EL1:
                if (!vcpu_has_ptrauth(vcpu))
-                       val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
-                                ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
-                                ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
-                                ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+                       val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
+                                ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
+                                ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
+                                ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
                break;
        case SYS_ID_AA64ISAR2_EL1:
                if (!vcpu_has_ptrauth(vcpu))
-                       val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) |
-                                ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3));
+                       val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
+                                ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
                if (!cpus_have_final_cap(ARM64_HAS_WFXT))
-                       val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT);
+                       val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
                break;
        case SYS_ID_AA64DFR0_EL1:
                /* Limit debug to ARMv8.0 */
index eeb9e45..1b7c93a 100644 (file)
@@ -18,7 +18,7 @@
  */
        .macro  multitag_transfer_size, reg, tmp
        mrs_s   \reg, SYS_GMID_EL1
-       ubfx    \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
+       ubfx    \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE
        mov     \tmp, #4
        lsl     \reg, \tmp, \reg
        .endm
index 21c9079..081058d 100644 (file)
@@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop)
        ret
 SYM_FUNC_END(__pi_dcache_clean_pop)
 SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
-
-/*
- *     __dma_flush_area(start, size)
- *
- *     clean & invalidate D / U line
- *
- *     - start   - virtual start address of region
- *     - size    - size in question
- */
-SYM_FUNC_START(__pi___dma_flush_area)
-       add     x1, x0, x1
-       dcache_by_line_op civac, sy, x0, x1, x2, x3
-       ret
-SYM_FUNC_END(__pi___dma_flush_area)
-SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
-
-/*
- *     __dma_map_area(start, size, dir)
- *     - start - kernel virtual start address
- *     - size  - size of region
- *     - dir   - DMA direction
- */
-SYM_FUNC_START(__pi___dma_map_area)
-       add     x1, x0, x1
-       b       __pi_dcache_clean_poc
-SYM_FUNC_END(__pi___dma_map_area)
-SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
-
-/*
- *     __dma_unmap_area(start, size, dir)
- *     - start - kernel virtual start address
- *     - size  - size of region
- *     - dir   - DMA direction
- */
-SYM_FUNC_START(__pi___dma_unmap_area)
-       add     x1, x0, x1
-       cmp     w2, #DMA_TO_DEVICE
-       b.ne    __pi_dcache_inval_poc
-       ret
-SYM_FUNC_END(__pi___dma_unmap_area)
-SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
index 0dea80b..2491327 100644 (file)
@@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from)
 
        if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
                set_bit(PG_mte_tagged, &to->flags);
-               page_kasan_tag_reset(to);
-               /*
-                * We need smp_wmb() in between setting the flags and clearing the
-                * tags because if another thread reads page->flags and builds a
-                * tagged address out of it, there is an actual dependency to the
-                * memory access, but on the current thread we do not guarantee that
-                * the new page->flags are visible before the tags were updated.
-                */
-               smp_wmb();
                mte_copy_page_tags(kto, kfrom);
        }
 }
index 6099c81..599cf81 100644 (file)
 #include <asm/xen/xen-ops.h>
 
 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-               enum dma_data_direction dir)
+                             enum dma_data_direction dir)
 {
-       __dma_map_area(phys_to_virt(paddr), size, dir);
+       unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+       dcache_clean_poc(start, start + size);
 }
 
 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-               enum dma_data_direction dir)
+                          enum dma_data_direction dir)
 {
-       __dma_unmap_area(phys_to_virt(paddr), size, dir);
+       unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+       if (dir == DMA_TO_DEVICE)
+               return;
+
+       dcache_inval_poc(start, start + size);
 }
 
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-       __dma_flush_area(page_address(page), size);
+       unsigned long start = (unsigned long)page_address(page);
+
+       dcache_clean_inval_poc(start, start + size);
 }
 
 #ifdef CONFIG_IOMMU_DMA
index 4894553..228d681 100644 (file)
@@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex)
        return ((unsigned long)&ex->fixup + ex->fixup);
 }
 
-static bool ex_handler_fixup(const struct exception_table_entry *ex,
-                            struct pt_regs *regs)
-{
-       regs->pc = get_ex_fixup(ex);
-       return true;
-}
-
 static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
                                        struct pt_regs *regs)
 {
@@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs)
                return false;
 
        switch (ex->type) {
-       case EX_TYPE_FIXUP:
-               return ex_handler_fixup(ex, regs);
        case EX_TYPE_BPF:
                return ex_handler_bpf(ex, regs);
        case EX_TYPE_UACCESS_ERR_ZERO:
+       case EX_TYPE_KACCESS_ERR_ZERO:
                return ex_handler_uaccess_err_zero(ex, regs);
        case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
                return ex_handler_load_unaligned_zeropad(ex, regs);
index c5e1176..cdf3ffa 100644 (file)
@@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 void tag_clear_highpage(struct page *page)
 {
        mte_zero_clear_page_tags(page_address(page));
-       page_kasan_tag_reset(page);
        set_bit(PG_mte_tagged, &page->flags);
 }
index 3618ef3..5307ffd 100644 (file)
@@ -100,16 +100,6 @@ int pud_huge(pud_t pud)
 #endif
 }
 
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
-{
-       unsigned long pfn = pte_pfn(pte);
-
-       return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-}
-
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, size_t *pgsize)
 {
index 339ee84..b6ef26f 100644 (file)
@@ -389,7 +389,7 @@ void __init arm64_memblock_init(void)
 
        early_init_fdt_scan_reserved_mem();
 
-       if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+       if (!defer_reserve_crashkernel())
                reserve_crashkernel();
 
        high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -438,7 +438,7 @@ void __init bootmem_init(void)
         * request_standard_resources() depends on crashkernel's memory being
         * reserved, so do it here.
         */
-       if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+       if (defer_reserve_crashkernel())
                reserve_crashkernel();
 
        memblock_dump_all();
index b21f91c..c5af103 100644 (file)
@@ -1,96 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * Based on arch/arm/mm/ioremap.c
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- * Hacked for ARM by Phil Blundell <philb@gnu.org>
- * Hacked to allow all architectures to build, and various cleanups
- * by Russell King
- * Copyright (C) 2012 ARM Ltd.
- */
 
-#include <linux/export.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
 #include <linux/io.h>
 
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-
-static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
-                                     pgprot_t prot, void *caller)
+bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot)
 {
-       unsigned long last_addr;
-       unsigned long offset = phys_addr & ~PAGE_MASK;
-       int err;
-       unsigned long addr;
-       struct vm_struct *area;
+       unsigned long last_addr = phys_addr + size - 1;
 
-       /*
-        * Page align the mapping address and size, taking account of any
-        * offset.
-        */
-       phys_addr &= PAGE_MASK;
-       size = PAGE_ALIGN(size + offset);
+       /* Don't allow outside PHYS_MASK */
+       if (last_addr & ~PHYS_MASK)
+               return false;
 
-       /*
-        * Don't allow wraparound, zero size or outside PHYS_MASK.
-        */
-       last_addr = phys_addr + size - 1;
-       if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
-               return NULL;
-
-       /*
-        * Don't allow RAM to be mapped.
-        */
+       /* Don't allow RAM to be mapped. */
        if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
-               return NULL;
-
-       area = get_vm_area_caller(size, VM_IOREMAP, caller);
-       if (!area)
-               return NULL;
-       addr = (unsigned long)area->addr;
-       area->phys_addr = phys_addr;
-
-       err = ioremap_page_range(addr, addr + size, phys_addr, prot);
-       if (err) {
-               vunmap((void *)addr);
-               return NULL;
-       }
-
-       return (void __iomem *)(offset + addr);
-}
-
-void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
-{
-       return __ioremap_caller(phys_addr, size, prot,
-                               __builtin_return_address(0));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *io_addr)
-{
-       unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
-
-       /*
-        * We could get an address outside vmalloc range in case
-        * of ioremap_cache() reusing a RAM mapping.
-        */
-       if (is_vmalloc_addr((void *)addr))
-               vunmap((void *)addr);
-}
-EXPORT_SYMBOL(iounmap);
-
-void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
-{
-       /* For normal memory we already have a cacheable mapping. */
-       if (pfn_is_map_memory(__phys_to_pfn(phys_addr)))
-               return (void __iomem *)__phys_to_virt(phys_addr);
+               return false;
 
-       return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
-                               __builtin_return_address(0));
+       return true;
 }
-EXPORT_SYMBOL(ioremap_cache);
 
 /*
  * Must be called after early_fixmap_init
index c12cd70..e969e68 100644 (file)
@@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
         */
        memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
        dsb(ishst);
-       cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
+       cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
 
        clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
                                PAGE_KERNEL_RO));
 
        memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
-       cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+       cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 }
 
 static void __init kasan_init_depth(void)
index 626ec32..db7c4e6 100644 (file)
 #define NO_CONT_MAPPINGS       BIT(1)
 #define NO_EXEC_MAPPINGS       BIT(2)  /* assumes FEAT_HPDS is not used */
 
-u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
-u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+int idmap_t0sz __ro_after_init;
 
-u64 __section(".mmuoff.data.write") vabits_actual;
+#if VA_BITS > 48
+u64 vabits_actual __ro_after_init = VA_BITS_MIN;
 EXPORT_SYMBOL(vabits_actual);
+#endif
+
+u64 kimage_vaddr __ro_after_init = (u64)&_text;
+EXPORT_SYMBOL(kimage_vaddr);
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
 
+u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
+
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+long __section(".mmuoff.data.write") __early_cpu_boot_status;
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -388,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
        } while (pgdp++, addr = next, addr != end);
 }
 
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+extern __alias(__create_pgd_mapping)
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+                            phys_addr_t size, pgprot_t prot,
+                            phys_addr_t (*pgtable_alloc)(int), int flags);
+#endif
+
 static phys_addr_t __pgd_pgtable_alloc(int shift)
 {
        void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
@@ -529,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp)
 
 #ifdef CONFIG_KEXEC_CORE
        if (crash_mem_map) {
-               if (IS_ENABLED(CONFIG_ZONE_DMA) ||
-                   IS_ENABLED(CONFIG_ZONE_DMA32))
+               if (defer_reserve_crashkernel())
                        flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
                else if (crashk_res.end)
                        memblock_mark_nomap(crashk_res.start,
@@ -571,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp)
         * through /sys/kernel/kexec_crash_size interface.
         */
 #ifdef CONFIG_KEXEC_CORE
-       if (crash_mem_map &&
-           !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+       if (crash_mem_map && !defer_reserve_crashkernel()) {
                if (crashk_res.end) {
                        __map_memblock(pgdp, crashk_res.start,
                                       crashk_res.end + 1,
@@ -665,13 +682,9 @@ static int __init map_entry_trampoline(void)
                __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
                             pa_start + i * PAGE_SIZE, prot);
 
-       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-               extern char __entry_tramp_data_start[];
-
-               __set_fixmap(FIX_ENTRY_TRAMP_DATA,
-                            __pa_symbol(__entry_tramp_data_start),
-                            PAGE_KERNEL_RO);
-       }
+       if (IS_ENABLED(CONFIG_RELOCATABLE))
+               __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+                            pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);
 
        return 0;
 }
@@ -762,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
        kasan_copy_shadow(pgdp);
 }
 
+static void __init create_idmap(void)
+{
+       u64 start = __pa_symbol(__idmap_text_start);
+       u64 size = __pa_symbol(__idmap_text_end) - start;
+       pgd_t *pgd = idmap_pg_dir;
+       u64 pgd_phys;
+
+       /* check if we need an additional level of translation */
+       if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
+               pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
+               set_pgd(&idmap_pg_dir[start >> VA_BITS],
+                       __pgd(pgd_phys | P4D_TYPE_TABLE));
+               pgd = __va(pgd_phys);
+       }
+       __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
+                            early_pgtable_alloc, 0);
+
+       if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+               extern u32 __idmap_kpti_flag;
+               u64 pa = __pa_symbol(&__idmap_kpti_flag);
+
+               /*
+                * The KPTI G-to-nG conversion code needs a read-write mapping
+                * of its synchronization flag in the ID map.
+                */
+               __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
+                                    early_pgtable_alloc, 0);
+       }
+}
+
 void __init paging_init(void)
 {
        pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+       extern pgd_t init_idmap_pg_dir[];
+
+       idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
 
        map_kernel(pgdp);
        map_mem(pgdp);
 
        pgd_clear_fixmap();
 
-       cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+       cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
        init_mm.pgd = swapper_pg_dir;
 
        memblock_phys_free(__pa_symbol(init_pg_dir),
                           __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
 
        memblock_allow_resize();
+
+       create_idmap();
 }
 
 /*
index a9e50e9..4334dec 100644 (file)
@@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
        if (!tags)
                return false;
 
-       page_kasan_tag_reset(page);
-       /*
-        * We need smp_wmb() in between setting the flags and clearing the
-        * tags because if another thread reads page->flags and builds a
-        * tagged address out of it, there is an actual dependency to the
-        * memory access, but on the current thread we do not guarantee that
-        * the new page->flags are visible before the tags were updated.
-        */
-       smp_wmb();
        mte_restore_page_tags(page_address(page), tags);
 
        return true;
index 50bbed9..7837a69 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/asm_pointer_auth.h>
 #include <asm/hwcap.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
@@ -200,34 +201,64 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
        .popsection
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+
+#define KPTI_NG_PTE_FLAGS      (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+
        .pushsection ".idmap.text", "awx"
 
-       .macro  __idmap_kpti_get_pgtable_ent, type
-       dc      cvac, cur_\()\type\()p          // Ensure any existing dirty
-       dmb     sy                              // lines are written back before
-       ldr     \type, [cur_\()\type\()p]       // loading the entry
-       tbz     \type, #0, skip_\()\type        // Skip invalid and
-       tbnz    \type, #11, skip_\()\type       // non-global entries
+       .macro  kpti_mk_tbl_ng, type, num_entries
+       add     end_\type\()p, cur_\type\()p, #\num_entries * 8
+.Ldo_\type:
+       ldr     \type, [cur_\type\()p]          // Load the entry
+       tbz     \type, #0, .Lnext_\type         // Skip invalid and
+       tbnz    \type, #11, .Lnext_\type        // non-global entries
+       orr     \type, \type, #PTE_NG           // Same bit for blocks and pages
+       str     \type, [cur_\type\()p]          // Update the entry
+       .ifnc   \type, pte
+       tbnz    \type, #1, .Lderef_\type
+       .endif
+.Lnext_\type:
+       add     cur_\type\()p, cur_\type\()p, #8
+       cmp     cur_\type\()p, end_\type\()p
+       b.ne    .Ldo_\type
        .endm
 
-       .macro __idmap_kpti_put_pgtable_ent_ng, type
-       orr     \type, \type, #PTE_NG           // Same bit for blocks and pages
-       str     \type, [cur_\()\type\()p]       // Update the entry and ensure
-       dmb     sy                              // that it is visible to all
-       dc      civac, cur_\()\type\()p         // CPUs.
+       /*
+        * Dereference the current table entry and map it into the temporary
+        * fixmap slot associated with the current level.
+        */
+       .macro  kpti_map_pgtbl, type, level
+       str     xzr, [temp_pte, #8 * (\level + 1)]      // break before make
+       dsb     nshst
+       add     pte, temp_pte, #PAGE_SIZE * (\level + 1)
+       lsr     pte, pte, #12
+       tlbi    vaae1, pte
+       dsb     nsh
+       isb
+
+       phys_to_pte pte, cur_\type\()p
+       add     cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
+       orr     pte, pte, pte_flags
+       str     pte, [temp_pte, #8 * (\level + 1)]
+       dsb     nshst
        .endm
 
 /*
- * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
+ * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd,
+ *                                unsigned long temp_pte_va)
  *
  * Called exactly once from stop_machine context by each CPU found during boot.
  */
-__idmap_kpti_flag:
-       .long   1
+       .pushsection    ".data", "aw", %progbits
+SYM_DATA(__idmap_kpti_flag, .long 1)
+       .popsection
+
 SYM_FUNC_START(idmap_kpti_install_ng_mappings)
        cpu             .req    w0
+       temp_pte        .req    x0
        num_cpus        .req    w1
-       swapper_pa      .req    x2
+       pte_flags       .req    x1
+       temp_pgd_phys   .req    x2
        swapper_ttb     .req    x3
        flag_ptr        .req    x4
        cur_pgdp        .req    x5
@@ -235,17 +266,16 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
        pgd             .req    x7
        cur_pudp        .req    x8
        end_pudp        .req    x9
-       pud             .req    x10
        cur_pmdp        .req    x11
        end_pmdp        .req    x12
-       pmd             .req    x13
        cur_ptep        .req    x14
        end_ptep        .req    x15
        pte             .req    x16
+       valid           .req    x17
 
+       mov     x5, x3                          // preserve temp_pte arg
        mrs     swapper_ttb, ttbr1_el1
-       restore_ttbr1   swapper_ttb
-       adr     flag_ptr, __idmap_kpti_flag
+       adr_l   flag_ptr, __idmap_kpti_flag
 
        cbnz    cpu, __idmap_kpti_secondary
 
@@ -256,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
        eor     w17, w17, num_cpus
        cbnz    w17, 1b
 
-       /* We need to walk swapper, so turn off the MMU. */
-       pre_disable_mmu_workaround
-       mrs     x17, sctlr_el1
-       bic     x17, x17, #SCTLR_ELx_M
-       msr     sctlr_el1, x17
+       /* Switch to the temporary page tables on this CPU only */
+       __idmap_cpu_set_reserved_ttbr1 x8, x9
+       offset_ttbr1 temp_pgd_phys, x8
+       msr     ttbr1_el1, temp_pgd_phys
        isb
 
+       mov     temp_pte, x5
+       mov     pte_flags, #KPTI_NG_PTE_FLAGS
+
        /* Everybody is enjoying the idmap, so we can rewrite swapper. */
        /* PGD */
-       mov     cur_pgdp, swapper_pa
-       add     end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
-do_pgd:        __idmap_kpti_get_pgtable_ent    pgd
-       tbnz    pgd, #1, walk_puds
-next_pgd:
-       __idmap_kpti_put_pgtable_ent_ng pgd
-skip_pgd:
-       add     cur_pgdp, cur_pgdp, #8
-       cmp     cur_pgdp, end_pgdp
-       b.ne    do_pgd
-
-       /* Publish the updated tables and nuke all the TLBs */
-       dsb     sy
-       tlbi    vmalle1is
-       dsb     ish
-       isb
+       adrp            cur_pgdp, swapper_pg_dir
+       kpti_map_pgtbl  pgd, 0
+       kpti_mk_tbl_ng  pgd, PTRS_PER_PGD
 
-       /* We're done: fire up the MMU again */
-       mrs     x17, sctlr_el1
-       orr     x17, x17, #SCTLR_ELx_M
-       set_sctlr_el1   x17
+       /* Ensure all the updated entries are visible to secondary CPUs */
+       dsb     ishst
+
+       /* We're done: fire up swapper_pg_dir again */
+       __idmap_cpu_set_reserved_ttbr1 x8, x9
+       msr     ttbr1_el1, swapper_ttb
+       isb
 
        /* Set the flag to zero to indicate that we're all done */
        str     wzr, [flag_ptr]
        ret
 
+.Lderef_pgd:
        /* PUD */
-walk_puds:
-       .if CONFIG_PGTABLE_LEVELS > 3
+       .if             CONFIG_PGTABLE_LEVELS > 3
+       pud             .req    x10
        pte_to_phys     cur_pudp, pgd
-       add     end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
-do_pud:        __idmap_kpti_get_pgtable_ent    pud
-       tbnz    pud, #1, walk_pmds
-next_pud:
-       __idmap_kpti_put_pgtable_ent_ng pud
-skip_pud:
-       add     cur_pudp, cur_pudp, 8
-       cmp     cur_pudp, end_pudp
-       b.ne    do_pud
-       b       next_pgd
-       .else /* CONFIG_PGTABLE_LEVELS <= 3 */
-       mov     pud, pgd
-       b       walk_pmds
-next_pud:
-       b       next_pgd
+       kpti_map_pgtbl  pud, 1
+       kpti_mk_tbl_ng  pud, PTRS_PER_PUD
+       b               .Lnext_pgd
+       .else           /* CONFIG_PGTABLE_LEVELS <= 3 */
+       pud             .req    pgd
+       .set            .Lnext_pud, .Lnext_pgd
        .endif
 
+.Lderef_pud:
        /* PMD */
-walk_pmds:
-       .if CONFIG_PGTABLE_LEVELS > 2
+       .if             CONFIG_PGTABLE_LEVELS > 2
+       pmd             .req    x13
        pte_to_phys     cur_pmdp, pud
-       add     end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
-do_pmd:        __idmap_kpti_get_pgtable_ent    pmd
-       tbnz    pmd, #1, walk_ptes
-next_pmd:
-       __idmap_kpti_put_pgtable_ent_ng pmd
-skip_pmd:
-       add     cur_pmdp, cur_pmdp, #8
-       cmp     cur_pmdp, end_pmdp
-       b.ne    do_pmd
-       b       next_pud
-       .else /* CONFIG_PGTABLE_LEVELS <= 2 */
-       mov     pmd, pud
-       b       walk_ptes
-next_pmd:
-       b       next_pud
+       kpti_map_pgtbl  pmd, 2
+       kpti_mk_tbl_ng  pmd, PTRS_PER_PMD
+       b               .Lnext_pud
+       .else           /* CONFIG_PGTABLE_LEVELS <= 2 */
+       pmd             .req    pgd
+       .set            .Lnext_pmd, .Lnext_pgd
        .endif
 
+.Lderef_pmd:
        /* PTE */
-walk_ptes:
        pte_to_phys     cur_ptep, pmd
-       add     end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
-do_pte:        __idmap_kpti_get_pgtable_ent    pte
-       __idmap_kpti_put_pgtable_ent_ng pte
-skip_pte:
-       add     cur_ptep, cur_ptep, #8
-       cmp     cur_ptep, end_ptep
-       b.ne    do_pte
-       b       next_pmd
+       kpti_map_pgtbl  pte, 3
+       kpti_mk_tbl_ng  pte, PTRS_PER_PTE
+       b               .Lnext_pmd
 
        .unreq  cpu
+       .unreq  temp_pte
        .unreq  num_cpus
-       .unreq  swapper_pa
+       .unreq  pte_flags
+       .unreq  temp_pgd_phys
        .unreq  cur_pgdp
        .unreq  end_pgdp
        .unreq  pgd
@@ -360,6 +363,7 @@ skip_pte:
        .unreq  cur_ptep
        .unreq  end_ptep
        .unreq  pte
+       .unreq  valid
 
        /* Secondary CPUs end up here */
 __idmap_kpti_secondary:
@@ -379,7 +383,6 @@ __idmap_kpti_secondary:
        cbnz    w16, 1b
 
        /* All done, act like nothing happened */
-       offset_ttbr1 swapper_ttb, x16
        msr     ttbr1_el1, swapper_ttb
        isb
        ret
@@ -395,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
  *
  *     Initialise the processor for turning the MMU on.
  *
+ * Input:
+ *     x0 - actual number of VA bits (ignored unless VA_BITS > 48)
  * Output:
  *     Return in x0 the value of the SCTLR_EL1 register.
  */
@@ -464,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
        tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
-       ldr_l           x9, vabits_actual
-       sub             x9, xzr, x9
+       sub             x9, xzr, x0
        add             x9, x9, #64
        tcr_set_t1sz    tcr, x9
 #else
-       ldr_l           x9, idmap_t0sz
+       idmap_get_t0sz  x9
 #endif
        tcr_set_t0sz    tcr, x9
 
index 507b203..7796537 100644 (file)
@@ -36,6 +36,7 @@ HAS_RNG
 HAS_SB
 HAS_STAGE2_FWB
 HAS_SYSREG_GIC_CPUIF
+HAS_TIDCP1
 HAS_TLB_RANGE
 HAS_VIRT_HOST_EXTN
 HAS_WFXT
@@ -61,6 +62,7 @@ WORKAROUND_1418040
 WORKAROUND_1463225
 WORKAROUND_1508412
 WORKAROUND_1542419
+WORKAROUND_1742098
 WORKAROUND_1902691
 WORKAROUND_2038923
 WORKAROUND_2064142
index 5c55509..db46192 100755 (executable)
@@ -88,7 +88,7 @@ END {
 
 # skip blank lines and comment lines
 /^$/ { next }
-/^#/ { next }
+/^[\t ]*#/ { next }
 
 /^SysregFields/ {
        change_block("SysregFields", "None", "SysregFields")
index ff5e552..9ae483e 100644 (file)
 # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
 # item ACCDATA) though it may be more taseful to do something else.
 
+Sysreg ID_AA64ZFR0_EL1 3       0       0       4       4
+Res0   63:60
+Enum   59:56   F64MM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   55:52   F32MM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   51:48
+Enum   47:44   I8MM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   43:40   SM4
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   39:36
+Enum   35:32   SHA3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   31:24
+Enum   23:20   BF16
+       0b0000  NI
+       0b0001  IMP
+       0b0010  EBF16
+EndEnum
+Enum   19:16   BitPerm
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   15:8
+Enum   7:4     AES
+       0b0000  NI
+       0b0001  IMP
+       0b0010  PMULL128
+EndEnum
+Enum   3:0     SVEver
+       0b0000  IMP
+       0b0001  SVE2
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64SMFR0_EL1        3       0       0       4       5
+Enum   63      FA64
+       0b0     NI
+       0b1     IMP
+EndEnum
+Res0   62:60
+Field  59:56   SMEver
+Enum   55:52   I16I64
+       0b0000  NI
+       0b1111  IMP
+EndEnum
+Res0   51:49
+Enum   48      F64F64
+       0b0     NI
+       0b1     IMP
+EndEnum
+Res0   47:40
+Enum   39:36   I8I32
+       0b0000  NI
+       0b1111  IMP
+EndEnum
+Enum   35      F16F32
+       0b0     NI
+       0b1     IMP
+EndEnum
+Enum   34      B16F32
+       0b0     NI
+       0b1     IMP
+EndEnum
+Res0   33
+Enum   32      F32F32
+       0b0     NI
+       0b1     IMP
+EndEnum
+Res0   31:0
+EndSysreg
+
 Sysreg ID_AA64ISAR0_EL1        3       0       0       6       0
 Enum   63:60   RNDR
        0b0000  NI
@@ -114,6 +197,122 @@ EndEnum
 Res0   3:0
 EndSysreg
 
+Sysreg ID_AA64ISAR1_EL1        3       0       0       6       1
+Enum   63:60   LS64
+       0b0000  NI
+       0b0001  LS64
+       0b0010  LS64_V
+       0b0011  LS64_ACCDATA
+EndEnum
+Enum   59:56   XS
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   55:52   I8MM
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   51:48   DGH
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   47:44   BF16
+       0b0000  NI
+       0b0001  IMP
+       0b0010  EBF16
+EndEnum
+Enum   43:40   SPECRES
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   39:36   SB
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   35:32   FRINTTS
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   31:28   GPI
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   27:24   GPA
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   23:20   LRCPC
+       0b0000  NI
+       0b0001  IMP
+       0b0010  LRCPC2
+EndEnum
+Enum   19:16   FCMA
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   15:12   JSCVT
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   11:8    API
+       0b0000  NI
+       0b0001  PAuth
+       0b0010  EPAC
+       0b0011  PAuth2
+       0b0100  FPAC
+       0b0101  FPACCOMBINE
+EndEnum
+Enum   7:4     APA
+       0b0000  NI
+       0b0001  PAuth
+       0b0010  EPAC
+       0b0011  PAuth2
+       0b0100  FPAC
+       0b0101  FPACCOMBINE
+EndEnum
+Enum   3:0     DPB
+       0b0000  NI
+       0b0001  IMP
+       0b0010  DPB2
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64ISAR2_EL1        3       0       0       6       2
+Res0   63:28
+Enum   27:24   PAC_frac
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   23:20   BC
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   19:16   MOPS
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   15:12   APA3
+       0b0000  NI
+       0b0001  PAuth
+       0b0010  EPAC
+       0b0011  PAuth2
+       0b0100  FPAC
+       0b0101  FPACCOMBINE
+EndEnum
+Enum   11:8    GPA3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   7:4     RPRES
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Enum   3:0     WFxT
+       0b0000  NI
+       0b0010  IMP
+EndEnum
+EndSysreg
+
 Sysreg SCTLR_EL1       3       0       1       0       0
 Field  63      TIDCP
 Field  62      SPINMASK
@@ -257,6 +456,11 @@ Field      5:3     Ctype2
 Field  2:0     Ctype1
 EndSysreg
 
+Sysreg GMID_EL1        3       1       0       0       4
+Res0   63:4
+Field  3:0     BS
+EndSysreg
+
 Sysreg SMIDR_EL1       3       1       0       0       6
 Res0   63:32
 Field  31:24   IMPLEMENTER
@@ -273,6 +477,33 @@ Field      3:1     Level
 Field  0       InD
 EndSysreg
 
+Sysreg CTR_EL0 3       3       0       0       1
+Res0   63:38
+Field  37:32   TminLine
+Res1   31
+Res0   30
+Field  29      DIC
+Field  28      IDC
+Field  27:24   CWG
+Field  23:20   ERG
+Field  19:16   DminLine
+Enum   15:14   L1Ip
+       0b00    VPIPT
+       # This is named as AIVIVT in the ARM but documented as reserved
+       0b01    RESERVED
+       0b10    VIPT
+       0b11    PIPT
+EndEnum
+Res0   13:4
+Field  3:0     IminLine
+EndSysreg
+
+Sysreg DCZID_EL0       3       3       0       0       7
+Res0   63:5
+Field  4       DZP
+Field  3:0     BS
+EndSysreg
+
 Sysreg SVCR    3       3       4       2       2
 Res0   63:2
 Field  1       ZA
@@ -367,3 +598,36 @@ EndSysreg
 Sysreg TTBR1_EL1       3       0       2       0       1
 Fields TTBRx_EL1
 EndSysreg
+
+Sysreg LORSA_EL1       3       0       10      4       0
+Res0   63:52
+Field  51:16   SA
+Res0   15:1
+Field  0       Valid
+EndSysreg
+
+Sysreg LOREA_EL1       3       0       10      4       1
+Res0   63:52
+Field  51:48   EA_51_48
+Field  47:16   EA_47_16
+Res0   15:0
+EndSysreg
+
+Sysreg LORN_EL1        3       0       10      4       2
+Res0   63:8
+Field  7:0     Num
+EndSysreg
+
+Sysreg LORC_EL1        3       0       10      4       3
+Res0   63:10
+Field  9:2     DS
+Res0   1
+Field  0       EN
+EndSysreg
+
+Sysreg LORID_EL1       3       0       10      4       7
+Res0   63:24
+Field  23:16   LD
+Res0   15:8
+Field  7:0     LR
+EndSysreg
index b2ec655..5aa4c2e 100644 (file)
@@ -278,6 +278,7 @@ config X86
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        select TRACE_IRQFLAGS_SUPPORT
+       select TRACE_IRQFLAGS_NMI_SUPPORT
        select USER_STACKTRACE_SUPPORT
        select VIRT_TO_BUS
        select HAVE_ARCH_KCSAN                  if X86_64
index 340399f..bdfe08f 100644 (file)
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_NMI_SUPPORT
-       def_bool y
-
 config EARLY_PRINTK_USB
        bool
 
index be7f512..747aa53 100644 (file)
@@ -3,7 +3,8 @@
 # ARM CPU Idle drivers
 #
 config ARM_CPUIDLE
-       bool "Generic ARM/ARM64 CPU idle Driver"
+       bool "Generic ARM CPU idle Driver"
+       depends on ARM
        select DT_IDLE_STATES
        select CPU_IDLE_MULTIPLE_DRIVERS
        help
index 96e09fa..03b1309 100644 (file)
@@ -1139,7 +1139,7 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags)
 
        /*
         * To handle interrupt latency, we always reprogram the period
-        * regardlesss of PERF_EF_RELOAD.
+        * regardless of PERF_EF_RELOAD.
         */
        if (pmu_flags & PERF_EF_RELOAD)
                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
@@ -1261,7 +1261,7 @@ static int validate_group(struct perf_event *event)
                 */
                .used_mask = mask,
        };
-       memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
+       bitmap_zero(mask, cci_pmu->num_cntrs);
 
        if (!validate_event(event->pmu, &fake_pmu, leader))
                return -EINVAL;
@@ -1629,10 +1629,9 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev)
                                             GFP_KERNEL);
        if (!cci_pmu->hw_events.events)
                return ERR_PTR(-ENOMEM);
-       cci_pmu->hw_events.used_mask = devm_kcalloc(dev,
-                                               BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
-                                               sizeof(*cci_pmu->hw_events.used_mask),
-                                               GFP_KERNEL);
+       cci_pmu->hw_events.used_mask = devm_bitmap_zalloc(dev,
+                                                         CCI_PMU_MAX_HW_CNTRS(model),
+                                                         GFP_KERNEL);
        if (!cci_pmu->hw_events.used_mask)
                return ERR_PTR(-ENOMEM);
 
index 40b352e..728d13d 100644 (file)
@@ -1250,7 +1250,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
        ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9);
 
        /* Get a convenient /sys/event_source/devices/ name */
-       ccn->dt.id = ida_simple_get(&arm_ccn_pmu_ida, 0, 0, GFP_KERNEL);
+       ccn->dt.id = ida_alloc(&arm_ccn_pmu_ida, GFP_KERNEL);
        if (ccn->dt.id == 0) {
                name = "ccn";
        } else {
@@ -1312,7 +1312,7 @@ error_pmu_register:
                                            &ccn->dt.node);
 error_set_affinity:
 error_choose_name:
-       ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
+       ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
        for (i = 0; i < ccn->num_xps; i++)
                writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
        writel(0, ccn->dt.base + CCN_DT_PMCR);
@@ -1329,7 +1329,7 @@ static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn)
                writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
        writel(0, ccn->dt.base + CCN_DT_PMCR);
        perf_pmu_unregister(&ccn->dt.pmu);
-       ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
+       ida_free(&arm_ccn_pmu_ida, ccn->dt.id);
 }
 
 static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn,
index db670b2..b65a7d9 100644 (file)
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
 
+/*
+ * Cache if the event is allowed to trace Context information.
+ * This allows us to perform the check, i.e, perfmon_capable(),
+ * in the context of the event owner, once, during the event_init().
+ */
+#define SPE_PMU_HW_FLAGS_CX                    BIT(0)
+
+static void set_spe_event_has_cx(struct perf_event *event)
+{
+       if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+               event->hw.flags |= SPE_PMU_HW_FLAGS_CX;
+}
+
+static bool get_spe_event_has_cx(struct perf_event *event)
+{
+       return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX);
+}
+
 #define ARM_SPE_BUF_PAD_BYTE                   0
 
 struct arm_spe_pmu_buf {
@@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event)
        if (!attr->exclude_kernel)
                reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
 
-       if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable())
+       if (get_spe_event_has_cx(event))
                reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT);
 
        return reg;
@@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event)
            !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT))
                return -EOPNOTSUPP;
 
+       set_spe_event_has_cx(event);
        reg = arm_spe_event_to_pmscr(event);
        if (!perfmon_capable() &&
            (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) |
-                   BIT(SYS_PMSCR_EL1_CX_SHIFT) |
                    BIT(SYS_PMSCR_EL1_PCT_SHIFT))))
                return -EACCES;
 
index b1b2a55..8e058e0 100644 (file)
@@ -611,7 +611,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
                .dev = dev,
        };
 
-       pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+       pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
        return pmu->id;
 }
 
@@ -765,7 +765,7 @@ ddr_perf_err:
 cpuhp_instance_err:
        cpuhp_remove_multi_state(pmu->cpuhp_state);
 cpuhp_state_err:
-       ida_simple_remove(&ddr_ida, pmu->id);
+       ida_free(&ddr_ida, pmu->id);
        dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
        return ret;
 }
@@ -779,7 +779,7 @@ static int ddr_perf_remove(struct platform_device *pdev)
 
        perf_pmu_unregister(&pmu->pmu);
 
-       ida_simple_remove(&ddr_ida, pmu->id);
+       ida_free(&ddr_ida, pmu->id);
        return 0;
 }
 
index 5546218..171bfc1 100644 (file)
@@ -14,3 +14,13 @@ config HISI_PCIE_PMU
          RCiEP devices.
          Adds the PCIe PMU into perf events system for monitoring latency,
          bandwidth etc.
+
+config HNS3_PMU
+       tristate "HNS3 PERF PMU"
+       depends on ARM64 || COMPILE_TEST
+       depends on PCI
+       help
+         Provide support for HNS3 performance monitoring unit (PMU) RCiEP
+         devices.
+         Adds the HNS3 PMU into perf events system for monitoring latency,
+         bandwidth etc.
index 6be8351..4d2c9ab 100644 (file)
@@ -4,3 +4,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
                          hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o
 
 obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o
+obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o
index 62299ab..50d0c0a 100644 (file)
@@ -516,21 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
                                      "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
                                      ddrc_pmu->index_id);
 
-       ddrc_pmu->pmu = (struct pmu) {
-               .name           = name,
-               .module         = THIS_MODULE,
-               .task_ctx_nr    = perf_invalid_context,
-               .event_init     = hisi_uncore_pmu_event_init,
-               .pmu_enable     = hisi_uncore_pmu_enable,
-               .pmu_disable    = hisi_uncore_pmu_disable,
-               .add            = hisi_uncore_pmu_add,
-               .del            = hisi_uncore_pmu_del,
-               .start          = hisi_uncore_pmu_start,
-               .stop           = hisi_uncore_pmu_stop,
-               .read           = hisi_uncore_pmu_read,
-               .attr_groups    = ddrc_pmu->pmu_events.attr_groups,
-               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
-       };
+       hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE);
 
        ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1);
        if (ret) {
index 3935131..13017b3 100644 (file)
@@ -519,21 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
 
        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u",
                              hha_pmu->sccl_id, hha_pmu->index_id);
-       hha_pmu->pmu = (struct pmu) {
-               .name           = name,
-               .module         = THIS_MODULE,
-               .task_ctx_nr    = perf_invalid_context,
-               .event_init     = hisi_uncore_pmu_event_init,
-               .pmu_enable     = hisi_uncore_pmu_enable,
-               .pmu_disable    = hisi_uncore_pmu_disable,
-               .add            = hisi_uncore_pmu_add,
-               .del            = hisi_uncore_pmu_del,
-               .start          = hisi_uncore_pmu_start,
-               .stop           = hisi_uncore_pmu_stop,
-               .read           = hisi_uncore_pmu_read,
-               .attr_groups    = hha_pmu->pmu_events.attr_groups,
-               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
-       };
+       hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE);
 
        ret = perf_pmu_register(&hha_pmu->pmu, name, -1);
        if (ret) {
index 560ab96..2995f36 100644 (file)
@@ -557,21 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
         */
        name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
                              l3c_pmu->sccl_id, l3c_pmu->ccl_id);
-       l3c_pmu->pmu = (struct pmu) {
-               .name           = name,
-               .module         = THIS_MODULE,
-               .task_ctx_nr    = perf_invalid_context,
-               .event_init     = hisi_uncore_pmu_event_init,
-               .pmu_enable     = hisi_uncore_pmu_enable,
-               .pmu_disable    = hisi_uncore_pmu_disable,
-               .add            = hisi_uncore_pmu_add,
-               .del            = hisi_uncore_pmu_del,
-               .start          = hisi_uncore_pmu_start,
-               .stop           = hisi_uncore_pmu_stop,
-               .read           = hisi_uncore_pmu_read,
-               .attr_groups    = l3c_pmu->pmu_events.attr_groups,
-               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
-       };
+       hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE);
 
        ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
        if (ret) {
index a0ee84d..47d3cc9 100644 (file)
@@ -412,21 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
                return ret;
        }
 
-       pa_pmu->pmu = (struct pmu) {
-               .module         = THIS_MODULE,
-               .task_ctx_nr    = perf_invalid_context,
-               .event_init     = hisi_uncore_pmu_event_init,
-               .pmu_enable     = hisi_uncore_pmu_enable,
-               .pmu_disable    = hisi_uncore_pmu_disable,
-               .add            = hisi_uncore_pmu_add,
-               .del            = hisi_uncore_pmu_del,
-               .start          = hisi_uncore_pmu_start,
-               .stop           = hisi_uncore_pmu_stop,
-               .read           = hisi_uncore_pmu_read,
-               .attr_groups    = pa_pmu->pmu_events.attr_groups,
-               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
-       };
-
+       hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE);
        ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
        if (ret) {
                dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
index 980b9ee..fbc8a93 100644 (file)
@@ -531,4 +531,22 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 }
 EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu);
 
+void hisi_pmu_init(struct pmu *pmu, const char *name,
+               const struct attribute_group **attr_groups, struct module *module)
+{
+       pmu->name               = name;
+       pmu->module             = module;
+       pmu->task_ctx_nr        = perf_invalid_context;
+       pmu->event_init         = hisi_uncore_pmu_event_init;
+       pmu->pmu_enable         = hisi_uncore_pmu_enable;
+       pmu->pmu_disable        = hisi_uncore_pmu_disable;
+       pmu->add                = hisi_uncore_pmu_add;
+       pmu->del                = hisi_uncore_pmu_del;
+       pmu->start              = hisi_uncore_pmu_start;
+       pmu->stop               = hisi_uncore_pmu_stop;
+       pmu->read               = hisi_uncore_pmu_read;
+       pmu->attr_groups        = attr_groups;
+}
+EXPORT_SYMBOL_GPL(hisi_pmu_init);
+
 MODULE_LICENSE("GPL v2");
index 96eedda..b59de33 100644 (file)
@@ -121,4 +121,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
 int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
                             struct platform_device *pdev);
 
+void hisi_pmu_init(struct pmu *pmu, const char *name,
+               const struct attribute_group **attr_groups, struct module *module);
 #endif /* __HISI_UNCORE_PMU_H__ */
index 6aedc30..b9c79f1 100644 (file)
@@ -445,20 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
                return ret;
        }
 
-       sllc_pmu->pmu = (struct pmu) {
-               .module         = THIS_MODULE,
-               .task_ctx_nr    = perf_invalid_context,
-               .event_init     = hisi_uncore_pmu_event_init,
-               .pmu_enable     = hisi_uncore_pmu_enable,
-               .pmu_disable    = hisi_uncore_pmu_disable,
-               .add            = hisi_uncore_pmu_add,
-               .del            = hisi_uncore_pmu_del,
-               .start          = hisi_uncore_pmu_start,
-               .stop           = hisi_uncore_pmu_stop,
-               .read           = hisi_uncore_pmu_read,
-               .attr_groups    = sllc_pmu->pmu_events.attr_groups,
-               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
-       };
+       hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE);
 
        ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
        if (ret) {
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
new file mode 100644 (file)
index 0000000..e0457d8
--- /dev/null
@@ -0,0 +1,1671 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This driver adds support for HNS3 PMU iEP device. Related perf events are
+ * bandwidth, latency, packet rate, interrupt rate etc.
+ *
+ * Copyright (C) 2022 HiSilicon Limited
+ */
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-epf.h>
+#include <linux/perf_event.h>
+#include <linux/smp.h>
+
+/* registers offset address */
+#define HNS3_PMU_REG_GLOBAL_CTRL               0x0000
+#define HNS3_PMU_REG_CLOCK_FREQ                        0x0020
+#define HNS3_PMU_REG_BDF                       0x0fe0
+#define HNS3_PMU_REG_VERSION                   0x0fe4
+#define HNS3_PMU_REG_DEVICE_ID                 0x0fe8
+
+#define HNS3_PMU_REG_EVENT_OFFSET              0x1000
+#define HNS3_PMU_REG_EVENT_SIZE                        0x1000
+#define HNS3_PMU_REG_EVENT_CTRL_LOW            0x00
+#define HNS3_PMU_REG_EVENT_CTRL_HIGH           0x04
+#define HNS3_PMU_REG_EVENT_INTR_STATUS         0x08
+#define HNS3_PMU_REG_EVENT_INTR_MASK           0x0c
+#define HNS3_PMU_REG_EVENT_COUNTER             0x10
+#define HNS3_PMU_REG_EVENT_EXT_COUNTER         0x18
+#define HNS3_PMU_REG_EVENT_QID_CTRL            0x28
+#define HNS3_PMU_REG_EVENT_QID_PARA            0x2c
+
+#define HNS3_PMU_FILTER_SUPPORT_GLOBAL         BIT(0)
+#define HNS3_PMU_FILTER_SUPPORT_PORT           BIT(1)
+#define HNS3_PMU_FILTER_SUPPORT_PORT_TC                BIT(2)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC           BIT(3)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE     BIT(4)
+#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR      BIT(5)
+
+#define HNS3_PMU_FILTER_ALL_TC                 0xf
+#define HNS3_PMU_FILTER_ALL_QUEUE              0xffff
+
+#define HNS3_PMU_CTRL_SUBEVENT_S               4
+#define HNS3_PMU_CTRL_FILTER_MODE_S            24
+
+#define HNS3_PMU_GLOBAL_START                  BIT(0)
+
+#define HNS3_PMU_EVENT_STATUS_RESET            BIT(11)
+#define HNS3_PMU_EVENT_EN                      BIT(12)
+#define HNS3_PMU_EVENT_OVERFLOW_RESTART                BIT(15)
+
+#define HNS3_PMU_QID_PARA_FUNC_S               0
+#define HNS3_PMU_QID_PARA_QUEUE_S              16
+
+#define HNS3_PMU_QID_CTRL_REQ_ENABLE           BIT(0)
+#define HNS3_PMU_QID_CTRL_DONE                 BIT(1)
+#define HNS3_PMU_QID_CTRL_MISS                 BIT(2)
+
+#define HNS3_PMU_INTR_MASK_OVERFLOW            BIT(1)
+
+#define HNS3_PMU_MAX_HW_EVENTS                 8
+
+/*
+ * Each hardware event contains two registers (counter and ext_counter) for
+ * bandwidth, packet rate, latency and interrupt rate. These two registers will
+ * be triggered to run at the same when a hardware event is enabled. The meaning
+ * of counter and ext_counter of different event type are different, their
+ * meaning show as follow:
+ *
+ * +----------------+------------------+---------------+
+ * |   event type   |     counter      |  ext_counter  |
+ * +----------------+------------------+---------------+
+ * | bandwidth      | byte number      | cycle number  |
+ * +----------------+------------------+---------------+
+ * | packet rate    | packet number    | cycle number  |
+ * +----------------+------------------+---------------+
+ * | latency        | cycle number     | packet number |
+ * +----------------+------------------+---------------+
+ * | interrupt rate | interrupt number | cycle number  |
+ * +----------------+------------------+---------------+
+ *
+ * The cycle number indicates increment of counter of hardware timer, the
+ * frequency of hardware timer can be read from hw_clk_freq file.
+ *
+ * Performance of each hardware event is calculated by: counter / ext_counter.
+ *
+ * Since processing of data is preferred to be done in userspace, we expose
+ * ext_counter as a separate event for userspace and use bit 16 to indicate it.
+ * For example, event 0x00001 and 0x10001 are actually one event for hardware
+ * because bit 0-15 are same. If the bit 16 of one event is 0 means to read
+ * counter register, otherwise means to read ext_counter register.
+ */
+/* bandwidth events */
+#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM               0x00001
+#define HNS3_PMU_EVT_BW_SSU_EGU_TIME                   0x10001
+#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM               0x00002
+#define HNS3_PMU_EVT_BW_SSU_RPU_TIME                   0x10002
+#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM              0x00003
+#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME                  0x10003
+#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM              0x00004
+#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME                  0x10004
+#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM               0x00005
+#define HNS3_PMU_EVT_BW_TPU_SSU_TIME                   0x10005
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM             0x00006
+#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME                 0x10006
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM           0x00008
+#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME               0x10008
+#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM                        0x00009
+#define HNS3_PMU_EVT_BW_WR_FBD_TIME                    0x10009
+#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM                        0x0000a
+#define HNS3_PMU_EVT_BW_WR_EBD_TIME                    0x1000a
+#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM                        0x0000b
+#define HNS3_PMU_EVT_BW_RD_FBD_TIME                    0x1000b
+#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM                        0x0000c
+#define HNS3_PMU_EVT_BW_RD_EBD_TIME                    0x1000c
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM             0x0000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME                 0x1000d
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM             0x0000e
+#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME                 0x1000e
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM             0x0000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME                 0x1000f
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM             0x00010
+#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME                 0x10010
+
+/* packet rate events */
+#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM            0x00100
+#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME                  0x10100
+#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM            0x00101
+#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME                  0x10101
+#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM            0x00102
+#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME                  0x10102
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM           0x00103
+#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME                 0x10103
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM           0x00104
+#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME                 0x10104
+#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM            0x00105
+#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME                  0x10105
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM          0x00106
+#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME                        0x10106
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM          0x00107
+#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME                        0x10107
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM                0x00108
+#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME              0x10108
+#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM             0x00109
+#define HNS3_PMU_EVT_PPS_WR_FBD_TIME                   0x10109
+#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM             0x0010a
+#define HNS3_PMU_EVT_PPS_WR_EBD_TIME                   0x1010a
+#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM             0x0010b
+#define HNS3_PMU_EVT_PPS_RD_FBD_TIME                   0x1010b
+#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM             0x0010c
+#define HNS3_PMU_EVT_PPS_RD_EBD_TIME                   0x1010c
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM          0x0010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME                        0x1010d
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM          0x0010e
+#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME                        0x1010e
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM          0x0010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME                        0x1010f
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM          0x00110
+#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME                        0x10110
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM      0x00111
+#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME            0x10111
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM      0x00112
+#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME            0x10112
+
+/* latency events */
+#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME                  0x00202
+#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM            0x10202
+#define HNS3_PMU_EVT_DLY_TX_TIME                       0x00204
+#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM                 0x10204
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME               0x00206
+#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM         0x10206
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME              0x00207
+#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM                0x10207
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME               0x00208
+#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM         0x10208
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME              0x00209
+#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM                0x10209
+#define HNS3_PMU_EVT_DLY_RPU_TIME                      0x0020e
+#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM                        0x1020e
+#define HNS3_PMU_EVT_DLY_TPU_TIME                      0x0020f
+#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM                        0x1020f
+#define HNS3_PMU_EVT_DLY_RPE_TIME                      0x00210
+#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM                        0x10210
+#define HNS3_PMU_EVT_DLY_TPE_TIME                      0x00211
+#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM                        0x10211
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME                 0x00212
+#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM           0x10212
+#define HNS3_PMU_EVT_DLY_WR_FBD_TIME                   0x00213
+#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM             0x10213
+#define HNS3_PMU_EVT_DLY_WR_EBD_TIME                   0x00214
+#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM             0x10214
+#define HNS3_PMU_EVT_DLY_RD_FBD_TIME                   0x00215
+#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM             0x10215
+#define HNS3_PMU_EVT_DLY_RD_EBD_TIME                   0x00216
+#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM             0x10216
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME                        0x00217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM          0x10217
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME                        0x00218
+#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM          0x10218
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME                        0x00219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM          0x10219
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME                        0x0021a
+#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM          0x1021a
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME               0x0021c
+#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM         0x1021c
+
+/* interrupt rate events */
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM             0x00300
+#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME                 0x10300
+
+/* filter mode supported by each bandwidth event */
+#define HNS3_PMU_FILTER_BW_SSU_EGU             0x07
+#define HNS3_PMU_FILTER_BW_SSU_RPU             0x1f
+#define HNS3_PMU_FILTER_BW_SSU_ROCE            0x0f
+#define HNS3_PMU_FILTER_BW_ROCE_SSU            0x0f
+#define HNS3_PMU_FILTER_BW_TPU_SSU             0x1f
+#define HNS3_PMU_FILTER_BW_RPU_RCBRX           0x11
+#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH         0x11
+#define HNS3_PMU_FILTER_BW_WR_FBD              0x1b
+#define HNS3_PMU_FILTER_BW_WR_EBD              0x11
+#define HNS3_PMU_FILTER_BW_RD_FBD              0x01
+#define HNS3_PMU_FILTER_BW_RD_EBD              0x1b
+#define HNS3_PMU_FILTER_BW_RD_PAY_M0           0x01
+#define HNS3_PMU_FILTER_BW_RD_PAY_M1           0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M0           0x01
+#define HNS3_PMU_FILTER_BW_WR_PAY_M1           0x01
+
+/* filter mode supported by each packet rate event */
+#define HNS3_PMU_FILTER_PPS_IGU_SSU            0x07
+#define HNS3_PMU_FILTER_PPS_SSU_EGU            0x07
+#define HNS3_PMU_FILTER_PPS_SSU_RPU            0x1f
+#define HNS3_PMU_FILTER_PPS_SSU_ROCE           0x0f
+#define HNS3_PMU_FILTER_PPS_ROCE_SSU           0x0f
+#define HNS3_PMU_FILTER_PPS_TPU_SSU            0x1f
+#define HNS3_PMU_FILTER_PPS_RPU_RCBRX          0x11
+#define HNS3_PMU_FILTER_PPS_RCBTX_TPU          0x1f
+#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH                0x11
+#define HNS3_PMU_FILTER_PPS_WR_FBD             0x1b
+#define HNS3_PMU_FILTER_PPS_WR_EBD             0x11
+#define HNS3_PMU_FILTER_PPS_RD_FBD             0x01
+#define HNS3_PMU_FILTER_PPS_RD_EBD             0x1b
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M0          0x01
+#define HNS3_PMU_FILTER_PPS_RD_PAY_M1          0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M0          0x01
+#define HNS3_PMU_FILTER_PPS_WR_PAY_M1          0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE      0x01
+#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE      0x01
+
+/* filter mode supported by each latency event */
+#define HNS3_PMU_FILTER_DLY_TX_PUSH            0x01
+#define HNS3_PMU_FILTER_DLY_TX                 0x01
+#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC         0x07
+#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE                0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC         0x07
+#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE                0x07
+#define HNS3_PMU_FILTER_DLY_RPU                        0x11
+#define HNS3_PMU_FILTER_DLY_TPU                        0x1f
+#define HNS3_PMU_FILTER_DLY_RPE                        0x01
+#define HNS3_PMU_FILTER_DLY_TPE                        0x0b
+#define HNS3_PMU_FILTER_DLY_TPE_PUSH           0x1b
+#define HNS3_PMU_FILTER_DLY_WR_FBD             0x1b
+#define HNS3_PMU_FILTER_DLY_WR_EBD             0x11
+#define HNS3_PMU_FILTER_DLY_RD_FBD             0x01
+#define HNS3_PMU_FILTER_DLY_RD_EBD             0x1b
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M0          0x01
+#define HNS3_PMU_FILTER_DLY_RD_PAY_M1          0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M0          0x01
+#define HNS3_PMU_FILTER_DLY_WR_PAY_M1          0x01
+#define HNS3_PMU_FILTER_DLY_MSIX_WRITE         0x01
+
+/* filter mode supported by each interrupt rate event */
+#define HNS3_PMU_FILTER_INTR_MSIX_NIC          0x01
+
+enum hns3_pmu_hw_filter_mode {
+       HNS3_PMU_HW_FILTER_GLOBAL,
+       HNS3_PMU_HW_FILTER_PORT,
+       HNS3_PMU_HW_FILTER_PORT_TC,
+       HNS3_PMU_HW_FILTER_FUNC,
+       HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+       HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+struct hns3_pmu_event_attr {
+       u32 event;
+       u16 filter_support;
+};
+
+struct hns3_pmu {
+       struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS];
+       struct hlist_node node;
+       struct pci_dev *pdev;
+       struct pmu pmu;
+       void __iomem *base;
+       int irq;
+       int on_cpu;
+       u32 identifier;
+       u32 hw_clk_freq; /* hardware clock frequency of PMU */
+       /* maximum and minimum bdf allowed by PMU */
+       u16 bdf_min;
+       u16 bdf_max;
+};
+
+#define to_hns3_pmu(p)  (container_of((p), struct hns3_pmu, pmu))
+
+#define GET_PCI_DEVFN(bdf)  ((bdf) & 0xff)
+
+#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff)
+#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07))
+#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func))
+
+#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end)               \
+       static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \
+       {                                                                \
+               return FIELD_GET(GENMASK_ULL(_end, _start),              \
+                                event->attr._config);                   \
+       }
+
+HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7);
+HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15);
+HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16);
+HNS3_PMU_FILTER_ATTR(port, config1, 0, 3);
+HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7);
+HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23);
+HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39);
+HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51);
+HNS3_PMU_FILTER_ATTR(global, config1, 52, 52);
+
+#define HNS3_BW_EVT_BYTE_NUM(_name)    (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_BW_##_name##_BYTE_NUM,                             \
+       HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_BW_EVT_TIME(_name)                (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_BW_##_name##_TIME,                                 \
+       HNS3_PMU_FILTER_BW_##_name})
+#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM,                          \
+       HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_PPS_EVT_TIME(_name)       (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_PPS_##_name##_TIME,                                \
+       HNS3_PMU_FILTER_PPS_##_name})
+#define HNS3_DLY_EVT_TIME(_name)       (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_DLY_##_name##_TIME,                                \
+       HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM,                          \
+       HNS3_PMU_FILTER_DLY_##_name})
+#define HNS3_INTR_EVT_INTR_NUM(_name)  (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_PPS_##_name##_INTR_NUM,                            \
+       HNS3_PMU_FILTER_INTR_##_name})
+#define HNS3_INTR_EVT_TIME(_name)      (&(struct hns3_pmu_event_attr) {\
+       HNS3_PMU_EVT_PPS_##_name##_TIME,                                \
+       HNS3_PMU_FILTER_INTR_##_name})
+
+static ssize_t hns3_pmu_format_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct dev_ext_attribute *eattr;
+
+       eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+       return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t hns3_pmu_event_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct hns3_pmu_event_attr *event;
+       struct dev_ext_attribute *eattr;
+
+       eattr = container_of(attr, struct dev_ext_attribute, attr);
+       event = eattr->var;
+
+       return sysfs_emit(buf, "config=0x%x\n", event->event);
+}
+
+static ssize_t hns3_pmu_filter_mode_show(struct device *dev,
+                                        struct device_attribute *attr,
+                                        char *buf)
+{
+       struct hns3_pmu_event_attr *event;
+       struct dev_ext_attribute *eattr;
+       int len;
+
+       eattr = container_of(attr, struct dev_ext_attribute, attr);
+       event = eattr->var;
+
+       len = sysfs_emit_at(buf, 0, "filter mode supported: ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)
+               len += sysfs_emit_at(buf, len, "global ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)
+               len += sysfs_emit_at(buf, len, "port ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)
+               len += sysfs_emit_at(buf, len, "port-tc ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)
+               len += sysfs_emit_at(buf, len, "func ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)
+               len += sysfs_emit_at(buf, len, "func-queue ");
+       if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)
+               len += sysfs_emit_at(buf, len, "func-intr ");
+
+       len += sysfs_emit_at(buf, len, "\n");
+
+       return len;
+}
+
+#define HNS3_PMU_ATTR(_name, _func, _config)                           \
+       (&((struct dev_ext_attribute[]) {                               \
+               { __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+       })[0].attr.attr)
+
+#define HNS3_PMU_FORMAT_ATTR(_name, _format) \
+       HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format)
+#define HNS3_PMU_EVENT_ATTR(_name, _event) \
+       HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event)
+#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \
+       HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event)
+
+#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \
+       HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+       HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \
+       HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+       HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \
+       HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+       HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \
+       HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+       HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro))
+#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro))
+#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro))
+#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \
+       HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro))
+
+static u8 hns3_pmu_hw_filter_modes[] = {
+       HNS3_PMU_HW_FILTER_GLOBAL,
+       HNS3_PMU_HW_FILTER_PORT,
+       HNS3_PMU_HW_FILTER_PORT_TC,
+       HNS3_PMU_HW_FILTER_FUNC,
+       HNS3_PMU_HW_FILTER_FUNC_QUEUE,
+       HNS3_PMU_HW_FILTER_FUNC_INTR,
+};
+
+#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \
+       ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)])
+
+static ssize_t identifier_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+       return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier);
+}
+static DEVICE_ATTR_RO(identifier);
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+       return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+       u16 bdf = hns3_pmu->bdf_min;
+
+       return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+                         PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_min);
+
+static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+       u16 bdf = hns3_pmu->bdf_max;
+
+       return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf),
+                         PCI_SLOT(bdf), PCI_FUNC(bdf));
+}
+static DEVICE_ATTR_RO(bdf_max);
+
+static ssize_t hw_clk_freq_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev));
+
+       return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq);
+}
+static DEVICE_ATTR_RO(hw_clk_freq);
+
+static struct attribute *hns3_pmu_events_attr[] = {
+       /* bandwidth events */
+       HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU),
+       HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU),
+       HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE),
+       HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU),
+       HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU),
+       HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+       HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+       HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD),
+       HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD),
+       HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD),
+       HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD),
+       HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+       /* packet rate events */
+       HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE),
+       HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+       HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD),
+       HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+       HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+       HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+       /* latency events */
+       HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH),
+       HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX),
+       HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+       HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+       HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+       HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU),
+       HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE),
+       HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE),
+       HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH),
+       HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD),
+       HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+       HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE),
+
+       /* interrupt rate events */
+       HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+       NULL
+};
+
+static struct attribute *hns3_pmu_filter_mode_attr[] = {
+       /* bandwidth events */
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1),
+
+       /* packet rate events */
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE),
+       HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE),
+
+       /* latency events */
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1),
+       HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE),
+
+       /* interrupt rate events */
+       HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC),
+
+       NULL
+};
+
+static struct attribute_group hns3_pmu_events_group = {
+       .name = "events",
+       .attrs = hns3_pmu_events_attr,
+};
+
+static struct attribute_group hns3_pmu_filter_mode_group = {
+       .name = "filtermode",
+       .attrs = hns3_pmu_filter_mode_attr,
+};
+
+static struct attribute *hns3_pmu_format_attr[] = {
+       HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"),
+       HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"),
+       HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"),
+       HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"),
+       HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"),
+       HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"),
+       HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"),
+       HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"),
+       HNS3_PMU_FORMAT_ATTR(global, "config1:52"),
+       NULL
+};
+
+static struct attribute_group hns3_pmu_format_group = {
+       .name = "format",
+       .attrs = hns3_pmu_format_attr,
+};
+
+static struct attribute *hns3_pmu_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL
+};
+
+static struct attribute_group hns3_pmu_cpumask_attr_group = {
+       .attrs = hns3_pmu_cpumask_attrs,
+};
+
+static struct attribute *hns3_pmu_identifier_attrs[] = {
+       &dev_attr_identifier.attr,
+       NULL
+};
+
+static struct attribute_group hns3_pmu_identifier_attr_group = {
+       .attrs = hns3_pmu_identifier_attrs,
+};
+
+static struct attribute *hns3_pmu_bdf_range_attrs[] = {
+       &dev_attr_bdf_min.attr,
+       &dev_attr_bdf_max.attr,
+       NULL
+};
+
+static struct attribute_group hns3_pmu_bdf_range_attr_group = {
+       .attrs = hns3_pmu_bdf_range_attrs,
+};
+
+static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = {
+       &dev_attr_hw_clk_freq.attr,
+       NULL
+};
+
+static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = {
+       .attrs = hns3_pmu_hw_clk_freq_attrs,
+};
+
+static const struct attribute_group *hns3_pmu_attr_groups[] = {
+       &hns3_pmu_events_group,
+       &hns3_pmu_filter_mode_group,
+       &hns3_pmu_format_group,
+       &hns3_pmu_cpumask_attr_group,
+       &hns3_pmu_identifier_attr_group,
+       &hns3_pmu_bdf_range_attr_group,
+       &hns3_pmu_hw_clk_freq_attr_group,
+       NULL
+};
+
+static u32 hns3_pmu_get_event(struct perf_event *event)
+{
+       return hns3_pmu_get_ext_counter_used(event) << 16 |
+              hns3_pmu_get_event_type(event) << 8 |
+              hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_real_event(struct perf_event *event)
+{
+       return hns3_pmu_get_event_type(event) << 8 |
+              hns3_pmu_get_subevent(event);
+}
+
+static u32 hns3_pmu_get_offset(u32 offset, u32 idx)
+{
+       return offset + HNS3_PMU_REG_EVENT_OFFSET +
+              HNS3_PMU_REG_EVENT_SIZE * idx;
+}
+
+static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+       u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+       return readl(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+                           u32 val)
+{
+       u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+       writel(val, hns3_pmu->base + offset);
+}
+
+static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx)
+{
+       u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+       return readq(hns3_pmu->base + offset);
+}
+
+static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx,
+                           u64 val)
+{
+       u32 offset = hns3_pmu_get_offset(reg_offset, idx);
+
+       writeq(val, hns3_pmu->base + offset);
+}
+
+static bool hns3_pmu_cmp_event(struct perf_event *target,
+                              struct perf_event *event)
+{
+       return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event);
+}
+
+static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu,
+                                          struct perf_event *event)
+{
+       struct perf_event *sibling;
+       int hw_event_used = 0;
+       int idx;
+
+       for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+               sibling = hns3_pmu->hw_events[idx];
+               if (!sibling)
+                       continue;
+
+               hw_event_used++;
+
+               if (!hns3_pmu_cmp_event(sibling, event))
+                       continue;
+
+               /* Related events is used in group */
+               if (sibling->group_leader == event->group_leader)
+                       return idx;
+       }
+
+       /* No related event and all hardware events are used up */
+       if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS)
+               return -EBUSY;
+
+       /* No related event and there is extra hardware events can be use */
+       return -ENOENT;
+}
+
+static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu)
+{
+       int idx;
+
+       for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+               if (!hns3_pmu->hw_events[idx])
+                       return idx;
+       }
+
+       return -EBUSY;
+}
+
+static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf)
+{
+       struct pci_dev *pdev;
+
+       if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) {
+               pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf);
+               return false;
+       }
+
+       pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus),
+                                          PCI_BUS_NUM(bdf),
+                                          GET_PCI_DEVFN(bdf));
+       if (!pdev) {
+               pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf);
+               return false;
+       }
+
+       pci_dev_put(pdev);
+       return true;
+}
+
+static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+                                 u16 queue)
+{
+       u32 val;
+
+       val = GET_PCI_DEVFN(bdf);
+       val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val);
+}
+
+static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+       bool queue_id_valid = false;
+       u32 reg_qid_ctrl, val;
+       int err;
+
+       /* enable queue id request */
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx,
+                       HNS3_PMU_QID_CTRL_REQ_ENABLE);
+
+       reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx);
+       err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val,
+                                val & HNS3_PMU_QID_CTRL_DONE, 1, 1000);
+       if (err == -ETIMEDOUT) {
+               pci_err(hns3_pmu->pdev, "QID request timeout!\n");
+               goto out;
+       }
+
+       queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS);
+
+out:
+       /* disable qid request and clear status */
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0);
+
+       return queue_id_valid;
+}
+
+static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf,
+                                u16 queue)
+{
+       hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue);
+
+       return hns3_pmu_qid_req_start(hns3_pmu, idx);
+}
+
+static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event)
+{
+       struct hns3_pmu_event_attr *pmu_event;
+       struct dev_ext_attribute *eattr;
+       struct device_attribute *dattr;
+       struct attribute *attr;
+       u32 i;
+
+       for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) {
+               attr = hns3_pmu_events_attr[i];
+               dattr = container_of(attr, struct device_attribute, attr);
+               eattr = container_of(dattr, struct dev_ext_attribute, attr);
+               pmu_event = eattr->var;
+
+               if (event == pmu_event->event)
+                       return pmu_event;
+       }
+
+       return NULL;
+}
+
+static int hns3_pmu_set_func_mode(struct perf_event *event,
+                                 struct hns3_pmu *hns3_pmu)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u16 bdf = hns3_pmu_get_bdf(event);
+
+       if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+               return -ENOENT;
+
+       HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC);
+
+       return 0;
+}
+
+static int hns3_pmu_set_func_queue_mode(struct perf_event *event,
+                                       struct hns3_pmu *hns3_pmu)
+{
+       u16 queue_id = hns3_pmu_get_queue(event);
+       struct hw_perf_event *hwc = &event->hw;
+       u16 bdf = hns3_pmu_get_bdf(event);
+
+       if (!hns3_pmu_valid_bdf(hns3_pmu, bdf))
+               return -ENOENT;
+
+       if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) {
+               pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id);
+               return -ENOENT;
+       }
+
+       HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE);
+
+       return 0;
+}
+
+static bool
+hns3_pmu_is_enabled_global_mode(struct perf_event *event,
+                               struct hns3_pmu_event_attr *pmu_event)
+{
+       u8 global = hns3_pmu_get_global(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL))
+               return false;
+
+       return global;
+}
+
+static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event,
+                                         struct hns3_pmu_event_attr *pmu_event)
+{
+       u16 queue_id = hns3_pmu_get_queue(event);
+       u16 bdf = hns3_pmu_get_bdf(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC))
+               return false;
+       else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE)
+               return false;
+
+       return bdf;
+}
+
+static bool
+hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event,
+                                   struct hns3_pmu_event_attr *pmu_event)
+{
+       u16 queue_id = hns3_pmu_get_queue(event);
+       u16 bdf = hns3_pmu_get_bdf(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE))
+               return false;
+       else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE)
+               return false;
+
+       return bdf;
+}
+
+static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event,
+                                         struct hns3_pmu_event_attr *pmu_event)
+{
+       u8 tc_id = hns3_pmu_get_tc(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT))
+               return false;
+
+       return tc_id == HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event,
+                                struct hns3_pmu_event_attr *pmu_event)
+{
+       u8 tc_id = hns3_pmu_get_tc(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC))
+               return false;
+
+       return tc_id != HNS3_PMU_FILTER_ALL_TC;
+}
+
+static bool
+hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event,
+                                  struct hns3_pmu *hns3_pmu,
+                                  struct hns3_pmu_event_attr *pmu_event)
+{
+       u16 bdf = hns3_pmu_get_bdf(event);
+
+       if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR))
+               return false;
+
+       return hns3_pmu_valid_bdf(hns3_pmu, bdf);
+}
+
+static int hns3_pmu_select_filter_mode(struct perf_event *event,
+                                      struct hns3_pmu *hns3_pmu)
+{
+       u32 event_id = hns3_pmu_get_event(event);
+       struct hw_perf_event *hwc = &event->hw;
+       struct hns3_pmu_event_attr *pmu_event;
+
+       pmu_event = hns3_pmu_get_pmu_event(event_id);
+       if (!pmu_event) {
+               pci_err(hns3_pmu->pdev, "Invalid pmu event\n");
+               return -ENOENT;
+       }
+
+       if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) {
+               HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL);
+               return 0;
+       }
+
+       if (hns3_pmu_is_enabled_func_mode(event, pmu_event))
+               return hns3_pmu_set_func_mode(event, hns3_pmu);
+
+       if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event))
+               return hns3_pmu_set_func_queue_mode(event, hns3_pmu);
+
+       if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) {
+               HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT);
+               return 0;
+       }
+
+       if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) {
+               HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC);
+               return 0;
+       }
+
+       if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) {
+               HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR);
+               return 0;
+       }
+
+       return -ENOENT;
+}
+
+static bool hns3_pmu_validate_event_group(struct perf_event *event)
+{
+       struct perf_event *sibling, *leader = event->group_leader;
+       struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS];
+       int counters = 1;
+       int num;
+
+       event_group[0] = leader;
+       if (!is_software_event(leader)) {
+               if (leader->pmu != event->pmu)
+                       return false;
+
+               if (leader != event && !hns3_pmu_cmp_event(leader, event))
+                       event_group[counters++] = event;
+       }
+
+       for_each_sibling_event(sibling, event->group_leader) {
+               if (is_software_event(sibling))
+                       continue;
+
+               if (sibling->pmu != event->pmu)
+                       return false;
+
+               for (num = 0; num < counters; num++) {
+                       if (hns3_pmu_cmp_event(event_group[num], sibling))
+                               break;
+               }
+
+               if (num == counters)
+                       event_group[counters++] = sibling;
+       }
+
+       return counters <= HNS3_PMU_MAX_HW_EVENTS;
+}
+
+static u32 hns3_pmu_get_filter_condition(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u16 intr_id = hns3_pmu_get_intr(event);
+       u8 port_id = hns3_pmu_get_port(event);
+       u16 bdf = hns3_pmu_get_bdf(event);
+       u8 tc_id = hns3_pmu_get_tc(event);
+       u8 filter_mode;
+
+       filter_mode = *(u8 *)hwc->addr_filters;
+       switch (filter_mode) {
+       case HNS3_PMU_HW_FILTER_PORT:
+               return FILTER_CONDITION_PORT(port_id);
+       case HNS3_PMU_HW_FILTER_PORT_TC:
+               return FILTER_CONDITION_PORT_TC(port_id, tc_id);
+       case HNS3_PMU_HW_FILTER_FUNC:
+       case HNS3_PMU_HW_FILTER_FUNC_QUEUE:
+               return GET_PCI_DEVFN(bdf);
+       case HNS3_PMU_HW_FILTER_FUNC_INTR:
+               return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id);
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+static void hns3_pmu_config_filter(struct perf_event *event)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       u8 event_type = hns3_pmu_get_event_type(event);
+       u8 subevent_id = hns3_pmu_get_subevent(event);
+       u16 queue_id = hns3_pmu_get_queue(event);
+       struct hw_perf_event *hwc = &event->hw;
+       u8 filter_mode = *(u8 *)hwc->addr_filters;
+       u16 bdf = hns3_pmu_get_bdf(event);
+       u32 idx = hwc->idx;
+       u32 val;
+
+       val = event_type;
+       val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S;
+       val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S;
+       val |= HNS3_PMU_EVENT_OVERFLOW_RESTART;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+       val = hns3_pmu_get_filter_condition(event);
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val);
+
+       if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE)
+               hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id);
+}
+
+static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu,
+                                   struct hw_perf_event *hwc)
+{
+       u32 idx = hwc->idx;
+       u32 val;
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+       val |= HNS3_PMU_EVENT_EN;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu,
+                                    struct hw_perf_event *hwc)
+{
+       u32 idx = hwc->idx;
+       u32 val;
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+       val &= ~HNS3_PMU_EVENT_EN;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu,
+                                struct hw_perf_event *hwc)
+{
+       u32 idx = hwc->idx;
+       u32 val;
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+       val &= ~HNS3_PMU_INTR_MASK_OVERFLOW;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu,
+                                 struct hw_perf_event *hwc)
+{
+       u32 idx = hwc->idx;
+       u32 val;
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx);
+       val |= HNS3_PMU_INTR_MASK_OVERFLOW;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val);
+}
+
+static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx)
+{
+       u32 val;
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+       val |= HNS3_PMU_EVENT_STATUS_RESET;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+
+       val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx);
+       val &= ~HNS3_PMU_EVENT_STATUS_RESET;
+       hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val);
+}
+
+static u64 hns3_pmu_read_counter(struct perf_event *event)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+
+       return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx);
+}
+
+static void hns3_pmu_write_counter(struct perf_event *event, u64 value)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       u32 idx = event->hw.idx;
+
+       hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value);
+       hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value);
+}
+
+static void hns3_pmu_init_counter(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       local64_set(&hwc->prev_count, 0);
+       hns3_pmu_write_counter(event, 0);
+}
+
+static int hns3_pmu_event_init(struct perf_event *event)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+       int ret;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /* Sampling is not supported */
+       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+               return -EOPNOTSUPP;
+
+       event->cpu = hns3_pmu->on_cpu;
+
+       idx = hns3_pmu_get_event_idx(hns3_pmu);
+       if (idx < 0) {
+               pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n",
+                       HNS3_PMU_MAX_HW_EVENTS);
+               return -EBUSY;
+       }
+
+       hwc->idx = idx;
+
+       ret = hns3_pmu_select_filter_mode(event, hns3_pmu);
+       if (ret) {
+               pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret);
+               return ret;
+       }
+
+       if (!hns3_pmu_validate_event_group(event)) {
+               pci_err(hns3_pmu->pdev, "Invalid event group.\n");
+               return -EINVAL;
+       }
+
+       if (hns3_pmu_get_ext_counter_used(event))
+               hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER;
+       else
+               hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER;
+
+       return 0;
+}
+
+static void hns3_pmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 new_cnt, prev_cnt, delta;
+
+       do {
+               prev_cnt = local64_read(&hwc->prev_count);
+               new_cnt = hns3_pmu_read_counter(event);
+       } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) !=
+                prev_cnt);
+
+       delta = new_cnt - prev_cnt;
+       local64_add(delta, &event->count);
+}
+
+static void hns3_pmu_start(struct perf_event *event, int flags)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+       hwc->state = 0;
+
+       hns3_pmu_config_filter(event);
+       hns3_pmu_init_counter(event);
+       hns3_pmu_enable_intr(hns3_pmu, hwc);
+       hns3_pmu_enable_counter(hns3_pmu, hwc);
+
+       perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+
+       hns3_pmu_disable_counter(hns3_pmu, hwc);
+       hns3_pmu_disable_intr(hns3_pmu, hwc);
+
+       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       /* Read hardware counter and update the perf counter statistics */
+       hns3_pmu_read(event);
+       hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hns3_pmu_add(struct perf_event *event, int flags)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+       /* Check all working events to find a related event. */
+       idx = hns3_pmu_find_related_event_idx(hns3_pmu, event);
+       if (idx < 0 && idx != -ENOENT)
+               return idx;
+
+       /* Current event shares an enabled hardware event with related event */
+       if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) {
+               hwc->idx = idx;
+               goto start_count;
+       }
+
+       idx = hns3_pmu_get_event_idx(hns3_pmu);
+       if (idx < 0)
+               return idx;
+
+       hwc->idx = idx;
+       hns3_pmu->hw_events[idx] = event;
+
+start_count:
+       if (flags & PERF_EF_START)
+               hns3_pmu_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void hns3_pmu_del(struct perf_event *event, int flags)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu);
+       struct hw_perf_event *hwc = &event->hw;
+
+       hns3_pmu_stop(event, PERF_EF_UPDATE);
+       hns3_pmu->hw_events[hwc->idx] = NULL;
+       perf_event_update_userpage(event);
+}
+
+static void hns3_pmu_enable(struct pmu *pmu)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+       u32 val;
+
+       val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+       val |= HNS3_PMU_GLOBAL_START;
+       writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static void hns3_pmu_disable(struct pmu *pmu)
+{
+       struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu);
+       u32 val;
+
+       val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+       val &= ~HNS3_PMU_GLOBAL_START;
+       writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL);
+}
+
+static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+       u16 device_id;
+       char *name;
+       u32 val;
+
+       hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2];
+       if (!hns3_pmu->base) {
+               pci_err(pdev, "ioremap failed\n");
+               return -ENOMEM;
+       }
+
+       hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ);
+
+       val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF);
+       hns3_pmu->bdf_min = val & 0xffff;
+       hns3_pmu->bdf_max = val >> 16;
+
+       val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID);
+       device_id = val & 0xffff;
+       name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id);
+       if (!name)
+               return -ENOMEM;
+
+       hns3_pmu->pdev = pdev;
+       hns3_pmu->on_cpu = -1;
+       hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION);
+       hns3_pmu->pmu = (struct pmu) {
+               .name           = name,
+               .module         = THIS_MODULE,
+               .event_init     = hns3_pmu_event_init,
+               .pmu_enable     = hns3_pmu_enable,
+               .pmu_disable    = hns3_pmu_disable,
+               .add            = hns3_pmu_add,
+               .del            = hns3_pmu_del,
+               .start          = hns3_pmu_start,
+               .stop           = hns3_pmu_stop,
+               .read           = hns3_pmu_read,
+               .task_ctx_nr    = perf_invalid_context,
+               .attr_groups    = hns3_pmu_attr_groups,
+               .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
+       };
+
+       return 0;
+}
+
+static irqreturn_t hns3_pmu_irq(int irq, void *data)
+{
+       struct hns3_pmu *hns3_pmu = data;
+       u32 intr_status, idx;
+
+       for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) {
+               intr_status = hns3_pmu_readl(hns3_pmu,
+                                            HNS3_PMU_REG_EVENT_INTR_STATUS,
+                                            idx);
+
+               /*
+                * As each counter will restart from 0 when it is overflowed,
+                * extra processing is no need, just clear interrupt status.
+                */
+               if (intr_status)
+                       hns3_pmu_clear_intr_status(hns3_pmu, idx);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+       struct hns3_pmu *hns3_pmu;
+
+       hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+       if (!hns3_pmu)
+               return -ENODEV;
+
+       if (hns3_pmu->on_cpu == -1) {
+               hns3_pmu->on_cpu = cpu;
+               irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu));
+       }
+
+       return 0;
+}
+
+static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+       struct hns3_pmu *hns3_pmu;
+       unsigned int target;
+
+       hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node);
+       if (!hns3_pmu)
+               return -ENODEV;
+
+       /* Nothing to do if this CPU doesn't own the PMU */
+       if (hns3_pmu->on_cpu != cpu)
+               return 0;
+
+       /* Choose a new CPU from all online cpus */
+       target = cpumask_any_but(cpu_online_mask, cpu);
+       if (target >= nr_cpu_ids)
+               return 0;
+
+       perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target);
+       hns3_pmu->on_cpu = target;
+       irq_set_affinity(hns3_pmu->irq, cpumask_of(target));
+
+       return 0;
+}
+
+static void hns3_pmu_free_irq(void *data)
+{
+       struct pci_dev *pdev = data;
+
+       pci_free_irq_vectors(pdev);
+}
+
+static int hns3_pmu_irq_register(struct pci_dev *pdev,
+                                struct hns3_pmu *hns3_pmu)
+{
+       int irq, ret;
+
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+       if (ret < 0) {
+               pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret);
+               return ret;
+       }
+
+       ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev);
+       if (ret) {
+               pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret);
+               return ret;
+       }
+
+       irq = pci_irq_vector(pdev, 0);
+       ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0,
+                              hns3_pmu->pmu.name, hns3_pmu);
+       if (ret) {
+               pci_err(pdev, "failed to register irq, ret = %d.\n", ret);
+               return ret;
+       }
+
+       hns3_pmu->irq = irq;
+
+       return 0;
+}
+
+static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
+{
+       int ret;
+
+       ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu);
+       if (ret)
+               return ret;
+
+       ret = hns3_pmu_irq_register(pdev, hns3_pmu);
+       if (ret)
+               return ret;
+
+       ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                      &hns3_pmu->node);
+       if (ret) {
+               pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret);
+               return ret;
+       }
+
+       ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
+       if (ret) {
+               pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
+               cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                           &hns3_pmu->node);
+       }
+
+       return ret;
+}
+
+static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
+{
+       struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
+
+       perf_pmu_unregister(&hns3_pmu->pmu);
+       cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                   &hns3_pmu->node);
+}
+
+static int hns3_pmu_init_dev(struct pci_dev *pdev)
+{
+       int ret;
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret);
+               return ret;
+       }
+
+       ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu");
+       if (ret < 0) {
+               pci_err(pdev, "failed to request pci region, ret = %d.\n", ret);
+               return ret;
+       }
+
+       pci_set_master(pdev);
+
+       return 0;
+}
+
+static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct hns3_pmu *hns3_pmu;
+       int ret;
+
+       hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL);
+       if (!hns3_pmu)
+               return -ENOMEM;
+
+       ret = hns3_pmu_init_dev(pdev);
+       if (ret)
+               return ret;
+
+       ret = hns3_pmu_init_pmu(pdev, hns3_pmu);
+       if (ret) {
+               pci_clear_master(pdev);
+               return ret;
+       }
+
+       pci_set_drvdata(pdev, hns3_pmu);
+
+       return ret;
+}
+
+static void hns3_pmu_remove(struct pci_dev *pdev)
+{
+       hns3_pmu_uninit_pmu(pdev);
+       pci_clear_master(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static const struct pci_device_id hns3_pmu_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, hns3_pmu_ids);
+
+static struct pci_driver hns3_pmu_driver = {
+       .name = "hns3_pmu",
+       .id_table = hns3_pmu_ids,
+       .probe = hns3_pmu_probe,
+       .remove = hns3_pmu_remove,
+};
+
+static int __init hns3_pmu_module_init(void)
+{
+       int ret;
+
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+                                     "AP_PERF_ARM_HNS3_PMU_ONLINE",
+                                     hns3_pmu_online_cpu,
+                                     hns3_pmu_offline_cpu);
+       if (ret) {
+               pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret);
+               return ret;
+       }
+
+       ret = pci_register_driver(&hns3_pmu_driver);
+       if (ret) {
+               pr_err("failed to register pci driver, ret = %d.\n", ret);
+               cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+       }
+
+       return ret;
+}
+module_init(hns3_pmu_module_init);
+
+static void __exit hns3_pmu_module_exit(void)
+{
+       pci_unregister_driver(&hns3_pmu_driver);
+       cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE);
+}
+module_exit(hns3_pmu_module_exit);
+
+MODULE_DESCRIPTION("HNS3 PMU driver");
+MODULE_LICENSE("GPL v2");
index 282d3a0..69c3050 100644 (file)
@@ -2,10 +2,6 @@
 /* Marvell CN10K LLC-TAD perf driver
  *
  * Copyright (C) 2021 Marvell
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) "tad_pmu: " fmt
@@ -18,9 +14,9 @@
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 
-#define TAD_PFC_OFFSET         0x0
+#define TAD_PFC_OFFSET         0x800
 #define TAD_PFC(counter)       (TAD_PFC_OFFSET | (counter << 3))
-#define TAD_PRF_OFFSET         0x100
+#define TAD_PRF_OFFSET         0x900
 #define TAD_PRF(counter)       (TAD_PRF_OFFSET | (counter << 3))
 #define TAD_PRF_CNTSEL_MASK    0xFF
 #define TAD_MAX_COUNTERS       8
@@ -100,9 +96,7 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
         * which sets TAD()_PRF()[CNTSEL] != 0
         */
        for (i = 0; i < tad_pmu->region_cnt; i++) {
-               reg_val = readq_relaxed(tad_pmu->regions[i].base +
-                                       TAD_PRF(counter_idx));
-               reg_val |= (event_idx & 0xFF);
+               reg_val = event_idx & 0xFF;
                writeq_relaxed(reg_val, tad_pmu->regions[i].base +
                               TAD_PRF(counter_idx));
        }
index b2b8d20..2c96183 100644 (file)
@@ -121,7 +121,7 @@ u64 riscv_pmu_event_update(struct perf_event *event)
        return delta;
 }
 
-static void riscv_pmu_stop(struct perf_event *event, int flags)
+void riscv_pmu_stop(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
        struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
@@ -175,7 +175,7 @@ int riscv_pmu_event_set_period(struct perf_event *event)
        return overflow;
 }
 
-static void riscv_pmu_start(struct perf_event *event, int flags)
+void riscv_pmu_start(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
        struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
index dca3537..79a3de5 100644 (file)
 #include <linux/irqdomain.h>
 #include <linux/of_irq.h>
 #include <linux/of.h>
+#include <linux/cpu_pm.h>
 
 #include <asm/sbi.h>
 #include <asm/hwcap.h>
 
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(firmware, "config:63");
+
+static struct attribute *riscv_arch_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_firmware.attr,
+       NULL,
+};
+
+static struct attribute_group riscv_pmu_format_group = {
+       .name = "format",
+       .attrs = riscv_arch_formats_attr,
+};
+
+static const struct attribute_group *riscv_pmu_attr_groups[] = {
+       &riscv_pmu_format_group,
+       NULL,
+};
+
 union sbi_pmu_ctr_info {
        unsigned long value;
        struct {
@@ -666,12 +686,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
                child = of_get_compatible_child(cpu, "riscv,cpu-intc");
                if (!child) {
                        pr_err("Failed to find INTC node\n");
+                       of_node_put(cpu);
                        return -ENODEV;
                }
                domain = irq_find_host(child);
                of_node_put(child);
-               if (domain)
+               if (domain) {
+                       of_node_put(cpu);
                        break;
+               }
        }
        if (!domain) {
                pr_err("Failed to find INTC IRQ root domain\n");
@@ -693,6 +716,73 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
        return 0;
 }
 
+#ifdef CONFIG_CPU_PM
+static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+                               void *v)
+{
+       struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb);
+       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+       int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS);
+       struct perf_event *event;
+       int idx;
+
+       if (!enabled)
+               return NOTIFY_OK;
+
+       for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) {
+               event = cpuc->events[idx];
+               if (!event)
+                       continue;
+
+               switch (cmd) {
+               case CPU_PM_ENTER:
+                       /*
+                        * Stop and update the counter
+                        */
+                       riscv_pmu_stop(event, PERF_EF_UPDATE);
+                       break;
+               case CPU_PM_EXIT:
+               case CPU_PM_ENTER_FAILED:
+                       /*
+                        * Restore and enable the counter.
+                        *
+                        * Requires RCU read locking to be functional,
+                        * wrap the call within RCU_NONIDLE to make the
+                        * RCU subsystem aware this cpu is not idle from
+                        * an RCU perspective for the riscv_pmu_start() call
+                        * duration.
+                        */
+                       RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD));
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       return NOTIFY_OK;
+}
+
+static int riscv_pm_pmu_register(struct riscv_pmu *pmu)
+{
+       pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify;
+       return cpu_pm_register_notifier(&pmu->riscv_pm_nb);
+}
+
+static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu)
+{
+       cpu_pm_unregister_notifier(&pmu->riscv_pm_nb);
+}
+#else
+static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; }
+static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
+#endif
+
+static void riscv_pmu_destroy(struct riscv_pmu *pmu)
+{
+       riscv_pm_pmu_unregister(pmu);
+       cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+}
+
 static int pmu_sbi_device_probe(struct platform_device *pdev)
 {
        struct riscv_pmu *pmu = NULL;
@@ -720,6 +810,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
                pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;
        }
+       pmu->pmu.attr_groups = riscv_pmu_attr_groups;
        pmu->num_counters = num_counters;
        pmu->ctr_start = pmu_sbi_ctr_start;
        pmu->ctr_stop = pmu_sbi_ctr_stop;
@@ -733,14 +824,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       ret = riscv_pm_pmu_register(pmu);
+       if (ret)
+               goto out_unregister;
+
        ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
-       if (ret) {
-               cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
-               return ret;
-       }
+       if (ret)
+               goto out_unregister;
 
        return 0;
 
+out_unregister:
+       riscv_pmu_destroy(pmu);
+
 out_free:
        kfree(pmu);
        return ret;
index fd7e8fb..961f4d8 100644 (file)
 #define wmb()  do { kcsan_wmb(); __wmb(); } while (0)
 #endif
 
+#ifdef __dma_mb
+#define dma_mb()       do { kcsan_mb(); __dma_mb(); } while (0)
+#endif
+
 #ifdef __dma_rmb
 #define dma_rmb()      do { kcsan_rmb(); __dma_rmb(); } while (0)
 #endif
 #define wmb()  mb()
 #endif
 
+#ifndef dma_mb
+#define dma_mb()       mb()
+#endif
+
 #ifndef dma_rmb
 #define dma_rmb()      rmb()
 #endif
index 98954dd..72974cb 100644 (file)
@@ -964,7 +964,34 @@ static inline void iounmap(volatile void __iomem *addr)
 #elif defined(CONFIG_GENERIC_IOREMAP)
 #include <linux/pgtable.h>
 
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
+/*
+ * Arch code can implement the following two hooks when using GENERIC_IOREMAP
+ * ioremap_allowed() return a bool,
+ *   - true means continue to remap
+ *   - false means skip remap and return directly
+ * iounmap_allowed() return a bool,
+ *   - true means continue to vunmap
+ *   - false means skip vunmap and return directly
+ */
+#ifndef ioremap_allowed
+#define ioremap_allowed ioremap_allowed
+static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size,
+                                  unsigned long prot)
+{
+       return true;
+}
+#endif
+
+#ifndef iounmap_allowed
+#define iounmap_allowed iounmap_allowed
+static inline bool iounmap_allowed(void *addr)
+{
+       return true;
+}
+#endif
+
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+                          unsigned long prot);
 void iounmap(volatile void __iomem *addr);
 
 static inline void __iomem *ioremap(phys_addr_t addr, size_t size)
index b66c5f3..154daff 100644 (file)
@@ -229,6 +229,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
        CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
        CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE,
+       CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
        CPUHP_AP_PERF_ARM_L2X0_ONLINE,
        CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
        CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
index 2d2ccae..0ace775 100644 (file)
@@ -348,7 +348,7 @@ struct vm_area_struct;
 #define GFP_DMA32      __GFP_DMA32
 #define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
 #define GFP_HIGHUSER_MOVABLE   (GFP_HIGHUSER | __GFP_MOVABLE | \
-                        __GFP_SKIP_KASAN_POISON)
+                        __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON)
 #define GFP_TRANSHUGE_LIGHT    ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
                         __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE  (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
index de29821..4ddaf6a 100644 (file)
@@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio,
        return split_huge_page_to_list(&folio->page, list);
 }
 
+/*
+ * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
+ * limitations in the implementation like arm64 MTE can override this to
+ * false
+ */
+#ifndef arch_thp_swp_supported
+static inline bool arch_thp_swp_supported(void)
+{
+       return true;
+}
+#endif
+
 #endif /* _LINUX_HUGE_MM_H */
index 46f9b6f..bf66fe0 100644 (file)
@@ -56,9 +56,13 @@ struct riscv_pmu {
 
        struct cpu_hw_events    __percpu *hw_events;
        struct hlist_node       node;
+       struct notifier_block   riscv_pm_nb;
 };
 
 #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu))
+
+void riscv_pmu_start(struct perf_event *event, int flags);
+void riscv_pmu_stop(struct perf_event *event, int flags);
 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr);
 int riscv_pmu_event_set_period(struct perf_event *event);
 uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event);
index 5fe598e..8652426 100644 (file)
 #include <linux/io.h>
 #include <linux/export.h>
 
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+                          unsigned long prot)
 {
        unsigned long offset, vaddr;
        phys_addr_t last_addr;
        struct vm_struct *area;
 
        /* Disallow wrap-around or zero size */
-       last_addr = addr + size - 1;
-       if (!size || last_addr < addr)
+       last_addr = phys_addr + size - 1;
+       if (!size || last_addr < phys_addr)
                return NULL;
 
        /* Page-align mappings */
-       offset = addr & (~PAGE_MASK);
-       addr -= offset;
+       offset = phys_addr & (~PAGE_MASK);
+       phys_addr -= offset;
        size = PAGE_ALIGN(size + offset);
 
+       if (!ioremap_allowed(phys_addr, size, prot))
+               return NULL;
+
        area = get_vm_area_caller(size, VM_IOREMAP,
                        __builtin_return_address(0));
        if (!area)
                return NULL;
        vaddr = (unsigned long)area->addr;
+       area->phys_addr = phys_addr;
 
-       if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) {
+       if (ioremap_page_range(vaddr, vaddr + size, phys_addr,
+                              __pgprot(prot))) {
                free_vm_area(area);
                return NULL;
        }
@@ -44,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot);
 
 void iounmap(volatile void __iomem *addr)
 {
-       vunmap((void *)((unsigned long)addr & PAGE_MASK));
+       void *vaddr = (void *)((unsigned long)addr & PAGE_MASK);
+
+       if (!iounmap_allowed(vaddr))
+               return;
+
+       if (is_vmalloc_addr(vaddr))
+               vunmap(vaddr);
 }
 EXPORT_SYMBOL(iounmap);
index c40c0e7..78be2be 100644 (file)
@@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init)
                return;
 
        tag = kasan_random_tag();
+       kasan_unpoison(set_tag(page_address(page), tag),
+                      PAGE_SIZE << order, init);
        for (i = 0; i < (1 << order); i++)
                page_kasan_tag_set(page + i, tag);
-       kasan_unpoison(page_address(page), PAGE_SIZE << order, init);
 }
 
 void __kasan_poison_pages(struct page *page, unsigned int order, bool init)
index b5b14b7..b0bcab5 100644 (file)
@@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order)
 }
 #endif /* CONFIG_DEBUG_VM */
 
-static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
+static inline bool should_skip_kasan_unpoison(gfp_t flags)
 {
        /* Don't skip if a software KASAN mode is enabled. */
        if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
@@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
                return true;
 
        /*
-        * With hardware tag-based KASAN enabled, skip if either:
-        *
-        * 1. Memory tags have already been cleared via tag_clear_highpage().
-        * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON.
+        * With hardware tag-based KASAN enabled, skip if this has been
+        * requested via __GFP_SKIP_KASAN_UNPOISON.
         */
-       return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON);
+       return flags & __GFP_SKIP_KASAN_UNPOISON;
 }
 
 static inline bool should_skip_init(gfp_t flags)
@@ -2397,6 +2395,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
        bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
                        !should_skip_init(gfp_flags);
        bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS);
+       int i;
 
        set_page_private(page, 0);
        set_page_refcounted(page);
@@ -2422,8 +2421,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
         * should be initialized as well).
         */
        if (init_tags) {
-               int i;
-
                /* Initialize both memory and tags. */
                for (i = 0; i != 1 << order; ++i)
                        tag_clear_highpage(page + i);
@@ -2431,13 +2428,17 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
                /* Note that memory is already initialized by the loop above. */
                init = false;
        }
-       if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) {
+       if (!should_skip_kasan_unpoison(gfp_flags)) {
                /* Unpoison shadow memory or set memory tags. */
                kasan_unpoison_pages(page, order, init);
 
                /* Note that memory is already initialized by KASAN. */
                if (kasan_has_integrated_init())
                        init = false;
+       } else {
+               /* Ensure page_address() dereferencing does not fault. */
+               for (i = 0; i != 1 << order; ++i)
+                       page_kasan_tag_reset(page + i);
        }
        /* If memory is still not initialized, do it now. */
        if (init)
index 2a65a89..10b94d6 100644 (file)
@@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio)
        entry.val = 0;
 
        if (folio_test_large(folio)) {
-               if (IS_ENABLED(CONFIG_THP_SWAP))
+               if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
                        get_swap_pages(1, &entry, folio_nr_pages(folio));
                goto out;
        }