Merge branches 'for-next/kvm-build-fix', 'for-next/va-refactor', 'for-next/lto',...
authorCatalin Marinas <catalin.marinas@arm.com>
Wed, 9 Dec 2020 18:04:35 +0000 (18:04 +0000)
committerCatalin Marinas <catalin.marinas@arm.com>
Wed, 9 Dec 2020 18:04:35 +0000 (18:04 +0000)
* for-next/kvm-build-fix:
  : Fix KVM build issues with 64K pages
  KVM: arm64: Fix build error in user_mem_abort()

* for-next/va-refactor:
  : VA layout changes
  arm64: mm: don't assume struct page is always 64 bytes
  Documentation/arm64: fix RST layout of memory.rst
  arm64: mm: tidy up top of kernel VA space
  arm64: mm: make vmemmap region a projection of the linear region
  arm64: mm: extend linear region for 52-bit VA configurations

* for-next/lto:
  : Upgrade READ_ONCE() to RCpc acquire on arm64 with LTO
  arm64: lto: Strengthen READ_ONCE() to acquire when CONFIG_LTO=y
  arm64: alternatives: Remove READ_ONCE() usage during patch operation
  arm64: cpufeatures: Add capability for LDAPR instruction
  arm64: alternatives: Split up alternative.h
  arm64: uaccess: move uao_* alternatives to asm-uaccess.h

* for-next/mem-hotplug:
  : Memory hotplug improvements
  arm64/mm/hotplug: Ensure early memory sections are all online
  arm64/mm/hotplug: Enable MEM_OFFLINE event handling
  arm64/mm/hotplug: Register boot memory hot remove notifier earlier
  arm64: mm: account for hotplug memory when randomizing the linear region

* for-next/cppc-ffh:
  : Add CPPC FFH support using arm64 AMU counters
  arm64: abort counter_read_on_cpu() when irqs_disabled()
  arm64: implement CPPC FFH support using AMUs
  arm64: split counter validation function
  arm64: wrap and generalise counter read functions

* for-next/pad-image-header:
  : Pad Image header to 64KB and unmap it
  arm64: head: tidy up the Image header definition
  arm64/head: avoid symbol names pointing into first 64 KB of kernel image
  arm64: omit [_text, _stext) from permanent kernel mapping

* for-next/zone-dma-default-32-bit:
  : Default to 32-bit wide ZONE_DMA (previously reduced to 1GB for RPi4)
  of: unittest: Fix build on architectures without CONFIG_OF_ADDRESS
  mm: Remove examples from enum zone_type comment
  arm64: mm: Set ZONE_DMA size based on early IORT scan
  arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges
  of: unittest: Add test for of_dma_get_max_cpu_address()
  of/address: Introduce of_dma_get_max_cpu_address()
  arm64: mm: Move zone_dma_bits initialization into zone_sizes_init()
  arm64: mm: Move reserve_crashkernel() into mem_init()
  arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required
  arm64: Ignore any DMA offsets in the max_zone_phys() calculation

* for-next/signal-tag-bits:
  : Expose the FAR_EL1 tag bits in siginfo
  arm64: expose FAR_EL1 tag bits in siginfo
  signal: define the SA_EXPOSE_TAGBITS bit in sa_flags
  signal: define the SA_UNSUPPORTED bit in sa_flags
  arch: provide better documentation for the arch-specific SA_* flags
  signal: clear non-uapi flag bits when passing/returning sa_flags
  arch: move SA_* definitions to generic headers
  parisc: start using signal-defs.h
  parisc: Drop parisc special case for __sighandler_t

* for-next/cmdline-extended:
  : Add support for CONFIG_CMDLINE_EXTENDED
  arm64: Extend the kernel command line from the bootloader
  arm64: kaslr: Refactor early init command line parsing

64 files changed:
Documentation/arm64/kasan-offsets.sh
Documentation/arm64/memory.rst
Documentation/arm64/tagged-pointers.rst
arch/alpha/include/uapi/asm/signal.h
arch/arm/include/asm/signal.h
arch/arm/include/uapi/asm/signal.h
arch/arm64/Kconfig
arch/arm64/include/asm/alternative-macros.h [new file with mode: 0644]
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/exception.h
arch/arm64/include/asm/insn.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/rwonce.h [new file with mode: 0644]
arch/arm64/include/asm/signal.h [new file with mode: 0644]
arch/arm64/include/asm/system_misc.h
arch/arm64/include/asm/topology.h
arch/arm64/include/asm/traps.h
arch/arm64/kernel/alternative.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/efi-header.S
arch/arm64/kernel/entry-common.c
arch/arm64/kernel/head.S
arch/arm64/kernel/proton-pack.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/sys_compat.c
arch/arm64/kernel/topology.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kernel/vdso32/Makefile
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/mmu.c
arch/arm64/lib/mte.S
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/mmu.c
arch/h8300/include/uapi/asm/signal.h
arch/ia64/include/uapi/asm/signal.h
arch/m68k/include/uapi/asm/signal.h
arch/mips/include/uapi/asm/signal.h
arch/parisc/include/asm/signal.h
arch/parisc/include/uapi/asm/signal.h
arch/powerpc/include/uapi/asm/signal.h
arch/s390/include/uapi/asm/signal.h
arch/sparc/include/uapi/asm/signal.h
arch/x86/include/uapi/asm/signal.h
arch/x86/kernel/signal_compat.c
arch/xtensa/include/uapi/asm/signal.h
drivers/acpi/arm64/iort.c
drivers/of/address.c
drivers/of/unittest.c
include/linux/acpi_iort.h
include/linux/mmzone.h
include/linux/of.h
include/linux/signal.h
include/linux/signal_types.h
include/uapi/asm-generic/signal-defs.h
include/uapi/asm-generic/signal.h
kernel/signal.c

index 2b7a021..2dc5f9e 100644 (file)
@@ -1,12 +1,11 @@
 #!/bin/sh
 
 # Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW
-# start address at the mid-point of the kernel VA space
+# start address at the top of the linear region
 
 print_kasan_offset () {
        printf "%02d\t" $1
        printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \
-                       + (1 << ($1 - 32 - $2)) \
                        - (1 << (64 - 32 - $2)) ))
 }
 
index cf03b32..e7522e5 100644 (file)
@@ -32,17 +32,16 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   -----------------------------------------------------------------------
   0000000000000000     0000ffffffffffff         256TB          user
   ffff000000000000     ffff7fffffffffff         128TB          kernel logical memory map
-  ffff800000000000     ffff9fffffffffff          32TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffffdffbffeffff         ~93TB          vmalloc
-  fffffdffbfff0000     fffffdfffe5f8fff        ~998MB          [guard region]
-  fffffdfffe5f9000     fffffdfffe9fffff        4124KB          fixed mappings
-  fffffdfffea00000     fffffdfffebfffff           2MB          [guard region]
-  fffffdfffec00000     fffffdffffbfffff          16MB          PCI I/O space
-  fffffdffffc00000     fffffdffffdfffff           2MB          [guard region]
-  fffffdffffe00000     ffffffffffdfffff           2TB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+ [ffff600000000000     ffff7fffffffffff]         32TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     fffffdffffffffff           2TB          vmemmap
+  fffffe0000000000     ffffffffffffffff           2TB          [guard region]
 
 
 AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
@@ -50,19 +49,17 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   Start                        End                     Size            Use
   -----------------------------------------------------------------------
   0000000000000000     000fffffffffffff           4PB          user
-  fff0000000000000     fff7ffffffffffff           2PB          kernel logical memory map
-  fff8000000000000     fffd9fffffffffff        1440TB          [gap]
-  fffda00000000000     ffff9fffffffffff         512TB          kasan shadow region
-  ffffa00000000000     ffffa00007ffffff         128MB          bpf jit region
-  ffffa00008000000     ffffa0000fffffff         128MB          modules
-  ffffa00010000000     fffff81ffffeffff         ~88TB          vmalloc
-  fffff81fffff0000     fffffc1ffe58ffff          ~3TB          [guard region]
-  fffffc1ffe590000     fffffc1ffe9fffff        4544KB          fixed mappings
-  fffffc1ffea00000     fffffc1ffebfffff           2MB          [guard region]
-  fffffc1ffec00000     fffffc1fffbfffff          16MB          PCI I/O space
-  fffffc1fffc00000     fffffc1fffdfffff           2MB          [guard region]
-  fffffc1fffe00000     ffffffffffdfffff        3968GB          vmemmap
-  ffffffffffe00000     ffffffffffffffff           2MB          [guard region]
+  fff0000000000000     ffff7fffffffffff          ~4PB          kernel logical memory map
+ [fffd800000000000     ffff7fffffffffff]        512TB          [kasan shadow region]
+  ffff800000000000     ffff800007ffffff         128MB          bpf jit region
+  ffff800008000000     ffff80000fffffff         128MB          modules
+  ffff800010000000     fffffbffefffffff         124TB          vmalloc
+  fffffbfff0000000     fffffbfffdffffff         224MB          fixed mappings (top down)
+  fffffbfffe000000     fffffbfffe7fffff           8MB          [guard region]
+  fffffbfffe800000     fffffbffff7fffff          16MB          PCI I/O space
+  fffffbffff800000     fffffbffffffffff           8MB          [guard region]
+  fffffc0000000000     ffffffdfffffffff          ~4TB          vmemmap
+  ffffffe000000000     ffffffffffffffff         128GB          [guard region]
 
 
 Translation table lookup with 4KB pages::
index eab4323..19d284b 100644 (file)
@@ -53,12 +53,25 @@ visibility.
 Preserving tags
 ---------------
 
-Non-zero tags are not preserved when delivering signals. This means that
-signal handlers in applications making use of tags cannot rely on the
-tag information for user virtual addresses being maintained for fields
-inside siginfo_t. One exception to this rule is for signals raised in
-response to watchpoint debug exceptions, where the tag information will
-be preserved.
+When delivering signals, non-zero tags are not preserved in
+siginfo.si_addr unless the flag SA_EXPOSE_TAGBITS was set in
+sigaction.sa_flags when the signal handler was installed. This means
+that signal handlers in applications making use of tags cannot rely
+on the tag information for user virtual addresses being maintained
+in these fields unless the flag was set.
+
+Due to architecture limitations, bits 63:60 of the fault address
+are not preserved in response to synchronous tag check faults
+(SEGV_MTESERR) even if SA_EXPOSE_TAGBITS was set. Applications should
+treat the values of these bits as undefined in order to accommodate
+future architecture revisions which may preserve the bits.
+
+For signals raised in response to watchpoint debug exceptions, the
+tag information will be preserved regardless of the SA_EXPOSE_TAGBITS
+flag setting.
+
+Non-zero tags are never preserved in sigcontext.fault_address
+regardless of the SA_EXPOSE_TAGBITS flag setting.
 
 The architecture prevents the use of a tagged PC, so the upper byte will
 be set to a sign-extension of bit 55 on exception return.
index 74c750b..a69dd8d 100644 (file)
@@ -60,20 +60,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-
 #define SA_ONSTACK     0x00000001
 #define SA_RESTART     0x00000002
 #define SA_NOCLDSTOP   0x00000004
index 65530a0..430be77 100644 (file)
@@ -17,6 +17,8 @@ typedef struct {
        unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#define __ARCH_UAPI_SA_FLAGS   (SA_THIRTYTWO | SA_RESTORER)
+
 #define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
index 9b4185b..c9a3ea1 100644 (file)
@@ -60,33 +60,12 @@ typedef unsigned long sigset_t;
 #define SIGSWI         32
 
 /*
- * SA_FLAGS values:
- *
- * SA_NOCLDSTOP                flag to turn off SIGCHLD when children stop.
- * SA_NOCLDWAIT                flag on SIGCHLD to inhibit zombies.
- * SA_SIGINFO          deliver the signal with SIGINFO structs
- * SA_THIRTYTWO                delivers the signal in 32-bit mode, even if the task 
- *                     is running in 26-bit.
- * SA_ONSTACK          allows alternate signal stacks (see sigaltstack(2)).
- * SA_RESTART          flag to get restarting signals (which were the default long ago)
- * SA_NODEFER          prevents the current signal from being masked in the handler.
- * SA_RESETHAND                clears the handler when the signal is delivered.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
+ * SA_THIRTYTWO historically meant deliver the signal in 32-bit mode, even if
+ * the task is running in 26-bit. But since the kernel no longer supports
+ * 26-bit mode, the flag has no effect.
  */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
 #define SA_THIRTYTWO   0x02000000
 #define SA_RESTORER    0x04000000
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
 
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
index b5f5a9f..2272a95 100644 (file)
@@ -331,16 +331,16 @@ config BROKEN_GAS_INST
 config KASAN_SHADOW_OFFSET
        hex
        depends on KASAN
-       default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
-       default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
-       default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
-       default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
-       default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
-       default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
-       default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
-       default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
-       default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
-       default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
+       default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
+       default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
+       default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
+       default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
+       default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
+       default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
+       default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
+       default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
+       default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
+       default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
        default 0xffffffffffffffff
 
 source "arch/arm64/Kconfig.platforms"
@@ -1388,6 +1388,9 @@ config ARM64_PAN
         The feature is detected at runtime, and will remain as a 'nop'
         instruction if the cpu does not implement the feature.
 
+config AS_HAS_LDAPR
+       def_bool $(as-instr,.arch_extension rcpc)
+
 config ARM64_LSE_ATOMICS
        bool
        default ARM64_USE_LSE_ATOMICS
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644 (file)
index 0000000..5df500d
--- /dev/null
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <asm/cpucaps.h>
+
+#define ARM64_CB_PATCH ARM64_NCAPS
+
+/* A64 instructions are always 32 bits. */
+#define        AARCH64_INSN_SIZE               4
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(feature)                                                      \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word 663f - .\n"                             /* new instruction */ \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
+       " .word 661b - .\n"                             /* label           */ \
+       " .word " __stringify(cb) "- .\n"               /* callback */        \
+       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
+       " .byte 662b-661b\n"                            /* source len      */ \
+       " .byte 664f-663f\n"                            /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ *     .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY(feature)                                         \
+       ".popsection\n"                                                 \
+       ".subsection 1\n"                                               \
+       "663:\n\t"                                                      \
+       newinstr "\n"                                                   \
+       "664:\n\t"                                                      \
+       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
+       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
+       ".previous\n"                                                   \
+       ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
+       ".if "__stringify(cfg_enabled)" == 1\n"                         \
+       "661:\n\t"                                                      \
+       oldinstr "\n"                                                   \
+       "662:\n"                                                        \
+       ".pushsection .altinstructions,\"a\"\n"                         \
+       ALTINSTR_ENTRY_CB(feature, cb)                                  \
+       ".popsection\n"                                                 \
+       "663:\n\t"                                                      \
+       "664:\n\t"                                                      \
+       ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
+       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cb) \
+       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+       .word \orig_offset - .
+       .word \alt_offset - .
+       .hword \feature
+       .byte \orig_len
+       .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+       .if \enable
+661:   \insn1
+662:   .pushsection .altinstructions, "a"
+       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+       .popsection
+       .subsection 1
+663:   \insn2
+664:   .previous
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+       .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ *    sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ *    alternative sequence it is defined in (branches into an
+ *    alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+       .popsection
+661:
+.endm
+
+.macro alternative_if cap
+       .set .Lasm_alt_mode, 1
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+       .popsection
+       .subsection 1
+       .align 2        /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cb
+       .set .Lasm_alt_mode, 0
+       .pushsection .altinstructions, "a"
+       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+       .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+       .if .Lasm_alt_mode==0
+       .subsection 1
+       .else
+       .previous
+       .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+       .if .Lasm_alt_mode==0
+       .previous
+       .endif
+       .org    . - (664b-663b) + (662b-661b)
+       .org    . - (662b-661b) + (664b-663b)
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+       nops    (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
+       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+.macro user_alt, label, oldinstr, newinstr, cond
+9999:  alternative_insn "\oldinstr", "\newinstr", \cond
+       _asm_extable 9999b, \label
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ *      will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...)   \
+       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
index 619db9b..a38b92e 100644 (file)
@@ -2,17 +2,13 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
 
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
-#include <linux/stringify.h>
 
 struct alt_instr {
        s32 orig_offset;        /* offset to original instruction */
@@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length);
 static inline void apply_alternatives_module(void *start, size_t length) { }
 #endif
 
-#define ALTINSTR_ENTRY(feature)                                                      \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word 663f - .\n"                             /* new instruction */ \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-#define ALTINSTR_ENTRY_CB(feature, cb)                                       \
-       " .word 661b - .\n"                             /* label           */ \
-       " .word " __stringify(cb) "- .\n"               /* callback */        \
-       " .hword " __stringify(feature) "\n"            /* feature bit     */ \
-       " .byte 662b-661b\n"                            /* source len      */ \
-       " .byte 664f-663f\n"                            /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- *     .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)    \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY(feature)                                         \
-       ".popsection\n"                                                 \
-       ".subsection 1\n"                                               \
-       "663:\n\t"                                                      \
-       newinstr "\n"                                                   \
-       "664:\n\t"                                                      \
-       ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
-       ".org   . - (662b-661b) + (664b-663b)\n\t"                      \
-       ".previous\n"                                                   \
-       ".endif\n"
-
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb)       \
-       ".if "__stringify(cfg_enabled)" == 1\n"                         \
-       "661:\n\t"                                                      \
-       oldinstr "\n"                                                   \
-       "662:\n"                                                        \
-       ".pushsection .altinstructions,\"a\"\n"                         \
-       ALTINSTR_ENTRY_CB(feature, cb)                                  \
-       ".popsection\n"                                                 \
-       "663:\n\t"                                                      \
-       "664:\n\t"                                                      \
-       ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)        \
-       __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
-       __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
-       .word \orig_offset - .
-       .word \alt_offset - .
-       .hword \feature
-       .byte \orig_len
-       .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
-       .if \enable
-661:   \insn1
-662:   .pushsection .altinstructions, "a"
-       altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
-       .popsection
-       .subsection 1
-663:   \insn2
-664:   .previous
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-       .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- *    sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- *    alternative sequence it is defined in (branches into an
- *    alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
-       .popsection
-661:
-.endm
-
-.macro alternative_if cap
-       .set .Lasm_alt_mode, 1
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
-       .popsection
-       .subsection 1
-       .align 2        /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
-       .set .Lasm_alt_mode, 0
-       .pushsection .altinstructions, "a"
-       altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
-       .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
-       .if .Lasm_alt_mode==0
-       .subsection 1
-       .else
-       .previous
-       .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
-       .if .Lasm_alt_mode==0
-       .previous
-       .endif
-       .org    . - (664b-663b) + (662b-661b)
-       .org    . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
-       nops    (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
-       alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:  alternative_insn "\oldinstr", "\newinstr", \cond
-       _asm_extable 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       ldtr    \reg1, [\addr];
-                       ldtr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
-8889:                  nop;
-                       nop;
-               alternative_else
-                       sttr    \reg1, [\addr];
-                       sttr    \reg2, [\addr, #8];
-                       add     \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-               _asm_extable    8889b,\l;
-       .endm
-
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               alternative_if_not ARM64_HAS_UAO
-8888:                  \inst   \reg, [\addr], \post_inc;
-                       nop;
-               alternative_else
-                       \alt_inst       \reg, [\addr];
-                       add             \addr, \addr, \post_inc;
-               alternative_endif
-
-               _asm_extable    8888b,\l;
-       .endm
-#else
-       .macro uao_ldp l, reg1, reg2, addr, post_inc
-               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_stp l, reg1, reg2, addr, post_inc
-               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
-       .endm
-       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
-               USER(\l, \inst \reg, [\addr], \post_inc)
-       .endm
-#endif
-
-#endif  /*  __ASSEMBLY__  */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- *      will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...)   \
-       _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
-
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_ALTERNATIVE_H */
index f68a0e6..2c26ca5 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __ASM_ASM_UACCESS_H
 #define __ASM_ASM_UACCESS_H
 
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
@@ -58,4 +58,63 @@ alternative_else_nop_endif
        .endm
 #endif
 
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  ldp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       ldtr    \reg1, [\addr];
+                       ldtr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  stp     \reg1, \reg2, [\addr], \post_inc;
+8889:                  nop;
+                       nop;
+               alternative_else
+                       sttr    \reg1, [\addr];
+                       sttr    \reg2, [\addr, #8];
+                       add     \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+               _asm_extable    8889b,\l;
+       .endm
+
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               alternative_if_not ARM64_HAS_UAO
+8888:                  \inst   \reg, [\addr], \post_inc;
+                       nop;
+               alternative_else
+                       \alt_inst       \reg, [\addr];
+                       add             \addr, \addr, \post_inc;
+               alternative_endif
+
+               _asm_extable    8888b,\l;
+       .endm
+#else
+       .macro uao_ldp l, reg1, reg2, addr, post_inc
+               USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_stp l, reg1, reg2, addr, post_inc
+               USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+       .endm
+       .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+               USER(\l, \inst \reg, [\addr], \post_inc)
+       .endm
+#endif
+
 #endif
index e7d9899..64ea0bb 100644 (file)
@@ -66,7 +66,8 @@
 #define ARM64_HAS_TLB_RANGE                    56
 #define ARM64_MTE                              57
 #define ARM64_WORKAROUND_1508412               58
+#define ARM64_HAS_LDAPR                                59
 
-#define ARM64_NCAPS                            59
+#define ARM64_NCAPS                            60
 
 #endif /* __ASM_CPUCAPS_H */
index 97244d4..f5b44ac 100644 (file)
@@ -765,8 +765,16 @@ static inline bool cpu_has_hw_af(void)
 #ifdef CONFIG_ARM64_AMU_EXTN
 /* Check whether the cpu supports the Activity Monitors Unit (AMU) */
 extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+       return false;
+}
 #endif
 
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
 static inline unsigned int get_vmid_bits(u64 mmfr1)
 {
        int vmid_bits;
index 99b9383..2a8aa18 100644 (file)
@@ -32,7 +32,7 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void enter_from_user_mode(void);
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
 asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
index 4b39293..4ebb9c0 100644 (file)
@@ -10,8 +10,7 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-/* A64 instructions are always 32 bits. */
-#define        AARCH64_INSN_SIZE               4
+#include <asm/alternative.h>
 
 #ifndef __ASSEMBLY__
 /*
index cd61239..556cb2d 100644 (file)
@@ -30,8 +30,8 @@
  * keep a constant PAGE_OFFSET and "fallback" to using the higher end
  * of the VMEMMAP where 52-bit support is not available in hardware.
  */
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \
-                       >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_SHIFT  (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE   ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
 
 /*
  * PAGE_OFFSET - the virtual address of the start of the linear map, at the
 #define _PAGE_OFFSET(va)       (-(UL(1) << (va)))
 #define PAGE_OFFSET            (_PAGE_OFFSET(VA_BITS))
 #define KIMAGE_VADDR           (MODULES_END)
-#define BPF_JIT_REGION_START   (KASAN_SHADOW_END)
+#define BPF_JIT_REGION_START   (_PAGE_END(VA_BITS_MIN))
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
 #define BPF_JIT_REGION_END     (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
 #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR          (BPF_JIT_REGION_END)
 #define MODULES_VSIZE          (SZ_128M)
-#define VMEMMAP_START          (-VMEMMAP_SIZE - SZ_2M)
+#define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
-#define PCI_IO_END             (VMEMMAP_START - SZ_2M)
+#define PCI_IO_END             (VMEMMAP_START - SZ_8M)
 #define PCI_IO_START           (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP            (PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP            (VMEMMAP_START - SZ_32M)
 
 #if VA_BITS > 48
 #define VA_BITS_MIN            (48)
 #define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
 #define KASAN_SHADOW_END       ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
                                        + KASAN_SHADOW_OFFSET)
+#define PAGE_END               (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT)))
 #define KASAN_THREAD_SHIFT     1
 #else
 #define KASAN_THREAD_SHIFT     0
-#define KASAN_SHADOW_END       (_PAGE_END(VA_BITS_MIN))
+#define PAGE_END               (_PAGE_END(VA_BITS_MIN))
 #endif /* CONFIG_KASAN */
 
 #define MIN_THREAD_SHIFT       (14 + KASAN_THREAD_SHIFT)
 #include <asm/bug.h>
 
 extern u64                     vabits_actual;
-#define PAGE_END               (_PAGE_END(vabits_actual))
 
 extern s64                     memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
@@ -238,11 +238,9 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 
 
 /*
- * The linear kernel range starts at the bottom of the virtual address
- * space. Testing the top bit for the start of the region is a
- * sufficient check and avoids having to worry about the tag.
+ * The linear kernel range starts at the bottom of the virtual address space.
  */
-#define __is_lm_address(addr)  (!(((u64)addr) & BIT(vabits_actual - 1)))
+#define __is_lm_address(addr)  (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
 
 #define __lm_to_phys(addr)     (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
 #define __kimg_to_phys(addr)   ((addr) - kimage_voffset)
index 4ff12a7..ec307b8 100644 (file)
@@ -22,7 +22,7 @@
  *     and fixed mappings
  */
 #define VMALLOC_START          (MODULES_END)
-#define VMALLOC_END            (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_END            (VMEMMAP_START - SZ_256M)
 
 #define vmemmap                        ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
 
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644 (file)
index 0000000..1bce62f
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#ifdef CONFIG_LTO
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...)                                      \
+       ALTERNATIVE(                                                    \
+               "ldar"  #sfx "\t" #regs,                                \
+               ".arch_extension rcpc\n"                                \
+               "ldapr" #sfx "\t" #regs,                                \
+       ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...)      "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x)                                                 \
+({                                                                     \
+       typeof(&(x)) __x = &(x);                                        \
+       int atomic = 1;                                                 \
+       union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+       switch (sizeof(x)) {                                            \
+       case 1:                                                         \
+               asm volatile(__LOAD_RCPC(b, %w0, %1)                    \
+                       : "=r" (*(__u8 *)__u.__c)                       \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 2:                                                         \
+               asm volatile(__LOAD_RCPC(h, %w0, %1)                    \
+                       : "=r" (*(__u16 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 4:                                                         \
+               asm volatile(__LOAD_RCPC(, %w0, %1)                     \
+                       : "=r" (*(__u32 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       case 8:                                                         \
+               asm volatile(__LOAD_RCPC(, %0, %1)                      \
+                       : "=r" (*(__u64 *)__u.__c)                      \
+                       : "Q" (*__x) : "memory");                       \
+               break;                                                  \
+       default:                                                        \
+               atomic = 0;                                             \
+       }                                                               \
+       atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
new file mode 100644 (file)
index 0000000..ef449f5
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_ASM_SIGNAL_H
+#define __ARM64_ASM_SIGNAL_H
+
+#include <asm/memory.h>
+#include <uapi/asm/signal.h>
+#include <uapi/asm/siginfo.h>
+
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+                                                unsigned long sig,
+                                                unsigned long si_code)
+{
+       /*
+        * For historical reasons, all bits of the fault address are exposed as
+        * address bits for watchpoint exceptions. New architectures should
+        * handle the tag bits consistently.
+        */
+       if (sig == SIGTRAP && si_code == TRAP_BRKPT)
+               return addr;
+
+       return untagged_addr(addr);
+}
+#define arch_untagged_si_addr arch_untagged_si_addr
+
+#endif
index 1ab63cf..673be2d 100644 (file)
@@ -22,7 +22,7 @@ void die(const char *msg, struct pt_regs *regs, int err);
 
 struct siginfo;
 void arm64_notify_die(const char *str, struct pt_regs *regs,
-                     int signo, int sicode, void __user *addr,
+                     int signo, int sicode, unsigned long far,
                      int err);
 
 void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
index 11a4652..3b8dca4 100644 (file)
@@ -16,12 +16,14 @@ int pcibus_to_node(struct pci_bus *bus);
 
 #include <linux/arch_topology.h>
 
+void update_freq_counters_refs(void);
+void topology_scale_freq_tick(void);
+
 #ifdef CONFIG_ARM64_AMU_EXTN
 /*
  * Replace task scheduler's default counter-based
  * frequency-invariance scale factor setting.
  */
-void topology_scale_freq_tick(void);
 #define arch_scale_freq_tick topology_scale_freq_tick
 #endif /* CONFIG_ARM64_AMU_EXTN */
 
index d96dc2c..54f32a0 100644 (file)
@@ -26,9 +26,9 @@ void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
 void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
 void arm64_notify_segfault(unsigned long addr);
-void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str);
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str);
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, const char *str);
+void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
 
 /*
  * Move regs->pc to next instruction and do necessary setup before it
index 7303994..a57cffb 100644 (file)
@@ -21,7 +21,8 @@
 #define ALT_ORIG_PTR(a)                __ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)                __ALT_PTR(a, alt_offset)
 
-static int all_alternatives_applied;
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
 
 static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
 
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
 
        /* We always have a CPU 0 at this point (__init) */
        if (smp_processor_id()) {
-               while (!READ_ONCE(all_alternatives_applied))
+               while (!all_alternatives_applied)
                        cpu_relax();
                isb();
        } else {
@@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused)
                BUG_ON(all_alternatives_applied);
                __apply_alternatives(&region, false, remaining_capabilities);
                /* Barriers provided by the cache flushing */
-               WRITE_ONCE(all_alternatives_applied, 1);
+               all_alternatives_applied = 1;
        }
 
        return 0;
index dcc165b..bffcd55 100644 (file)
@@ -1526,8 +1526,10 @@ bool cpu_has_amu_feat(int cpu)
        return cpumask_test_cpu(cpu, &amu_cpus);
 }
 
-/* Initialize the use of AMU counters for frequency invariance */
-extern void init_cpu_freq_invariance_counters(void);
+int get_cpu_with_amu_feat(void)
+{
+       return cpumask_any(&amu_cpus);
+}
 
 static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
 {
@@ -1535,7 +1537,7 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
                pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
                        smp_processor_id());
                cpumask_set_cpu(smp_processor_id(), &amu_cpus);
-               init_cpu_freq_invariance_counters();
+               update_freq_counters_refs();
        }
 }
 
@@ -1557,6 +1559,11 @@ static bool has_amu(const struct arm64_cpu_capabilities *cap,
 
        return true;
 }
+#else
+int get_cpu_with_amu_feat(void)
+{
+       return nr_cpu_ids;
+}
 #endif
 
 #ifdef CONFIG_ARM64_VHE
@@ -2136,6 +2143,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .cpu_enable = cpu_enable_mte,
        },
 #endif /* CONFIG_ARM64_MTE */
+       {
+               .desc = "RCpc load-acquire (LDAPR)",
+               .capability = ARM64_HAS_LDAPR,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .sys_reg = SYS_ID_AA64ISAR1_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
+               .matches = has_cpuid_feature,
+               .min_field_value = 1,
+       },
        {},
 };
 
index fa76151..4f3661e 100644 (file)
@@ -234,9 +234,8 @@ static void send_user_sigtrap(int si_code)
        if (interrupts_enabled(regs))
                local_irq_enable();
 
-       arm64_force_sig_fault(SIGTRAP, si_code,
-                            (void __user *)instruction_pointer(regs),
-                            "User debug trap");
+       arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+                             "User debug trap");
 }
 
 static int single_step_handler(unsigned long unused, unsigned int esr,
index a71844f..28d8a5d 100644 (file)
@@ -7,30 +7,48 @@
 #include <linux/pe.h>
 #include <linux/sizes.h>
 
+       .macro  efi_signature_nop
+#ifdef CONFIG_EFI
+.L_head:
+       /*
+        * This ccmp instruction has no meaningful effect except that
+        * its opcode forms the magic "MZ" signature required by UEFI.
+        */
+       ccmp    x18, #0, #0xd, pl
+#else
+       /*
+        * Bootloaders may inspect the opcode at the start of the kernel
+        * image to decide if the kernel is capable of booting via UEFI.
+        * So put an ordinary NOP here, not the "MZ.." pseudo-nop above.
+        */
+       nop
+#endif
+       .endm
+
        .macro  __EFI_PE_HEADER
+#ifdef CONFIG_EFI
+       .set    .Lpe_header_offset, . - .L_head
        .long   PE_MAGIC
-coff_header:
        .short  IMAGE_FILE_MACHINE_ARM64                // Machine
-       .short  section_count                           // NumberOfSections
+       .short  .Lsection_count                         // NumberOfSections
        .long   0                                       // TimeDateStamp
        .long   0                                       // PointerToSymbolTable
        .long   0                                       // NumberOfSymbols
-       .short  section_table - optional_header         // SizeOfOptionalHeader
+       .short  .Lsection_table - .Loptional_header     // SizeOfOptionalHeader
        .short  IMAGE_FILE_DEBUG_STRIPPED | \
                IMAGE_FILE_EXECUTABLE_IMAGE | \
                IMAGE_FILE_LINE_NUMS_STRIPPED           // Characteristics
 
-optional_header:
+.Loptional_header:
        .short  PE_OPT_MAGIC_PE32PLUS                   // PE32+ format
        .byte   0x02                                    // MajorLinkerVersion
        .byte   0x14                                    // MinorLinkerVersion
-       .long   __initdata_begin - efi_header_end       // SizeOfCode
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfCode
        .long   __pecoff_data_size                      // SizeOfInitializedData
        .long   0                                       // SizeOfUninitializedData
-       .long   __efistub_efi_pe_entry - _head          // AddressOfEntryPoint
-       .long   efi_header_end - _head                  // BaseOfCode
+       .long   __efistub_efi_pe_entry - .L_head        // AddressOfEntryPoint
+       .long   .Lefi_header_end - .L_head              // BaseOfCode
 
-extra_header_fields:
        .quad   0                                       // ImageBase
        .long   SEGMENT_ALIGN                           // SectionAlignment
        .long   PECOFF_FILE_ALIGNMENT                   // FileAlignment
@@ -42,10 +60,10 @@ extra_header_fields:
        .short  0                                       // MinorSubsystemVersion
        .long   0                                       // Win32VersionValue
 
-       .long   _end - _head                            // SizeOfImage
+       .long   _end - .L_head                          // SizeOfImage
 
        // Everything before the kernel image is considered part of the header
-       .long   efi_header_end - _head                  // SizeOfHeaders
+       .long   .Lefi_header_end - .L_head              // SizeOfHeaders
        .long   0                                       // CheckSum
        .short  IMAGE_SUBSYSTEM_EFI_APPLICATION         // Subsystem
        .short  0                                       // DllCharacteristics
@@ -54,7 +72,7 @@ extra_header_fields:
        .quad   0                                       // SizeOfHeapReserve
        .quad   0                                       // SizeOfHeapCommit
        .long   0                                       // LoaderFlags
-       .long   (section_table - .) / 8                 // NumberOfRvaAndSizes
+       .long   (.Lsection_table - .) / 8               // NumberOfRvaAndSizes
 
        .quad   0                                       // ExportTable
        .quad   0                                       // ImportTable
@@ -64,17 +82,17 @@ extra_header_fields:
        .quad   0                                       // BaseRelocationTable
 
 #ifdef CONFIG_DEBUG_EFI
-       .long   efi_debug_table - _head                 // DebugTable
-       .long   efi_debug_table_size
+       .long   .Lefi_debug_table - .L_head             // DebugTable
+       .long   .Lefi_debug_table_size
 #endif
 
        // Section table
-section_table:
+.Lsection_table:
        .ascii  ".text\0\0\0"
-       .long   __initdata_begin - efi_header_end       // VirtualSize
-       .long   efi_header_end - _head                  // VirtualAddress
-       .long   __initdata_begin - efi_header_end       // SizeOfRawData
-       .long   efi_header_end - _head                  // PointerToRawData
+       .long   __initdata_begin - .Lefi_header_end     // VirtualSize
+       .long   .Lefi_header_end - .L_head              // VirtualAddress
+       .long   __initdata_begin - .Lefi_header_end     // SizeOfRawData
+       .long   .Lefi_header_end - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -86,9 +104,9 @@ section_table:
 
        .ascii  ".data\0\0\0"
        .long   __pecoff_data_size                      // VirtualSize
-       .long   __initdata_begin - _head                // VirtualAddress
+       .long   __initdata_begin - .L_head              // VirtualAddress
        .long   __pecoff_data_rawsize                   // SizeOfRawData
-       .long   __initdata_begin - _head                // PointerToRawData
+       .long   __initdata_begin - .L_head              // PointerToRawData
 
        .long   0                                       // PointerToRelocations
        .long   0                                       // PointerToLineNumbers
@@ -98,7 +116,7 @@ section_table:
                IMAGE_SCN_MEM_READ | \
                IMAGE_SCN_MEM_WRITE                     // Characteristics
 
-       .set    section_count, (. - section_table) / 40
+       .set    .Lsection_count, (. - .Lsection_table) / 40
 
 #ifdef CONFIG_DEBUG_EFI
        /*
@@ -114,21 +132,21 @@ section_table:
        __INITRODATA
 
        .align  2
-efi_debug_table:
+.Lefi_debug_table:
        // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
        .long   0                                       // Characteristics
        .long   0                                       // TimeDateStamp
        .short  0                                       // MajorVersion
        .short  0                                       // MinorVersion
        .long   IMAGE_DEBUG_TYPE_CODEVIEW               // Type
-       .long   efi_debug_entry_size                    // SizeOfData
+       .long   .Lefi_debug_entry_size                  // SizeOfData
        .long   0                                       // RVA
-       .long   efi_debug_entry - _head                 // FileOffset
+       .long   .Lefi_debug_entry - .L_head             // FileOffset
 
-       .set    efi_debug_table_size, . - efi_debug_table
+       .set    .Lefi_debug_table_size, . - .Lefi_debug_table
        .previous
 
-efi_debug_entry:
+.Lefi_debug_entry:
        // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
        .ascii  "NB10"                                  // Signature
        .long   0                                       // Unknown
@@ -137,16 +155,12 @@ efi_debug_entry:
 
        .asciz  VMLINUX_PATH
 
-       .set    efi_debug_entry_size, . - efi_debug_entry
+       .set    .Lefi_debug_entry_size, . - .Lefi_debug_entry
 #endif
 
-       /*
-        * EFI will load .text onwards at the 4k section alignment
-        * described in the PE/COFF header. To ensure that instruction
-        * sequences using an adrp and a :lo12: immediate will function
-        * correctly at this alignment, we must ensure that .text is
-        * placed at a 4k boundary in the Image to begin with.
-        */
        .balign SEGMENT_ALIGN
-efi_header_end:
+.Lefi_header_end:
+#else
+       .set    .Lpe_header_offset, 0x0
+#endif
        .endm
index 43d4c32..dbbddfb 100644 (file)
@@ -22,7 +22,6 @@ static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
        unsigned long far = read_sysreg(far_el1);
 
        local_daif_inherit(regs);
-       far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
 }
 NOKPROBE_SYMBOL(el1_abort);
@@ -114,7 +113,6 @@ static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
 
        user_exit_irqoff();
        local_daif_restore(DAIF_PROCCTX);
-       far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
 }
 NOKPROBE_SYMBOL(el0_da);
index d8d9caf..c1f8f2c 100644 (file)
  * in the entry routines.
  */
        __HEAD
-_head:
        /*
         * DO NOT MODIFY. Image header expected by Linux boot-loaders.
         */
-#ifdef CONFIG_EFI
-       /*
-        * This add instruction has no meaningful effect except that
-        * its opcode forms the magic "MZ" signature required by UEFI.
-        */
-       add     x13, x18, #0x16
-       b       primary_entry
-#else
+       efi_signature_nop                       // special NOP to identity as PE/COFF executable
        b       primary_entry                   // branch to kernel start, magic
-       .long   0                               // reserved
-#endif
        .quad   0                               // Image load offset from start of RAM, little-endian
        le64sym _kernel_size_le                 // Effective size of kernel image, little-endian
        le64sym _kernel_flags_le                // Informative flags, little-endian
@@ -80,14 +70,9 @@ _head:
        .quad   0                               // reserved
        .quad   0                               // reserved
        .ascii  ARM64_IMAGE_MAGIC               // Magic number
-#ifdef CONFIG_EFI
-       .long   pe_header - _head               // Offset to the PE header.
+       .long   .Lpe_header_offset              // Offset to the PE header.
 
-pe_header:
        __EFI_PE_HEADER
-#else
-       .long   0                               // reserved
-#endif
 
        __INIT
 
index c18eb7d..4b202e4 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/prctl.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm/insn.h>
 #include <asm/spectre.h>
 #include <asm/traps.h>
 
index f49b349..8ac487c 100644 (file)
@@ -192,14 +192,11 @@ static void ptrace_hbptriggered(struct perf_event *bp,
                                break;
                        }
                }
-               arm64_force_sig_ptrace_errno_trap(si_errno,
-                                                 (void __user *)bkpt->trigger,
+               arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
                                                  desc);
        }
 #endif
-       arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT,
-                             (void __user *)(bkpt->trigger),
-                             desc);
+       arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
 }
 
 /*
index 133257f..fe1cf52 100644 (file)
@@ -206,7 +206,7 @@ static void __init request_standard_resources(void)
        unsigned long i = 0;
        size_t res_size;
 
-       kernel_code.start   = __pa_symbol(_text);
+       kernel_code.start   = __pa_symbol(_stext);
        kernel_code.end     = __pa_symbol(__init_begin - 1);
        kernel_data.start   = __pa_symbol(_sdata);
        kernel_data.end     = __pa_symbol(_end - 1);
@@ -283,7 +283,7 @@ u64 cpu_logical_map(int cpu)
 
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
-       init_mm.start_code = (unsigned long) _text;
+       init_mm.start_code = (unsigned long) _stext;
        init_mm.end_code   = (unsigned long) _etext;
        init_mm.end_data   = (unsigned long) _edata;
        init_mm.brk        = (unsigned long) _end;
index 3c18c24..265fe3e 100644 (file)
@@ -68,7 +68,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
  */
 long compat_arm_syscall(struct pt_regs *regs, int scno)
 {
-       void __user *addr;
+       unsigned long addr;
 
        switch (scno) {
        /*
@@ -111,8 +111,7 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
                break;
        }
 
-       addr  = (void __user *)instruction_pointer(regs) -
-               (compat_thumb_mode(regs) ? 2 : 4);
+       addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
 
        arm64_notify_die("Oops - bad compat syscall(2)", regs,
                         SIGILL, ILL_ILLTRP, addr, scno);
index 543c67c..b8026ec 100644 (file)
@@ -124,6 +124,12 @@ int __init parse_acpi_topology(void)
 #endif
 
 #ifdef CONFIG_ARM64_AMU_EXTN
+#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
+#define read_constcnt()        read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
+#else
+#define read_corecnt() (0UL)
+#define read_constcnt()        (0UL)
+#endif
 
 #undef pr_fmt
 #define pr_fmt(fmt) "AMU: " fmt
@@ -133,54 +139,58 @@ static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
 static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
 static cpumask_var_t amu_fie_cpus;
 
-/* Initialize counter reference per-cpu variables for the current CPU */
-void init_cpu_freq_invariance_counters(void)
+void update_freq_counters_refs(void)
 {
-       this_cpu_write(arch_core_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0));
-       this_cpu_write(arch_const_cycles_prev,
-                      read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0));
+       this_cpu_write(arch_core_cycles_prev, read_corecnt());
+       this_cpu_write(arch_const_cycles_prev, read_constcnt());
 }
 
-static int validate_cpu_freq_invariance_counters(int cpu)
+static inline bool freq_counters_valid(int cpu)
 {
-       u64 max_freq_hz, ratio;
+       if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+               return false;
 
        if (!cpu_has_amu_feat(cpu)) {
                pr_debug("CPU%d: counters are not supported.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
        if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
                     !per_cpu(arch_core_cycles_prev, cpu))) {
                pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
-               return -EINVAL;
+               return false;
        }
 
-       /* Convert maximum frequency from KHz to Hz and validate */
-       max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
-       if (unlikely(!max_freq_hz)) {
-               pr_debug("CPU%d: invalid maximum frequency.\n", cpu);
+       return true;
+}
+
+static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+{
+       u64 ratio;
+
+       if (unlikely(!max_rate || !ref_rate)) {
+               pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+                        cpu);
                return -EINVAL;
        }
 
        /*
         * Pre-compute the fixed ratio between the frequency of the constant
-        * counter and the maximum frequency of the CPU.
+        * reference counter and the maximum frequency of the CPU.
         *
-        *                            const_freq
-        * arch_max_freq_scale =   ---------------- * SCHED_CAPACITY_SCALE²
-        *                         cpuinfo_max_freq
+        *                          ref_rate
+        * arch_max_freq_scale =   ---------- * SCHED_CAPACITY_SCALE²
+        *                          max_rate
         *
         * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
         * in order to ensure a good resolution for arch_max_freq_scale for
-        * very low arch timer frequencies (down to the KHz range which should
+        * very low reference frequencies (down to the KHz range which should
         * be unlikely).
         */
-       ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT);
-       ratio = div64_u64(ratio, max_freq_hz);
+       ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
+       ratio = div64_u64(ratio, max_rate);
        if (!ratio) {
-               WARN_ONCE(1, "System timer frequency too low.\n");
+               WARN_ONCE(1, "Reference frequency too low.\n");
                return -EINVAL;
        }
 
@@ -227,8 +237,12 @@ static int __init init_amu_fie(void)
        }
 
        for_each_present_cpu(cpu) {
-               if (validate_cpu_freq_invariance_counters(cpu))
+               if (!freq_counters_valid(cpu) ||
+                   freq_inv_set_max_ratio(cpu,
+                                          cpufreq_get_hw_max_freq(cpu) * 1000,
+                                          arch_timer_get_rate()))
                        continue;
+
                cpumask_set_cpu(cpu, valid_cpus);
                have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
        }
@@ -280,11 +294,14 @@ void topology_scale_freq_tick(void)
        if (!cpumask_test_cpu(cpu, amu_fie_cpus))
                return;
 
-       const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0);
-       core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
        prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
        prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
 
+       update_freq_counters_refs();
+
+       const_cnt = this_cpu_read(arch_const_cycles_prev);
+       core_cnt = this_cpu_read(arch_core_cycles_prev);
+
        if (unlikely(core_cnt <= prev_core_cnt ||
                     const_cnt <= prev_const_cnt))
                goto store_and_exit;
@@ -309,4 +326,71 @@ store_and_exit:
        this_cpu_write(arch_core_cycles_prev, core_cnt);
        this_cpu_write(arch_const_cycles_prev, const_cnt);
 }
-#endif /* CONFIG_ARM64_AMU_EXTN */
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+
+static void cpu_read_corecnt(void *val)
+{
+       *(u64 *)val = read_corecnt();
+}
+
+static void cpu_read_constcnt(void *val)
+{
+       *(u64 *)val = read_constcnt();
+}
+
+static inline
+int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+{
+       /*
+        * Abort call on counterless CPU or when interrupts are
+        * disabled - can lead to deadlock in smp sync call.
+        */
+       if (!cpu_has_amu_feat(cpu))
+               return -EOPNOTSUPP;
+
+       if (WARN_ON_ONCE(irqs_disabled()))
+               return -EPERM;
+
+       smp_call_function_single(cpu, func, val, 1);
+
+       return 0;
+}
+
+/*
+ * Refer to drivers/acpi/cppc_acpi.c for the description of the functions
+ * below.
+ */
+bool cpc_ffh_supported(void)
+{
+       return freq_counters_valid(get_cpu_with_amu_feat());
+}
+
+int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+{
+       int ret = -EOPNOTSUPP;
+
+       switch ((u64)reg->address) {
+       case 0x0:
+               ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val);
+               break;
+       case 0x1:
+               ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val);
+               break;
+       }
+
+       if (!ret) {
+               *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+                                   reg->bit_offset);
+               *val >>= reg->bit_offset;
+       }
+
+       return ret;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+       return -EOPNOTSUPP;
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
index 8af4e0e..f4ddbe9 100644 (file)
@@ -170,32 +170,32 @@ static void arm64_show_signal(int signo, const char *str)
        __show_regs(regs);
 }
 
-void arm64_force_sig_fault(int signo, int code, void __user *addr,
+void arm64_force_sig_fault(int signo, int code, unsigned long far,
                           const char *str)
 {
        arm64_show_signal(signo, str);
        if (signo == SIGKILL)
                force_sig(SIGKILL);
        else
-               force_sig_fault(signo, code, addr);
+               force_sig_fault(signo, code, (void __user *)far);
 }
 
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb,
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
                            const char *str)
 {
        arm64_show_signal(SIGBUS, str);
-       force_sig_mceerr(code, addr, lsb);
+       force_sig_mceerr(code, (void __user *)far, lsb);
 }
 
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr,
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
                                       const char *str)
 {
        arm64_show_signal(SIGTRAP, str);
-       force_sig_ptrace_errno_trap(errno, addr);
+       force_sig_ptrace_errno_trap(errno, (void __user *)far);
 }
 
 void arm64_notify_die(const char *str, struct pt_regs *regs,
-                     int signo, int sicode, void __user *addr,
+                     int signo, int sicode, unsigned long far,
                      int err)
 {
        if (user_mode(regs)) {
@@ -203,7 +203,7 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
                current->thread.fault_address = 0;
                current->thread.fault_code = err;
 
-               arm64_force_sig_fault(signo, sicode, addr, str);
+               arm64_force_sig_fault(signo, sicode, far, str);
        } else {
                die(str, regs, err);
        }
@@ -374,7 +374,7 @@ void force_signal_inject(int signal, int code, unsigned long address, unsigned i
                signal = SIGKILL;
        }
 
-       arm64_notify_die(desc, regs, signal, code, (void __user *)address, err);
+       arm64_notify_die(desc, regs, signal, code, address, err);
 }
 
 /*
@@ -385,7 +385,7 @@ void arm64_notify_segfault(unsigned long addr)
        int code;
 
        mmap_read_lock(current->mm);
-       if (find_vma(current->mm, addr) == NULL)
+       if (find_vma(current->mm, untagged_addr(addr)) == NULL)
                code = SEGV_MAPERR;
        else
                code = SEGV_ACCERR;
@@ -448,12 +448,13 @@ NOKPROBE_SYMBOL(do_ptrauth_fault);
 
 static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
-       unsigned long address;
+       unsigned long tagged_address, address;
        int rt = ESR_ELx_SYS64_ISS_RT(esr);
        int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
        int ret = 0;
 
-       address = untagged_addr(pt_regs_read_reg(regs, rt));
+       tagged_address = pt_regs_read_reg(regs, rt);
+       address = untagged_addr(tagged_address);
 
        switch (crm) {
        case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:     /* DC CVAU, gets promoted */
@@ -480,7 +481,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
        }
 
        if (ret)
-               arm64_notify_segfault(address);
+               arm64_notify_segfault(tagged_address);
        else
                arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
@@ -772,7 +773,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
  */
 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
-       void __user *pc = (void __user *)instruction_pointer(regs);
+       unsigned long pc = instruction_pointer(regs);
 
        current->thread.fault_address = 0;
        current->thread.fault_code = esr;
index d65f522..a8f8e40 100644 (file)
@@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv      \
             $(btildflags-y) -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
 CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
 KASAN_SANITIZE                 := n
index 79280c5..a1e0f91 100644 (file)
@@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\
 # As a result we set our own flags here.
 
 # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
 VDSO_CPPFLAGS += $(LINUXINCLUDE)
 
 # Common C and assembly flags
index 1bda604..94a08e3 100644 (file)
@@ -121,7 +121,7 @@ SECTIONS
                _text = .;
                HEAD_TEXT
        }
-       .text : {                       /* Real text segment            */
+       .text : ALIGN(SEGMENT_ALIGN) {  /* Real text segment            */
                _stext = .;             /* Text and read-only data      */
                        IRQENTRY_TEXT
                        SOFTIRQENTRY_TEXT
@@ -201,7 +201,7 @@ SECTIONS
                INIT_CALLS
                CON_INITCALL
                INIT_RAM_FS
-               *(.init.rodata.* .init.bss)     /* from the EFI stub */
+               *(.init.altinstructions .init.rodata.* .init.bss)       /* from the EFI stub */
        }
        .exit.data : {
                EXIT_DATA
index 57972bd..1a01da9 100644 (file)
@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        }
 
        switch (vma_shift) {
+#ifndef __PAGETABLE_PMD_FOLDED
        case PUD_SHIFT:
                if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
                        break;
                fallthrough;
+#endif
        case CONT_PMD_SHIFT:
                vma_shift = PMD_SHIFT;
                fallthrough;
index 03ca6d8..cceed41 100644 (file)
@@ -4,7 +4,7 @@
  */
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
+#include <asm/asm-uaccess.h>
 #include <asm/assembler.h>
 #include <asm/mte.h>
 #include <asm/page.h>
index 1ee9400..29a6b8c 100644 (file)
@@ -40,7 +40,7 @@
 #include <asm/traps.h>
 
 struct fault_info {
-       int     (*fn)(unsigned long addr, unsigned int esr,
+       int     (*fn)(unsigned long far, unsigned int esr,
                      struct pt_regs *regs);
        int     sig;
        int     code;
@@ -385,8 +385,11 @@ static void set_thread_esr(unsigned long address, unsigned int esr)
        current->thread.fault_code = esr;
 }
 
-static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static void do_bad_area(unsigned long far, unsigned int esr,
+                       struct pt_regs *regs)
 {
+       unsigned long addr = untagged_addr(far);
+
        /*
         * If we are in kernel mode at this point, we have no context to
         * handle this fault with.
@@ -395,8 +398,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
                const struct fault_info *inf = esr_to_fault_info(esr);
 
                set_thread_esr(addr, esr);
-               arm64_force_sig_fault(inf->sig, inf->code, (void __user *)addr,
-                                     inf->name);
+               arm64_force_sig_fault(inf->sig, inf->code, far, inf->name);
        } else {
                __do_kernel_fault(addr, esr, regs);
        }
@@ -448,7 +450,7 @@ static bool is_write_abort(unsigned int esr)
        return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
 }
 
-static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
+static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
                                   struct pt_regs *regs)
 {
        const struct fault_info *inf;
@@ -456,6 +458,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        vm_fault_t fault;
        unsigned long vm_flags = VM_ACCESS_FLAGS;
        unsigned int mm_flags = FAULT_FLAG_DEFAULT;
+       unsigned long addr = untagged_addr(far);
 
        if (kprobe_page_fault(regs, esr))
                return 0;
@@ -567,8 +570,7 @@ retry:
                 * We had some memory, but were unable to successfully fix up
                 * this page fault.
                 */
-               arm64_force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr,
-                                     inf->name);
+               arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name);
        } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) {
                unsigned int lsb;
 
@@ -576,8 +578,7 @@ retry:
                if (fault & VM_FAULT_HWPOISON_LARGE)
                        lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
 
-               arm64_force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr, lsb,
-                                      inf->name);
+               arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
        } else {
                /*
                 * Something tried to access memory that isn't in our memory
@@ -585,8 +586,7 @@ retry:
                 */
                arm64_force_sig_fault(SIGSEGV,
                                      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
-                                     (void __user *)addr,
-                                     inf->name);
+                                     far, inf->name);
        }
 
        return 0;
@@ -596,33 +596,35 @@ no_context:
        return 0;
 }
 
-static int __kprobes do_translation_fault(unsigned long addr,
+static int __kprobes do_translation_fault(unsigned long far,
                                          unsigned int esr,
                                          struct pt_regs *regs)
 {
+       unsigned long addr = untagged_addr(far);
+
        if (is_ttbr0_addr(addr))
-               return do_page_fault(addr, esr, regs);
+               return do_page_fault(far, esr, regs);
 
-       do_bad_area(addr, esr, regs);
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
-static int do_alignment_fault(unsigned long addr, unsigned int esr,
+static int do_alignment_fault(unsigned long far, unsigned int esr,
                              struct pt_regs *regs)
 {
-       do_bad_area(addr, esr, regs);
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
-static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        return 1; /* "fault" */
 }
 
-static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        const struct fault_info *inf;
-       void __user *siaddr;
+       unsigned long siaddr;
 
        inf = esr_to_fault_info(esr);
 
@@ -634,19 +636,30 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                return 0;
        }
 
-       if (esr & ESR_ELx_FnV)
-               siaddr = NULL;
-       else
-               siaddr  = (void __user *)addr;
+       if (esr & ESR_ELx_FnV) {
+               siaddr = 0;
+       } else {
+               /*
+                * The architecture specifies that the tag bits of FAR_EL1 are
+                * UNKNOWN for synchronous external aborts. Mask them out now
+                * so that userspace doesn't see them.
+                */
+               siaddr  = untagged_addr(far);
+       }
        arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
 
        return 0;
 }
 
-static int do_tag_check_fault(unsigned long addr, unsigned int esr,
+static int do_tag_check_fault(unsigned long far, unsigned int esr,
                              struct pt_regs *regs)
 {
-       do_bad_area(addr, esr, regs);
+       /*
+        * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN for tag
+        * check faults. Mask them out now so that userspace doesn't see them.
+        */
+       far &= (1UL << 60) - 1;
+       do_bad_area(far, esr, regs);
        return 0;
 }
 
@@ -717,11 +730,12 @@ static const struct fault_info fault_info[] = {
        { do_bad,               SIGKILL, SI_KERNEL,     "unknown 63"                    },
 };
 
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
 {
        const struct fault_info *inf = esr_to_fault_info(esr);
+       unsigned long addr = untagged_addr(far);
 
-       if (!inf->fn(addr, esr, regs))
+       if (!inf->fn(far, esr, regs))
                return;
 
        if (!user_mode(regs)) {
@@ -730,8 +744,12 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                show_pte(addr);
        }
 
-       arm64_notify_die(inf->name, regs,
-                        inf->sig, inf->code, (void __user *)addr, esr);
+       /*
+        * At this point we have an unrecognized fault type whose tag bits may
+        * have been defined as UNKNOWN. Therefore we only expose the untagged
+        * address to the signal handler.
+        */
+       arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr);
 }
 NOKPROBE_SYMBOL(do_mem_abort);
 
@@ -744,8 +762,8 @@ NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
 
 void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-       arm64_notify_die("SP/PC alignment exception", regs,
-                        SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
+       arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
+                        addr, esr);
 }
 NOKPROBE_SYMBOL(do_sp_pc_abort);
 
@@ -871,8 +889,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
                arm64_apply_bp_hardening();
 
        if (inf->fn(addr_if_watchpoint, esr, regs)) {
-               arm64_notify_die(inf->name, regs,
-                                inf->sig, inf->code, (void __user *)pc, esr);
+               arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr);
        }
 
        debug_exception_exit(regs);
index 0955406..fbd452e 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
+#include <linux/acpi_iort.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -42,8 +43,6 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
-#define ARM64_ZONE_DMA_BITS    30
-
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -175,21 +174,34 @@ static void __init reserve_elfcorehdr(void)
 #endif /* CONFIG_CRASH_DUMP */
 
 /*
- * Return the maximum physical address for a zone with a given address size
- * limit. It currently assumes that for memory starting above 4G, 32-bit
- * devices will use a DMA offset.
+ * Return the maximum physical address for a zone accessible by the given bits
+ * limit. If DRAM starts above 32-bit, expand the zone to the maximum
+ * available memory, otherwise cap it at 32-bit.
  */
 static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
 {
-       phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
-       return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
+       phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
+       phys_addr_t phys_start = memblock_start_of_DRAM();
+
+       if (phys_start > U32_MAX)
+               zone_mask = PHYS_ADDR_MAX;
+       else if (phys_start > zone_mask)
+               zone_mask = U32_MAX;
+
+       return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
 }
 
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
+       unsigned int __maybe_unused acpi_zone_dma_bits;
+       unsigned int __maybe_unused dt_zone_dma_bits;
 
 #ifdef CONFIG_ZONE_DMA
+       acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
+       dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
+       zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
+       arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
        max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
 #endif
 #ifdef CONFIG_ZONE_DMA32
@@ -269,7 +281,7 @@ static void __init fdt_enforce_memory_region(void)
 
 void __init arm64_memblock_init(void)
 {
-       const s64 linear_region_size = BIT(vabits_actual - 1);
+       const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
 
        /* Handle linux,usable-memory-range property */
        fdt_enforce_memory_region();
@@ -348,15 +360,18 @@ void __init arm64_memblock_init(void)
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
                extern u16 memstart_offset_seed;
-               u64 range = linear_region_size -
-                           (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+               u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+               int parange = cpuid_feature_extract_unsigned_field(
+                                       mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+               s64 range = linear_region_size -
+                           BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
 
                /*
                 * If the size of the linear region exceeds, by a sufficient
-                * margin, the size of the region that the available physical
-                * memory spans, randomize the linear region as well.
+                * margin, the size of the region that the physical memory can
+                * span, randomize the linear region as well.
                 */
-               if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+               if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
                        range /= ARM64_MEMSTART_ALIGN;
                        memstart_addr -= ARM64_MEMSTART_ALIGN *
                                         ((range * memstart_offset_seed) >> 16);
@@ -367,7 +382,7 @@ void __init arm64_memblock_init(void)
         * Register the kernel text, kernel data, initrd, and initial
         * pagetables with memblock.
         */
-       memblock_reserve(__pa_symbol(_text), _end - _text);
+       memblock_reserve(__pa_symbol(_stext), _end - _stext);
        if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
                /* the generic initrd code expects virtual addresses */
                initrd_start = __phys_to_virt(phys_initrd_start);
@@ -376,18 +391,11 @@ void __init arm64_memblock_init(void)
 
        early_init_fdt_scan_reserved_mem();
 
-       if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-               zone_dma_bits = ARM64_ZONE_DMA_BITS;
-               arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
-       }
-
        if (IS_ENABLED(CONFIG_ZONE_DMA32))
                arm64_dma32_phys_limit = max_zone_phys(32);
        else
                arm64_dma32_phys_limit = PHYS_MASK + 1;
 
-       reserve_crashkernel();
-
        reserve_elfcorehdr();
 
        high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -427,6 +435,12 @@ void __init bootmem_init(void)
        sparse_init();
        zone_sizes_init(min, max);
 
+       /*
+        * request_standard_resources() depends on crashkernel's memory being
+        * reserved, so do it here.
+        */
+       reserve_crashkernel();
+
        memblock_dump_all();
 }
 
index 1c0f3e0..fe0721a 100644 (file)
@@ -464,20 +464,35 @@ void __init mark_linear_text_alias_ro(void)
        /*
         * Remove the write permissions from the linear alias of .text/.rodata
         */
-       update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
-                           (unsigned long)__init_begin - (unsigned long)_text,
+       update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
+                           (unsigned long)__init_begin - (unsigned long)_stext,
                            PAGE_KERNEL_RO);
 }
 
+static bool crash_mem_map __initdata;
+
+static int __init enable_crash_mem_map(char *arg)
+{
+       /*
+        * Proper parameter parsing is done by reserve_crashkernel(). We only
+        * need to know if the linear map has to avoid block mappings so that
+        * the crashkernel reservations can be unmapped later.
+        */
+       crash_mem_map = true;
+
+       return 0;
+}
+early_param("crashkernel", enable_crash_mem_map);
+
 static void __init map_mem(pgd_t *pgdp)
 {
-       phys_addr_t kernel_start = __pa_symbol(_text);
+       phys_addr_t kernel_start = __pa_symbol(_stext);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
        phys_addr_t start, end;
        int flags = 0;
        u64 i;
 
-       if (rodata_full || debug_pagealloc_enabled())
+       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
        /*
@@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp)
         * the following for-loop
         */
        memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
-#ifdef CONFIG_KEXEC_CORE
-       if (crashk_res.end)
-               memblock_mark_nomap(crashk_res.start,
-                                   resource_size(&crashk_res));
-#endif
 
        /* map all the memory banks */
        for_each_mem_range(i, &start, &end) {
@@ -506,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
        }
 
        /*
-        * Map the linear alias of the [_text, __init_begin) interval
+        * Map the linear alias of the [_stext, __init_begin) interval
         * as non-executable now, and remove the write permission in
         * mark_linear_text_alias_ro() below (which will be called after
         * alternative patching has completed). This makes the contents
@@ -518,21 +528,6 @@ static void __init map_mem(pgd_t *pgdp)
        __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
-
-#ifdef CONFIG_KEXEC_CORE
-       /*
-        * Use page-level mappings here so that we can shrink the region
-        * in page granularity and put back unused memory to buddy system
-        * through /sys/kernel/kexec_crash_size interface.
-        */
-       if (crashk_res.end) {
-               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
-                              PAGE_KERNEL,
-                              NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
-               memblock_clear_nomap(crashk_res.start,
-                                    resource_size(&crashk_res));
-       }
-#endif
 }
 
 void mark_rodata_ro(void)
@@ -665,7 +660,7 @@ static void __init map_kernel(pgd_t *pgdp)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
        map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
@@ -1493,13 +1488,43 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
        unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
        unsigned long pfn = arg->start_pfn;
 
-       if (action != MEM_GOING_OFFLINE)
+       if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE))
                return NOTIFY_OK;
 
        for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               unsigned long start = PFN_PHYS(pfn);
+               unsigned long end = start + (1UL << PA_SECTION_SHIFT);
+
                ms = __pfn_to_section(pfn);
-               if (early_section(ms))
+               if (!early_section(ms))
+                       continue;
+
+               if (action == MEM_GOING_OFFLINE) {
+                       /*
+                        * Boot memory removal is not supported. Prevent
+                        * it via blocking any attempted offline request
+                        * for the boot memory and just report it.
+                        */
+                       pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end);
                        return NOTIFY_BAD;
+               } else if (action == MEM_OFFLINE) {
+                       /*
+                        * This should have never happened. Boot memory
+                        * offlining should have been prevented by this
+                        * very notifier. Probably some memory removal
+                        * procedure might have changed which would then
+                        * require further debug.
+                        */
+                       pr_err("Boot memory [%lx %lx] offlined\n", start, end);
+
+                       /*
+                        * Core memory hotplug does not process a return
+                        * code from the notifier for MEM_OFFLINE events.
+                        * The error condition has been reported. Return
+                        * from here as if ignored.
+                        */
+                       return NOTIFY_DONE;
+               }
        }
        return NOTIFY_OK;
 }
@@ -1508,9 +1533,66 @@ static struct notifier_block prevent_bootmem_remove_nb = {
        .notifier_call = prevent_bootmem_remove_notifier,
 };
 
+/*
+ * This ensures that boot memory sections on the platform are online
+ * from early boot. Memory sections could not be prevented from being
+ * offlined, unless for some reason they are not online to begin with.
+ * This helps validate the basic assumption on which the above memory
+ * event notifier works to prevent boot memory section offlining and
+ * its possible removal.
+ */
+static void validate_bootmem_online(void)
+{
+       phys_addr_t start, end, addr;
+       struct mem_section *ms;
+       u64 i;
+
+       /*
+        * Scanning across all memblock might be expensive
+        * on some big memory systems. Hence enable this
+        * validation only with DEBUG_VM.
+        */
+       if (!IS_ENABLED(CONFIG_DEBUG_VM))
+               return;
+
+       for_each_mem_range(i, &start, &end) {
+               for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
+                       ms = __pfn_to_section(PHYS_PFN(addr));
+
+                       /*
+                        * All memory ranges in the system at this point
+                        * should have been marked as early sections.
+                        */
+                       WARN_ON(!early_section(ms));
+
+                       /*
+                        * Memory notifier mechanism here to prevent boot
+                        * memory offlining depends on the fact that each
+                        * early section memory on the system is initially
+                        * online. Otherwise a given memory section which
+                        * is already offline will be overlooked and can
+                        * be removed completely. Call out such sections.
+                        */
+                       if (!online_section(ms))
+                               pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
+                                       addr, addr + (1UL << PA_SECTION_SHIFT));
+               }
+       }
+}
+
 static int __init prevent_bootmem_remove_init(void)
 {
-       return register_memory_notifier(&prevent_bootmem_remove_nb);
+       int ret = 0;
+
+       if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+               return ret;
+
+       validate_bootmem_online();
+       ret = register_memory_notifier(&prevent_bootmem_remove_nb);
+       if (ret)
+               pr_err("%s: Notifier registration failed %d\n", __func__, ret);
+
+       return ret;
 }
-device_initcall(prevent_bootmem_remove_init);
+early_initcall(prevent_bootmem_remove_init);
 #endif
index e155210..2cd0dce 100644 (file)
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index aa98ff1..38166a8 100644 (file)
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 /*
index 915cc75..4619291 100644 (file)
@@ -57,30 +57,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
 
index 53104b1..e6c78a1 100644 (file)
@@ -62,18 +62,6 @@ typedef unsigned long old_sigset_t;          /* at least 32 bits */
 #define SIGRTMAX       _NSIG
 
 /*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- *
  * SA_RESTORER used to be defined as 0x04000000 but only the O32 ABI ever
  * supported its use and no libc was using it, so the entire sa-restorer
  * functionality was removed with lmo commit 39bffc12c3580ab for 2.5.48
index 715c96b..30dd1e4 100644 (file)
@@ -21,6 +21,8 @@ typedef struct {
        unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#define __ARCH_UAPI_SA_FLAGS   _SA_SIGGFAULT
+
 #include <asm/sigcontext.h>
 
 #endif /* !__ASSEMBLY */
index e605197..e5a2657 100644 (file)
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
 #define SA_ONSTACK     0x00000001
 #define SA_RESETHAND   0x00000004
 #define SA_NOCLDSTOP   0x00000008
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
 
-
-#define SIG_BLOCK          0   /* for blocking signals */
-#define SIG_UNBLOCK        1   /* for unblocking signals */
-#define SIG_SETMASK        2   /* for setting the signal mask */
-
-#define SIG_DFL        ((__sighandler_t)0)     /* default signal handling */
-#define SIG_IGN        ((__sighandler_t)1)     /* ignore signal */
-#define SIG_ERR        ((__sighandler_t)-1)    /* error return from signal */
+#include <asm-generic/signal-defs.h>
 
 # ifndef __ASSEMBLY__
 
 /* Avoid too many header ordering problems.  */
 struct siginfo;
 
-/* Type of a signal handler.  */
-#if defined(__LP64__)
-/* function pointers on 64-bit parisc are pointers to little structs and the
- * compiler doesn't support code which changes or tests the address of
- * the function in the little struct.  This is really ugly -PB
- */
-typedef char __user *__sighandler_t;
-#else
-typedef void __signalfn_t(int);
-typedef __signalfn_t __user *__sighandler_t;
-#endif
-
 typedef struct sigaltstack {
        void __user *ss_sp;
        int ss_flags;
index 85b0a7a..04873dd 100644 (file)
@@ -60,30 +60,6 @@ typedef struct {
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK is not currently supported, but will allow sigaltstack(2).
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001U
-#define SA_NOCLDWAIT   0x00000002U
-#define SA_SIGINFO     0x00000004U
-#define SA_ONSTACK     0x08000000U
-#define SA_RESTART     0x10000000U
-#define SA_NODEFER     0x40000000U
-#define SA_RESETHAND   0x80000000U
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000U
 
 #define MINSIGSTKSZ    2048
index 9a14a61..0189f32 100644 (file)
@@ -65,30 +65,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN        32
 #define SIGRTMAX        _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP    0x00000001
-#define SA_NOCLDWAIT    0x00000002
-#define SA_SIGINFO      0x00000004
-#define SA_ONSTACK      0x08000000
-#define SA_RESTART      0x10000000
-#define SA_NODEFER      0x40000000
-#define SA_RESETHAND    0x80000000
-
-#define SA_NOMASK       SA_NODEFER
-#define SA_ONESHOT      SA_RESETHAND
-
 #define SA_RESTORER     0x04000000
 
 #define MINSIGSTKSZ     2048
index ff95059..53758d5 100644 (file)
@@ -137,13 +137,11 @@ struct sigstack {
 #define SA_STACK       _SV_SSTACK
 #define SA_ONSTACK     _SV_SSTACK
 #define SA_RESTART     _SV_INTR
-#define SA_ONESHOT     _SV_RESET
+#define SA_RESETHAND   _SV_RESET
 #define SA_NODEFER     0x20u
 #define SA_NOCLDWAIT    0x100u
 #define SA_SIGINFO      0x200u
 
-#define SA_NOMASK      SA_NODEFER
-
 #define SIG_BLOCK          0x01        /* for blocking signals */
 #define SIG_UNBLOCK        0x02        /* for unblocking signals */
 #define SIG_SETMASK        0x04        /* for setting the signal mask */
index e5745d5..164a22a 100644 (file)
@@ -62,30 +62,6 @@ typedef unsigned long sigset_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001u
-#define SA_NOCLDWAIT   0x00000002u
-#define SA_SIGINFO     0x00000004u
-#define SA_ONSTACK     0x08000000u
-#define SA_RESTART     0x10000000u
-#define SA_NODEFER     0x40000000u
-#define SA_RESETHAND   0x80000000u
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index a7f3e12..ddfd919 100644 (file)
@@ -165,16 +165,9 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
 {
        signal_compat_build_tests();
 
-       /* Don't leak in-kernel non-uapi flags to user-space */
-       if (oact)
-               oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
-
        if (!act)
                return;
 
-       /* Don't let flags to be set from userspace */
-       act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
-
        if (in_ia32_syscall())
                act->sa.sa_flags |= SA_IA32_ABI;
        if (in_x32_syscall())
index 005dec5..79ddaba 100644 (file)
@@ -72,30 +72,6 @@ typedef struct {
 #define SIGRTMIN       32
 #define SIGRTMAX       (_NSIG-1)
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
 #define SA_RESTORER    0x04000000
 
 #define MINSIGSTKSZ    2048
index 9929ff5..1787406 100644 (file)
@@ -1718,3 +1718,58 @@ void __init acpi_iort_init(void)
 
        iort_init_platform_devices();
 }
+
+#ifdef CONFIG_ZONE_DMA
+/*
+ * Extract the highest CPU physical address accessible to all DMA masters in
+ * the system. PHYS_ADDR_MAX is returned when no constrained device is found.
+ */
+phys_addr_t __init acpi_iort_dma_get_max_cpu_address(void)
+{
+       phys_addr_t limit = PHYS_ADDR_MAX;
+       struct acpi_iort_node *node, *end;
+       struct acpi_table_iort *iort;
+       acpi_status status;
+       int i;
+
+       if (acpi_disabled)
+               return limit;
+
+       status = acpi_get_table(ACPI_SIG_IORT, 0,
+                               (struct acpi_table_header **)&iort);
+       if (ACPI_FAILURE(status))
+               return limit;
+
+       node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset);
+       end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length);
+
+       for (i = 0; i < iort->node_count; i++) {
+               if (node >= end)
+                       break;
+
+               switch (node->type) {
+                       struct acpi_iort_named_component *ncomp;
+                       struct acpi_iort_root_complex *rc;
+                       phys_addr_t local_limit;
+
+               case ACPI_IORT_NODE_NAMED_COMPONENT:
+                       ncomp = (struct acpi_iort_named_component *)node->node_data;
+                       local_limit = DMA_BIT_MASK(ncomp->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+
+               case ACPI_IORT_NODE_PCI_ROOT_COMPLEX:
+                       if (node->revision < 1)
+                               break;
+
+                       rc = (struct acpi_iort_root_complex *)node->node_data;
+                       local_limit = DMA_BIT_MASK(rc->memory_address_limit);
+                       limit = min_not_zero(limit, local_limit);
+                       break;
+               }
+               node = ACPI_ADD_PTR(struct acpi_iort_node, node, node->length);
+       }
+       acpi_put_table(&iort->header);
+       return limit;
+}
+#endif
index eb9ab4f..09c0af7 100644 (file)
@@ -1025,6 +1025,48 @@ out:
 #endif /* CONFIG_HAS_DMA */
 
 /**
+ * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * @np: The node to start searching from or NULL to start from the root
+ *
+ * Gets the highest CPU physical address that is addressable by all DMA masters
+ * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
+ * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ */
+phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+{
+       phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
+       struct of_range_parser parser;
+       phys_addr_t subtree_max_addr;
+       struct device_node *child;
+       struct of_range range;
+       const __be32 *ranges;
+       u64 cpu_end = 0;
+       int len;
+
+       if (!np)
+               np = of_root;
+
+       ranges = of_get_property(np, "dma-ranges", &len);
+       if (ranges && len) {
+               of_dma_range_parser_init(&parser, np);
+               for_each_of_range(&parser, &range)
+                       if (range.cpu_addr + range.size > cpu_end)
+                               cpu_end = range.cpu_addr + range.size - 1;
+
+               if (max_cpu_addr > cpu_end)
+                       max_cpu_addr = cpu_end;
+       }
+
+       for_each_available_child_of_node(np, child) {
+               subtree_max_addr = of_dma_get_max_cpu_address(child);
+               if (max_cpu_addr > subtree_max_addr)
+                       max_cpu_addr = subtree_max_addr;
+       }
+
+       return max_cpu_addr;
+}
+
+/**
  * of_dma_is_coherent - Check if device is coherent
  * @np:        device node
  *
index 06cc988..eb51bc1 100644 (file)
@@ -869,6 +869,26 @@ static void __init of_unittest_changeset(void)
 #endif
 }
 
+static void __init of_unittest_dma_get_max_cpu_address(void)
+{
+       struct device_node *np;
+       phys_addr_t cpu_addr;
+
+       if (!IS_ENABLED(CONFIG_OF_ADDRESS))
+               return;
+
+       np = of_find_node_by_path("/testcase-data/address-tests");
+       if (!np) {
+               pr_err("missing testcase data\n");
+               return;
+       }
+
+       cpu_addr = of_dma_get_max_cpu_address(np);
+       unittest(cpu_addr == 0x4fffffff,
+                "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n",
+                &cpu_addr, 0x4fffffff);
+}
+
 static void __init of_unittest_dma_ranges_one(const char *path,
                u64 expect_dma_addr, u64 expect_paddr)
 {
@@ -3266,6 +3286,7 @@ static int __init of_unittest(void)
        of_unittest_changeset();
        of_unittest_parse_interrupts();
        of_unittest_parse_interrupts_extended();
+       of_unittest_dma_get_max_cpu_address();
        of_unittest_parse_dma_ranges();
        of_unittest_pci_dma_ranges();
        of_unittest_match_node();
index 20a3212..1a12baa 100644 (file)
@@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
                                                const u32 *id_in);
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
+phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_id(struct device *dev, u32 id)
@@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id(
 static inline
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
+
+static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void)
+{ return PHYS_ADDR_MAX; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
index fb3bf69..9d0c454 100644 (file)
@@ -354,26 +354,6 @@ enum zone_type {
         * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
         * platforms may need both zones as they support peripherals with
         * different DMA addressing limitations.
-        *
-        * Some examples:
-        *
-        *  - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
-        *    rest of the lower 4G.
-        *
-        *  - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
-        *    the specific device.
-        *
-        *  - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
-        *    lower 4G.
-        *
-        *  - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
-        *    depending on the specific device.
-        *
-        *  - s390 uses ZONE_DMA fixed to the lower 2G.
-        *
-        *  - ia64 and riscv only use ZONE_DMA32.
-        *
-        *  - parisc uses neither.
         */
 #ifdef CONFIG_ZONE_DMA
        ZONE_DMA,
index 5d51891..9ed5b85 100644 (file)
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
               const char *map_name, const char *map_mask_name,
               struct device_node **target, u32 *id_out);
 
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
        return -EINVAL;
 }
 
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+       return PHYS_ADDR_MAX;
+}
+
 #define of_match_ptr(_ptr)     NULL
 #define of_match_node(_matches, _node) NULL
 #endif /* CONFIG_OF */
index b256f9c..205526c 100644 (file)
@@ -469,4 +469,18 @@ struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
 #endif
 
+#ifndef arch_untagged_si_addr
+/*
+ * Given a fault address and a signal and si_code which correspond to the
+ * _sigfault union member, returns the address that must appear in si_addr if
+ * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags.
+ */
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+                                                unsigned long sig,
+                                                unsigned long si_code)
+{
+       return addr;
+}
+#endif
+
 #endif /* _LINUX_SIGNAL_H */
index f8a90ae..68e06c7 100644 (file)
@@ -68,4 +68,16 @@ struct ksignal {
        int sig;
 };
 
+#ifndef __ARCH_UAPI_SA_FLAGS
+#ifdef SA_RESTORER
+#define __ARCH_UAPI_SA_FLAGS   SA_RESTORER
+#else
+#define __ARCH_UAPI_SA_FLAGS   0
+#endif
+#endif
+
+#define UAPI_SA_FLAGS                                                          \
+       (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART |  \
+        SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS)
+
 #endif /* _LINUX_SIGNAL_TYPES_H */
index e9304c9..fe929e7 100644 (file)
@@ -4,6 +4,69 @@
 
 #include <linux/compiler.h>
 
+/*
+ * SA_FLAGS values:
+ *
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_SIGINFO delivers the signal with SIGINFO structs.
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_UNSUPPORTED is a flag bit that will never be supported. Kernels from
+ * before the introduction of SA_UNSUPPORTED did not clear unknown bits from
+ * sa_flags when read using the oldact argument to sigaction and rt_sigaction,
+ * so this bit allows flag bit support to be detected from userspace while
+ * allowing an old kernel to be distinguished from a kernel that supports every
+ * flag bit.
+ * SA_EXPOSE_TAGBITS exposes an architecture-defined set of tag bits in
+ * siginfo.si_addr.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#ifndef SA_NOCLDSTOP
+#define SA_NOCLDSTOP   0x00000001
+#endif
+#ifndef SA_NOCLDWAIT
+#define SA_NOCLDWAIT   0x00000002
+#endif
+#ifndef SA_SIGINFO
+#define SA_SIGINFO     0x00000004
+#endif
+/* 0x00000008 used on alpha, mips, parisc */
+/* 0x00000010 used on alpha, parisc */
+/* 0x00000020 used on alpha, parisc, sparc */
+/* 0x00000040 used on alpha, parisc */
+/* 0x00000080 used on parisc */
+/* 0x00000100 used on sparc */
+/* 0x00000200 used on sparc */
+#define SA_UNSUPPORTED 0x00000400
+#define SA_EXPOSE_TAGBITS      0x00000800
+/* 0x00010000 used on mips */
+/* 0x01000000 used on x86 */
+/* 0x02000000 used on x86 */
+/*
+ * New architectures should not define the obsolete
+ *     SA_RESTORER     0x04000000
+ */
+#ifndef SA_ONSTACK
+#define SA_ONSTACK     0x08000000
+#endif
+#ifndef SA_RESTART
+#define SA_RESTART     0x10000000
+#endif
+#ifndef SA_NODEFER
+#define SA_NODEFER     0x40000000
+#endif
+#ifndef SA_RESETHAND
+#define SA_RESETHAND   0x80000000
+#endif
+
+#define SA_NOMASK      SA_NODEFER
+#define SA_ONESHOT     SA_RESETHAND
+
 #ifndef SIG_BLOCK
 #define SIG_BLOCK          0   /* for blocking signals */
 #endif
index 5c716a9..f634822 100644 (file)
 #define SIGRTMAX       _NSIG
 #endif
 
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP   0x00000001
-#define SA_NOCLDWAIT   0x00000002
-#define SA_SIGINFO     0x00000004
-#define SA_ONSTACK     0x08000000
-#define SA_RESTART     0x10000000
-#define SA_NODEFER     0x40000000
-#define SA_RESETHAND   0x80000000
-
-#define SA_NOMASK      SA_NODEFER
-#define SA_ONESHOT     SA_RESETHAND
-
-/*
- * New architectures should not define the obsolete
- *     SA_RESTORER     0x04000000
- */
-
 #if !defined MINSIGSTKSZ || !defined SIGSTKSZ
 #define MINSIGSTKSZ    2048
 #define SIGSTKSZ       8192
index ef8f2a2..26018c5 100644 (file)
@@ -2524,6 +2524,26 @@ static int ptrace_signal(int signr, kernel_siginfo_t *info)
        return signr;
 }
 
+static void hide_si_addr_tag_bits(struct ksignal *ksig)
+{
+       switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
+       case SIL_FAULT:
+       case SIL_FAULT_MCEERR:
+       case SIL_FAULT_BNDERR:
+       case SIL_FAULT_PKUERR:
+               ksig->info.si_addr = arch_untagged_si_addr(
+                       ksig->info.si_addr, ksig->sig, ksig->info.si_code);
+               break;
+       case SIL_KILL:
+       case SIL_TIMER:
+       case SIL_POLL:
+       case SIL_CHLD:
+       case SIL_RT:
+       case SIL_SYS:
+               break;
+       }
+}
+
 bool get_signal(struct ksignal *ksig)
 {
        struct sighand_struct *sighand = current->sighand;
@@ -2761,6 +2781,10 @@ relock:
        spin_unlock_irq(&sighand->siglock);
 
        ksig->sig = signr;
+
+       if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
+               hide_si_addr_tag_bits(ksig);
+
        return ksig->sig > 0;
 }
 
@@ -3985,6 +4009,22 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
        if (oact)
                *oact = *k;
 
+       /*
+        * Make sure that we never accidentally claim to support SA_UNSUPPORTED,
+        * e.g. by having an architecture use the bit in their uapi.
+        */
+       BUILD_BUG_ON(UAPI_SA_FLAGS & SA_UNSUPPORTED);
+
+       /*
+        * Clear unknown flag bits in order to allow userspace to detect missing
+        * support for flag bits and to allow the kernel to use non-uapi bits
+        * internally.
+        */
+       if (act)
+               act->sa.sa_flags &= UAPI_SA_FLAGS;
+       if (oact)
+               oact->sa.sa_flags &= UAPI_SA_FLAGS;
+
        sigaction_compat_abi(act, oact);
 
        if (act) {