Merge tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst

index b7f9893..1bd687b 100644 (file)
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39
                                                                | Kernel-space virtual memory, shared between all processes:
    ____________________________________________________________|___________________________________________________________
                      |            |                  |         |
-   ffffffc000000000 | -256    GB | ffffffc7ffffffff |   32 GB | kasan
-   ffffffcefee00000 | -196    GB | ffffffcefeffffff |    2 MB | fixmap
-   ffffffceff000000 | -196    GB | ffffffceffffffff |   16 MB | PCI io
-   ffffffcf00000000 | -196    GB | ffffffcfffffffff |    4 GB | vmemmap
-   ffffffd000000000 | -192    GB | ffffffdfffffffff |   64 GB | vmalloc/ioremap space
-   ffffffe000000000 | -128    GB | ffffffff7fffffff |  124 GB | direct mapping of all physical memory
+   ffffffc6fee00000 | -228    GB | ffffffc6feffffff |    2 MB | fixmap
+   ffffffc6ff000000 | -228    GB | ffffffc6ffffffff |   16 MB | PCI io
+   ffffffc700000000 | -228    GB | ffffffc7ffffffff |    4 GB | vmemmap
+   ffffffc800000000 | -224    GB | ffffffd7ffffffff |   64 GB | vmalloc/ioremap space
+   ffffffd800000000 | -160    GB | fffffff6ffffffff |  124 GB | direct mapping of all physical memory
+   fffffff700000000 |  -36    GB | fffffffeffffffff |   32 GB | kasan
    __________________|____________|__________________|_________|____________________________________________________________
                                                                |
                                                                |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig

index 171ecc6..5adcbd9 100644 (file)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -147,27 +147,16 @@ config MMU
           Select if you want MMU-based virtualised addressing space
           support by paged memory management. If unsure, say 'Y'.
  
-config VA_BITS
-       int
-       default 32 if 32BIT
-       default 39 if 64BIT
-
-config PA_BITS
-       int
-       default 34 if 32BIT
-       default 56 if 64BIT
-
  config PAGE_OFFSET
         hex
-       default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
+       default 0xC0000000 if 32BIT
         default 0x80000000 if 64BIT && !MMU
-       default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
-       default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+       default 0xffffaf8000000000 if 64BIT
  
  config KASAN_SHADOW_OFFSET
         hex
         depends on KASAN_GENERIC
-       default 0xdfffffc800000000 if 64BIT
+       default 0xdfffffff00000000 if 64BIT
         default 0xffffffff if 32BIT
  
  config ARCH_FLATMEM_ENABLE
@@ -213,7 +202,7 @@ config FIX_EARLYCON_MEM
  
  config PGTABLE_LEVELS
         int
-       default 3 if 64BIT
+       default 4 if 64BIT
         default 2
  
  config LOCKDEP_SUPPORT
@@ -271,24 +260,6 @@ config MODULE_SECTIONS
         bool
         select HAVE_MOD_ARCH_SPECIFIC
  
-choice
-       prompt "Maximum Physical Memory"
-       default MAXPHYSMEM_1GB if 32BIT
-       default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
-       default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
-
-       config MAXPHYSMEM_1GB
-               depends on 32BIT
-               bool "1GiB"
-       config MAXPHYSMEM_2GB
-               depends on 64BIT
-               bool "2GiB"
-       config MAXPHYSMEM_128GB
-               depends on 64BIT && CMODEL_MEDANY
-               bool "128GiB"
-endchoice
-
-
  config SMP
         bool "Symmetric Multi-Processing"
         help
@@ -392,12 +363,25 @@ source "kernel/Kconfig.hz"
  
  config RISCV_SBI_V01
         bool "SBI v0.1 support"
-       default y
         depends on RISCV_SBI
         help
           This config allows kernel to use SBI v0.1 APIs. This will be
           deprecated in future once legacy M-mode software are no longer in use.
  
+config RISCV_BOOT_SPINWAIT
+       bool "Spinwait booting method"
+       depends on SMP
+       default y
+       help
+         This enables support for booting Linux via spinwait method. In the
+         spinwait method, all cores randomly jump to Linux. One of the cores
+         gets chosen via lottery and all other keep spinning on a percpu
+         variable. This method cannot support CPU hotplug and sparse hartid
+         scheme. It should be only enabled for M-mode Linux or platforms relying
+         on older firmware without SBI HSM extension. All other platforms should
+         rely on ordered booting via SBI HSM extension which gets chosen
+         dynamically at runtime if the firmware supports it.
+
  config KEXEC
         bool "Kexec system call"
         select KEXEC_CORE
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts

index 6bfa1f2..c4ed9ef 100644 (file)
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -39,6 +39,11 @@
                 clock-frequency = <RTCCLK_FREQ>;
                 clock-output-names = "rtcclk";
         };
+
+       gpio-poweroff {
+               compatible = "gpio-poweroff";
+               gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+       };
  };
  
  &uart0 {
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig

index e8ceab6..3f42ed8 100644 (file)
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -29,7 +29,6 @@ CONFIG_EMBEDDED=y
  CONFIG_SLOB=y
  # CONFIG_MMU is not set
  CONFIG_SOC_CANAAN=y
-CONFIG_MAXPHYSMEM_2GB=y
  CONFIG_SMP=y
  CONFIG_NR_CPUS=2
  CONFIG_CMDLINE="earlycon console=ttySIF0"
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig

index 46aa387..2a82a3b 100644 (file)
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -21,7 +21,6 @@ CONFIG_EMBEDDED=y
  CONFIG_SLOB=y
  # CONFIG_MMU is not set
  CONFIG_SOC_CANAAN=y
-CONFIG_MAXPHYSMEM_2GB=y
  CONFIG_SMP=y
  CONFIG_NR_CPUS=2
  CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig

index 385cca7..e1c9864 100644 (file)
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -24,10 +24,8 @@ CONFIG_EXPERT=y
  # CONFIG_VM_EVENT_COUNTERS is not set
  # CONFIG_COMPAT_BRK is not set
  CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
  # CONFIG_MMU is not set
  CONFIG_SOC_VIRT=y
-CONFIG_MAXPHYSMEM_2GB=y
  CONFIG_SMP=y
  CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
  CONFIG_CMDLINE_FORCE=y
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h

index a8ec3c5..134590f 100644 (file)
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -40,7 +40,5 @@ struct cpu_operations {
  
  extern const struct cpu_operations *cpu_ops[NR_CPUS];
  void __init cpu_set_ops(int cpu);
-void cpu_update_secondary_bootdata(unsigned int cpuid,
-                                  struct task_struct *tidle);
  
  #endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h

new file mode 100644 (file)

index 0000000..56e4b76
--- /dev/null
+++ b/arch/riscv/include/asm/cpu_ops_sbi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 by Rivos Inc.
+ */
+#ifndef __ASM_CPU_OPS_SBI_H
+#define __ASM_CPU_OPS_SBI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/**
+ * struct sbi_hart_boot_data - Hart specific boot used during booting and
+ *                            cpu hotplug.
+ * @task_ptr: A pointer to the hart specific tp
+ * @stack_ptr: A pointer to the hart specific sp
+ */
+struct sbi_hart_boot_data {
+       void *task_ptr;
+       void *stack_ptr;
+};
+#endif
+
+#endif /* ifndef __ASM_CPU_OPS_SBI_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h

index 5046f43..ae71169 100644 (file)
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,14 +40,13 @@
  #ifndef CONFIG_64BIT
  #define SATP_PPN       _AC(0x003FFFFF, UL)
  #define SATP_MODE_32   _AC(0x80000000, UL)
-#define SATP_MODE      SATP_MODE_32
  #define SATP_ASID_BITS 9
  #define SATP_ASID_SHIFT        22
  #define SATP_ASID_MASK _AC(0x1FF, UL)
  #else
  #define SATP_PPN       _AC(0x00000FFFFFFFFFFF, UL)
  #define SATP_MODE_39   _AC(0x8000000000000000, UL)
-#define SATP_MODE      SATP_MODE_39
+#define SATP_MODE_48   _AC(0x9000000000000000, UL)
  #define SATP_ASID_BITS 16
  #define SATP_ASID_SHIFT        44
  #define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h

index 54cbf07..58a7185 100644 (file)
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -24,6 +24,7 @@ enum fixed_addresses {
         FIX_HOLE,
         FIX_PTE,
         FIX_PMD,
+       FIX_PUD,
         FIX_TEXT_POKE1,
         FIX_TEXT_POKE0,
         FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h

index b00f503..0b85e36 100644 (file)
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -27,13 +27,18 @@
   */
  #define KASAN_SHADOW_SCALE_SHIFT       3
  
-#define KASAN_SHADOW_SIZE      (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START     KERN_VIRT_START
-#define KASAN_SHADOW_END       (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+#define KASAN_SHADOW_SIZE      (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START     ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END       MODULES_LOWEST_VADDR
  #define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
  
  void kasan_init(void);
  asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
  
  #endif
  #endif
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h

index b3e5ff0..160e3a1 100644 (file)
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,9 +31,20 @@
   * When not using MMU this corresponds to the first free page in
   * physical memory (aligned on a page boundary).
   */
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET            kernel_map.page_offset
+#else
  #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3         _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
  
  #ifndef __ASSEMBLY__
  
@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
  #endif /* CONFIG_MMU */
  
  struct kernel_mapping {
+       unsigned long page_offset;
         unsigned long virt_addr;
         uintptr_t phys_addr;
         uintptr_t size;
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h

index 0af6933..1182300 100644 (file)
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
  #include <asm/tlb.h>
  
  #ifdef CONFIG_MMU
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
  #include <asm-generic/pgalloc.h>
  
  static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
  
         set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
  }
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+       if (pgtable_l4_enabled) {
+               unsigned long pfn = virt_to_pfn(pud);
+
+               set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+       }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+                                    pud_t *pud)
+{
+       if (pgtable_l4_enabled) {
+               unsigned long pfn = virt_to_pfn(pud);
+
+               set_p4d_safe(p4d,
+                            __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+       }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+       if (pgtable_l4_enabled)
+               return __pud_alloc_one(mm, addr);
+
+       return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+       if (pgtable_l4_enabled)
+               __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
  #endif /* __PAGETABLE_PMD_FOLDED */
  
  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h

index 228261a..bbbdd66 100644 (file)
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -8,16 +8,36 @@
  
  #include <linux/const.h>
  
-#define PGDIR_SHIFT     30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT_L3  30
+#define PGDIR_SHIFT_L4  39
+#define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
  /* Size of region mapped by a page global directory */
  #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
  #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
  
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT      30
+#define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK       (~(PUD_SIZE - 1))
+
  #define PMD_SHIFT       21
  /* Size of region mapped by a page middle directory */
  #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
  #define PMD_MASK        (~(PMD_SIZE - 1))
  
+/* Page Upper Directory entry */
+typedef struct {
+       unsigned long pud;
+} pud_t;
+
+#define pud_val(x)      ((x).pud)
+#define __pud(x)        ((pud_t) { (x) })
+#define PTRS_PER_PUD    (PAGE_SIZE / sizeof(pud_t))
+
  /* Page Middle Directory entry */
  typedef struct {
         unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
         set_pud(pudp, __pud(0));
  }
  
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+       return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+       return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
  static inline pmd_t *pud_pgtable(pud_t pud)
  {
         return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
         return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
  }
  
+#define mm_pud_folded  mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+       if (pgtable_l4_enabled)
+               return false;
+
+       return true;
+}
+
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
  static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
  {
         return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
  #define pmd_ERROR(e) \
         pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
  
+#define pud_ERROR(e)   \
+       pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+       if (pgtable_l4_enabled)
+               *p4dp = p4d;
+       else
+               set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+       if (pgtable_l4_enabled)
+               return (p4d_val(p4d) == 0);
+
+       return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+       if (pgtable_l4_enabled)
+               return (p4d_val(p4d) & _PAGE_PRESENT);
+
+       return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+       if (pgtable_l4_enabled)
+               return !p4d_present(p4d);
+
+       return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+       if (pgtable_l4_enabled)
+               set_p4d(p4d, __p4d(0));
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+       if (pgtable_l4_enabled)
+               return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+       return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+       return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+       if (pgtable_l4_enabled)
+               return p4d_pgtable(*p4d) + pud_index(address);
+
+       return (pud_t *)p4d;
+}
+
  #endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h

index 67f687a..7e949f2 100644 (file)
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,6 +24,17 @@
  #define KERNEL_LINK_ADDR       PAGE_OFFSET
  #endif
  
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD    (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE    (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE          ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
+
  #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
  #define VMALLOC_END      PAGE_OFFSET
  #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
@@ -39,8 +50,10 @@
  
  /* Modules always live before the kernel */
  #ifdef CONFIG_64BIT
-#define MODULES_VADDR  (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
-#define MODULES_END    (PFN_ALIGN((unsigned long)&_start))
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR   (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR          (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END            (PFN_ALIGN((unsigned long)&_start))
  #endif
  
  /*
@@ -48,8 +61,14 @@
   * struct pages to map half the virtual address space. Then
   * position vmemmap directly below the VMALLOC region.
   */
+#ifdef CONFIG_64BIT
+#define VA_BITS                (pgtable_l4_enabled ? 48 : 39)
+#else
+#define VA_BITS                32
+#endif
+
  #define VMEMMAP_SHIFT \
-       (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+       (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
  #define VMEMMAP_SIZE   BIT(VMEMMAP_SHIFT)
  #define VMEMMAP_END    VMALLOC_START
  #define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
@@ -83,8 +102,7 @@
  
  #ifndef __ASSEMBLY__
  
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
  #include <asm/page.h>
  #include <asm/tlbflush.h>
  #include <linux/mm_types.h>
@@ -107,12 +125,20 @@
  #define XIP_FIXUP(addr)                (addr)
  #endif /* CONFIG_XIP_KERNEL */
  
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD    (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE    (PAGE_SIZE / sizeof(pte_t))
+struct pt_alloc_ops {
+       pte_t *(*get_pte_virt)(phys_addr_t pa);
+       phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+       pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+       phys_addr_t (*alloc_pmd)(uintptr_t va);
+       pud_t *(*get_pud_virt)(phys_addr_t pa);
+       phys_addr_t (*alloc_pud)(uintptr_t va);
+#endif
+};
+
+extern struct pt_alloc_ops pt_ops __initdata;
  
+#ifdef CONFIG_MMU
  /* Number of PGD entries that a user-mode program can use */
  #define USER_PTRS_PER_PGD   (TASK_SIZE / PGDIR_SIZE)
  
@@ -659,7 +685,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
   * and give the kernel the other (upper) half.
   */
  #ifdef CONFIG_64BIT
-#define KERN_VIRT_START        (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START        (-(BIT(VA_BITS)) + TASK_SIZE)
  #else
  #define KERN_VIRT_START        FIXADDR_START
  #endif
@@ -667,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
  /*
   * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
   * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * -     0x9fc00000 (~2.5GB) for RV32.
+ * -   0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
   */
  #ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE      (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN  (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
  #else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE      FIXADDR_START
+#define TASK_SIZE_MIN  TASK_SIZE
  #endif
  
  #else /* CONFIG_MMU */
@@ -697,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
  #define dtb_early_va   _dtb_early_va
  #define dtb_early_pa   _dtb_early_pa
  #endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
  
  void paging_init(void);
  void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h

index 26ba6f2..d1c3747 100644 (file)
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
  #define _ASM_RISCV_SBI_H
  
  #include <linux/types.h>
+#include <linux/cpumask.h>
  
  #ifdef CONFIG_RISCV_SBI
  enum sbi_ext_id {
@@ -128,27 +129,27 @@ long sbi_get_mimpid(void);
  void sbi_set_timer(uint64_t stime_value);
  void sbi_shutdown(void);
  void sbi_clear_ipi(void);
-int sbi_send_ipi(const unsigned long *hart_mask);
-int sbi_remote_fence_i(const unsigned long *hart_mask);
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const struct cpumask *cpu_mask);
+int sbi_remote_fence_i(const struct cpumask *cpu_mask);
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size);
  
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long asid);
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size);
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long vmid);
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size);
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long asid);
@@ -183,7 +184,7 @@ static inline unsigned long sbi_mk_version(unsigned long major,
  
  int sbi_err_map_linux_errno(int err);
  #else /* CONFIG_RISCV_SBI */
-static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
  static inline void sbi_init(void) {}
  #endif /* CONFIG_RISCV_SBI */
  #endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h

index 6ad749f..23170c9 100644 (file)
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -92,8 +92,6 @@ static inline void riscv_clear_ipi(void)
  
  #endif /* CONFIG_SMP */
  
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
-
  #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
  bool cpu_has_hotplug(unsigned int cpu);
  #else
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h

index 45a7018..63acaec 100644 (file)
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
  #define _ASM_RISCV_SPARSEMEM_H
  
  #ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS       CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS       56
+#else
+#define MAX_PHYSMEM_BITS       34
+#endif /* CONFIG_64BIT */
  #define SECTION_SIZE_BITS      27
  #endif /* CONFIG_SPARSEMEM */
  
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile

index 3397dda..612556f 100644 (file)
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -43,7 +43,8 @@ obj-$(CONFIG_FPU)             += fpu.o
  obj-$(CONFIG_SMP)              += smpboot.o
  obj-$(CONFIG_SMP)              += smp.o
  obj-$(CONFIG_SMP)              += cpu_ops.o
-obj-$(CONFIG_SMP)              += cpu_ops_spinwait.o
+
+obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
  obj-$(CONFIG_MODULES)          += module.o
  obj-$(CONFIG_MODULE_SECTIONS)  += module-sections.o
  
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c

index 253126e..df0519a 100644 (file)
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
  #include <asm/kvm_host.h>
  #include <asm/thread_info.h>
  #include <asm/ptrace.h>
+#include <asm/cpu_ops_sbi.h>
  
  void asm_offsets(void);
  
@@ -468,4 +469,6 @@ void asm_offsets(void)
         DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
  
         OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
+       OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
+       OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
  }
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c

index f13b2c9..ad0a7e9 100644 (file)
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -7,6 +7,7 @@
  #include <linux/seq_file.h>
  #include <linux/of.h>
  #include <asm/smp.h>
+#include <asm/pgtable.h>
  
  /*
   * Returns the hart ID of the given device tree node, or -ENODEV if the node
@@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa)
         seq_puts(f, "\n");
  }
  
-static void print_mmu(struct seq_file *f, const char *mmu_type)
+static void print_mmu(struct seq_file *f)
  {
+       char sv_type[16];
+
  #if defined(CONFIG_32BIT)
-       if (strcmp(mmu_type, "riscv,sv32") != 0)
-               return;
+       strncpy(sv_type, "sv32", 5);
  #elif defined(CONFIG_64BIT)
-       if (strcmp(mmu_type, "riscv,sv39") != 0 &&
-           strcmp(mmu_type, "riscv,sv48") != 0)
-               return;
+       if (pgtable_l4_enabled)
+               strncpy(sv_type, "sv48", 5);
+       else
+               strncpy(sv_type, "sv39", 5);
  #endif
-
-       seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+       seq_printf(f, "mmu\t\t: %s\n", sv_type);
  }
  
  static void *c_start(struct seq_file *m, loff_t *pos)
@@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v)
  {
         unsigned long cpu_id = (unsigned long)v - 1;
         struct device_node *node = of_get_cpu_node(cpu_id, NULL);
-       const char *compat, *isa, *mmu;
+       const char *compat, *isa;
  
         seq_printf(m, "processor\t: %lu\n", cpu_id);
         seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
         if (!of_property_read_string(node, "riscv,isa", &isa))
                 print_isa(m, isa);
-       if (!of_property_read_string(node, "mmu-type", &mmu))
-               print_mmu(m, mmu);
+       print_mmu(m);
         if (!of_property_read_string(node, "compatible", &compat)
             && strcmp(compat, "riscv"))
                 seq_printf(m, "uarch\t\t: %s\n", compat);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c

index 1985884..170d07e 100644 (file)
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -8,37 +8,29 @@
  #include <linux/of.h>
  #include <linux/string.h>
  #include <linux/sched.h>
-#include <linux/sched/task_stack.h>
  #include <asm/cpu_ops.h>
  #include <asm/sbi.h>
  #include <asm/smp.h>
  
  const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
  
-void *__cpu_up_stack_pointer[NR_CPUS] __section(".data");
-void *__cpu_up_task_pointer[NR_CPUS] __section(".data");
-
  extern const struct cpu_operations cpu_ops_sbi;
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
  extern const struct cpu_operations cpu_ops_spinwait;
-
-void cpu_update_secondary_bootdata(unsigned int cpuid,
-                                  struct task_struct *tidle)
-{
-       int hartid = cpuid_to_hartid_map(cpuid);
-
-       /* Make sure tidle is updated */
-       smp_mb();
-       WRITE_ONCE(__cpu_up_stack_pointer[hartid],
-                  task_stack_page(tidle) + THREAD_SIZE);
-       WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
-}
+#else
+const struct cpu_operations cpu_ops_spinwait = {
+       .name           = "",
+       .cpu_prepare    = NULL,
+       .cpu_start      = NULL,
+};
+#endif
  
  void __init cpu_set_ops(int cpuid)
  {
  #if IS_ENABLED(CONFIG_RISCV_SBI)
         if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
                 if (!cpuid)
-                       pr_info("SBI v0.2 HSM extension detected\n");
+                       pr_info("SBI HSM extension detected\n");
                 cpu_ops[cpuid] = &cpu_ops_sbi;
         } else
  #endif
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c

index 685fae7..dae29cb 100644 (file)
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -7,13 +7,22 @@
  
  #include <linux/init.h>
  #include <linux/mm.h>
+#include <linux/sched/task_stack.h>
  #include <asm/cpu_ops.h>
+#include <asm/cpu_ops_sbi.h>
  #include <asm/sbi.h>
  #include <asm/smp.h>
  
  extern char secondary_start_sbi[];
  const struct cpu_operations cpu_ops_sbi;
  
+/*
+ * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
+ * be invoked from multiple threads in parallel. Define a per cpu data
+ * to handle that.
+ */
+DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+
  static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
                               unsigned long priv)
  {
@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
  
  static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
  {
-       int rc;
         unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
         int hartid = cpuid_to_hartid_map(cpuid);
-
-       cpu_update_secondary_bootdata(cpuid, tidle);
-       rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
-
-       return rc;
+       unsigned long hsm_data;
+       struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+
+       /* Make sure tidle is updated */
+       smp_mb();
+       bdata->task_ptr = tidle;
+       bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+       /* Make sure boot data is updated */
+       smp_mb();
+       hsm_data = __pa(bdata);
+       return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
  }
  
  static int sbi_cpu_prepare(unsigned int cpuid)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c

index b2c957b..346847f 100644 (file)
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -6,11 +6,36 @@
  #include <linux/errno.h>
  #include <linux/of.h>
  #include <linux/string.h>
+#include <linux/sched/task_stack.h>
  #include <asm/cpu_ops.h>
  #include <asm/sbi.h>
  #include <asm/smp.h>
  
  const struct cpu_operations cpu_ops_spinwait;
+void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
+void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
+
+static void cpu_update_secondary_bootdata(unsigned int cpuid,
+                                  struct task_struct *tidle)
+{
+       int hartid = cpuid_to_hartid_map(cpuid);
+
+       /*
+        * The hartid must be less than NR_CPUS to avoid out-of-bound access
+        * errors for __cpu_spinwait_stack/task_pointer. That is not always possible
+        * for platforms with discontiguous hartid numbering scheme. That's why
+        * spinwait booting is not the recommended approach for any platforms
+        * booting Linux in S-mode and can be disabled in the future.
+        */
+       if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+               return;
+
+       /* Make sure tidle is updated */
+       smp_mb();
+       WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+                  task_stack_page(tidle) + THREAD_SIZE);
+       WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+}
  
  static int spinwait_cpu_prepare(unsigned int cpuid)
  {
@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
          * selects the first cpu to boot the kernel and causes the remainder
          * of the cpus to spin in a loop waiting for their stack pointer to be
          * setup by that main cpu.  Writing to bootdata
-        * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
+        * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
          * can continue the boot process.
          */
         cpu_update_secondary_bootdata(cpuid, tidle);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S

index 604d602..2363b43 100644 (file)
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/csr.h>
+#include <asm/cpu_ops_sbi.h>
  #include <asm/hwcap.h>
  #include <asm/image.h>
  #include "efi-header.S"
@@ -105,7 +106,8 @@ relocate:
  
         /* Compute satp for kernel page tables, but don't load it yet */
         srl a2, a0, PAGE_SHIFT
-       li a1, SATP_MODE
+       la a1, satp_mode
+       REG_L a1, 0(a1)
         or a2, a2, a1
  
         /*
@@ -167,15 +169,15 @@ secondary_start_sbi:
         la a3, .Lsecondary_park
         csrw CSR_TVEC, a3
  
-       slli a3, a0, LGREG
-       la a4, __cpu_up_stack_pointer
-       XIP_FIXUP_OFFSET a4
-       la a5, __cpu_up_task_pointer
-       XIP_FIXUP_OFFSET a5
-       add a4, a3, a4
-       add a5, a3, a5
-       REG_L sp, (a4)
-       REG_L tp, (a5)
+       /* a0 contains the hartid & a1 contains boot data */
+       li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
+       XIP_FIXUP_OFFSET a2
+       add a2, a2, a1
+       REG_L tp, (a2)
+       li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
+       XIP_FIXUP_OFFSET a3
+       add a3, a3, a1
+       REG_L sp, (a3)
  
  .Lsecondary_start_common:
  
@@ -257,13 +259,13 @@ pmp_done:
         li t0, SR_FS
         csrc CSR_STATUS, t0
  
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
         li t0, CONFIG_NR_CPUS
         blt a0, t0, .Lgood_cores
         tail .Lsecondary_park
  .Lgood_cores:
-#endif
  
+       /* The lottery system is only required for spinwait booting method */
  #ifndef CONFIG_XIP_KERNEL
         /* Pick one hart to run the main boot sequence */
         la a3, hart_lottery
@@ -282,6 +284,10 @@ pmp_done:
         /* first time here if hart_lottery in RAM is not set */
         beq t0, t1, .Lsecondary_start
  
+#endif /* CONFIG_XIP */
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
+
+#ifdef CONFIG_XIP_KERNEL
         la sp, _end + THREAD_SIZE
         XIP_FIXUP_OFFSET sp
         mv s0, a0
@@ -338,16 +344,16 @@ clear_bss_done:
         call soc_early_init
         tail start_kernel
  
+#if CONFIG_RISCV_BOOT_SPINWAIT
  .Lsecondary_start:
-#ifdef CONFIG_SMP
         /* Set trap vector to spin forever to help debug */
         la a3, .Lsecondary_park
         csrw CSR_TVEC, a3
  
         slli a3, a0, LGREG
-       la a1, __cpu_up_stack_pointer
+       la a1, __cpu_spinwait_stack_pointer
         XIP_FIXUP_OFFSET a1
-       la a2, __cpu_up_task_pointer
+       la a2, __cpu_spinwait_task_pointer
         XIP_FIXUP_OFFSET a2
         add a1, a3, a1
         add a2, a3, a2
@@ -365,7 +371,7 @@ clear_bss_done:
         fence
  
         tail .Lsecondary_start_common
-#endif
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
  
  END(_start_kernel)
  
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h

index aabbc3a..726731a 100644 (file)
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
  asmlinkage void __init __copy_data(void);
  #endif
  
-extern void *__cpu_up_stack_pointer[];
-extern void *__cpu_up_task_pointer[];
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
+extern void *__cpu_spinwait_stack_pointer[];
+extern void *__cpu_spinwait_task_pointer[];
+#endif
  
  #endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c

index 9c05111..a892437 100644 (file)
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target,
                          unsigned int pos, unsigned int count,
                          const void *kbuf, const void __user *ubuf)
  {
-       int ret;
         struct pt_regs *regs;
  
         regs = task_pt_regs(target);
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
-       return ret;
+       return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
  }
  
  #ifdef CONFIG_FPU
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c

index 9a84f0c..f72527f 100644 (file)
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -16,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
  EXPORT_SYMBOL(sbi_spec_version);
  
  static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
-static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
+static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
+static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
                            unsigned long start, unsigned long size,
                            unsigned long arg4, unsigned long arg5) __ro_after_init;
  
@@ -67,6 +67,30 @@ int sbi_err_map_linux_errno(int err)
  EXPORT_SYMBOL(sbi_err_map_linux_errno);
  
  #ifdef CONFIG_RISCV_SBI_V01
+static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
+{
+       unsigned long cpuid, hartid;
+       unsigned long hmask = 0;
+
+       /*
+        * There is no maximum hartid concept in RISC-V and NR_CPUS must not be
+        * associated with hartid. As SBI v0.1 is only kept for backward compatibility
+        * and will be removed in the future, there is no point in supporting hartid
+        * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
+        * should be used for platforms with hartid greater than BITS_PER_LONG.
+        */
+       for_each_cpu(cpuid, cpu_mask) {
+               hartid = cpuid_to_hartid_map(cpuid);
+               if (hartid >= BITS_PER_LONG) {
+                       pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
+                       break;
+               }
+               hmask |= 1 << hartid;
+       }
+
+       return hmask;
+}
+
  /**
   * sbi_console_putchar() - Writes given character to the console device.
   * @ch: The data to be written to the console.
@@ -132,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
  #endif
  }
  
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
  {
-       sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
+       unsigned long hart_mask;
+
+       if (!cpu_mask)
+               cpu_mask = cpu_online_mask;
+       hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
+
+       sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
                   0, 0, 0, 0, 0);
         return 0;
  }
  
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
                             unsigned long start, unsigned long size,
                             unsigned long arg4, unsigned long arg5)
  {
         int result = 0;
+       unsigned long hart_mask;
+
+       if (!cpu_mask)
+               cpu_mask = cpu_online_mask;
+       hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
  
         /* v0.2 function IDs are equivalent to v0.1 extension IDs */
         switch (fid) {
         case SBI_EXT_RFENCE_REMOTE_FENCE_I:
                 sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
-                         (unsigned long)hart_mask, 0, 0, 0, 0, 0);
+                         (unsigned long)&hart_mask, 0, 0, 0, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
                 sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
-                         (unsigned long)hart_mask, start, size,
+                         (unsigned long)&hart_mask, start, size,
                           0, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
                 sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
-                         (unsigned long)hart_mask, start, size,
+                         (unsigned long)&hart_mask, start, size,
                           arg4, 0, 0);
                 break;
         default:
@@ -180,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
                 sbi_major_version(), sbi_minor_version());
  }
  
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
  {
         pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
                 sbi_major_version(), sbi_minor_version());
@@ -188,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
         return 0;
  }
  
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
                             unsigned long start, unsigned long size,
                             unsigned long arg4, unsigned long arg5)
  {
@@ -212,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
  #endif
  }
  
-static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
  {
-       unsigned long hartid, hmask_val, hbase;
-       struct cpumask tmask;
+       unsigned long hartid, cpuid, hmask = 0, hbase = 0;
         struct sbiret ret = {0};
         int result;
  
-       if (!hart_mask || !(*hart_mask)) {
-               riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
-               hart_mask = cpumask_bits(&tmask);
-       }
+       if (!cpu_mask)
+               cpu_mask = cpu_online_mask;
  
-       hmask_val = 0;
-       hbase = 0;
-       for_each_set_bit(hartid, hart_mask, NR_CPUS) {
-               if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
+       for_each_cpu(cpuid, cpu_mask) {
+               hartid = cpuid_to_hartid_map(cpuid);
+               if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
                         ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
-                                       hmask_val, hbase, 0, 0, 0, 0);
+                                       hmask, hbase, 0, 0, 0, 0);
                         if (ret.error)
                                 goto ecall_failed;
-                       hmask_val = 0;
+                       hmask = 0;
                         hbase = 0;
                 }
-               if (!hmask_val)
+               if (!hmask)
                         hbase = hartid;
-               hmask_val |= 1UL << (hartid - hbase);
+               hmask |= 1UL << (hartid - hbase);
         }
  
-       if (hmask_val) {
+       if (hmask) {
                 ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
-                               hmask_val, hbase, 0, 0, 0, 0);
+                               hmask, hbase, 0, 0, 0, 0);
                 if (ret.error)
                         goto ecall_failed;
         }
@@ -252,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
  ecall_failed:
         result = sbi_err_map_linux_errno(ret.error);
         pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
-              __func__, hbase, hmask_val, result);
+              __func__, hbase, hmask, result);
         return result;
  }
  
-static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
+static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
                                  unsigned long hbase, unsigned long start,
                                  unsigned long size, unsigned long arg4,
                                  unsigned long arg5)
@@ -267,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
  
         switch (fid) {
         case SBI_EXT_RFENCE_REMOTE_FENCE_I:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
+               ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, arg4, 0);
                 break;
  
         case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, arg4, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, 0, 0);
                 break;
         case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
-               ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+               ret = sbi_ecall(ext, fid, hmask, hbase, start,
                                 size, arg4, 0);
                 break;
         default:
@@ -303,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
         if (ret.error) {
                 result = sbi_err_map_linux_errno(ret.error);
                 pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
-                      __func__, hbase, hmask_val, result);
+                      __func__, hbase, hmask, result);
         }
  
         return result;
  }
  
-static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
                             unsigned long start, unsigned long size,
                             unsigned long arg4, unsigned long arg5)
  {
-       unsigned long hmask_val, hartid, hbase;
-       struct cpumask tmask;
+       unsigned long hartid, cpuid, hmask = 0, hbase = 0;
         int result;
  
-       if (!hart_mask || !(*hart_mask)) {
-               riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
-               hart_mask = cpumask_bits(&tmask);
-       }
+       if (!cpu_mask)
+               cpu_mask = cpu_online_mask;
  
-       hmask_val = 0;
-       hbase = 0;
-       for_each_set_bit(hartid, hart_mask, NR_CPUS) {
-               if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
-                       result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+       for_each_cpu(cpuid, cpu_mask) {
+               hartid = cpuid_to_hartid_map(cpuid);
+               if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
+                       result = __sbi_rfence_v02_call(fid, hmask, hbase,
                                                        start, size, arg4, arg5);
                         if (result)
                                 return result;
-                       hmask_val = 0;
+                       hmask = 0;
                         hbase = 0;
                 }
-               if (!hmask_val)
+               if (!hmask)
                         hbase = hartid;
-               hmask_val |= 1UL << (hartid - hbase);
+               hmask |= 1UL << (hartid - hbase);
         }
  
-       if (hmask_val) {
-               result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+       if (hmask) {
+               result = __sbi_rfence_v02_call(fid, hmask, hbase,
                                                start, size, arg4, arg5);
                 if (result)
                         return result;
@@ -361,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value)
  
  /**
   * sbi_send_ipi() - Send an IPI to any hart.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   *
   * Return: 0 on success, appropriate linux error code otherwise.
   */
-int sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const struct cpumask *cpu_mask)
  {
-       return __sbi_send_ipi(hart_mask);
+       return __sbi_send_ipi(cpu_mask);
  }
  EXPORT_SYMBOL(sbi_send_ipi);
  
  /**
   * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   *
   * Return: 0 on success, appropriate linux error code otherwise.
   */
-int sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const struct cpumask *cpu_mask)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
-                           hart_mask, 0, 0, 0, 0);
+                           cpu_mask, 0, 0, 0, 0);
  }
  EXPORT_SYMBOL(sbi_remote_fence_i);
  
  /**
   * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
   *                          harts for the specified virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the virtual address
   * @size: Total size of the virtual address range.
   *
   * Return: 0 on success, appropriate linux error code otherwise.
   */
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
-                           hart_mask, start, size, 0, 0);
+                           cpu_mask, start, size, 0, 0);
  }
  EXPORT_SYMBOL(sbi_remote_sfence_vma);
  
@@ -406,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
   * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
   * remote harts for a virtual address range belonging to a specific ASID.
   *
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the virtual address
   * @size: Total size of the virtual address range.
   * @asid: The value of address space identifier (ASID).
   *
   * Return: 0 on success, appropriate linux error code otherwise.
   */
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long asid)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
-                           hart_mask, start, size, asid, 0);
+                           cpu_mask, start, size, asid, 0);
  }
  EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
  
  /**
   * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
   *                        harts for the specified guest physical address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the guest physical address
   * @size: Total size of the guest physical address range.
   *
   * Return: None
   */
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
-                           hart_mask, start, size, 0, 0);
+                           cpu_mask, start, size, 0, 0);
  }
  EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
  
@@ -445,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
   * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
   * remote harts for a guest physical address range belonging to a specific VMID.
   *
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the guest physical address
   * @size: Total size of the guest physical address range.
   * @vmid: The value of guest ID (VMID).
   *
   * Return: 0 if success, Error otherwise.
   */
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long vmid)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
-                           hart_mask, start, size, vmid, 0);
+                           cpu_mask, start, size, vmid, 0);
  }
  EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
  
  /**
   * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
   *                          harts for the current guest virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the current guest virtual address
   * @size: Total size of the current guest virtual address range.
   *
   * Return: None
   */
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
                            unsigned long start,
                            unsigned long size)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
-                           hart_mask, start, size, 0, 0);
+                           cpu_mask, start, size, 0, 0);
  }
  EXPORT_SYMBOL(sbi_remote_hfence_vvma);
  
@@ -485,20 +512,20 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
   * remote harts for current guest virtual address range belonging to a specific
   * ASID.
   *
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
   * @start: Start of the current guest virtual address
   * @size: Total size of the current guest virtual address range.
   * @asid: The value of address space identifier (ASID).
   *
   * Return: None
   */
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
                                 unsigned long start,
                                 unsigned long size,
                                 unsigned long asid)
  {
         return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
-                           hart_mask, start, size, asid, 0);
+                           cpu_mask, start, size, asid, 0);
  }
  EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
  
@@ -591,11 +618,7 @@ long sbi_get_mimpid(void)
  
  static void sbi_send_cpumask_ipi(const struct cpumask *target)
  {
-       struct cpumask hartid_mask;
-
-       riscv_cpuid_to_hartid_mask(target, &hartid_mask);
-
-       sbi_send_ipi(cpumask_bits(&hartid_mask));
+       sbi_send_ipi(target);
  }
  
  static const struct riscv_ipi_ops sbi_ipi_ops = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c

index 63241ab..b42bfdc 100644 (file)
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -59,16 +59,6 @@ atomic_t hart_lottery __section(".sdata")
  unsigned long boot_cpu_hartid;
  static DEFINE_PER_CPU(struct cpu, cpu_devices);
  
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
-{
-       int cpu;
-
-       cpumask_clear(out);
-       for_each_cpu(cpu, in)
-               cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
-}
-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
-
  /*
   * Place kernel memory regions on the resource tree so that
   * kexec-tools can retrieve them from /proc/iomem. While there
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c

index bd82375..622f226 100644 (file)
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -96,7 +96,7 @@ void __init setup_smp(void)
                 if (cpuid >= NR_CPUS) {
                         pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
                                 cpuid, hart);
-                       break;
+                       continue;
                 }
  
                 cpuid_to_hartid_map(cpuid) = hart;
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c

index 9af67db..f80a34f 100644 (file)
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -114,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
  
  static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
  {
-       struct cpumask hmask;
         unsigned long size = PAGE_SIZE;
         struct kvm_vmid *vmid = &kvm->arch.vmid;
  
@@ -127,8 +126,7 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
          * where the Guest/VM is running.
          */
         preempt_disable();
-       riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
-       sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
+       sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
                                     READ_ONCE(vmid->vmid));
         preempt_enable();
  }
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c

index 00036b7..1bc0608 100644 (file)
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -82,7 +82,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
  {
         int ret = 0;
         unsigned long i;
-       struct cpumask cm, hm;
+       struct cpumask cm;
         struct kvm_vcpu *tmp;
         struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
         unsigned long hmask = cp->a0;
@@ -90,7 +90,6 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
         unsigned long funcid = cp->a6;
  
         cpumask_clear(&cm);
-       cpumask_clear(&hm);
         kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
                 if (hbase != -1UL) {
                         if (tmp->vcpu_id < hbase)
@@ -103,17 +102,15 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
                 cpumask_set_cpu(tmp->cpu, &cm);
         }
  
-       riscv_cpuid_to_hartid_mask(&cm, &hm);
-
         switch (funcid) {
         case SBI_EXT_RFENCE_REMOTE_FENCE_I:
-               ret = sbi_remote_fence_i(cpumask_bits(&hm));
+               ret = sbi_remote_fence_i(&cm);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
-               ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3);
+               ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
                 break;
         case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
-               ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2,
+               ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
                                                   cp->a3, cp->a4);
                 break;
         case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c

index 4c7e13e..07e2de1 100644 (file)
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -38,7 +38,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
         int i, ret = 0;
         u64 next_cycle;
         struct kvm_vcpu *rvcpu;
-       struct cpumask cm, hm;
+       struct cpumask cm;
         struct kvm *kvm = vcpu->kvm;
         struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
  
@@ -101,15 +101,12 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
                                 continue;
                         cpumask_set_cpu(rvcpu->cpu, &cm);
                 }
-               riscv_cpuid_to_hartid_mask(&cm, &hm);
                 if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
-                       ret = sbi_remote_fence_i(cpumask_bits(&hm));
+                       ret = sbi_remote_fence_i(&cm);
                 else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
-                       ret = sbi_remote_hfence_vvma(cpumask_bits(&hm),
-                                               cp->a1, cp->a2);
+                       ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
                 else
-                       ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
-                                               cp->a1, cp->a2, cp->a3);
+                       ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
                 break;
         default:
                 ret = -EINVAL;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c

index 807228f..2fa4f7b 100644 (file)
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -67,7 +67,6 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
  {
         unsigned long i;
         struct kvm_vcpu *v;
-       struct cpumask hmask;
         struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
  
         if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
@@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
                  * running, we force VM exits on all host CPUs using IPI and
                  * flush all Guest TLBs.
                  */
-               riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
-               sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+               sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
         }
  
         vmid->vmid = vmid_next;
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c

index 89f8106..6cb7d96 100644 (file)
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
                  */
                 smp_mb();
         } else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
-               cpumask_t hartid_mask;
-
-               riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
-               sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+               sbi_remote_fence_i(&others);
         } else {
                 on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
         }
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c

index ea54cc0..7acbfbd 100644 (file)
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
  switch_mm_fast:
         csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
                   ((cntx & asid_mask) << SATP_ASID_SHIFT) |
-                 SATP_MODE);
+                 satp_mode);
  
         if (need_flush_tlb)
                 local_flush_tlb_all();
@@ -201,7 +201,7 @@ switch_mm_fast:
  static void set_mm_noasid(struct mm_struct *mm)
  {
         /* Switch the page table and blindly nuke entire local TLB */
-       csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+       csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
         local_flush_tlb_all();
  }
  
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c

index 0624c68..cf4d018 100644 (file)
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map);
  #define kernel_map     (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
  #endif
  
+#ifdef CONFIG_64BIT
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+#else
+u64 satp_mode = SATP_MODE_32;
+#endif
+EXPORT_SYMBOL(satp_mode);
+
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+EXPORT_SYMBOL(pgtable_l4_enabled);
+
  phys_addr_t phys_ram_base __ro_after_init;
  EXPORT_SYMBOL(phys_ram_base);
  
-#ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[], __data_loc;
-#endif
-
  unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
                                                         __page_aligned_bss;
  EXPORT_SYMBOL(empty_zero_page);
@@ -53,15 +59,6 @@ extern char _start[];
  void *_dtb_early_va __initdata;
  uintptr_t _dtb_early_pa __initdata;
  
-struct pt_alloc_ops {
-       pte_t *(*get_pte_virt)(phys_addr_t pa);
-       phys_addr_t (*alloc_pte)(uintptr_t va);
-#ifndef __PAGETABLE_PMD_FOLDED
-       pmd_t *(*get_pmd_virt)(phys_addr_t pa);
-       phys_addr_t (*alloc_pmd)(uintptr_t va);
-#endif
-};
-
  static phys_addr_t dma32_phys_limit __initdata;
  
  static void __init zone_sizes_init(void)
@@ -102,10 +99,14 @@ static void __init print_vm_layout(void)
                   (unsigned long)VMALLOC_END);
         print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
                   (unsigned long)high_memory);
-#ifdef CONFIG_64BIT
-       print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
-                 (unsigned long)ADDRESS_SPACE_END);
+       if (IS_ENABLED(CONFIG_64BIT)) {
+#ifdef CONFIG_KASAN
+               print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
  #endif
+
+               print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+                         (unsigned long)ADDRESS_SPACE_END);
+       }
  }
  #else
  static void print_vm_layout(void) { }
@@ -130,18 +131,8 @@ void __init mem_init(void)
         print_vm_layout();
  }
  
-/*
- * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
- * whereas for 64-bit kernel, the end of the virtual address space is occupied
- * by the modules/BPF/kernel mappings which reduces the available size of the
- * linear mapping.
- * Limit the memory size via mem.
- */
-#ifdef CONFIG_64BIT
-static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
-#else
-static phys_addr_t memory_limit = -PAGE_OFFSET;
-#endif
+/* Limit the memory size via mem. */
+static phys_addr_t memory_limit;
  
  static int __init early_mem(char *p)
  {
@@ -162,35 +153,31 @@ early_param("mem", early_mem);
  static void __init setup_bootmem(void)
  {
         phys_addr_t vmlinux_end = __pa_symbol(&_end);
-       phys_addr_t vmlinux_start = __pa_symbol(&_start);
-       phys_addr_t __maybe_unused max_mapped_addr;
-       phys_addr_t phys_ram_end;
+       phys_addr_t max_mapped_addr;
+       phys_addr_t phys_ram_end, vmlinux_start;
  
-#ifdef CONFIG_XIP_KERNEL
-       vmlinux_start = __pa_symbol(&_sdata);
-#endif
+       if (IS_ENABLED(CONFIG_XIP_KERNEL))
+               vmlinux_start = __pa_symbol(&_sdata);
+       else
+               vmlinux_start = __pa_symbol(&_start);
  
         memblock_enforce_memory_limit(memory_limit);
  
         /*
-        * Reserve from the start of the kernel to the end of the kernel
-        */
-#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
-       /*
          * Make sure we align the reservation on PMD_SIZE since we will
          * map the kernel in the linear mapping as read-only: we do not want
          * any allocation to happen between _end and the next pmd aligned page.
          */
-       vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
-#endif
+       if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+               vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+       /*
+        * Reserve from the start of the kernel to the end of the kernel
+        */
         memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
  
-
         phys_ram_end = memblock_end_of_DRAM();
-#ifndef CONFIG_XIP_KERNEL
-       phys_ram_base = memblock_start_of_DRAM();
-#endif
-#ifndef CONFIG_64BIT
+       if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+               phys_ram_base = memblock_start_of_DRAM();
         /*
          * memblock allocator is not aware of the fact that last 4K bytes of
          * the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -200,10 +187,11 @@ static void __init setup_bootmem(void)
          * address space is occupied by the kernel mapping then this check must
          * be done as soon as the kernel mapping base address is determined.
          */
-       max_mapped_addr = __pa(~(ulong)0);
-       if (max_mapped_addr == (phys_ram_end - 1))
-               memblock_set_current_limit(max_mapped_addr - 4096);
-#endif
+       if (!IS_ENABLED(CONFIG_64BIT)) {
+               max_mapped_addr = __pa(~(ulong)0);
+               if (max_mapped_addr == (phys_ram_end - 1))
+                       memblock_set_current_limit(max_mapped_addr - 4096);
+       }
  
         min_low_pfn = PFN_UP(phys_ram_base);
         max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
@@ -229,13 +217,7 @@ static void __init setup_bootmem(void)
  }
  
  #ifdef CONFIG_MMU
-static struct pt_alloc_ops _pt_ops __initdata;
-
-#ifdef CONFIG_XIP_KERNEL
-#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
-#else
-#define pt_ops _pt_ops
-#endif
+struct pt_alloc_ops pt_ops __initdata;
  
  unsigned long riscv_pfn_base __ro_after_init;
  EXPORT_SYMBOL(riscv_pfn_base);
@@ -245,9 +227,11 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
  static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
  
  pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
  static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
  
  #ifdef CONFIG_XIP_KERNEL
+#define pt_ops                 (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
  #define trampoline_pg_dir      ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
  #define fixmap_pte             ((pte_t *)XIP_FIXUP(fixmap_pte))
  #define early_pg_dir           ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -333,6 +317,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
  #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
  #endif /* CONFIG_XIP_KERNEL */
  
+static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
+#define fixmap_pud     ((pud_t *)XIP_FIXUP(fixmap_pud))
+#define early_pud      ((pud_t *)XIP_FIXUP(early_pud))
+#endif /* CONFIG_XIP_KERNEL */
+
  static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
  {
         /* Before MMU is enabled */
@@ -352,7 +346,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
  
  static phys_addr_t __init alloc_pmd_early(uintptr_t va)
  {
-       BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+       BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
  
         return (uintptr_t)early_pmd;
  }
@@ -399,21 +393,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
         create_pte_mapping(ptep, va, pa, sz, prot);
  }
  
-#define pgd_next_t             pmd_t
-#define alloc_pgd_next(__va)   pt_ops.alloc_pmd(__va)
-#define get_pgd_next_virt(__pa)        pt_ops.get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt_early(phys_addr_t pa)
+{
+       return (pud_t *)((uintptr_t)pa);
+}
+
+static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
+{
+       clear_fixmap(FIX_PUD);
+       return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+}
+
+static pud_t *__init get_pud_virt_late(phys_addr_t pa)
+{
+       return (pud_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_pud_early(uintptr_t va)
+{
+       /* Only one PUD is available for early mapping */
+       BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+       return (uintptr_t)early_pud;
+}
+
+static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
+{
+       return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pud_late(uintptr_t va)
+{
+       unsigned long vaddr;
+
+       vaddr = __get_free_page(GFP_KERNEL);
+       BUG_ON(!vaddr);
+       return __pa(vaddr);
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+                                     uintptr_t va, phys_addr_t pa,
+                                     phys_addr_t sz, pgprot_t prot)
+{
+       pmd_t *nextp;
+       phys_addr_t next_phys;
+       uintptr_t pud_index = pud_index(va);
+
+       if (sz == PUD_SIZE) {
+               if (pud_val(pudp[pud_index]) == 0)
+                       pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+               return;
+       }
+
+       if (pud_val(pudp[pud_index]) == 0) {
+               next_phys = pt_ops.alloc_pmd(va);
+               pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+               nextp = pt_ops.get_pmd_virt(next_phys);
+               memset(nextp, 0, PAGE_SIZE);
+       } else {
+               next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+               nextp = pt_ops.get_pmd_virt(next_phys);
+       }
+
+       create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t             pud_t
+#define alloc_pgd_next(__va)   (pgtable_l4_enabled ?                   \
+               pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
+#define get_pgd_next_virt(__pa)        (pgtable_l4_enabled ?                   \
+               pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
  #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)     \
-       create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next                fixmap_pmd
+                               (pgtable_l4_enabled ?                   \
+               create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
+               create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
+#define fixmap_pgd_next                (pgtable_l4_enabled ?                   \
+               (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
+#define trampoline_pgd_next    (pgtable_l4_enabled ?                   \
+               (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
+#define early_dtb_pgd_next     (pgtable_l4_enabled ?                   \
+               (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
  #else
  #define pgd_next_t             pte_t
  #define alloc_pgd_next(__va)   pt_ops.alloc_pte(__va)
  #define get_pgd_next_virt(__pa)        pt_ops.get_pte_virt(__pa)
  #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)     \
         create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next                fixmap_pte
+#define fixmap_pgd_next                ((uintptr_t)fixmap_pte)
+#define early_dtb_pgd_next     ((uintptr_t)early_dtb_pmd)
+#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
  #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
  
  void __init create_pgd_mapping(pgd_t *pgdp,
                                       uintptr_t va, phys_addr_t pa,
@@ -452,6 +522,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
  }
  
  #ifdef CONFIG_XIP_KERNEL
+extern char _xiprom[], _exiprom[], __data_loc;
+
  /* called from head.S with MMU off */
  asmlinkage void __init __copy_data(void)
  {
@@ -500,6 +572,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
  }
  #endif /* CONFIG_STRICT_KERNEL_RWX */
  
+#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l4(void)
+{
+       pgtable_l4_enabled = false;
+       kernel_map.page_offset = PAGE_OFFSET_L3;
+       satp_mode = SATP_MODE_39;
+}
+
+/*
+ * There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+static __init void set_satp_mode(void)
+{
+       u64 identity_satp, hw_satp;
+       uintptr_t set_satp_mode_pmd;
+
+       set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+       create_pgd_mapping(early_pg_dir,
+                          set_satp_mode_pmd, (uintptr_t)early_pud,
+                          PGDIR_SIZE, PAGE_TABLE);
+       create_pud_mapping(early_pud,
+                          set_satp_mode_pmd, (uintptr_t)early_pmd,
+                          PUD_SIZE, PAGE_TABLE);
+       /* Handle the case where set_satp_mode straddles 2 PMDs */
+       create_pmd_mapping(early_pmd,
+                          set_satp_mode_pmd, set_satp_mode_pmd,
+                          PMD_SIZE, PAGE_KERNEL_EXEC);
+       create_pmd_mapping(early_pmd,
+                          set_satp_mode_pmd + PMD_SIZE,
+                          set_satp_mode_pmd + PMD_SIZE,
+                          PMD_SIZE, PAGE_KERNEL_EXEC);
+
+       identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+
+       local_flush_tlb_all();
+       csr_write(CSR_SATP, identity_satp);
+       hw_satp = csr_swap(CSR_SATP, 0ULL);
+       local_flush_tlb_all();
+
+       if (hw_satp != identity_satp)
+               disable_pgtable_l4();
+
+       memset(early_pg_dir, 0, PAGE_SIZE);
+       memset(early_pud, 0, PAGE_SIZE);
+       memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
+
  /*
   * setup_vm() is called from head.S with MMU-off.
   *
@@ -564,10 +687,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
         uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
  
         create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
-                          IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+                          IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
                            PGDIR_SIZE,
                            IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
  
+       if (pgtable_l4_enabled) {
+               create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+                                  (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+       }
+
         if (IS_ENABLED(CONFIG_64BIT)) {
                 create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
                                    pa, PMD_SIZE, PAGE_KERNEL);
@@ -589,11 +717,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
         dtb_early_pa = dtb_pa;
  }
  
+/*
+ * MMU is not enabled, the page tables are allocated directly using
+ * early_pmd/pud/p4d and the address returned is the physical one.
+ */
+void __init pt_ops_set_early(void)
+{
+       pt_ops.alloc_pte = alloc_pte_early;
+       pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+       pt_ops.alloc_pmd = alloc_pmd_early;
+       pt_ops.get_pmd_virt = get_pmd_virt_early;
+       pt_ops.alloc_pud = alloc_pud_early;
+       pt_ops.get_pud_virt = get_pud_virt_early;
+#endif
+}
+
+/*
+ * MMU is enabled but page table setup is not complete yet.
+ * fixmap page table alloc functions must be used as a means to temporarily
+ * map the allocated physical pages since the linear mapping does not exist yet.
+ *
+ * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
+ * but it will be used as described above.
+ */
+void __init pt_ops_set_fixmap(void)
+{
+       pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
+       pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+#ifndef __PAGETABLE_PMD_FOLDED
+       pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
+       pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
+       pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
+       pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+#endif
+}
+
+/*
+ * MMU is enabled and page table setup is complete, so from now, we can use
+ * generic page allocation functions to setup page table.
+ */
+void __init pt_ops_set_late(void)
+{
+       pt_ops.alloc_pte = alloc_pte_late;
+       pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+       pt_ops.alloc_pmd = alloc_pmd_late;
+       pt_ops.get_pmd_virt = get_pmd_virt_late;
+       pt_ops.alloc_pud = alloc_pud_late;
+       pt_ops.get_pud_virt = get_pud_virt_late;
+#endif
+}
+
  asmlinkage void __init setup_vm(uintptr_t dtb_pa)
  {
         pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
  
         kernel_map.virt_addr = KERNEL_LINK_ADDR;
+       kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
  
  #ifdef CONFIG_XIP_KERNEL
         kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
@@ -608,11 +789,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
         kernel_map.phys_addr = (uintptr_t)(&_start);
         kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
  #endif
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+       set_satp_mode();
+#endif
+
         kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
         kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
  
         riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
  
+       /*
+        * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
+        * kernel, whereas for 64-bit kernel, the end of the virtual address
+        * space is occupied by the modules/BPF/kernel mappings which reduces
+        * the available size of the linear mapping.
+        */
+       memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
+
         /* Sanity check alignment and size */
         BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
         BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
@@ -625,23 +819,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
         BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
  #endif
  
-       pt_ops.alloc_pte = alloc_pte_early;
-       pt_ops.get_pte_virt = get_pte_virt_early;
-#ifndef __PAGETABLE_PMD_FOLDED
-       pt_ops.alloc_pmd = alloc_pmd_early;
-       pt_ops.get_pmd_virt = get_pmd_virt_early;
-#endif
+       pt_ops_set_early();
+
         /* Setup early PGD for fixmap */
         create_pgd_mapping(early_pg_dir, FIXADDR_START,
-                          (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+                          fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
  
  #ifndef __PAGETABLE_PMD_FOLDED
-       /* Setup fixmap PMD */
+       /* Setup fixmap PUD and PMD */
+       if (pgtable_l4_enabled)
+               create_pud_mapping(fixmap_pud, FIXADDR_START,
+                                  (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
         create_pmd_mapping(fixmap_pmd, FIXADDR_START,
                            (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
         /* Setup trampoline PGD and PMD */
         create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
-                          (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+                          trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+       if (pgtable_l4_enabled)
+               create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
+                                  (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
  #ifdef CONFIG_XIP_KERNEL
         create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
                            kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
@@ -669,7 +865,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
          * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
          * range can not span multiple pmds.
          */
-       BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+       BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                      != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
  
  #ifndef __PAGETABLE_PMD_FOLDED
@@ -694,6 +890,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
                 pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
         }
  #endif
+
+       pt_ops_set_fixmap();
  }
  
  static void __init setup_vm_final(void)
@@ -702,16 +900,6 @@ static void __init setup_vm_final(void)
         phys_addr_t pa, start, end;
         u64 i;
  
-       /**
-        * MMU is enabled at this point. But page table setup is not complete yet.
-        * fixmap page table alloc functions should be used at this point
-        */
-       pt_ops.alloc_pte = alloc_pte_fixmap;
-       pt_ops.get_pte_virt = get_pte_virt_fixmap;
-#ifndef __PAGETABLE_PMD_FOLDED
-       pt_ops.alloc_pmd = alloc_pmd_fixmap;
-       pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
-#endif
         /* Setup swapper PGD for fixmap */
         create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
                            __pa_symbol(fixmap_pgd_next),
@@ -736,26 +924,24 @@ static void __init setup_vm_final(void)
                 }
         }
  
-#ifdef CONFIG_64BIT
         /* Map the kernel */
-       create_kernel_page_table(swapper_pg_dir, false);
+       if (IS_ENABLED(CONFIG_64BIT))
+               create_kernel_page_table(swapper_pg_dir, false);
+
+#ifdef CONFIG_KASAN
+       kasan_swapper_init();
  #endif
  
         /* Clear fixmap PTE and PMD mappings */
         clear_fixmap(FIX_PTE);
         clear_fixmap(FIX_PMD);
+       clear_fixmap(FIX_PUD);
  
         /* Move to swapper page table */
-       csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+       csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
         local_flush_tlb_all();
  
-       /* generic page allocation functions must be used to setup page table */
-       pt_ops.alloc_pte = alloc_pte_late;
-       pt_ops.get_pte_virt = get_pte_virt_late;
-#ifndef __PAGETABLE_PMD_FOLDED
-       pt_ops.alloc_pmd = alloc_pmd_late;
-       pt_ops.get_pmd_virt = get_pmd_virt_late;
-#endif
+       pt_ops_set_late();
  }
  #else
  asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -791,12 +977,10 @@ static void __init reserve_crashkernel(void)
          * since it doesn't make much sense and we have limited memory
          * resources.
          */
-#ifdef CONFIG_CRASH_DUMP
         if (is_kdump_kernel()) {
                 pr_info("crashkernel: ignoring reservation request\n");
                 return;
         }
-#endif
  
         ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
                                 &crash_size, &crash_base);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c

index 54294f8..f61f7ca 100644 (file)
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -11,45 +11,27 @@
  #include <asm/fixmap.h>
  #include <asm/pgalloc.h>
  
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
-asmlinkage void __init kasan_early_init(void)
-{
-       uintptr_t i;
-       pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
-
-       BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
-               KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
-
-       for (i = 0; i < PTRS_PER_PTE; ++i)
-               set_pte(kasan_early_shadow_pte + i,
-                       mk_pte(virt_to_page(kasan_early_shadow_page),
-                              PAGE_KERNEL));
-
-       for (i = 0; i < PTRS_PER_PMD; ++i)
-               set_pmd(kasan_early_shadow_pmd + i,
-                       pfn_pmd(PFN_DOWN
-                               (__pa((uintptr_t) kasan_early_shadow_pte)),
-                               __pgprot(_PAGE_TABLE)));
-
-       for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
-            i += PGDIR_SIZE, ++pgd)
-               set_pgd(pgd,
-                       pfn_pgd(PFN_DOWN
-                               (__pa(((uintptr_t) kasan_early_shadow_pmd))),
-                               __pgprot(_PAGE_TABLE)));
-
-       /* init for swapper_pg_dir */
-       pgd = pgd_offset_k(KASAN_SHADOW_START);
-
-       for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
-            i += PGDIR_SIZE, ++pgd)
-               set_pgd(pgd,
-                       pfn_pgd(PFN_DOWN
-                               (__pa(((uintptr_t) kasan_early_shadow_pmd))),
-                               __pgprot(_PAGE_TABLE)));
+/*
+ * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
+ * which is right before the kernel.
+ *
+ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
+ * the page global directory with kasan_early_shadow_pmd.
+ *
+ * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
+ * must be divided as follows:
+ * - the first PGD entry, although incomplete, is populated with
+ *   kasan_early_shadow_pud/p4d
+ * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
+ * - the last PGD entry is shared with the kernel mapping so populated at the
+ *   lower levels pud/p4d
+ *
+ * In addition, when shallow populating a kasan region (for example vmalloc),
+ * this region may also not be aligned on PGDIR size, so we must go down to the
+ * pud level too.
+ */
  
-       local_flush_tlb_all();
-}
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
  
  static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
  {
@@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
         set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
  }
  
-static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
  {
         phys_addr_t phys_addr;
         pmd_t *pmdp, *base_pmd;
         unsigned long next;
  
-       base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
-       if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+       if (pud_none(*pud)) {
                 base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+       } else {
+               base_pmd = (pmd_t *)pud_pgtable(*pud);
+               if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+                       base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+       }
  
         pmdp = base_pmd + pmd_index(vaddr);
  
@@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned
          * it entirely, memblock could allocate a page at a physical address
          * where KASAN is not populated yet and then we'd get a page fault.
          */
-       set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+       set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pud(pgd_t *pgd,
+                                     unsigned long vaddr, unsigned long end,
+                                     bool early)
+{
+       phys_addr_t phys_addr;
+       pud_t *pudp, *base_pud;
+       unsigned long next;
+
+       if (early) {
+               /*
+                * We can't use pgd_page_vaddr here as it would return a linear
+                * mapping address but it is not mapped yet, but when populating
+                * early_pg_dir, we need the physical address and when populating
+                * swapper_pg_dir, we need the kernel virtual address so use
+                * pt_ops facility.
+                */
+               base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+       } else {
+               base_pud = (pud_t *)pgd_page_vaddr(*pgd);
+               if (base_pud == lm_alias(kasan_early_shadow_pud))
+                       base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+       }
+
+       pudp = base_pud + pud_index(vaddr);
+
+       do {
+               next = pud_addr_end(vaddr, end);
+
+               if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+                       if (early) {
+                               phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
+                               set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+                               continue;
+                       } else {
+                               phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+                               if (phys_addr) {
+                                       set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+                                       continue;
+                               }
+                       }
+               }
+
+               kasan_populate_pmd(pudp, vaddr, next);
+       } while (pudp++, vaddr = next, vaddr != end);
+
+       /*
+        * Wait for the whole PGD to be populated before setting the PGD in
+        * the page table, otherwise, if we did set the PGD before populating
+        * it entirely, memblock could allocate a page at a physical address
+        * where KASAN is not populated yet and then we'd get a page fault.
+        */
+       if (!early)
+               set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
  }
  
-static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+#define kasan_early_shadow_pgd_next                    (pgtable_l4_enabled ?   \
+                               (uintptr_t)kasan_early_shadow_pud :             \
+                               (uintptr_t)kasan_early_shadow_pmd)
+#define kasan_populate_pgd_next(pgdp, vaddr, next, early)                      \
+               (pgtable_l4_enabled ?                                           \
+                       kasan_populate_pud(pgdp, vaddr, next, early) :          \
+                       kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+
+static void __init kasan_populate_pgd(pgd_t *pgdp,
+                                     unsigned long vaddr, unsigned long end,
+                                     bool early)
  {
         phys_addr_t phys_addr;
-       pgd_t *pgdp = pgd_offset_k(vaddr);
         unsigned long next;
  
         do {
                 next = pgd_addr_end(vaddr, end);
  
-               /*
-                * pgdp can't be none since kasan_early_init initialized all KASAN
-                * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
-                * that means we can try to allocate a hugepage as a replacement.
-                */
-               if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
-                   IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
-                       phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
-                       if (phys_addr) {
-                               set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+               if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+                       if (early) {
+                               phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
+                               set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
                                 continue;
+                       } else if (pgd_page_vaddr(*pgdp) ==
+                                  (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
+                               /*
+                                * pgdp can't be none since kasan_early_init
+                                * initialized all KASAN shadow region with
+                                * kasan_early_shadow_pud: if this is still the
+                                * case, that means we can try to allocate a
+                                * hugepage as a replacement.
+                                */
+                               phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+                               if (phys_addr) {
+                                       set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+                                       continue;
+                               }
                         }
                 }
  
-               kasan_populate_pmd(pgdp, vaddr, next);
+               kasan_populate_pgd_next(pgdp, vaddr, next, early);
         } while (pgdp++, vaddr = next, vaddr != end);
  }
  
+asmlinkage void __init kasan_early_init(void)
+{
+       uintptr_t i;
+
+       BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+               KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+       for (i = 0; i < PTRS_PER_PTE; ++i)
+               set_pte(kasan_early_shadow_pte + i,
+                       mk_pte(virt_to_page(kasan_early_shadow_page),
+                              PAGE_KERNEL));
+
+       for (i = 0; i < PTRS_PER_PMD; ++i)
+               set_pmd(kasan_early_shadow_pmd + i,
+                       pfn_pmd(PFN_DOWN
+                               (__pa((uintptr_t)kasan_early_shadow_pte)),
+                               PAGE_TABLE));
+
+       if (pgtable_l4_enabled) {
+               for (i = 0; i < PTRS_PER_PUD; ++i)
+                       set_pud(kasan_early_shadow_pud + i,
+                               pfn_pud(PFN_DOWN
+                                       (__pa(((uintptr_t)kasan_early_shadow_pmd))),
+                                       PAGE_TABLE));
+       }
+
+       kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+                          KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+       local_flush_tlb_all();
+}
+
+void __init kasan_swapper_init(void)
+{
+       kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+                          KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+       local_flush_tlb_all();
+}
+
  static void __init kasan_populate(void *start, void *end)
  {
         unsigned long vaddr = (unsigned long)start & PAGE_MASK;
         unsigned long vend = PAGE_ALIGN((unsigned long)end);
  
-       kasan_populate_pgd(vaddr, vend);
+       kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
  
         local_flush_tlb_all();
         memset(start, KASAN_SHADOW_INIT, end - start);
  }
  
+static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
+                                             unsigned long vaddr, unsigned long end,
+                                             bool kasan_populate)
+{
+       unsigned long next;
+       pud_t *pudp, *base_pud;
+       pmd_t *base_pmd;
+       bool is_kasan_pmd;
+
+       base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
+       pudp = base_pud + pud_index(vaddr);
+
+       if (kasan_populate)
+               memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
+                      sizeof(pud_t) * PTRS_PER_PUD);
+
+       do {
+               next = pud_addr_end(vaddr, end);
+               is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
+
+               if (is_kasan_pmd) {
+                       base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+                       set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+               }
+       } while (pudp++, vaddr = next, vaddr != end);
+}
+
  static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
  {
         unsigned long next;
         void *p;
         pgd_t *pgd_k = pgd_offset_k(vaddr);
+       bool is_kasan_pgd_next;
  
         do {
                 next = pgd_addr_end(vaddr, end);
-               if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
+               is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
+                                    (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
+
+               if (is_kasan_pgd_next) {
                         p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
                         set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
                 }
+
+               if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
+                       continue;
+
+               kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
         } while (pgd_k++, vaddr = next, vaddr != end);
  }
  
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c

index 64f8201..37ed760 100644 (file)
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
                                   unsigned long size, unsigned long stride)
  {
         struct cpumask *cmask = mm_cpumask(mm);
-       struct cpumask hmask;
         unsigned int cpuid;
         bool broadcast;
  
@@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
                 unsigned long asid = atomic_long_read(&mm->context.id);
  
                 if (broadcast) {
-                       riscv_cpuid_to_hartid_mask(cmask, &hmask);
-                       sbi_remote_sfence_vma_asid(cpumask_bits(&hmask),
-                                                  start, size, asid);
+                       sbi_remote_sfence_vma_asid(cmask, start, size, asid);
                 } else if (size <= stride) {
                         local_flush_tlb_page_asid(start, asid);
                 } else {
@@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
                 }
         } else {
                 if (broadcast) {
-                       riscv_cpuid_to_hartid_mask(cmask, &hmask);
-                       sbi_remote_sfence_vma(cpumask_bits(&hmask),
-                                             start, size);
+                       sbi_remote_sfence_vma(cmask, start, size);
                 } else if (size <= stride) {
                         local_flush_tlb_page(start);
                 } else {
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c

index 293dd6e..0bcda99 100644 (file)
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -497,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
         offset = pc - (long)&ex->insn;
         if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
                 return -ERANGE;
-       ex->insn = pc;
+       ex->insn = offset;
  
         /*
          * Since the extable follows the program, the fixup offset is always
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c

index e87e7f1..da93864 100644 (file)
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -40,6 +40,8 @@
  
  #ifdef CONFIG_ARM64
  # define EFI_RT_VIRTUAL_LIMIT  DEFAULT_MAP_WINDOW_64
+#elif defined(CONFIG_RISCV)
+# define EFI_RT_VIRTUAL_LIMIT  TASK_SIZE_MIN
  #else
  # define EFI_RT_VIRTUAL_LIMIT  TASK_SIZE
  #endif
diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig

index 853096b..2527cf5 100644 (file)
--- a/drivers/soc/canaan/Kconfig
+++ b/drivers/soc/canaan/Kconfig
@@ -5,7 +5,6 @@ config SOC_K210_SYSCTL
         depends on RISCV && SOC_CANAAN && OF
         default SOC_CANAAN
          select PM
-        select SYSCON
          select MFD_SYSCON
         help
           Canaan Kendryte K210 SoC system controller driver.
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h

index 02932ef..977bea1 100644 (file)
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
  
  #if CONFIG_PGTABLE_LEVELS > 3
  
+static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+       gfp_t gfp = GFP_PGTABLE_USER;
+
+       if (mm == &init_mm)
+               gfp = GFP_PGTABLE_KERNEL;
+       return (pud_t *)get_zeroed_page(gfp);
+}
+
  #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
  /**
   * pud_alloc_one - allocate a page for PUD-level page table
@@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
   */
  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
  {
-       gfp_t gfp = GFP_PGTABLE_USER;
-
-       if (mm == &init_mm)
-               gfp = GFP_PGTABLE_KERNEL;
-       return (pud_t *)get_zeroed_page(gfp);
+       return __pud_alloc_one(mm, addr);
  }
  #endif
  
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
  {
         BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
         free_page((unsigned long)pud);
  }
  
+#ifndef __HAVE_ARCH_PUD_FREE
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+       __pud_free(mm, pud);
+}
+#endif
+
  #endif /* CONFIG_PGTABLE_LEVELS > 3 */
  
  #ifndef __HAVE_ARCH_PGD_FREE
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 22 Jan 2022 07:34:49 +0000 (09:34 +0200)
Documentation/riscv/vm-layout.rst		patch \| blob \| history
arch/riscv/Kconfig		patch \| blob \| history
arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts		patch \| blob \| history
arch/riscv/configs/nommu_k210_defconfig		patch \| blob \| history
arch/riscv/configs/nommu_k210_sdcard_defconfig		patch \| blob \| history
arch/riscv/configs/nommu_virt_defconfig		patch \| blob \| history
arch/riscv/include/asm/cpu_ops.h		patch \| blob \| history
arch/riscv/include/asm/cpu_ops_sbi.h	[new file with mode: 0644]	patch \| blob
arch/riscv/include/asm/csr.h		patch \| blob \| history
arch/riscv/include/asm/fixmap.h		patch \| blob \| history
arch/riscv/include/asm/kasan.h		patch \| blob \| history
arch/riscv/include/asm/page.h		patch \| blob \| history
arch/riscv/include/asm/pgalloc.h		patch \| blob \| history
arch/riscv/include/asm/pgtable-64.h		patch \| blob \| history
arch/riscv/include/asm/pgtable.h		patch \| blob \| history
arch/riscv/include/asm/sbi.h		patch \| blob \| history
arch/riscv/include/asm/smp.h		patch \| blob \| history
arch/riscv/include/asm/sparsemem.h		patch \| blob \| history
arch/riscv/kernel/Makefile		patch \| blob \| history
arch/riscv/kernel/asm-offsets.c		patch \| blob \| history
arch/riscv/kernel/cpu.c		patch \| blob \| history
arch/riscv/kernel/cpu_ops.c		patch \| blob \| history
arch/riscv/kernel/cpu_ops_sbi.c		patch \| blob \| history
arch/riscv/kernel/cpu_ops_spinwait.c		patch \| blob \| history
arch/riscv/kernel/head.S		patch \| blob \| history
arch/riscv/kernel/head.h		patch \| blob \| history
arch/riscv/kernel/ptrace.c		patch \| blob \| history
arch/riscv/kernel/sbi.c		patch \| blob \| history
arch/riscv/kernel/setup.c		patch \| blob \| history
arch/riscv/kernel/smpboot.c		patch \| blob \| history
arch/riscv/kvm/mmu.c		patch \| blob \| history
arch/riscv/kvm/vcpu_sbi_replace.c		patch \| blob \| history
arch/riscv/kvm/vcpu_sbi_v01.c		patch \| blob \| history
arch/riscv/kvm/vmid.c		patch \| blob \| history
arch/riscv/mm/cacheflush.c		patch \| blob \| history
arch/riscv/mm/context.c		patch \| blob \| history
arch/riscv/mm/init.c		patch \| blob \| history
arch/riscv/mm/kasan_init.c		patch \| blob \| history
arch/riscv/mm/tlbflush.c		patch \| blob \| history
arch/riscv/net/bpf_jit_comp64.c		patch \| blob \| history
drivers/firmware/efi/libstub/efi-stub.c		patch \| blob \| history
drivers/soc/canaan/Kconfig		patch \| blob \| history
include/asm-generic/pgalloc.h		patch \| blob \| history