From: Palmer Dabbelt Date: Thu, 20 Jan 2022 03:23:41 +0000 (-0800) Subject: RISC-V: Introduce sv48 support without relocatable kernel X-Git-Tag: v6.6.17~8338^2~12 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0c34e79e52bb13881c08f1a2c2390b7b88ff10c7;p=platform%2Fkernel%2Flinux-rpi.git RISC-V: Introduce sv48 support without relocatable kernel This patchset allows to have a single kernel for sv39 and sv48 without being relocatable. The idea comes from Arnd Bergmann who suggested to do the same as x86, that is mapping the kernel to the end of the address space, which allows the kernel to be linked at the same address for both sv39 and sv48 and then does not require to be relocated at runtime. This implements sv48 support at runtime. The kernel will try to boot with 4-level page table and will fallback to 3-level if the HW does not support it. Folding the 4th level into a 3-level page table has almost no cost at runtime. Note that kasan region had to be moved to the end of the address space since its location must be known at compile-time and then be valid for both sv39 and sv48 (and sv57 that is coming). * riscv-sv48-v3: riscv: Explicit comment about user virtual address space size riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo riscv: Implement sv48 support asm-generic: Prepare for riscv use of pud_alloc_one and pud_free riscv: Allow to dynamically define VA_BITS riscv: Introduce functions to switch pt_ops riscv: Split early kasan mapping to prepare sv48 introduction riscv: Move KASAN mapping next to the kernel mapping riscv: Get rid of MAXPHYSMEM configs Signed-off-by: Palmer Dabbelt --- 0c34e79e52bb13881c08f1a2c2390b7b88ff10c7 diff --cc arch/riscv/include/asm/pgtable.h index 67f687a,d0a96b5..7e949f2 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@@ -24,8 -24,19 +24,19 @@@ #define KERNEL_LINK_ADDR PAGE_OFFSET #endif + /* Number of entries in the page global directory */ + #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) + /* Number of entries in the page table */ + #define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) + + /* + * Half of the kernel address space (half of the entries of the page global + * directory) is for the direct mapping. + */ + #define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2) + #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_END PAGE_OFFSET #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) @@@ -48,10 -61,16 +61,16 @@@ * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ + #ifdef CONFIG_64BIT + #define VA_BITS (pgtable_l4_enabled ? 48 : 39) + #else + #define VA_BITS 32 + #endif + #define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) + (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_END VMALLOC_START #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) /* @@@ -107,12 -125,18 +125,20 @@@ #define XIP_FIXUP(addr) (addr) #endif /* CONFIG_XIP_KERNEL */ - #ifdef CONFIG_MMU - /* Number of entries in the page global directory */ - #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) - /* Number of entries in the page table */ - #define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) + struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); + #ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); + pud_t *(*get_pud_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pud)(uintptr_t va); + #endif + }; + ++extern struct pt_alloc_ops pt_ops __initdata; + + #ifdef CONFIG_MMU /* Number of PGD entries that a user-mode program can use */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) diff --cc arch/riscv/mm/init.c index c1fffec,7ba9158..8def6f8 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@@ -98,9 -104,13 +100,14 @@@ static void __init print_vm_layout(void (unsigned long)VMALLOC_END); print_mlm("lowmem", (unsigned long)PAGE_OFFSET, (unsigned long)high_memory); - if (IS_ENABLED(CONFIG_64BIT)) -#ifdef CONFIG_64BIT ++ if (IS_ENABLED(CONFIG_64BIT)) { + #ifdef CONFIG_KASAN - print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); -#endif - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, - (unsigned long)ADDRESS_SPACE_END); ++ print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); + #endif ++ + print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); ++ } } #else static void print_vm_layout(void) { } @@@ -221,7 -224,13 +218,7 @@@ static void __init setup_bootmem(void } #ifdef CONFIG_MMU - static struct pt_alloc_ops pt_ops __initdata; -struct pt_alloc_ops _pt_ops __initdata; - -#ifdef CONFIG_XIP_KERNEL -#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) -#else -#define pt_ops _pt_ops -#endif ++struct pt_alloc_ops pt_ops __initdata; unsigned long riscv_pfn_base __ro_after_init; EXPORT_SYMBOL(riscv_pfn_base); @@@ -578,6 -726,58 +718,58 @@@ static void __init create_fdt_early_pag dtb_early_pa = dtb_pa; } + /* + * MMU is not enabled, the page tables are allocated directly using + * early_pmd/pud/p4d and the address returned is the physical one. + */ -void pt_ops_set_early(void) ++void __init pt_ops_set_early(void) + { + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; + #ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; + pt_ops.alloc_pud = alloc_pud_early; + pt_ops.get_pud_virt = get_pud_virt_early; + #endif + } + + /* + * MMU is enabled but page table setup is not complete yet. + * fixmap page table alloc functions must be used as a means to temporarily + * map the allocated physical pages since the linear mapping does not exist yet. + * + * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, + * but it will be used as described above. + */ -void pt_ops_set_fixmap(void) ++void __init pt_ops_set_fixmap(void) + { + pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); + #ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + #endif + } + + /* + * MMU is enabled and page table setup is complete, so from now, we can use + * generic page allocation functions to setup page table. + */ -void pt_ops_set_late(void) ++void __init pt_ops_set_late(void) + { + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; + #ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; + pt_ops.alloc_pud = alloc_pud_late; + pt_ops.get_pud_virt = get_pud_virt_late; + #endif + } + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; @@@ -717,10 -933,15 +925,14 @@@ static void __init setup_vm_final(void } } -#ifdef CONFIG_64BIT /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, false); -#endif + if (IS_ENABLED(CONFIG_64BIT)) + create_kernel_page_table(swapper_pg_dir, false); + #ifdef CONFIG_KASAN + kasan_swapper_init(); + #endif + /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); diff --cc arch/riscv/mm/kasan_init.c index 54294f8,993f505..f61f7ca --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@@ -11,45 -11,29 +11,27 @@@ #include #include - extern pgd_t early_pg_dir[PTRS_PER_PGD]; - asmlinkage void __init kasan_early_init(void) - { - uintptr_t i; - pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + /* + * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57 + * which is right before the kernel. + * + * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate + * the page global directory with kasan_early_shadow_pmd. + * + * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping + * must be divided as follows: + * - the first PGD entry, although incomplete, is populated with + * kasan_early_shadow_pud/p4d + * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d + * - the last PGD entry is shared with the kernel mapping so populated at the + * lower levels pud/p4d + * + * In addition, when shallow populating a kasan region (for example vmalloc), + * this region may also not be aligned on PGDIR size, so we must go down to the + * pud level too. + */ - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != - KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - - for (i = 0; i < PTRS_PER_PTE; ++i) - set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); - - for (i = 0; i < PTRS_PER_PMD; ++i) - set_pmd(kasan_early_shadow_pmd + i, - pfn_pmd(PFN_DOWN - (__pa((uintptr_t) kasan_early_shadow_pte)), - __pgprot(_PAGE_TABLE))); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); - - /* init for swapper_pg_dir */ - pgd = pgd_offset_k(KASAN_SHADOW_START); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); - - local_flush_tlb_all(); - } + extern pgd_t early_pg_dir[PTRS_PER_PGD]; -extern struct pt_alloc_ops _pt_ops __initdata; -#define pt_ops _pt_ops static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) {