From 671f9a3e2e24cdeb2d2856abee7422f093e23e29 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 28 Jun 2019 13:36:21 -0700 Subject: [PATCH] RISC-V: Setup initial page tables in two stages Currently, the setup_vm() does initial page table setup in one-shot very early before enabling MMU. Due to this, the setup_vm() has to map all possible kernel virtual addresses since it does not know size and location of RAM. This means we have kernel mappings for non-existent RAM and any buggy driver (or kernel) code doing out-of-bound access to RAM will not fault and cause underterministic behaviour. Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e. MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e. 516 KB) of memory for initial page tables which is never freed. The memory required for initial page tables will further increase if we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000) This patch implements two-staged initial page table setup, as follows: 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in a early page table (i.e. early_pg_dir). The early_pg_dir will be used only by boot HART so it can be freed as-part of init memory free-up. 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM banks in the final page table (i.e. swapper_pg_dir). The boot HART will start using swapper_pg_dir at the end of setup_vm_final(). All non-boot HARTs directly use the swapper_pg_dir created by boot HART. We have following advantages with this new approach: 1. Kernel mappings for non-existent RAM don't exists anymore. 2. Memory consumed by initial page tables is now indpendent of the chosen PAGE_OFFSET. 3. Memory consumed by initial page tables on RV64 system is 2 pages (i.e. 8 KB) which has significantly reduced and these pages will be freed as-part of the init memory free-up. The patch also provides a foundation for implementing strict kernel mappings where we protect kernel text and rodata using PTE permissions. Suggested-by: Mike Rapoport Signed-off-by: Anup Patel [paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/fixmap.h | 5 + arch/riscv/include/asm/pgtable-64.h | 5 + arch/riscv/include/asm/pgtable.h | 8 + arch/riscv/kernel/head.S | 17 +- arch/riscv/kernel/setup.c | 6 +- arch/riscv/mm/init.c | 307 ++++++++++++++++++++++++++++++------ 6 files changed, 284 insertions(+), 64 deletions(-) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index c207f66..9c66033 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -21,6 +21,11 @@ */ enum fixed_addresses { FIX_HOLE, +#define FIX_FDT_SIZE SZ_1M + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_PTE, + FIX_PMD, FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 45dfac2a..7463098 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -70,6 +70,11 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } +static inline unsigned long _pmd_pfn(pmd_t pmd) +{ + return pmd_val(pmd) >> _PAGE_PFN_SHIFT; +} + #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 18ea56f..a364aba 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -59,6 +59,8 @@ #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) +#define PAGE_TABLE __pgprot(_PAGE_TABLE) + extern pgd_t swapper_pg_dir[]; /* MAP_PRIVATE permissions: xwr (copy-on-write) */ @@ -118,6 +120,11 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } +static inline unsigned long _pgd_pfn(pgd_t pgd) +{ + return pgd_val(pgd) >> _PAGE_PFN_SHIFT; +} + #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) /* Locate an entry in the page global directory */ @@ -400,6 +407,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) /* FIXME */ #endif +extern void *dtb_early_va; extern void setup_bootmem(void); extern void paging_init(void); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 4e46f31..e368106 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -55,7 +55,9 @@ clear_bss_done: /* Initialize page tables and relocate to virtual addresses */ la sp, init_thread_union + THREAD_SIZE + mv a0, s1 call setup_vm + la a0, early_pg_dir call relocate /* Restore C environment */ @@ -64,25 +66,23 @@ clear_bss_done: la sp, init_thread_union + THREAD_SIZE /* Start the kernel */ - mv a0, s1 call parse_dtb tail start_kernel relocate: /* Relocate return address */ li a1, PAGE_OFFSET - la a0, _start - sub a1, a1, a0 + la a2, _start + sub a1, a1, a2 add ra, ra, a1 /* Point stvec to virtual address of intruction after satp write */ - la a0, 1f - add a0, a0, a1 - csrw CSR_STVEC, a0 + la a2, 1f + add a2, a2, a1 + csrw CSR_STVEC, a2 /* Compute satp for kernel page tables, but don't load it yet */ - la a2, swapper_pg_dir - srl a2, a2, PAGE_SHIFT + srl a2, a0, PAGE_SHIFT li a1, SATP_MODE or a2, a2, a1 @@ -148,6 +148,7 @@ relocate: fence /* Enable virtual memory and relocate to virtual address */ + la a0, swapper_pg_dir call relocate tail smp_callin diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b92e683..a990a6c 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -39,11 +39,9 @@ struct screen_info screen_info = { atomic_t hart_lottery; unsigned long boot_cpu_hartid; -void __init parse_dtb(phys_addr_t dtb_phys) +void __init parse_dtb(void) { - void *dtb = __va(dtb_phys); - - if (early_init_dt_scan(dtb)) + if (early_init_dt_scan(dtb_early_va)) return; pr_err("No DTB passed to the kernel\n"); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b1ca386..42bf939 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2019 Western Digital Corporation or its affiliates. */ #include @@ -41,13 +42,6 @@ void setup_zero_page(void) memset((void *)empty_zero_page, 0, PAGE_SIZE); } -void __init paging_init(void) -{ - setup_zero_page(); - local_flush_tlb_all(); - zone_sizes_init(); -} - void __init mem_init(void) { #ifdef CONFIG_FLATMEM @@ -143,17 +137,15 @@ EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); +void *dtb_early_va; pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; -pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; +pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +static bool mmu_enabled; -#ifndef __PAGETABLE_PMD_FOLDED -#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT) -pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss; -pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif +#define MAX_EARLY_MAPPING_SIZE SZ_128M -pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { @@ -172,6 +164,156 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) } } +static pte_t *__init get_pte_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); + } else { + return (pte_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pte(uintptr_t va) +{ + /* + * We only create PMD or PGD early mappings so we + * should never reach here with MMU disabled. + */ + BUG_ON(!mmu_enabled); + + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init create_pte_mapping(pte_t *ptep, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + uintptr_t pte_index = pte_index(va); + + BUG_ON(sz != PAGE_SIZE); + + if (pte_none(ptep[pte_index])) + ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot); +} + +#ifndef __PAGETABLE_PMD_FOLDED + +pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; +pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; + +#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE +#define NUM_EARLY_PMDS 1UL +#else +#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) +#endif +pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); + +static pmd_t *__init get_pmd_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); + } else { + return (pmd_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pmd(uintptr_t va) +{ + uintptr_t pmd_num; + + if (mmu_enabled) + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + + pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; + BUG_ON(pmd_num >= NUM_EARLY_PMDS); + return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; +} + +static void __init create_pmd_mapping(pmd_t *pmdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pte_t *ptep; + phys_addr_t pte_phys; + uintptr_t pmd_index = pmd_index(va); + + if (sz == PMD_SIZE) { + if (pmd_none(pmdp[pmd_index])) + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot); + return; + } + + if (pmd_none(pmdp[pmd_index])) { + pte_phys = alloc_pte(va); + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); + ptep = get_pte_virt(pte_phys); + memset(ptep, 0, PAGE_SIZE); + } else { + pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index])); + ptep = get_pte_virt(pte_phys); + } + + create_pte_mapping(ptep, va, pa, sz, prot); +} + +#define pgd_next_t pmd_t +#define alloc_pgd_next(__va) alloc_pmd(__va) +#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PMD_SIZE +#define fixmap_pgd_next fixmap_pmd +#else +#define pgd_next_t pte_t +#define alloc_pgd_next(__va) alloc_pte(__va) +#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pte_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PGDIR_SIZE +#define fixmap_pgd_next fixmap_pte +#endif + +static void __init create_pgd_mapping(pgd_t *pgdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pgd_next_t *nextp; + phys_addr_t next_phys; + uintptr_t pgd_index = pgd_index(va); + + if (sz == PGDIR_SIZE) { + if (pgd_val(pgdp[pgd_index]) == 0) + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot); + return; + } + + if (pgd_val(pgdp[pgd_index]) == 0) { + next_phys = alloc_pgd_next(va); + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = get_pgd_next_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index])); + nextp = get_pgd_next_virt(next_phys); + } + + create_pgd_next_mapping(nextp, va, pa, sz, prot); +} + +static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +{ + uintptr_t map_size = PAGE_SIZE; + + /* Upgrade to PMD/PGDIR mappings whenever possible */ + if (!(base & (PTE_PARENT_SIZE - 1)) && + !(size & (PTE_PARENT_SIZE - 1))) + map_size = PTE_PARENT_SIZE; + + return map_size; +} + /* * setup_vm() is called from head.S with MMU-off. * @@ -191,54 +333,115 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) "not use absolute addressing." #endif -asmlinkage void __init setup_vm(void) +asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t i; - uintptr_t pa = (uintptr_t) &_start; - pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC); + uintptr_t va, end_va; + uintptr_t load_pa = (uintptr_t)(&_start); + uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; + uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); + + va_pa_offset = PAGE_OFFSET - load_pa; + pfn_base = PFN_DOWN(load_pa); - va_pa_offset = PAGE_OFFSET - pa; - pfn_base = PFN_DOWN(pa); + /* + * Enforce boot alignment requirements of RV32 and + * RV64 by only allowing PMD or PGD mappings. + */ + BUG_ON(map_size == PAGE_SIZE); /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); - BUG_ON((pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0); + BUG_ON((load_pa % map_size) != 0); + BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + + /* Setup early PGD for fixmap */ + create_pgd_mapping(early_pg_dir, FIXADDR_START, + (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)trampoline_pmd), - __pgprot(_PAGE_TABLE)); - trampoline_pmd[0] = pfn_pmd(PFN_DOWN(pa), prot); + /* Setup fixmap PMD */ + create_pmd_mapping(fixmap_pmd, FIXADDR_START, + (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); + /* Setup trampoline PGD and PMD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + create_pmd_mapping(trampoline_pmd, PAGE_OFFSET, + load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); +#else + /* Setup trampoline PGD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC); +#endif - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; + /* + * Setup early PGD covering entire kernel which will allows + * us to reach paging_init(). We map all memory banks later + * in setup_vm_final() below. + */ + end_va = PAGE_OFFSET + load_sz; + for (va = PAGE_OFFSET; va < end_va; va += map_size) + create_pgd_mapping(early_pg_dir, va, + load_pa + (va - PAGE_OFFSET), + map_size, PAGE_KERNEL_EXEC); + + /* Create fixed mapping for early FDT parsing */ + end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; + for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) + create_pte_mapping(fixmap_pte, va, + dtb_pa + (va - __fix_to_virt(FIX_FDT)), + PAGE_SIZE, PAGE_KERNEL); + + /* Save pointer to DTB for early FDT parsing */ + dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); +} - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN((uintptr_t)swapper_pmd) + i, - __pgprot(_PAGE_TABLE)); - } - for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++) - swapper_pmd[i] = pfn_pmd(PFN_DOWN(pa + i * PMD_SIZE), prot); - - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pmd), - __pgprot(_PAGE_TABLE)); - fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] = - pfn_pmd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); -#else - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN(pa), prot); +static void __init setup_vm_final(void) +{ + uintptr_t va, map_size; + phys_addr_t pa, start, end; + struct memblock_region *reg; + + /* Set mmu_enabled flag */ + mmu_enabled = true; - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; + /* Setup swapper PGD for fixmap */ + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN(pa + i * PGDIR_SIZE), prot); + /* Map all memory banks */ + for_each_memblock(memory, reg) { + start = reg->base; + end = start + reg->size; + + if (start >= end) + break; + if (memblock_is_nomap(reg)) + continue; + if (start <= __pa(PAGE_OFFSET) && + __pa(PAGE_OFFSET) < end) + start = __pa(PAGE_OFFSET); + + map_size = best_map_size(start, end - start); + for (pa = start; pa < end; pa += map_size) { + va = (uintptr_t)__va(pa); + create_pgd_mapping(swapper_pg_dir, va, pa, + map_size, PAGE_KERNEL_EXEC); + } } - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); -#endif + /* Clear fixmap PTE and PMD mappings */ + clear_fixmap(FIX_PTE); + clear_fixmap(FIX_PMD); + + /* Move to swapper page table */ + csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE); + local_flush_tlb_all(); +} + +void __init paging_init(void) +{ + setup_vm_final(); + setup_zero_page(); + zone_sizes_init(); } -- 2.7.4