From: tao zeng Date: Fri, 30 Nov 2018 10:01:31 +0000 (+0800) Subject: mm: optimize thread stack usage on ARMv7 [1/1] X-Git-Tag: khadas-vims-v0.9.6-release~860 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8021e13c8e78350cf345d6fdb5593211c523b229;p=platform%2Fkernel%2Flinux-amlogic.git mm: optimize thread stack usage on ARMv7 [1/1] PD#SWPL-2681 Problem: Kernel stack usage is large when running many tasks. Solution: Map kernel stack to module space and handle page-fault for stack fault. This can save about 50% memory of stack usage Verify: p212 Change-Id: Ie894bc8f00cb525ddf8ac63c6d99d9c6e937fdc0 Signed-off-by: tao zeng --- diff --git a/arch/arm/configs/meson64_a32_defconfig b/arch/arm/configs/meson64_a32_defconfig index f40ccea..7b04042 100644 --- a/arch/arm/configs/meson64_a32_defconfig +++ b/arch/arm/configs/meson64_a32_defconfig @@ -30,7 +30,6 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_JUMP_LABEL=y -CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 3aed449..28438b87 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -208,6 +208,10 @@ THUMB( mov \rd, sp ) THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT ) mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT +#ifdef CONFIG_AMLOGIC_VMAP + add \rd, \rd, #TI_THREAD_SIZE + sub \rd, \rd, #TI_THREAD_INFO_SIZE +#endif .endm /* diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index bde40c4..508cee7 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -33,18 +33,34 @@ #ifdef CONFIG_MMU +#ifdef CONFIG_AMLOGIC_VMAP +/* + * TASK_SIZE - the maximum size of a user space task. + * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area + */ +#define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_64M)) +#define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) +#else /* CONFIG_AMLOGIC_VMAP */ /* * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) +#endif /* CONFIG_AMLOGIC_VMAP */ /* * The maximum size of a 26-bit user space task. */ #define TASK_SIZE_26 (UL(1) << 26) +#ifdef CONFIG_AMLOGIC_VMAP +#ifndef CONFIG_THUMB2_KERNEL +#define MODULES_VADDR (PAGE_OFFSET - SZ_64M) +#else +#define MODULES_VADDR (PAGE_OFFSET - SZ_8M) +#endif +#else /* CONFIG_AMLOGIC_VMAP */ /* * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 32MB of the kernel text. @@ -55,6 +71,7 @@ /* smaller range for Thumb-2 symbols relocation (2^24)*/ #define MODULES_VADDR (PAGE_OFFSET - SZ_8M) #endif +#endif /* CONFIG_AMLOGIC_VMAP */ #if TASK_SIZE > MODULES_VADDR #error Top of user space clashes with start of module space diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index e9c9a11..61bdfc3 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -164,9 +164,16 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->ARM_sp; } +#ifdef CONFIG_AMLOGIC_VMAP +#define current_pt_regs(void) ({ (struct pt_regs *) \ + ((current_stack_pointer | (THREAD_SIZE - 1)) - 7 - \ + THREAD_INFO_SIZE) - 1; \ +}) +#else #define current_pt_regs(void) ({ (struct pt_regs *) \ ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) +#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 776757d..fa89009 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -18,7 +18,15 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#ifdef CONFIG_AMLOGIC_VMAP +#define THREAD_INFO_SIZE (sizeof(struct thread_info)) +#define THREAD_INFO_OFFSET (THREAD_SIZE - THREAD_INFO_SIZE) +#define THREAD_START_SP (THREAD_SIZE - 8 - THREAD_INFO_SIZE) +#define VMAP_RESERVE_SIZE (8 + 4 * 4) +#define VMAP_BACK_SP 12 +#else #define THREAD_START_SP (THREAD_SIZE - 8) +#endif #ifndef __ASSEMBLY__ @@ -88,11 +96,20 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +#ifdef CONFIG_AMLOGIC_VMAP +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) + (((current_stack_pointer & ~(THREAD_SIZE - 1)) + + THREAD_INFO_OFFSET)); +} +#else static inline struct thread_info *current_thread_info(void) { return (struct thread_info *) (current_stack_pointer & ~(THREAD_SIZE - 1)); } +#endif #define thread_saved_pc(tsk) \ ((unsigned long)(task_thread_info(tsk)->cpu_context.pc)) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 6080082..365842d 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -183,5 +183,14 @@ int main(void) #ifdef CONFIG_VDSO DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); #endif + +#ifdef CONFIG_AMLOGIC_VMAP + DEFINE(TI_THREAD_START_SP, THREAD_START_SP); + DEFINE(TI_VMAP_BACK_SP, VMAP_BACK_SP); + DEFINE(TI_VMAP_RESERVE_LEN, VMAP_RESERVE_SIZE); + DEFINE(TI_THREAD_SIZE, THREAD_SIZE); + DEFINE(TI_THREAD_INFO_SIZE, sizeof(struct thread_info)); +#endif + return 0; } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 9f157e7..bd77d9c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -39,6 +39,12 @@ * Interrupt handling. */ .macro irq_handler +#ifdef CONFIG_AMLOGIC_VMAP + mov r8, sp /* back up sp */ + mov r0, sp + bl irq_stack_entry /* switch IRQ stack */ + mov sp, r0 +#endif #ifdef CONFIG_MULTI_IRQ_HANDLER ldr r1, =handle_arch_irq mov r0, sp @@ -48,6 +54,9 @@ arch_irq_handler_default #endif 9997: +#ifdef CONFIG_AMLOGIC_VMAP + mov sp, r8 /* switch stack back to task stack */ +#endif .endm .macro pabt_helper @@ -149,10 +158,24 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif +#ifdef CONFIG_AMLOGIC_VMAP + .macro svc_entry, stack_hole=0, trace=1, uaccess=1, vmap=0 +#else .macro svc_entry, stack_hole=0, trace=1, uaccess=1 +#endif UNWIND(.fnstart ) UNWIND(.save {r0 - pc} ) +#ifdef CONFIG_AMLOGIC_VMAP + .if \vmap + /* keep using stack of abt mode */ + str sp, [r0, #TI_VMAP_BACK_SP] + sub sp, r0, #(SVC_REGS_SIZE + \stack_hole - 4) + .else + sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) + .endif +#else /* !CONFIG_AMLOGIC_VMAP */ sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) +#endif /* CONFIG_AMLOGIC_VMAP */ #ifdef CONFIG_THUMB2_KERNEL SPFIX( str r0, [sp] ) @ temporarily saved SPFIX( mov r0, sp ) @@ -167,7 +190,15 @@ ENDPROC(__und_invalid) ldmia r0, {r3 - r5} add r7, sp, #S_SP - 4 @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" +#ifdef CONFIG_AMLOGIC_VMAP + .if \vmap + ldr r2, [sp, #(TI_VMAP_BACK_SP + SVC_REGS_SIZE - 4)] + .else add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) + .endif +#else + add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) +#endif SPFIX( addeq r2, r2, #4 ) str r3, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack @@ -185,7 +216,44 @@ ENDPROC(__und_invalid) @ stmia r7, {r2 - r6} +#ifdef CONFIG_AMLOGIC_VMAP + .if \vmap + /* + * get fault task thread info + */ + ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)] + mrc p15, 0, r1, c6, c0, 0 @ get FAR + bl pmd_check + mov tsk, r0 + mov tsk, tsk, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT + mov tsk, tsk, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT + add tsk, tsk, #TI_THREAD_SIZE + sub tsk, tsk, #TI_THREAD_INFO_SIZE + + /* + * copy some important member of thread_info from current + * task to vmap stack + */ + ldr r0, [tsk, #TI_FLAGS] + ldr r1, [tsk, #TI_PREEMPT] + str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_FLAGS)] + str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_PREEMPT)] + + ldr r0, [tsk, #TI_ADDR_LIMIT] + ldr r1, [tsk, #TI_TASK] + str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_ADDR_LIMIT)] + str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_TASK)] + + ldr r0, [tsk, #TI_CPU] + ldr r1, [tsk, #TI_CPU_DOMAIN] + str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_CPU)] + str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_CPU_DOMAIN)] + .else + get_thread_info tsk + .endif +#else get_thread_info tsk +#endif ldr r0, [tsk, #TI_ADDR_LIMIT] mov r1, #TASK_SIZE str r1, [tsk, #TI_ADDR_LIMIT] @@ -205,7 +273,28 @@ ENDPROC(__und_invalid) .align 5 __dabt_svc: +#ifdef CONFIG_AMLOGIC_VMAP + svc_entry uaccess=0, vmap=1 + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + mov r2, sp + uaccess_disable ip @ disable userspace access + bl handle_vmap_fault + cmp r0, #0 + bne .L__dabt_svc_next + /* handled by vmap fault handler */ + svc_exit r5, vmap=1 @ return from exception +.L__dabt_svc_next: + /* re-build context for normal abort handler */ + ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)] + sub r0, #SVC_REGS_SIZE + mov r1, sp + mov r2, #SVC_REGS_SIZE + bl memcpy /* copy back sp */ + mov sp, r0 +#else svc_entry uaccess=0 +#endif mov r2, sp dabt_helper THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index e056c9a..c4c792f 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -197,7 +197,11 @@ .endm +#ifdef CONFIG_AMLOGIC_VMAP + .macro svc_exit, rpsr, irq = 0, vmap = 0 +#else .macro svc_exit, rpsr, irq = 0 +#endif /* CONFIG_AMLOGIC_VMAP */ .if \irq != 0 @ IRQs already off #ifdef CONFIG_TRACE_IRQFLAGS @@ -224,7 +228,16 @@ msr spsr_cxsf, \rpsr #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 +#ifdef CONFIG_AMLOGIC_VMAP + .if \vmap + ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)] + sub r0, r0, #4 @ uninhabited address + .else + sub r0, sp, #4 @ uninhabited address + .endif +#else sub r0, sp, #4 @ uninhabited address +#endif /* CONFIG_AMLOGIC_VMAP */ strex r1, r2, [r0] @ clear the exclusive monitor #endif ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012..f823604 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -119,7 +119,11 @@ __mmap_switched_data: #else .long 0 @ r7 #endif +#ifdef CONFIG_AMLOGIC_VMAP + .long init_thread_union + TI_THREAD_START_SP @ sp +#else .long init_thread_union + THREAD_START_SP @ sp +#endif .size __mmap_switched_data, . - __mmap_switched_data /* diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20b5ae9..2b5ddfa 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -62,6 +62,9 @@ #include #include #include +#ifdef CONFIG_AMLOGIC_VMAP +#include +#endif #ifdef CONFIG_AMLOGIC_CPU_INFO #include #endif @@ -515,6 +518,17 @@ static void __init elf_hwcap_fixup(void) elf_hwcap &= ~HWCAP_SWP; } +#ifdef CONFIG_AMLOGIC_VMAP +static void __init fixup_init_thread_union(void) +{ + void *p; + + p = (void *)((unsigned long)&init_thread_union + THREAD_INFO_OFFSET); + memcpy(p, &init_thread_union, THREAD_INFO_SIZE); + memset(&init_thread_union, 0, THREAD_INFO_SIZE); +} +#endif + /* * cpu_init - initialise one CPU. * @@ -578,6 +592,9 @@ void notrace cpu_init(void) "I" (offsetof(struct stack, fiq[0])), PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); +#ifdef CONFIG_AMLOGIC_VMAP + __setup_vmap_stack(cpu); +#endif #endif } @@ -600,6 +617,9 @@ void __init smp_setup_processor_id(void) */ set_my_cpu_offset(0); +#ifdef CONFIG_AMLOGIC_VMAP + fixup_init_thread_union(); +#endif pr_info("Booting Linux on physical CPU 0x%x\n", mpidr); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7dd14e8..3f9ce5a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -326,11 +326,20 @@ void arch_cpu_idle_dead(void) * cpu initialisation. There's some initialisation which needs * to be repeated to undo the effects of taking the CPU offline. */ +#ifdef CONFIG_AMLOGIC_VMAP + __asm__("mov sp, %0\n" + " mov fp, #0\n" + " b secondary_start_kernel" + : + : "r" (task_stack_page(current) + THREAD_SIZE - 8 - + THREAD_INFO_SIZE)); +#else __asm__("mov sp, %0\n" " mov fp, #0\n" " b secondary_start_kernel" : : "r" (task_stack_page(current) + THREAD_SIZE - 8)); +#endif } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 9a2f882..f616202 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_AMLOGIC_VMAP +#include +#endif + extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); extern void cpu_resume_mmu(void); @@ -47,6 +51,27 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) #define idmap_pgd NULL #endif +#ifdef CONFIG_AMLOGIC_VMAP +void copy_pgd(void) +{ + unsigned long index; + pgd_t *pgd_c = NULL, *pgd_k, *pgd_i; + unsigned long size; + + /* + * sync pgd of current task and idmap_pgd from init mm + */ + index = pgd_index(TASK_SIZE); + pgd_c = cpu_get_pgd() + index; + pgd_i = idmap_pgd + index; + pgd_k = init_mm.pgd + index; + size = (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t); + pr_debug("pgd:%p, pgd_k:%p, pdg_i:%p\n", + pgd_c, pgd_k, pgd_i); + memcpy(pgd_c, pgd_k, size); + memcpy(pgd_i, pgd_k, size); +} +#endif /* * This is called by __cpu_suspend() to save the state, and do whatever * flushing is required to ensure that when the CPU goes to sleep we have @@ -56,7 +81,21 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) { u32 *ctx = ptr; +#ifdef CONFIG_AMLOGIC_VMAP + if (likely(is_vmap_addr((unsigned long)ptr))) { + struct page *page = vmalloc_to_page(ptr); + unsigned long offset; + + offset = (unsigned long)ptr & (PAGE_SIZE - 1); + *save_ptr = (page_to_phys(page) + offset); + pr_debug("%s, ptr:%p, page:%lx, save_ptr:%x\n", + __func__, ptr, page_to_pfn(page), *save_ptr); + copy_pgd(); + } else + *save_ptr = virt_to_phys(ptr); +#else *save_ptr = virt_to_phys(ptr); +#endif /* This must correspond to the LDM in cpu_resume() assembly */ *ptr++ = virt_to_phys(idmap_pgd); diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 0bee233..f7b320f 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -44,6 +44,9 @@ #include #include #include +#ifdef CONFIG_AMLOGIC_VMAP +#include +#endif #include #include @@ -468,6 +471,20 @@ int unwind_frame(struct stackframe *frame) return URC_OK; } +#ifdef CONFIG_AMLOGIC_VMAP +static void dump_backtrace_entry_fp(unsigned long where, unsigned long fp, + unsigned long sp) +{ + signed long fp_size = 0; + + fp_size = fp - sp + 4; + if (fp_size < 0 || !fp) + fp_size = 0; + pr_info("[%08lx+%4ld][<%08lx>] %pS\n", + fp, fp_size, where, (void *)where); +} +#endif + void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; @@ -504,9 +521,33 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long where = frame.pc; urc = unwind_frame(&frame); + #ifdef CONFIG_AMLOGIC_VMAP + if (urc < 0) { + int keep = 0; + + /* continue search for irq stack */ + if (on_irq_stack(frame.sp, raw_smp_processor_id())) { + unsigned long *prev_fp; + + prev_fp = (unsigned long *)(frame.fp - 12); + if (frame.fp >= TASK_SIZE) { + keep = 1; + frame.fp = prev_fp[0]; + frame.sp = prev_fp[1]; + frame.lr = prev_fp[2]; + frame.pc = prev_fp[3]; + } + } + if (!keep) + break; + } + where = frame.lr; + dump_backtrace_entry_fp(where, frame.fp, frame.sp); + #else if (urc < 0) break; dump_backtrace_entry(where, frame.pc, frame.sp - 4); + #endif } } diff --git a/drivers/amlogic/memory_ext/Kconfig b/drivers/amlogic/memory_ext/Kconfig index fc41173..e5a1f2d 100644 --- a/drivers/amlogic/memory_ext/Kconfig +++ b/drivers/amlogic/memory_ext/Kconfig @@ -43,7 +43,6 @@ config AMLOGIC_VMAP bool "Amlogic kernel stack" depends on AMLOGIC_MEMORY_EXTEND depends on !KASAN - depends on 64BIT default y help This config is used to enable amlogic kernel stack diff --git a/drivers/amlogic/memory_ext/vmap_stack.c b/drivers/amlogic/memory_ext/vmap_stack.c index 511cfc7..ea3dacb 100644 --- a/drivers/amlogic/memory_ext/vmap_stack.c +++ b/drivers/amlogic/memory_ext/vmap_stack.c @@ -50,8 +50,21 @@ static unsigned char vmap_shrink_enable; static atomic_t vmap_stack_size; static struct aml_vmap *avmap; +#ifdef CONFIG_ARM64 DEFINE_PER_CPU(unsigned long [THREAD_SIZE/sizeof(long)], vmap_stack) __aligned(16); +#else +static unsigned long irq_stack1[(THREAD_SIZE/sizeof(long))] + __aligned(THREAD_SIZE); +static void *irq_stack[NR_CPUS] = { + irq_stack1, /* only assign 1st irq stack ,other need alloc */ +}; +static unsigned long vmap_stack1[(THREAD_SIZE/sizeof(long))] + __aligned(THREAD_SIZE); +static void *vmap_stack[NR_CPUS] = { + vmap_stack1, /* only assign 1st vmap stack ,other need alloc */ +}; +#endif void update_vmap_stack(int diff) { @@ -65,7 +78,146 @@ int get_vmap_stack_size(void) } EXPORT_SYMBOL(get_vmap_stack_size); -static int is_vmap_addr(unsigned long addr) +#ifdef CONFIG_ARM64 +bool on_vmap_stack(unsigned long sp, int cpu) +{ + /* variable names the same as kernel/stacktrace.c */ + unsigned long low = (unsigned long)per_cpu(vmap_stack, cpu); + unsigned long high = low + THREAD_START_SP; + + return (low <= sp && sp <= high); +} +#endif + +#ifdef CONFIG_ARM +void notrace __setup_vmap_stack(unsigned long cpu) +{ + void *stack; + +#ifdef CONFIG_THUMB2_KERNEL +#define TAG "r" +#else +#define TAG "I" +#endif + stack = vmap_stack[cpu]; + if (!stack) { + stack = kmalloc(THREAD_SIZE, GFP_ATOMIC | __GFP_ZERO); + WARN_ON(!stack); + vmap_stack[cpu] = stack; + irq_stack[cpu] = kmalloc(THREAD_SIZE, GFP_ATOMIC | __GFP_ZERO); + WARN_ON(!irq_stack[cpu]); + } + + pr_info("cpu %ld, vmap stack:[%lx-%lx]\n", + cpu, (unsigned long)stack, + (unsigned long)stack + THREAD_START_SP); + pr_info("cpu %ld, irq stack:[%lx-%lx]\n", + cpu, (unsigned long)irq_stack[cpu], + (unsigned long)irq_stack[cpu] + THREAD_START_SP); + stack += THREAD_SIZE; + stack -= sizeof(struct thread_info); + /* + * reserve 24 byte for r0, lr, spsr, sp_svc and 8 bytes gap + */ + stack -= (24); + asm volatile ( + "msr cpsr_c, %1 \n" + "mov sp, %0 \n" + "msr cpsr_c, %2 \n" + : + : "r" (stack), + TAG(PSR_F_BIT | PSR_I_BIT | ABT_MODE), + TAG(PSR_F_BIT | PSR_I_BIT | SVC_MODE) + : "memory", "cc" + ); +} + +int on_irq_stack(unsigned long sp, int cpu) +{ + unsigned long sp_irq; + + sp_irq = (unsigned long)irq_stack[cpu]; + if ((sp & ~(THREAD_SIZE - 1)) == (sp_irq & ~(THREAD_SIZE - 1))) + return 1; + return 0; +} + +unsigned long notrace irq_stack_entry(unsigned long sp_irq) +{ + int cpu = raw_smp_processor_id(); + + if (!on_irq_stack(sp_irq, cpu)) { + unsigned long sp = (unsigned long)irq_stack[cpu]; + void *src, *dst; + + /* + * copy some data to irq stack + */ + src = current_thread_info(); + dst = (void *)(sp + THREAD_INFO_OFFSET); + memcpy(dst, src, offsetof(struct thread_info, cpu_context)); + sp_irq = (unsigned long)dst - 8; + } + return sp_irq; +} + +unsigned long notrace pmd_check(unsigned long addr, unsigned long far) +{ + unsigned int index; + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + if (addr < TASK_SIZE) + return addr; + + index = pgd_index(addr); + + pgd = cpu_get_pgd() + index; + pgd_k = init_mm.pgd + index; + + if (pgd_none(*pgd_k)) + goto bad_area; + if (!pgd_present(*pgd)) + set_pgd(pgd, *pgd_k); + + pud = pud_offset(pgd, addr); + pud_k = pud_offset(pgd_k, addr); + + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + +#ifdef CONFIG_ARM_LPAE + /* + * Only one hardware entry per PMD with LPAE. + */ + index = 0; +#else + /* + * On ARM one Linux PGD entry contains two hardware entries (see page + * tables layout in pgtable.h). We normally guarantee that we always + * fill both L1 entries. But create_mapping() doesn't follow the rule. + * It can create inidividual L1 entries, so here we have to call + * pmd_none() check for the entry really corresponded to address, not + * for the first of pair. + */ + index = (addr >> SECTION_SHIFT) & 1; +#endif + if (pmd_none(pmd_k[index])) + goto bad_area; + + copy_pmd(pmd, pmd_k); +bad_area: + return addr; +} +#endif + +int is_vmap_addr(unsigned long addr) { unsigned long start, end; @@ -96,6 +248,50 @@ static struct page *get_vmap_cached_page(int *remain) return page; } +static struct page *check_pte_exist(unsigned long addr) +{ + struct mm_struct *mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + mm = &init_mm; + + pgd = pgd_offset(mm, addr); + + if (pgd_none(*pgd)) + return NULL; + + if (pgd_bad(*pgd)) + return NULL; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return NULL; + + if (pud_bad(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return NULL; + + if (pmd_bad(*pmd)) + return NULL; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return NULL; +#ifdef CONFIG_ARM64 + return pte_page(*pte); +#elif defined(CONFIG_ARM) + return pte_page(pte_val(*pte)); +#else + return NULL; /* not supported */ +#endif +} + static int vmap_mmu_set(struct page *page, unsigned long addr, int set) { pgd_t *pgd = NULL; @@ -121,16 +317,22 @@ static int vmap_mmu_set(struct page *page, unsigned long addr, int set) goto nomem; } - pte = pte_offset_map(pmd, addr); + pte = pte_offset_kernel(pmd, addr); if (set) set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL)); else pte_clear(&init_mm, addr, pte); - pte_unmap(pte); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +#ifdef CONFIG_ARM64 D("add:%lx, pgd:%p %llx, pmd:%p %llx, pte:%p %llx\n", addr, pgd, pgd_val(*pgd), pmd, pmd_val(*pmd), pte, pte_val(*pte)); +#elif defined(CONFIG_ARM) + D("add:%lx, pgd:%p %x, pmd:%p %x, pte:%p %x\n", + addr, pgd, (unsigned int)pgd_val(*pgd), + pmd, (unsigned int)pmd_val(*pmd), + pte, pte_val(*pte)); +#endif return 0; nomem: E("allocation page talbe failed, G:%p, U:%p, M:%p, T:%p", @@ -140,10 +342,16 @@ nomem: static int stack_floor_page(unsigned long addr) { + unsigned long pos; + + pos = addr & (THREAD_SIZE - 1); /* * stack address must align to THREAD_SIZE */ - return ((addr & (THREAD_SIZE - 1)) < PAGE_SIZE); + if (THREAD_SIZE_ORDER > 1) + return pos < PAGE_SIZE; + else + return pos < (PAGE_SIZE / 4); } static int check_addr_up_flow(unsigned long addr) @@ -153,17 +361,17 @@ static int check_addr_up_flow(unsigned long addr) * rage(aligned to THREAD_SIZE) but next page of this * addr is not mapped */ - if (stack_floor_page(addr) && - !vmalloc_to_page((const void *)(addr + PAGE_SIZE))) + if (stack_floor_page(addr) && !check_pte_exist(addr + PAGE_SIZE)) return 1; return 0; } -#if DEBUG -static void dump_backtrace_entry(unsigned long ip, unsigned long fp) +static void dump_backtrace_entry(unsigned long ip, unsigned long fp, + unsigned long sp) { unsigned long fp_size = 0; +#ifdef CONFIG_ARM64 if (fp >= VMALLOC_START) { fp_size = *((unsigned long *)fp) - fp; /* fp cross IRQ or vmap stack */ @@ -172,29 +380,94 @@ static void dump_backtrace_entry(unsigned long ip, unsigned long fp) } pr_info("[%016lx+%4ld][<%p>] %pS\n", fp, fp_size, (void *) ip, (void *) ip); +#elif defined(CONFIG_ARM) + if (fp >= TASK_SIZE) { + fp_size = fp - sp + 4; + /* fp cross IRQ or vmap stack */ + if (fp_size >= THREAD_SIZE) + fp_size = 0; + } + pr_info("[%08lx+%4ld][<%p>] %pS\n", + fp, fp_size, (void *) ip, (void *) ip); +#endif } -static void show_fault_stack(unsigned long addr, struct pt_regs *regs) +static noinline void show_fault_stack(unsigned long addr, struct pt_regs *regs) { struct stackframe frame; +#ifdef CONFIG_ARM64 frame.fp = regs->regs[29]; frame.sp = addr; frame.pc = (unsigned long)regs->regs[30]; +#elif defined(CONFIG_ARM) + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.pc = (unsigned long)regs->uregs[15]; +#endif - pr_info("Call trace:\n"); + pr_info("Addr:%lx, Call trace:\n", addr); +#ifdef CONFIG_ARM64 pr_info("[%016lx+%4ld][<%p>] %pS\n", addr, frame.fp - addr, (void *)regs->pc, (void *) regs->pc); +#elif defined(CONFIG_ARM) + pr_info("[%08lx+%4ld][<%p>] %pS\n", + addr, frame.fp - addr, (void *)regs->uregs[15], + (void *) regs->uregs[15]); +#endif while (1) { int ret; - dump_backtrace_entry(frame.pc, frame.fp); + dump_backtrace_entry(frame.pc, frame.fp, frame.sp); + #ifdef CONFIG_ARM64 ret = unwind_frame(current, &frame); + #elif defined(CONFIG_ARM) + ret = unwind_frame(&frame); + #endif if (ret < 0) break; } } + +static void check_sp_fault_again(struct pt_regs *regs) +{ + unsigned long sp = 0, addr; + struct page *page; + int cache; + +#ifdef CONFIG_ARM + sp = regs->ARM_sp; +#elif defined(CONFIG_ARM64) + sp = regs->sp; #endif + addr = sp - sizeof(*regs); + + if (sp && ((addr & PAGE_MASK) != (sp & PAGE_MASK))) { + /* + * will fault when we copy back context, so handle + * it first + */ + E("fault again, sp:%lx, addr:%lx\n", sp, addr); + page = get_vmap_cached_page(&cache); + WARN_ON(!page); + vmap_mmu_set(page, addr, 1); + update_vmap_stack(1); + if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) { + E("task:%d %s, stack near overflow, addr:%lx\n", + current->pid, current->comm, addr); + show_fault_stack(addr, regs); + } + + /* cache is not enough */ + if (cache <= (VMAP_CACHE_PAGE / 2)) + mod_delayed_work(system_highpri_wq, &avmap->mwork, 0); + + D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr); + #if DEBUG + show_fault_stack(addr, regs); + #endif + } +} /* * IRQ should *NEVER* been opened in this handler @@ -205,28 +478,54 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr, struct page *page; int cache = 0; - if (!is_vmap_addr(addr)) + if (!is_vmap_addr(addr)) { + check_sp_fault_again(regs); return -EINVAL; + } D("addr:%lx, esr:%x, task:%5d %s\n", addr, esr, current->pid, current->comm); +#ifdef CONFIG_ARM64 D("pc:%pf, %llx, lr:%pf, %llx, sp:%llx, %lx\n", (void *)regs->pc, regs->pc, (void *)regs->regs[30], regs->regs[30], regs->sp, current_stack_pointer); +#elif defined(CONFIG_ARM) + D("pc:%pf, %lx, lr:%pf, %lx, sp:%lx, %lx\n", + (void *)regs->uregs[15], regs->uregs[15], + (void *)regs->uregs[14], regs->uregs[14], regs->uregs[13], + current_stack_pointer); +#endif if (check_addr_up_flow(addr)) { E("address %lx out of range\n", addr); + #ifdef CONFIG_ARM64 E("PC is:%llx, %pf, LR is:%llx %pf\n", regs->pc, (void *)regs->pc, regs->regs[30], (void *)regs->regs[30]); + #elif defined(CONFIG_ARM) + E("PC is:%lx, %pf, LR is:%lx %pf\n", + regs->uregs[15], (void *)regs->uregs[15], + regs->uregs[14], (void *)regs->uregs[14]); + #endif E("task:%d %s, stack:%p, %lx\n", current->pid, current->comm, current->stack, current_stack_pointer); - dump_stack(); + show_fault_stack(addr, regs); + check_sp_fault_again(regs); return -ERANGE; } +#ifdef CONFIG_ARM + page = check_pte_exist(addr); + if (page) { + D("task:%d %s, page:%lx mapped for addr:%lx\n", + current->pid, current->comm, page_to_pfn(page), addr); + check_sp_fault_again(regs); + return -EINVAL; + } +#endif + /* * allocate a new page for vmap */ @@ -234,10 +533,10 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr, WARN_ON(!page); vmap_mmu_set(page, addr, 1); update_vmap_stack(1); - if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) { + if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) { E("task:%d %s, stack near overflow, addr:%lx\n", current->pid, current->comm, addr); - dump_stack(); + show_fault_stack(addr, regs); } /* cache is not enough */ @@ -248,7 +547,6 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr, #if DEBUG show_fault_stack(addr, regs); #endif - return 0; } EXPORT_SYMBOL(handle_vmap_fault); @@ -344,6 +642,17 @@ void aml_account_task_stack(struct task_struct *tsk, int account) unsigned long stack = (unsigned long)task_stack_page(tsk); struct page *first_page; + if (unlikely(!is_vmap_addr(stack))) { + /* stack get from kmalloc */ + first_page = virt_to_page((void *)stack); + mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, + THREAD_SIZE / 1024 * account); + + memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); + update_vmap_stack(account * (THREAD_SIZE / PAGE_SIZE)); + return; + } stack += STACK_TOP_PAGE_OFF; first_page = vmalloc_to_page((void *)stack); mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, @@ -379,13 +688,18 @@ void *aml_stack_alloc(int node, struct task_struct *tsk) avmap->start_bit = bitmap_no + 1; /* next idle address space */ if (bitmap_no >= MAX_TASKS) { spin_unlock_irqrestore(&avmap->vmap_lock, flags); - E("BITMAP FULL!!!\n"); - return NULL; + /* + * if vmap address space is full, we still need to try + * to get stack from kmalloc + */ + addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); + E("BITMAP FULL, kmalloc task stack:%lx\n", addr); + return (void *)addr; } bitmap_set(avmap->bitmap, bitmap_no, 1); spin_unlock_irqrestore(&avmap->vmap_lock, flags); - page = alloc_page(THREADINFO_GFP | __GFP_ZERO); + page = alloc_page(THREADINFO_GFP | __GFP_ZERO | __GFP_HIGHMEM); if (!page) { spin_lock_irqsave(&avmap->vmap_lock, flags); bitmap_clear(avmap->bitmap, bitmap_no, 1); @@ -400,6 +714,7 @@ void *aml_stack_alloc(int node, struct task_struct *tsk) map_addr = addr + STACK_TOP_PAGE_OFF; vmap_mmu_set(page, map_addr, 1); update_vmap_stack(1); + D("bit idx:%5ld, start:%5ld, addr:%lx, page:%lx\n", bitmap_no, raw_start, addr, page_to_pfn(page)); @@ -413,6 +728,12 @@ void aml_stack_free(struct task_struct *tsk) struct page *page; unsigned long flags; + if (unlikely(!is_vmap_addr(stack))) { + /* stack get from kmalloc */ + kfree((void *)stack); + return; + } + addr = stack + STACK_TOP_PAGE_OFF; for (; addr >= stack; addr -= PAGE_SIZE) { page = vmalloc_to_page((const void *)addr); @@ -458,7 +779,7 @@ static void page_cache_maintain_work(struct work_struct *work) INIT_LIST_HEAD(&head); for (i = 0; i < VMAP_CACHE_PAGE - cnt; i++) { - page = alloc_page(GFP_KERNEL | __GFP_HIGH); + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (!page) { E("get page failed, allocated:%d, cnt:%d\n", i, cnt); break; @@ -483,10 +804,9 @@ arch_initcall(start_thread_work); void __init thread_stack_cache_init(void) { int i; - unsigned long addr; struct page *page; - page = alloc_pages(GFP_KERNEL, VMAP_CACHE_PAGE_ORDER); + page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, VMAP_CACHE_PAGE_ORDER); if (!page) return; @@ -505,8 +825,8 @@ void __init thread_stack_cache_init(void) pr_info("%s, vmap:%p, bitmap:%p, cache page:%lx\n", __func__, avmap, avmap->bitmap, page_to_pfn(page)); avmap->root_vm = __get_vm_area_node(VM_STACK_AREA_SIZE, - VM_STACK_AREA_SIZE, - 0, VMALLOC_START, VMALLOC_END, + VMAP_ALIGN, + 0, VMAP_ADDR_START, VMAP_ADDR_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); if (!avmap->root_vm) { @@ -530,7 +850,9 @@ void __init thread_stack_cache_init(void) avmap->cached_pages = VMAP_CACHE_PAGE; INIT_DELAYED_WORK(&avmap->mwork, page_cache_maintain_work); +#ifdef CONFIG_ARM64 for_each_possible_cpu(i) { + unsigned long addr; addr = (unsigned long)per_cpu_ptr(vmap_stack, i); pr_info("cpu %d, vmap_stack:[%lx-%lx]\n", i, addr, addr + THREAD_START_SP); @@ -538,5 +860,6 @@ void __init thread_stack_cache_init(void) pr_info("cpu %d, irq_stack: [%lx-%lx]\n", i, addr, addr + THREAD_START_SP); } +#endif register_shrinker(&vmap_shrinker); } diff --git a/fs/namespace.c b/fs/namespace.c index 4628d08c..97c9f82 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2698,6 +2698,17 @@ static long exact_copy_from_user(void *to, const void __user * from, if (!access_ok(VERIFY_READ, from, n)) return n; +#ifdef CONFIG_AMLOGIC_VMAP + /* addr from kernel space and in vmalloc range, avoid overflow */ + if (is_vmalloc_or_module_addr((void *)from)) { + unsigned long old = n; + + n = strlen(from) + 1; + pr_info("addr:%p is in kernel, size fix %ld->%ld, data:%s\n", + from, old, n, (char *)from); + } +#endif + while (n) { if (__get_user(c, f)) { memset(t, 0, n); diff --git a/include/linux/amlogic/vmap_stack.h b/include/linux/amlogic/vmap_stack.h index 1b4081e..a5e001a 100644 --- a/include/linux/amlogic/vmap_stack.h +++ b/include/linux/amlogic/vmap_stack.h @@ -20,7 +20,18 @@ #define STACK_SHRINK_THRESHOLD (PAGE_SIZE + 1024) #define STACK_SHRINK_SLEEP (HZ) +#ifdef CONFIG_64BIT #define VM_STACK_AREA_SIZE SZ_512M +#define VMAP_ADDR_START VMALLOC_START +#define VMAP_ADDR_END VMALLOC_END +#define VMAP_ALIGN VM_STACK_AREA_SIZE +#else +/* currently support max 6144 tasks on 32bit */ +#define VM_STACK_AREA_SIZE (SZ_64M - SZ_16M) +#define VMAP_ADDR_START MODULES_VADDR +#define VMAP_ADDR_END MODULES_END +#define VMAP_ALIGN SZ_64M +#endif #define STACK_TOP_PAGE_OFF (THREAD_SIZE - PAGE_SIZE) @@ -34,33 +45,28 @@ #define CACHE_MAINTAIN_DELAY (HZ) struct aml_vmap { + spinlock_t vmap_lock; unsigned int start_bit; int cached_pages; struct vm_struct *root_vm; unsigned long *bitmap; struct list_head list; - spinlock_t vmap_lock; - spinlock_t page_lock; struct delayed_work mwork; + spinlock_t page_lock; }; extern int handle_vmap_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); -extern DEFINE_PER_CPU(unsigned long [THREAD_SIZE/sizeof(long)], vmap_stack); -static inline bool on_vmap_stack(unsigned long sp, int cpu) -{ - /* variable names the same as kernel/stacktrace.c */ - unsigned long low = (unsigned long)per_cpu(vmap_stack, cpu); - unsigned long high = low + THREAD_START_SP; - - return (low <= sp && sp <= high); -} - +extern bool on_vmap_stack(unsigned long sp, int cpu); extern void __setup_vmap_stack(unsigned long off); extern void update_vmap_stack(int diff); extern int get_vmap_stack_size(void); +extern int is_vmap_addr(unsigned long addr); extern void aml_stack_free(struct task_struct *tsk); extern void *aml_stack_alloc(int node, struct task_struct *tsk); extern void aml_account_task_stack(struct task_struct *tsk, int account); +#ifdef CONFIG_ARM +extern int on_irq_stack(unsigned long sp, int cpu); +#endif #endif /* __VMAP_STACK_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 9ab2bf1..e33a057 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3331,7 +3331,13 @@ static inline unsigned long *end_of_stack(const struct task_struct *task) #elif !defined(__HAVE_THREAD_FUNCTIONS) +#ifdef CONFIG_AMLOGIC_VMAP +#define task_thread_info(task) \ + ((struct thread_info *)(((unsigned long)(task)->stack) + \ + THREAD_INFO_OFFSET)) +#else #define task_thread_info(task) ((struct thread_info *)(task)->stack) +#endif #define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) @@ -3351,11 +3357,15 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct */ static inline unsigned long *end_of_stack(struct task_struct *p) { +#ifdef CONFIG_AMLOGIC_VMAP + return p->stack; +#else /* CONFIG_AMLOGIC_VMAP */ #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; #else return (unsigned long *)(task_thread_info(p) + 1); #endif +#endif /* CONFIG_AMLOGIC_VMAP */ } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 2322446..ec3312d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -481,21 +481,12 @@ int __weak arch_dup_task_struct(struct task_struct *dst, return 0; } -#ifdef CONFIG_AMLOGIC_VMAP -static bool first_magic __read_mostly; -#endif - void set_task_stack_end_magic(struct task_struct *tsk) { unsigned long *stackend; stackend = end_of_stack(tsk); -#ifdef CONFIG_AMLOGIC_VMAP - if (unlikely(!first_magic)) { - *stackend = STACK_END_MAGIC; /* for overflow detection */ - first_magic = 1; - } -#else +#ifndef CONFIG_AMLOGIC_VMAP *stackend = STACK_END_MAGIC; /* for overflow detection */ #endif }