/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
- struct task_struct *pcurrent; /* 0 Current process */
- unsigned long data_offset; /* 8 Per cpu data offset from linker
- address */
- unsigned long kernelstack; /* 16 top of kernel stack for current */
- unsigned long oldrsp; /* 24 user rsp for system call */
- int irqcount; /* 32 Irq nesting counter. Starts -1 */
- unsigned int cpunumber; /* 36 Logical CPU number */
+ unsigned long unused1;
+ unsigned long unused2;
+ unsigned long unused3;
+ unsigned long unused4;
+ int unused5;
+ unsigned int unused6; /* 36 was cpunumber */
-#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
offset 40!!! */
- char *irqstackptr;
- short nodenumber; /* number of current node (32k max) */
-#endif
short in_bootmem; /* pda lives in bootmem */
- unsigned int __softirq_pending;
- unsigned int __nmi_count; /* number of NMI on this CPUs */
- short mmu_state;
- short isidle;
- struct mm_struct *active_mm;
- unsigned apic_timer_irqs;
- unsigned irq0_irqs;
- unsigned irq_resched_count;
- unsigned irq_call_count;
- unsigned irq_tlb_count;
- unsigned irq_thermal_count;
- unsigned irq_threshold_count;
- unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
- extern struct x8664_pda **_cpu_pda;
+ DECLARE_PER_CPU(struct x8664_pda, __pda);
extern void pda_init(int);
- #define cpu_pda(i) (_cpu_pda[i])
+ #define cpu_pda(cpu) (&per_cpu(__pda, cpu))
- /*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
- */
- extern void __bad_pda_field(void) __attribute__((noreturn));
-
- /*
- * proxy_pda doesn't actually exist, but tell gcc it is accessed for
- * all PDA accesses so it gets read/write dependencies right.
- */
- extern struct x8664_pda _proxy_pda;
-
- #define pda_offset(field) offsetof(struct x8664_pda, field)
-
- #define pda_to_op(op, field, val) \
- do { \
- typedef typeof(_proxy_pda.field) T__; \
- if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- case 4: \
- asm(op "l %1,%%gs:%c2" : \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i" (pda_offset(field))); \
- break; \
- case 8: \
- asm(op "q %1,%%gs:%c2": \
- "+m" (_proxy_pda.field) : \
- "ri" ((T__)val), \
- "i"(pda_offset(field))); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
- } while (0)
-
- #define pda_from_op(op, field) \
- ({ \
- typeof(_proxy_pda.field) ret__; \
- switch (sizeof(_proxy_pda.field)) { \
- case 2: \
- asm(op "w %%gs:%c1,%0" : \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 4: \
- asm(op "l %%gs:%c1,%0": \
- "=r" (ret__): \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- case 8: \
- asm(op "q %%gs:%c1,%0": \
- "=r" (ret__) : \
- "i" (pda_offset(field)), \
- "m" (_proxy_pda.field)); \
- break; \
- default: \
- __bad_pda_field(); \
- } \
- ret__; \
- })
-
- #define read_pda(field) pda_from_op("mov", field)
- #define write_pda(field, val) pda_to_op("mov", field, val)
- #define add_pda(field, val) pda_to_op("add", field, val)
- #define sub_pda(field, val) pda_to_op("sub", field, val)
- #define or_pda(field, val) pda_to_op("or", field, val)
+ #define read_pda(field) percpu_read(__pda.field)
+ #define write_pda(field, val) percpu_write(__pda.field, val)
+ #define add_pda(field, val) percpu_add(__pda.field, val)
+ #define sub_pda(field, val) percpu_sub(__pda.field, val)
+ #define or_pda(field, val) percpu_or(__pda.field, val)
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define test_and_clear_bit_pda(bit, field) \
#endif
- #define PDA_STACKOFFSET (5*8)
-
+#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
+
#endif /* _ASM_X86_PDA_H */
--- /dev/null
--- /dev/null
++#ifndef _ASM_STACKPROTECTOR_H
++#define _ASM_STACKPROTECTOR_H 1
++
++#include <asm/tsc.h>
++#include <asm/pda.h>
++
++/*
++ * Initialize the stackprotector canary value.
++ *
++ * NOTE: this must only be called from functions that never return,
++ * and it must always be inlined.
++ */
++static __always_inline void boot_init_stack_canary(void)
++{
++ u64 canary;
++ u64 tsc;
++
++ /*
++ * If we're the non-boot CPU, nothing set the PDA stack
++ * canary up for us - and if we are the boot CPU we have
++ * a 0 stack canary. This is a good place for updating
++ * it, as we wont ever return from this function (so the
++ * invalid canaries already on the stack wont ever
++ * trigger).
++ *
++ * We both use the random pool and the current TSC as a source
++ * of randomness. The TSC only matters for very early init,
++ * there it already has some randomness on most systems. Later
++ * on during the bootup the random pool has true entropy too.
++ */
++ get_random_bytes(&canary, sizeof(canary));
++ tsc = __native_read_tsc();
++ canary += tsc + (tsc << 32UL);
++
++ current->stack_canary = canary;
++ write_pda(stack_canary, canary);
++}
++
++#endif
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
+ "movq %P[task_canary](%%rsi),%%r8\n\t" \
+ "movq %%r8,%%gs:%P[pda_canary]\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (per_cpu_var(current_task)) \
+ [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\
- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \
++ [current_task] "m" (per_cpu_var(current_task)), \
+ [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
: "memory", "cc" __EXTRA_CLOBBER)
#endif