[PATCH] x86_64: Move int 3 handler to debug stack and allow to increase it.
authorJan Beulich <jbeulich@novell.com>
Wed, 11 Jan 2006 21:43:00 +0000 (22:43 +0100)
committerLinus Torvalds <torvalds@g5.osdl.org>
Thu, 12 Jan 2006 03:01:13 +0000 (19:01 -0800)
This
- switches the INT3 handler to run on an IST stack (to cope with
  breakpoints set by a kernel debugger on places where the kernel's
  %gs base hasn't been set up, yet); the IST stack used is shared with
  the INT1 handler's
[AK: this also allows setting a kprobe on the interrupt/exception entry
points]
- allows nesting of INT1/INT3 handlers so that one can, with a kernel
  debugger, debug (at least) the user-mode portions of the INT1/INT3
  handling; the nesting isn't actively enabled here since a kernel-
  debugger-free kernel doesn't need it

Signed-Off-By: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
arch/x86_64/kernel/asm-offsets.c
arch/x86_64/kernel/entry.S
arch/x86_64/kernel/setup64.c
arch/x86_64/kernel/traps.c
include/asm-x86_64/desc.h
include/asm-x86_64/page.h
include/asm-x86_64/pda.h
include/asm-x86_64/processor.h

index aaa6d38..00a08d1 100644 (file)
@@ -64,5 +64,9 @@ int main(void)
        DEFINE(pbe_address, offsetof(struct pbe, address));
        DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
        DEFINE(pbe_next, offsetof(struct pbe, next));
+       BLANK();
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+       DEFINE(DEBUG_IST, DEBUG_STACK);
+#endif
        return 0;
 }
index bd21eba..28b3c8e 100644 (file)
@@ -673,7 +673,10 @@ ENTRY(spurious_interrupt)
 
        /* error code is on the stack already */
        /* handle NMI like exceptions that can happen everywhere */
-       .macro paranoidentry sym
+#ifndef DEBUG_IST
+# define DEBUG_IST 0
+#endif
+       .macro paranoidentry sym, ist=0
        SAVE_ALL
        cld
        movl $1,%ebx
@@ -683,10 +686,20 @@ ENTRY(spurious_interrupt)
        js    1f
        swapgs
        xorl  %ebx,%ebx
-1:     movq %rsp,%rdi
+1:
+       .if \ist
+       movq    %gs:pda_data_offset, %rbp
+       .endif
+       movq %rsp,%rdi
        movq ORIG_RAX(%rsp),%rsi
        movq $-1,ORIG_RAX(%rsp)
+       .if \ist
+       subq    $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       .endif
        call \sym
+       .if \ist
+       addq    $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       .endif
        cli
        .endm
        
@@ -904,7 +917,7 @@ KPROBE_ENTRY(debug)
        INTR_FRAME
        pushq $0
        CFI_ADJUST_CFA_OFFSET 8         
-       paranoidentry do_debug
+       paranoidentry do_debug, DEBUG_IST
        jmp paranoid_exit
        CFI_ENDPROC
        .previous .text
@@ -959,7 +972,12 @@ paranoid_schedule:
        CFI_ENDPROC
 
 KPROBE_ENTRY(int3)
-       zeroentry do_int3       
+       INTR_FRAME
+       pushq $0
+       CFI_ADJUST_CFA_OFFSET 8
+       paranoidentry do_int3, DEBUG_IST
+       jmp paranoid_exit
+       CFI_ENDPROC
        .previous .text
 
 ENTRY(overflow)
index 7b7131d..39e728c 100644 (file)
@@ -145,7 +145,7 @@ void pda_init(int cpu)
        pda->irqstackptr += IRQSTACKSIZE-64;
 } 
 
-char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] 
+char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ]
 __attribute__((section(".bss.page_aligned")));
 
 /* May not be marked __init: used by software suspend */
@@ -236,13 +236,27 @@ void __cpuinit cpu_init (void)
         */
        for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                if (cpu) {
-                       estacks = (char *)__get_free_pages(GFP_ATOMIC, 
-                                                  EXCEPTION_STACK_ORDER);
+                       static const unsigned int order[N_EXCEPTION_STACKS] = {
+                               [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+                               [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+                       };
+
+                       estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
                        if (!estacks)
                                panic("Cannot allocate exception stack %ld %d\n",
                                      v, cpu); 
                }
-               estacks += EXCEPTION_STKSZ;
+               switch (v + 1) {
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+               case DEBUG_STACK:
+                       cpu_pda[cpu].debugstack = (unsigned long)estacks;
+                       estacks += DEBUG_STKSZ;
+                       break;
+#endif
+               default:
+                       estacks += EXCEPTION_STKSZ;
+                       break;
+               }
                t->ist[v] = (unsigned long)estacks;
        }
 
index bd71dda..1a9094d 100644 (file)
@@ -121,19 +121,31 @@ int printk_address(unsigned long address)
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
                                        unsigned *usedp, const char **idp)
 {
-       static const char ids[N_EXCEPTION_STACKS][8] = {
+       static char ids[][8] = {
                [DEBUG_STACK - 1] = "#DB",
                [NMI_STACK - 1] = "NMI",
                [DOUBLEFAULT_STACK - 1] = "#DF",
                [STACKFAULT_STACK - 1] = "#SS",
                [MCE_STACK - 1] = "#MC",
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+               [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+#endif
        };
        unsigned k;
 
        for (k = 0; k < N_EXCEPTION_STACKS; k++) {
                unsigned long end;
 
-               end = per_cpu(init_tss, cpu).ist[k];
+               switch (k + 1) {
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+               case DEBUG_STACK:
+                       end = cpu_pda[cpu].debugstack + DEBUG_STKSZ;
+                       break;
+#endif
+               default:
+                       end = per_cpu(init_tss, cpu).ist[k];
+                       break;
+               }
                if (stack >= end)
                        continue;
                if (stack >= end - EXCEPTION_STKSZ) {
@@ -143,6 +155,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
                        *idp = ids[k];
                        return (unsigned long *)end;
                }
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+               if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
+                       unsigned j = N_EXCEPTION_STACKS - 1;
+
+                       do {
+                               ++j;
+                               end -= EXCEPTION_STKSZ;
+                               ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
+                       } while (stack < end - EXCEPTION_STKSZ);
+                       if (*usedp & (1U << j))
+                               break;
+                       *usedp |= 1U << j;
+                       *idp = ids[j];
+                       return (unsigned long *)end;
+               }
+#endif
        }
        return NULL;
 }
@@ -613,6 +641,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
                io_check_error(reason, regs);
 }
 
+/* runs on IST stack. */
 asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
 {
        if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
@@ -894,7 +923,7 @@ void __init trap_init(void)
        set_intr_gate(0,&divide_error);
        set_intr_gate_ist(1,&debug,DEBUG_STACK);
        set_intr_gate_ist(2,&nmi,NMI_STACK);
-       set_system_gate(3,&int3);
+       set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */
        set_system_gate(4,&overflow);   /* int4 can be called from all */
        set_intr_gate(5,&bounds);
        set_intr_gate(6,&invalid_op);
index 3376486..5ce0e34 100644 (file)
@@ -114,6 +114,11 @@ static inline void set_system_gate(int nr, void *func)
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
 } 
 
+static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
+{
+       _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
+}
+
 static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
                                         unsigned size) 
 { 
index 06e489f..dcbb4fc 100644 (file)
 #define PHYSICAL_PAGE_MASK     (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
 
 #define THREAD_ORDER 1 
-#ifdef __ASSEMBLY__
-#define THREAD_SIZE  (1 << (PAGE_SHIFT + THREAD_ORDER))
-#else
-#define THREAD_SIZE  (1UL << (PAGE_SHIFT + THREAD_ORDER))
-#endif
+#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
 #define CURRENT_MASK (~(THREAD_SIZE-1))
 
+#define EXCEPTION_STACK_ORDER 0
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
+#define DEBUG_STACK_ORDER EXCEPTION_STACK_ORDER
+#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
+
+#define IRQSTACK_ORDER 2
+#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
 
index 8733ccf..431a909 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/cache.h>
+#include <asm/page.h>
 
 /* Per processor datastructure. %gs points to it while the kernel runs */ 
 struct x8664_pda {
@@ -12,6 +13,9 @@ struct x8664_pda {
        unsigned long data_offset;      /* Per cpu data offset from linker address */
        unsigned long kernelstack;  /* top of kernel stack for current */ 
        unsigned long oldrsp;       /* user rsp for system call */
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+       unsigned long debugstack;   /* #DB/#BP stack. */
+#endif
         int irqcount;              /* Irq nesting counter. Starts with -1 */   
        int cpunumber;              /* Logical CPU number */
        char *irqstackptr;      /* top of irqstack */
@@ -23,10 +27,6 @@ struct x8664_pda {
        unsigned apic_timer_irqs;
 } ____cacheline_aligned_in_smp;
 
-
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 
-
 extern struct x8664_pda cpu_pda[];
 
 /* 
index 4861246..5cb1515 100644 (file)
@@ -273,8 +273,6 @@ struct thread_struct {
 #define DEBUG_STACK 4 
 #define MCE_STACK 5
 #define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
-#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-#define EXCEPTION_STACK_ORDER 0 
 
 #define start_thread(regs,new_rip,new_rsp) do { \
        asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));      \