Merge commit 'v2.6.27-rc7' into x86/debug
authorIngo Molnar <mingo@elte.hu>
Mon, 22 Sep 2008 11:08:57 +0000 (13:08 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 22 Sep 2008 11:08:57 +0000 (13:08 +0200)
1  2 
Documentation/kernel-parameters.txt
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process_32.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/traps_64.c
include/asm-x86/msr.h

@@@ -87,7 -87,8 +87,8 @@@ parameter is applicable
        SH      SuperH architecture is enabled.
        SMP     The kernel is an SMP kernel.
        SPARC   Sparc architecture is enabled.
-       SWSUSP  Software suspend is enabled.
+       SWSUSP  Software suspend (hibernation) is enabled.
+       SUSPEND System suspend states are enabled.
        TS      Appropriate touchscreen support is enabled.
        USB     USB support is enabled.
        USBHID  USB Human Interface Device support is enabled.
@@@ -147,10 -148,12 +148,12 @@@ and is between 256 and 4096 characters
                        default: 0
  
        acpi_sleep=     [HW,ACPI] Sleep options
-                       Format: { s3_bios, s3_mode, s3_beep, old_ordering }
+                       Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering }
                        See Documentation/power/video.txt for s3_bios and s3_mode.
                        s3_beep is for debugging; it makes the PC's speaker beep
                        as soon as the kernel's real-mode entry point is called.
+                       s4_nohwsig prevents ACPI hardware signature from being
+                       used during resume from hibernation.
                        old_ordering causes the ACPI 1.0 ordering of the _PTS
                        control method, wrt putting devices into low power
                        states, to be enforced (the ACPI 2.0 ordering of _PTS is
                        no delay (0).
                        Format: integer
  
+       bootmem_debug   [KNL] Enable bootmem allocator debug messages.
        bttv.card=      [HW,V4L] bttv (bt848 + bt878 based grabber cards)
        bttv.radio=     Most important insmod options are available as
                        kernel args too.
        hisax=          [HW,ISDN]
                        See Documentation/isdn/README.HiSax.
  
-       hugepages=      [HW,X86-32,IA-64] Maximal number of HugeTLB pages.
-       hugepagesz=     [HW,IA-64,PPC] The size of the HugeTLB pages.
+       hugepages=      [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
+       hugepagesz=     [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
+                       On x86-64 and powerpc, this option can be specified
+                       multiple times interleaved with hugepages= to reserve
+                       huge pages of different sizes. Valid pages sizes on
+                       x86-64 are 2M (when the CPU supports "pse") and 1G
+                       (when the CPU supports the "pdpe1gb" cpuinfo flag)
+                       Note that 1GB pages can only be allocated at boot time
+                       using hugepages= and not freed afterwards.
+       default_hugepagesz=
+                       [same as hugepagesz=] The size of the default
+                       HugeTLB page size. This is the size represented by
+                       the legacy /proc/ hugepages APIs, used for SHM, and
+                       default size when mounting hugetlbfs filesystems.
+                       Defaults to the default architecture's huge page size
+                       if not specified.
  
        i8042.direct    [HW] Put keyboard port into non-translated mode
        i8042.dumbkbd   [HW] Pretend that controller can only read data from
  
                        * [no]ncq: Turn on or off NCQ.
  
+                       * nohrst, nosrst, norst: suppress hard, soft
+                           and both resets.
                        If there are multiple matching configurations changing
                        the same attribute, the last one is used.
  
  
        mga=            [HW,DRM]
  
+       mminit_loglevel=
+                       [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+                       parameter allows control of the logging verbosity for
+                       the additional memory initialisation checks. A value
+                       of 0 disables mminit logging and a level of 4 will
+                       log everything. Information is printed at KERN_DEBUG
+                       so loglevel=8 may also need to be specified.
        mousedev.tap_time=
                        [MOUSE] Maximum time between finger touching and
                        leaving touchpad surface for touch to be considered
        shapers=        [NET]
                        Maximal number of shapers.
  
 +      show_msr=       [x86] show boot-time MSR settings
 +                      Format: { <integer> }
 +                      Show boot-time (BIOS-initialized) MSR settings.
 +                      The parameter means the number of CPUs to show,
 +                      for example 1 means boot CPU only.
 +
        sim710=         [SCSI,HW]
                        See header of drivers/scsi/sim710.c.
  
  
        tdfx=           [HW,DRM]
  
+       test_suspend=   [SUSPEND]
+                       Specify "mem" (for Suspend-to-RAM) or "standby" (for
+                       standby suspend) as the system sleep state to briefly
+                       enter during system startup.  The system is woken from
+                       this state using a wakeup-capable RTC alarm.
        thash_entries=  [KNL,NET]
                        Set number of hash buckets for TCP connection
  
                        <deci-seconds>: poll all this frequency
                        0: no polling (default)
  
-       tipar.timeout=  [HW,PPT]
-                       Set communications timeout in tenths of a second
-                       (default 15).
-       tipar.delay=    [HW,PPT]
-                       Set inter-bit delay in microseconds (default 10).
        tmscsim=        [HW,SCSI]
                        See comment before function dc390_setup() in
                        drivers/scsi/tmscsim.c.
@@@ -18,6 -18,7 +18,7 @@@
  #include <asm/mtrr.h>
  #include <asm/mce.h>
  #include <asm/pat.h>
+ #include <asm/asm.h>
  #include <asm/numa.h>
  #ifdef CONFIG_X86_LOCAL_APIC
  #include <asm/mpspec.h>
@@@ -215,6 -216,39 +216,39 @@@ static void __init early_cpu_support_pr
        }
  }
  
+ /*
+  * The NOPL instruction is supposed to exist on all CPUs with
+  * family >= 6, unfortunately, that's not true in practice because
+  * of early VIA chips and (more importantly) broken virtualizers that
+  * are not easy to detect.  Hence, probe for it based on first
+  * principles.
+  *
+  * Note: no 64-bit chip is known to lack these, but put the code here
+  * for consistency with 32 bits, and to make it utterly trivial to
+  * diagnose the problem should it ever surface.
+  */
+ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+ {
+       const u32 nopl_signature = 0x888c53b1; /* Random number */
+       u32 has_nopl = nopl_signature;
+       clear_cpu_cap(c, X86_FEATURE_NOPL);
+       if (c->x86 >= 6) {
+               asm volatile("\n"
+                            "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+                            "2:\n"
+                            "        .section .fixup,\"ax\"\n"
+                            "3:      xor %0,%0\n"
+                            "        jmp 2b\n"
+                            "        .previous\n"
+                            _ASM_EXTABLE(1b,3b)
+                            : "+a" (has_nopl));
+               if (has_nopl == nopl_signature)
+                       set_cpu_cap(c, X86_FEATURE_NOPL);
+       }
+ }
  static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
  
  void __init early_cpu_init(void)
@@@ -313,6 -347,8 +347,8 @@@ static void __cpuinit early_identify_cp
                c->x86_phys_bits = eax & 0xff;
        }
  
+       detect_nopl(c);
        if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
            cpu_devs[c->x86_vendor]->c_early_init)
                cpu_devs[c->x86_vendor]->c_early_init(c);
@@@ -394,49 -430,6 +430,49 @@@ static __init int setup_noclflush(char 
  }
  __setup("noclflush", setup_noclflush);
  
 +struct msr_range {
 +      unsigned min;
 +      unsigned max;
 +};
 +
 +static struct msr_range msr_range_array[] __cpuinitdata = {
 +      { 0x00000000, 0x00000418},
 +      { 0xc0000000, 0xc000040b},
 +      { 0xc0010000, 0xc0010142},
 +      { 0xc0011000, 0xc001103b},
 +};
 +
 +static void __cpuinit print_cpu_msr(void)
 +{
 +      unsigned index;
 +      u64 val;
 +      int i;
 +      unsigned index_min, index_max;
 +
 +      for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
 +              index_min = msr_range_array[i].min;
 +              index_max = msr_range_array[i].max;
 +              for (index = index_min; index < index_max; index++) {
 +                      if (rdmsrl_amd_safe(index, &val))
 +                              continue;
 +                      printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
 +              }
 +      }
 +}
 +
 +static int show_msr __cpuinitdata;
 +static __init int setup_show_msr(char *arg)
 +{
 +      int num;
 +
 +      get_option(&arg, &num);
 +
 +      if (num > 0)
 +              show_msr = num;
 +      return 1;
 +}
 +__setup("show_msr=", setup_show_msr);
 +
  void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
  {
        if (c->x86_model_id[0])
                printk(KERN_CONT " stepping %02x\n", c->x86_mask);
        else
                printk(KERN_CONT "\n");
 +
 +#ifdef CONFIG_SMP
 +      if (c->cpu_index < show_msr)
 +              print_cpu_msr();
 +#else
 +      if (show_msr)
 +              print_cpu_msr();
 +#endif
  }
  
  static __init int setup_disablecpuid(char *arg)
@@@ -544,17 -529,20 +580,20 @@@ void pda_init(int cpu
                /* others are initialized in smpboot.c */
                pda->pcurrent = &init_task;
                pda->irqstackptr = boot_cpu_stack;
+               pda->irqstackptr += IRQSTACKSIZE - 64;
        } else {
-               pda->irqstackptr = (char *)
-                       __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-               if (!pda->irqstackptr)
-                       panic("cannot allocate irqstack for cpu %d", cpu);
+               if (!pda->irqstackptr) {
+                       pda->irqstackptr = (char *)
+                               __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+                       if (!pda->irqstackptr)
+                               panic("cannot allocate irqstack for cpu %d",
+                                     cpu);
+                       pda->irqstackptr += IRQSTACKSIZE - 64;
+               }
  
                if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
                        pda->nodenumber = cpu_to_node(cpu);
        }
-       pda->irqstackptr += IRQSTACKSIZE-64;
  }
  
  char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@@ -652,19 -640,22 +691,22 @@@ void __cpuinit cpu_init(void
        /*
         * set up and load the per-CPU TSS
         */
-       for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+       if (!orig_ist->ist[0]) {
                static const unsigned int order[N_EXCEPTION_STACKS] = {
-                       [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
-                       [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+                 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+                 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
                };
-               if (cpu) {
-                       estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
-                       if (!estacks)
-                               panic("Cannot allocate exception stack %ld %d\n",
-                                     v, cpu);
+               for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+                       if (cpu) {
+                               estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+                               if (!estacks)
+                                       panic("Cannot allocate exception "
+                                             "stack %ld %d\n", v, cpu);
+                       }
+                       estacks += PAGE_SIZE << order[v];
+                       orig_ist->ist[v] = t->x86_tss.ist[v] =
+                                       (unsigned long)estacks;
                }
-               estacks += PAGE_SIZE << order[v];
-               orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
        }
  
        t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
@@@ -330,7 -330,6 +330,7 @@@ struct pv_cpu_ops pv_cpu_ops = 
  #endif
        .wbinvd = native_wbinvd,
        .read_msr = native_read_msr_safe,
 +      .read_msr_amd = native_read_msr_amd_safe,
        .write_msr = native_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
@@@ -472,7 -471,7 +472,7 @@@ struct pv_lock_ops pv_lock_ops = 
        .spin_unlock = __ticket_spin_unlock,
  #endif
  };
- EXPORT_SYMBOL_GPL(pv_lock_ops);
+ EXPORT_SYMBOL(pv_lock_ops);
  
  EXPORT_SYMBOL_GPL(pv_time_ops);
  EXPORT_SYMBOL    (pv_cpu_ops);
@@@ -37,7 -37,6 +37,7 @@@
  #include <linux/tick.h>
  #include <linux/percpu.h>
  #include <linux/prctl.h>
 +#include <linux/dmi.h>
  
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
@@@ -96,7 -95,6 +96,6 @@@ static inline void play_dead(void
  {
        /* This must be done before dead CPU ack */
        cpu_exit_clear();
-       wbinvd();
        mb();
        /* Ack it */
        __get_cpu_var(cpu_state) = CPU_DEAD;
         * With physical CPU hotplug, we should halt the cpu
         */
        local_irq_disable();
-       while (1)
-               halt();
+       /* mask all interrupts, flush any and all caches, and halt */
+       wbinvd_halt();
  }
  #else
  static inline void play_dead(void)
@@@ -129,7 -127,7 +128,7 @@@ void cpu_idle(void
  
        /* endless idle loop with no priority at all */
        while (1) {
-               tick_nohz_stop_sched_tick();
+               tick_nohz_stop_sched_tick(1);
                while (!need_resched()) {
  
                        check_pgt_cache();
@@@ -161,7 -159,6 +160,7 @@@ void __show_registers(struct pt_regs *r
        unsigned long d0, d1, d2, d3, d6, d7;
        unsigned long sp;
        unsigned short ss, gs;
 +      const char *board;
  
        if (user_mode_vm(regs)) {
                sp = regs->sp;
        }
  
        printk("\n");
 -      printk("Pid: %d, comm: %s %s (%s %.*s)\n",
 +
 +      board = dmi_get_system_info(DMI_PRODUCT_NAME);
 +      if (!board)
 +              board = "";
 +      printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
                        task_pid_nr(current), current->comm,
                        print_tainted(), init_utsname()->release,
                        (int)strcspn(init_utsname()->version, " "),
 -                      init_utsname()->version);
 +                      init_utsname()->version, board);
  
        printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
                        (u16)regs->cs, regs->ip, regs->flags,
@@@ -80,24 -80,6 +80,6 @@@ static void __init setup_per_cpu_maps(v
  #endif
  }
  
- #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
- cpumask_t *cpumask_of_cpu_map __read_mostly;
- EXPORT_SYMBOL(cpumask_of_cpu_map);
- /* requires nr_cpu_ids to be initialized */
- static void __init setup_cpumask_of_cpu(void)
- {
-       int i;
-       /* alloc_bootmem zeroes memory */
-       cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
-       for (i = 0; i < nr_cpu_ids; i++)
-               cpu_set(i, cpumask_of_cpu_map[i]);
- }
- #else
- static inline void setup_cpumask_of_cpu(void) { }
- #endif
  #ifdef CONFIG_X86_32
  /*
   * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@@ -180,16 -162,9 +162,16 @@@ void __init setup_per_cpu_areas(void
                        printk(KERN_INFO
                               "cpu %d has no node %d or node-local memory\n",
                                cpu, node);
 +                      if (ptr)
 +                              printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
 +                                       cpu, __pa(ptr));
                }
 -              else
 +              else {
                        ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
 +                      if (ptr)
 +                              printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
 +                                       cpu, node, __pa(ptr));
 +              }
  #endif
                per_cpu_offset(cpu) = ptr - __per_cpu_start;
                memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
  
        /* Setup node to cpumask map */
        setup_node_to_cpumask_map();
-       /* Setup cpumask_of_cpu map */
-       setup_cpumask_of_cpu();
  }
  
  #endif
@@@ -339,8 -339,9 +339,8 @@@ static voi
  show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp, char *log_lvl)
  {
 -      printk("\nCall Trace:\n");
 +      printk("Call Trace:\n");
        dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
 -      printk("\n");
  }
  
  void show_trace(struct task_struct *task, struct pt_regs *regs,
@@@ -385,7 -386,6 +385,7 @@@ show_stack_log_lvl(struct task_struct *
                printk(" %016lx", *stack++);
                touch_nmi_watchdog();
        }
 +      printk("\n");
        show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  }
  
@@@ -443,6 -443,7 +443,6 @@@ void show_registers(struct pt_regs *reg
                printk("Stack: ");
                show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
                                regs->bp, "");
 -              printk("\n");
  
                printk(KERN_EMERG "Code: ");
  
@@@ -1130,7 -1131,14 +1130,14 @@@ asmlinkage void math_state_restore(void
        }
  
        clts();                         /* Allow maths ops (or we recurse) */
-       restore_fpu_checking(&me->thread.xstate->fxsave);
+       /*
+        * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+        */
+       if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+               stts();
+               force_sig(SIGSEGV, me);
+               return;
+       }
        task_thread_info(me)->status |= TS_USEDFPU;
        me->fpu_counter++;
  }
diff --combined include/asm-x86/msr.h
@@@ -52,33 -52,17 +52,33 @@@ static inline unsigned long long native
  {
        DECLARE_ARGS(val, low, high);
  
-       asm volatile("2: rdmsr ; xor %0,%0\n"
+       asm volatile("2: rdmsr ; xor %[err],%[err]\n"
                     "1:\n\t"
                     ".section .fixup,\"ax\"\n\t"
-                    "3:  mov %3,%0 ; jmp 1b\n\t"
+                    "3:  mov %[fault],%[err] ; jmp 1b\n\t"
                     ".previous\n\t"
                     _ASM_EXTABLE(2b, 3b)
-                    : "=r" (*err), EAX_EDX_RET(val, low, high)
-                    : "c" (msr), "i" (-EFAULT));
+                    : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+                    : "c" (msr), [fault] "i" (-EFAULT));
        return EAX_EDX_VAL(val, low, high);
  }
  
 +static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
 +                                                    int *err)
 +{
 +      DECLARE_ARGS(val, low, high);
 +
 +      asm volatile("2: rdmsr ; xor %0,%0\n"
 +                   "1:\n\t"
 +                   ".section .fixup,\"ax\"\n\t"
 +                   "3:  mov %3,%0 ; jmp 1b\n\t"
 +                   ".previous\n\t"
 +                   _ASM_EXTABLE(2b, 3b)
 +                   : "=r" (*err), EAX_EDX_RET(val, low, high)
 +                   : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
 +      return EAX_EDX_VAL(val, low, high);
 +}
 +
  static inline void native_write_msr(unsigned int msr,
                                    unsigned low, unsigned high)
  {
@@@ -89,15 -73,15 +89,15 @@@ static inline int native_write_msr_safe
                                        unsigned low, unsigned high)
  {
        int err;
-       asm volatile("2: wrmsr ; xor %0,%0\n"
+       asm volatile("2: wrmsr ; xor %[err],%[err]\n"
                     "1:\n\t"
                     ".section .fixup,\"ax\"\n\t"
-                    "3:  mov %4,%0 ; jmp 1b\n\t"
+                    "3:  mov %[fault],%[err] ; jmp 1b\n\t"
                     ".previous\n\t"
                     _ASM_EXTABLE(2b, 3b)
-                    : "=a" (err)
+                    : [err] "=a" (err)
                     : "c" (msr), "0" (low), "d" (high),
-                      "i" (-EFAULT)
+                      [fault] "i" (-EFAULT)
                     : "memory");
        return err;
  }
@@@ -174,13 -158,6 +174,13 @@@ static inline int rdmsrl_safe(unsigned 
        *p = native_read_msr_safe(msr, &err);
        return err;
  }
 +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 +{
 +      int err;
 +
 +      *p = native_read_msr_amd_safe(msr, &err);
 +      return err;
 +}
  
  #define rdtscl(low)                                           \
        ((low) = (u32)native_read_tsc())
@@@ -215,19 -192,20 +215,20 @@@ do 
  #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0)
  
  #ifdef CONFIG_SMP
void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
  int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
  int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
  #else  /*  CONFIG_SMP  */
- static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+ static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
  {
        rdmsr(msr_no, *l, *h);
+       return 0;
  }
- static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
  {
        wrmsr(msr_no, l, h);
+       return 0;
  }
  static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
                                    u32 *l, u32 *h)