2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
47 #include <linux/dmi.h>
48 #include <linux/dma-mapping.h>
51 #include <asm/uaccess.h>
52 #include <asm/system.h>
57 #include <video/edid.h>
60 #include <asm/mpspec.h>
61 #include <asm/mmu_context.h>
62 #include <asm/bootsetup.h>
63 #include <asm/proto.h>
64 #include <asm/setup.h>
65 #include <asm/mach_apic.h>
67 #include <asm/swiotlb.h>
68 #include <asm/sections.h>
69 #include <asm/gart-mapping.h>
75 struct cpuinfo_x86 boot_cpu_data __read_mostly;
77 unsigned long mmu_cr4_features;
80 EXPORT_SYMBOL(acpi_disabled);
82 extern int __initdata acpi_ht;
83 extern acpi_interrupt_flags acpi_sci_flags;
84 int __initdata acpi_force = 0;
87 int acpi_numa __initdata;
89 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
92 unsigned long saved_video_mode;
97 struct drive_info_struct { char dummy[32]; } drive_info;
98 struct screen_info screen_info;
99 struct sys_desc_table_struct {
100 unsigned short length;
101 unsigned char table[0];
104 struct edid_info edid_info;
107 extern int root_mountflags;
109 char command_line[COMMAND_LINE_SIZE];
111 struct resource standard_io_resources[] = {
112 { .name = "dma1", .start = 0x00, .end = 0x1f,
113 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
114 { .name = "pic1", .start = 0x20, .end = 0x21,
115 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
116 { .name = "timer0", .start = 0x40, .end = 0x43,
117 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
118 { .name = "timer1", .start = 0x50, .end = 0x53,
119 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
120 { .name = "keyboard", .start = 0x60, .end = 0x6f,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
123 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
124 { .name = "pic2", .start = 0xa0, .end = 0xa1,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "dma2", .start = 0xc0, .end = 0xdf,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "fpu", .start = 0xf0, .end = 0xff,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
132 #define STANDARD_IO_RESOURCES \
133 (sizeof standard_io_resources / sizeof standard_io_resources[0])
135 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
137 struct resource data_resource = {
138 .name = "Kernel data",
141 .flags = IORESOURCE_RAM,
143 struct resource code_resource = {
144 .name = "Kernel code",
147 .flags = IORESOURCE_RAM,
150 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
152 static struct resource system_rom_resource = {
153 .name = "System ROM",
156 .flags = IORESOURCE_ROM,
159 static struct resource extension_rom_resource = {
160 .name = "Extension ROM",
163 .flags = IORESOURCE_ROM,
166 static struct resource adapter_rom_resources[] = {
167 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
168 .flags = IORESOURCE_ROM },
169 { .name = "Adapter ROM", .start = 0, .end = 0,
170 .flags = IORESOURCE_ROM },
171 { .name = "Adapter ROM", .start = 0, .end = 0,
172 .flags = IORESOURCE_ROM },
173 { .name = "Adapter ROM", .start = 0, .end = 0,
174 .flags = IORESOURCE_ROM },
175 { .name = "Adapter ROM", .start = 0, .end = 0,
176 .flags = IORESOURCE_ROM },
177 { .name = "Adapter ROM", .start = 0, .end = 0,
178 .flags = IORESOURCE_ROM }
181 #define ADAPTER_ROM_RESOURCES \
182 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
184 static struct resource video_rom_resource = {
188 .flags = IORESOURCE_ROM,
191 static struct resource video_ram_resource = {
192 .name = "Video RAM area",
195 .flags = IORESOURCE_RAM,
198 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
200 static int __init romchecksum(unsigned char *rom, unsigned long length)
202 unsigned char *p, sum = 0;
204 for (p = rom; p < rom + length; p++)
209 static void __init probe_roms(void)
211 unsigned long start, length, upper;
216 upper = adapter_rom_resources[0].start;
217 for (start = video_rom_resource.start; start < upper; start += 2048) {
218 rom = isa_bus_to_virt(start);
219 if (!romsignature(rom))
222 video_rom_resource.start = start;
224 /* 0 < length <= 0x7f * 512, historically */
225 length = rom[2] * 512;
227 /* if checksum okay, trust length byte */
228 if (length && romchecksum(rom, length))
229 video_rom_resource.end = start + length - 1;
231 request_resource(&iomem_resource, &video_rom_resource);
235 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
240 request_resource(&iomem_resource, &system_rom_resource);
241 upper = system_rom_resource.start;
243 /* check for extension rom (ignore length byte!) */
244 rom = isa_bus_to_virt(extension_rom_resource.start);
245 if (romsignature(rom)) {
246 length = extension_rom_resource.end - extension_rom_resource.start + 1;
247 if (romchecksum(rom, length)) {
248 request_resource(&iomem_resource, &extension_rom_resource);
249 upper = extension_rom_resource.start;
253 /* check for adapter roms on 2k boundaries */
254 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
255 rom = isa_bus_to_virt(start);
256 if (!romsignature(rom))
259 /* 0 < length <= 0x7f * 512, historically */
260 length = rom[2] * 512;
262 /* but accept any length that fits if checksum okay */
263 if (!length || start + length > upper || !romchecksum(rom, length))
266 adapter_rom_resources[i].start = start;
267 adapter_rom_resources[i].end = start + length - 1;
268 request_resource(&iomem_resource, &adapter_rom_resources[i]);
270 start = adapter_rom_resources[i++].end & ~2047UL;
274 static __init void parse_cmdline_early (char ** cmdline_p)
276 char c = ' ', *to = command_line, *from = COMMAND_LINE;
286 * If the BIOS enumerates physical processors before logical,
287 * maxcpus=N at enumeration-time can be used to disable HT.
289 else if (!memcmp(from, "maxcpus=", 8)) {
290 extern unsigned int maxcpus;
292 maxcpus = simple_strtoul(from + 8, NULL, 0);
296 /* "acpi=off" disables both ACPI table parsing and interpreter init */
297 if (!memcmp(from, "acpi=off", 8))
300 if (!memcmp(from, "acpi=force", 10)) {
301 /* add later when we do DMI horrors: */
306 /* acpi=ht just means: do ACPI MADT parsing
307 at bootup, but don't enable the full ACPI interpreter */
308 if (!memcmp(from, "acpi=ht", 7)) {
313 else if (!memcmp(from, "pci=noacpi", 10))
315 else if (!memcmp(from, "acpi=noirq", 10))
318 else if (!memcmp(from, "acpi_sci=edge", 13))
319 acpi_sci_flags.trigger = 1;
320 else if (!memcmp(from, "acpi_sci=level", 14))
321 acpi_sci_flags.trigger = 3;
322 else if (!memcmp(from, "acpi_sci=high", 13))
323 acpi_sci_flags.polarity = 1;
324 else if (!memcmp(from, "acpi_sci=low", 12))
325 acpi_sci_flags.polarity = 3;
327 /* acpi=strict disables out-of-spec workarounds */
328 else if (!memcmp(from, "acpi=strict", 11)) {
331 #ifdef CONFIG_X86_IO_APIC
332 else if (!memcmp(from, "acpi_skip_timer_override", 24))
333 acpi_skip_timer_override = 1;
337 if (!memcmp(from, "disable_timer_pin_1", 19))
338 disable_timer_pin_1 = 1;
339 if (!memcmp(from, "enable_timer_pin_1", 18))
340 disable_timer_pin_1 = -1;
342 if (!memcmp(from, "nolapic", 7) ||
343 !memcmp(from, "disableapic", 11))
346 if (!memcmp(from, "noapic", 6))
347 skip_ioapic_setup = 1;
349 /* Make sure to not confuse with apic= */
350 if (!memcmp(from, "apic", 4) &&
351 (from[4] == ' ' || from[4] == 0)) {
352 skip_ioapic_setup = 0;
356 if (!memcmp(from, "mem=", 4))
357 parse_memopt(from+4, &from);
359 if (!memcmp(from, "memmap=", 7)) {
360 /* exactmap option is for used defined memory */
361 if (!memcmp(from+7, "exactmap", 8)) {
362 #ifdef CONFIG_CRASH_DUMP
363 /* If we are doing a crash dump, we
364 * still need to know the real mem
365 * size before original memory map is
368 saved_max_pfn = e820_end_of_ram();
376 parse_memmapopt(from+7, &from);
382 if (!memcmp(from, "numa=", 5))
386 if (!memcmp(from,"iommu=",6)) {
390 if (!memcmp(from,"oops=panic", 10))
393 if (!memcmp(from, "noexec=", 7))
394 nonx_setup(from + 7);
397 /* crashkernel=size@addr specifies the location to reserve for
398 * a crash kernel. By reserving this memory we guarantee
399 * that linux never set's it up as a DMA target.
400 * Useful for holding code to do something appropriate
401 * after a kernel panic.
403 else if (!memcmp(from, "crashkernel=", 12)) {
404 unsigned long size, base;
405 size = memparse(from+12, &from);
407 base = memparse(from+1, &from);
408 /* FIXME: Do I want a sanity check
409 * to validate the memory range?
411 crashk_res.start = base;
412 crashk_res.end = base + size - 1;
417 #ifdef CONFIG_PROC_VMCORE
418 /* elfcorehdr= specifies the location of elf core header
419 * stored by the crashed kernel. This option will be passed
420 * by kexec loader to the capture kernel.
422 else if(!memcmp(from, "elfcorehdr=", 11))
423 elfcorehdr_addr = memparse(from+11, &from);
429 if (COMMAND_LINE_SIZE <= ++len)
434 printk(KERN_INFO "user-defined physical RAM map:\n");
435 e820_print_map("user");
438 *cmdline_p = command_line;
443 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
445 unsigned long bootmap_size, bootmap;
447 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
448 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
450 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
451 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
452 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
453 reserve_bootmem(bootmap, bootmap_size);
457 /* Use inline assembly to define this because the nops are defined
458 as inline assembly strings in the include files and we cannot
459 get them easily into strings. */
460 asm("\t.data\nk8nops: "
461 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
464 extern unsigned char k8nops[];
465 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
471 k8nops + 1 + 2 + 3 + 4,
472 k8nops + 1 + 2 + 3 + 4 + 5,
473 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
474 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
477 extern char __vsyscall_0;
479 /* Replace instructions with better alternatives for this CPU type.
481 This runs before SMP is initialized to avoid SMP problems with
482 self modifying code. This implies that assymetric systems where
483 APs have less capabilities than the boot processor are not handled.
484 In this case boot with "noreplacement". */
485 void apply_alternatives(void *start, void *end)
489 for (a = start; (void *)a < end; a++) {
492 if (!boot_cpu_has(a->cpuid))
495 BUG_ON(a->replacementlen > a->instrlen);
497 /* vsyscall code is not mapped yet. resolve it manually. */
498 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
499 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
500 __inline_memcpy(instr, a->replacement, a->replacementlen);
501 diff = a->instrlen - a->replacementlen;
503 /* Pad the rest with nops */
504 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
508 __inline_memcpy(instr + i, k8_nops[k], k);
513 static int no_replacement __initdata = 0;
515 void __init alternative_instructions(void)
517 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
520 apply_alternatives(__alt_instructions, __alt_instructions_end);
523 static int __init noreplacement_setup(char *s)
529 __setup("noreplacement", noreplacement_setup);
531 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
533 #ifdef CONFIG_EDD_MODULE
537 * copy_edd() - Copy the BIOS EDD information
538 * from boot_params into a safe place.
541 static inline void copy_edd(void)
543 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
544 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
545 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
546 edd.edd_info_nr = EDD_NR;
549 static inline void copy_edd(void)
554 #define EBDA_ADDR_POINTER 0x40E
555 static void __init reserve_ebda_region(void)
559 * there is a real-mode segmented pointer pointing to the
560 * 4K EBDA area at 0x40E
562 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
565 reserve_bootmem_generic(addr, PAGE_SIZE);
568 void __init setup_arch(char **cmdline_p)
570 unsigned long kernel_end;
572 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
573 drive_info = DRIVE_INFO;
574 screen_info = SCREEN_INFO;
575 edid_info = EDID_INFO;
576 saved_video_mode = SAVED_VIDEO_MODE;
577 bootloader_type = LOADER_TYPE;
579 #ifdef CONFIG_BLK_DEV_RAM
580 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
581 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
582 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
584 setup_memory_region();
587 if (!MOUNT_ROOT_RDONLY)
588 root_mountflags &= ~MS_RDONLY;
589 init_mm.start_code = (unsigned long) &_text;
590 init_mm.end_code = (unsigned long) &_etext;
591 init_mm.end_data = (unsigned long) &_edata;
592 init_mm.brk = (unsigned long) &_end;
594 code_resource.start = virt_to_phys(&_text);
595 code_resource.end = virt_to_phys(&_etext)-1;
596 data_resource.start = virt_to_phys(&_etext);
597 data_resource.end = virt_to_phys(&_edata)-1;
599 parse_cmdline_early(cmdline_p);
601 early_identify_cpu(&boot_cpu_data);
604 * partially used pages are not usable - thus
605 * we are rounding upwards:
607 end_pfn = e820_end_of_ram();
611 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
617 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
618 * Call this early for SRAT node setup.
620 acpi_boot_table_init();
623 #ifdef CONFIG_ACPI_NUMA
625 * Parse SRAT to discover nodes.
631 numa_initmem_init(0, end_pfn);
633 contig_initmem_init(0, end_pfn);
636 /* Reserve direct mapping */
637 reserve_bootmem_generic(table_start << PAGE_SHIFT,
638 (table_end - table_start) << PAGE_SHIFT);
641 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
642 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
645 * reserve physical page 0 - it's a special BIOS page on many boxes,
646 * enabling clean reboots, SMP operation, laptop functions.
648 reserve_bootmem_generic(0, PAGE_SIZE);
650 /* reserve ebda region */
651 reserve_ebda_region();
655 * But first pinch a few for the stack/trampoline stuff
656 * FIXME: Don't need the extra page at 4K, but need to fix
657 * trampoline before removing it. (see the GDT stuff)
659 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
661 /* Reserve SMP trampoline */
662 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
665 #ifdef CONFIG_ACPI_SLEEP
667 * Reserve low memory region for sleep support.
669 acpi_reserve_bootmem();
671 #ifdef CONFIG_X86_LOCAL_APIC
673 * Find and reserve possible boot-time SMP configuration:
677 #ifdef CONFIG_BLK_DEV_INITRD
678 if (LOADER_TYPE && INITRD_START) {
679 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
680 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
682 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
683 initrd_end = initrd_start+INITRD_SIZE;
686 printk(KERN_ERR "initrd extends beyond end of memory "
687 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
688 (unsigned long)(INITRD_START + INITRD_SIZE),
689 (unsigned long)(end_pfn << PAGE_SHIFT));
695 if (crashk_res.start != crashk_res.end) {
696 reserve_bootmem(crashk_res.start,
697 crashk_res.end - crashk_res.start + 1);
707 * Read APIC and some other early information from ACPI tables.
714 #ifdef CONFIG_X86_LOCAL_APIC
716 * get boot-time SMP configuration:
718 if (smp_found_config)
720 init_apic_mappings();
724 * Request address space for all standard RAM and ROM resources
725 * and also for regions reported as reserved by the e820.
728 e820_reserve_resources();
730 request_resource(&iomem_resource, &video_ram_resource);
734 /* request I/O space for devices used on all i[345]86 PCs */
735 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
736 request_resource(&ioport_resource, &standard_io_resources[i]);
741 #ifdef CONFIG_GART_IOMMU
746 #if defined(CONFIG_VGA_CONSOLE)
747 conswitchp = &vga_con;
748 #elif defined(CONFIG_DUMMY_CONSOLE)
749 conswitchp = &dummy_con;
754 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
758 if (c->extended_cpuid_level < 0x80000004)
761 v = (unsigned int *) c->x86_model_id;
762 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
763 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
764 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
765 c->x86_model_id[48] = 0;
770 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
772 unsigned int n, dummy, eax, ebx, ecx, edx;
774 n = c->extended_cpuid_level;
776 if (n >= 0x80000005) {
777 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
778 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
779 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
780 c->x86_cache_size=(ecx>>24)+(edx>>24);
781 /* On K8 L1 TLB is inclusive, so don't count it */
785 if (n >= 0x80000006) {
786 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
787 ecx = cpuid_ecx(0x80000006);
788 c->x86_cache_size = ecx >> 16;
789 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
791 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
792 c->x86_cache_size, ecx & 0xFF);
796 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
797 if (n >= 0x80000008) {
798 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
799 c->x86_virt_bits = (eax >> 8) & 0xff;
800 c->x86_phys_bits = eax & 0xff;
805 static int nearby_node(int apicid)
808 for (i = apicid - 1; i >= 0; i--) {
809 int node = apicid_to_node[i];
810 if (node != NUMA_NO_NODE && node_online(node))
813 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
814 int node = apicid_to_node[i];
815 if (node != NUMA_NO_NODE && node_online(node))
818 return first_node(node_online_map); /* Shouldn't happen */
823 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
824 * Assumes number of cores is a power of two.
826 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
829 int cpu = smp_processor_id();
833 unsigned apicid = phys_proc_id[cpu];
837 while ((1 << bits) < c->x86_max_cores)
840 /* Low order bits define the core id (index of core in socket) */
841 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
842 /* Convert the APIC ID into the socket ID */
843 phys_proc_id[cpu] >>= bits;
846 node = phys_proc_id[cpu];
847 if (apicid_to_node[apicid] != NUMA_NO_NODE)
848 node = apicid_to_node[apicid];
849 if (!node_online(node)) {
850 /* Two possibilities here:
851 - The CPU is missing memory and no node was created.
852 In that case try picking one from a nearby CPU
853 - The APIC IDs differ from the HyperTransport node IDs
854 which the K8 northbridge parsing fills in.
855 Assume they are all increased by a constant offset,
856 but in the same order as the HT nodeids.
857 If that doesn't result in a usable node fall back to the
858 path for the previous case. */
859 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
860 if (ht_nodeid >= 0 &&
861 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
862 node = apicid_to_node[ht_nodeid];
863 /* Pick a nearby node */
864 if (!node_online(node))
865 node = nearby_node(apicid);
867 numa_set_node(cpu, node);
869 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
870 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
875 static int __init init_amd(struct cpuinfo_x86 *c)
883 * Disable TLB flush filter by setting HWCR.FFDIS on K8
884 * bit 6 of msr C001_0015
886 * Errata 63 for SH-B3 steppings
887 * Errata 122 for all steppings (F+ have it disabled by default)
890 rdmsrl(MSR_K8_HWCR, value);
892 wrmsrl(MSR_K8_HWCR, value);
896 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
897 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
898 clear_bit(0*32+31, &c->x86_capability);
900 r = get_model_name(c);
904 /* Should distinguish Models here, but this is only
905 a fallback anyways. */
906 strcpy(c->x86_model_id, "Hammer");
910 display_cacheinfo(c);
912 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
913 if (c->x86_power & (1<<8))
914 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
916 if (c->extended_cpuid_level >= 0x80000008) {
917 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
918 if (c->x86_max_cores & (c->x86_max_cores - 1))
919 c->x86_max_cores = 1;
927 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
930 u32 eax, ebx, ecx, edx;
931 int index_msb, core_bits;
932 int cpu = smp_processor_id();
934 cpuid(1, &eax, &ebx, &ecx, &edx);
936 c->apicid = phys_pkg_id(0);
938 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
941 smp_num_siblings = (ebx & 0xff0000) >> 16;
943 if (smp_num_siblings == 1) {
944 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
945 } else if (smp_num_siblings > 1 ) {
947 if (smp_num_siblings > NR_CPUS) {
948 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
949 smp_num_siblings = 1;
953 index_msb = get_count_order(smp_num_siblings);
954 phys_proc_id[cpu] = phys_pkg_id(index_msb);
956 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
959 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
961 index_msb = get_count_order(smp_num_siblings) ;
963 core_bits = get_count_order(c->x86_max_cores);
965 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
966 ((1 << core_bits) - 1);
968 if (c->x86_max_cores > 1)
969 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
976 * find out the number of processor cores on the die
978 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
982 if (c->cpuid_level < 4)
991 return ((eax >> 26) + 1);
996 static void srat_detect_node(void)
1000 int cpu = smp_processor_id();
1002 /* Don't do the funky fallback heuristics the AMD version employs
1004 node = apicid_to_node[hard_smp_processor_id()];
1005 if (node == NUMA_NO_NODE)
1007 numa_set_node(cpu, node);
1010 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1014 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1019 init_intel_cacheinfo(c);
1020 n = c->extended_cpuid_level;
1021 if (n >= 0x80000008) {
1022 unsigned eax = cpuid_eax(0x80000008);
1023 c->x86_virt_bits = (eax >> 8) & 0xff;
1024 c->x86_phys_bits = eax & 0xff;
1025 /* CPUID workaround for Intel 0F34 CPU */
1026 if (c->x86_vendor == X86_VENDOR_INTEL &&
1027 c->x86 == 0xF && c->x86_model == 0x3 &&
1029 c->x86_phys_bits = 36;
1033 c->x86_cache_alignment = c->x86_clflush_size * 2;
1034 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1035 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1036 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1037 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1038 c->x86_max_cores = intel_num_cpu_cores(c);
1043 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1045 char *v = c->x86_vendor_id;
1047 if (!strcmp(v, "AuthenticAMD"))
1048 c->x86_vendor = X86_VENDOR_AMD;
1049 else if (!strcmp(v, "GenuineIntel"))
1050 c->x86_vendor = X86_VENDOR_INTEL;
1052 c->x86_vendor = X86_VENDOR_UNKNOWN;
1055 struct cpu_model_info {
1058 char *model_names[16];
1061 /* Do some early cpuid on the boot CPU to get some parameter that are
1062 needed before check_bugs. Everything advanced is in identify_cpu
1064 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1068 c->loops_per_jiffy = loops_per_jiffy;
1069 c->x86_cache_size = -1;
1070 c->x86_vendor = X86_VENDOR_UNKNOWN;
1071 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1072 c->x86_vendor_id[0] = '\0'; /* Unset */
1073 c->x86_model_id[0] = '\0'; /* Unset */
1074 c->x86_clflush_size = 64;
1075 c->x86_cache_alignment = c->x86_clflush_size;
1076 c->x86_max_cores = 1;
1077 c->extended_cpuid_level = 0;
1078 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1080 /* Get vendor name */
1081 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1082 (unsigned int *)&c->x86_vendor_id[0],
1083 (unsigned int *)&c->x86_vendor_id[8],
1084 (unsigned int *)&c->x86_vendor_id[4]);
1088 /* Initialize the standard set of capabilities */
1089 /* Note that the vendor-specific code below might override */
1091 /* Intel-defined flags: level 0x00000001 */
1092 if (c->cpuid_level >= 0x00000001) {
1094 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1095 &c->x86_capability[0]);
1096 c->x86 = (tfms >> 8) & 0xf;
1097 c->x86_model = (tfms >> 4) & 0xf;
1098 c->x86_mask = tfms & 0xf;
1100 c->x86 += (tfms >> 20) & 0xff;
1102 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1103 if (c->x86_capability[0] & (1<<19))
1104 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1106 /* Have CPUID level 0 only - unheard of */
1111 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1116 * This does the hard work of actually picking apart the CPU stuff...
1118 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1123 early_identify_cpu(c);
1125 /* AMD-defined flags: level 0x80000001 */
1126 xlvl = cpuid_eax(0x80000000);
1127 c->extended_cpuid_level = xlvl;
1128 if ((xlvl & 0xffff0000) == 0x80000000) {
1129 if (xlvl >= 0x80000001) {
1130 c->x86_capability[1] = cpuid_edx(0x80000001);
1131 c->x86_capability[6] = cpuid_ecx(0x80000001);
1133 if (xlvl >= 0x80000004)
1134 get_model_name(c); /* Default name */
1137 /* Transmeta-defined flags: level 0x80860001 */
1138 xlvl = cpuid_eax(0x80860000);
1139 if ((xlvl & 0xffff0000) == 0x80860000) {
1140 /* Don't set x86_cpuid_level here for now to not confuse. */
1141 if (xlvl >= 0x80860001)
1142 c->x86_capability[2] = cpuid_edx(0x80860001);
1146 * Vendor-specific initialization. In this section we
1147 * canonicalize the feature flags, meaning if there are
1148 * features a certain CPU supports which CPUID doesn't
1149 * tell us, CPUID claiming incorrect flags, or other bugs,
1150 * we handle them here.
1152 * At the end of this section, c->x86_capability better
1153 * indicate the features this CPU genuinely supports!
1155 switch (c->x86_vendor) {
1156 case X86_VENDOR_AMD:
1160 case X86_VENDOR_INTEL:
1164 case X86_VENDOR_UNKNOWN:
1166 display_cacheinfo(c);
1170 select_idle_routine(c);
1174 * On SMP, boot_cpu_data holds the common feature set between
1175 * all CPUs; so make sure that we indicate which features are
1176 * common between the CPUs. The first time this routine gets
1177 * executed, c == &boot_cpu_data.
1179 if (c != &boot_cpu_data) {
1180 /* AND the already accumulated flags with these */
1181 for (i = 0 ; i < NCAPINTS ; i++)
1182 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1185 #ifdef CONFIG_X86_MCE
1188 if (c == &boot_cpu_data)
1193 numa_add_cpu(smp_processor_id());
1198 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1200 if (c->x86_model_id[0])
1201 printk("%s", c->x86_model_id);
1203 if (c->x86_mask || c->cpuid_level >= 0)
1204 printk(" stepping %02x\n", c->x86_mask);
1210 * Get CPU information for use by the procfs.
1213 static int show_cpuinfo(struct seq_file *m, void *v)
1215 struct cpuinfo_x86 *c = v;
1218 * These flag bits must match the definitions in <asm/cpufeature.h>.
1219 * NULL means this bit is undefined or reserved; either way it doesn't
1220 * have meaning as far as Linux is concerned. Note that it's important
1221 * to realize there is a difference between this table and CPUID -- if
1222 * applications want to get the raw CPUID data, they should access
1223 * /dev/cpu/<cpu_nr>/cpuid instead.
1225 static char *x86_cap_flags[] = {
1227 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1228 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1229 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1230 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1233 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1234 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1235 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1236 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1238 /* Transmeta-defined */
1239 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1240 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1241 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1242 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1244 /* Other (Linux-defined) */
1245 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1246 "constant_tsc", NULL, NULL,
1247 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1248 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1249 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1251 /* Intel-defined (#2) */
1252 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1253 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1254 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1255 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1257 /* VIA/Cyrix/Centaur-defined */
1258 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1259 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1260 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1261 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1263 /* AMD-defined (#2) */
1264 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1265 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1266 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1267 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1269 static char *x86_power_flags[] = {
1270 "ts", /* temperature sensor */
1271 "fid", /* frequency id control */
1272 "vid", /* voltage id control */
1273 "ttp", /* thermal trip */
1277 /* nothing */ /* constant_tsc - moved to flags */
1282 if (!cpu_online(c-cpu_data))
1286 seq_printf(m,"processor\t: %u\n"
1288 "cpu family\t: %d\n"
1290 "model name\t: %s\n",
1291 (unsigned)(c-cpu_data),
1292 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1295 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1297 if (c->x86_mask || c->cpuid_level >= 0)
1298 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1300 seq_printf(m, "stepping\t: unknown\n");
1302 if (cpu_has(c,X86_FEATURE_TSC)) {
1303 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1306 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1307 freq / 1000, (freq % 1000));
1311 if (c->x86_cache_size >= 0)
1312 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1315 if (smp_num_siblings * c->x86_max_cores > 1) {
1316 int cpu = c - cpu_data;
1317 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1318 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1319 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1320 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1326 "fpu_exception\t: yes\n"
1327 "cpuid level\t: %d\n"
1334 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1335 if ( test_bit(i, &c->x86_capability) &&
1336 x86_cap_flags[i] != NULL )
1337 seq_printf(m, " %s", x86_cap_flags[i]);
1340 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1341 c->loops_per_jiffy/(500000/HZ),
1342 (c->loops_per_jiffy/(5000/HZ)) % 100);
1344 if (c->x86_tlbsize > 0)
1345 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1346 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1347 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1349 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1350 c->x86_phys_bits, c->x86_virt_bits);
1352 seq_printf(m, "power management:");
1355 for (i = 0; i < 32; i++)
1356 if (c->x86_power & (1 << i)) {
1357 if (i < ARRAY_SIZE(x86_power_flags) &&
1359 seq_printf(m, "%s%s",
1360 x86_power_flags[i][0]?" ":"",
1361 x86_power_flags[i]);
1363 seq_printf(m, " [%d]", i);
1367 seq_printf(m, "\n\n");
1372 static void *c_start(struct seq_file *m, loff_t *pos)
1374 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1377 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1380 return c_start(m, pos);
1383 static void c_stop(struct seq_file *m, void *v)
1387 struct seq_operations cpuinfo_op = {
1391 .show = show_cpuinfo,
1394 static int __init run_dmi_scan(void)
1399 core_initcall(run_dmi_scan);