From: Ingo Molnar Date: Tue, 23 Dec 2008 15:27:23 +0000 (+0100) Subject: Merge branches 'x86/apic', 'x86/cleanups', 'x86/cpufeature', 'x86/crashdump', 'x86... X-Git-Tag: v2.6.29-rc1~587^2~4 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fa623d1b0222adbe8f822e53c08003b9679a410c;p=platform%2Fkernel%2Flinux-exynos.git Merge branches 'x86/apic', 'x86/cleanups', 'x86/cpufeature', 'x86/crashdump', 'x86/debug', 'x86/defconfig', 'x86/detect-hyper', 'x86/doc', 'x86/dumpstack', 'x86/early-printk', 'x86/fpu', 'x86/idle', 'x86/io', 'x86/memory-corruption-check', 'x86/microcode', 'x86/mm', 'x86/mtrr', 'x86/nmi-watchdog', 'x86/pat2', 'x86/pci-ioapic-boot-irq-quirks', 'x86/ptrace', 'x86/quirks', 'x86/reboot', 'x86/setup-memory', 'x86/signal', 'x86/sparse-fixes', 'x86/time', 'x86/uv' and 'x86/xen' into x86/core --- fa623d1b0222adbe8f822e53c08003b9679a410c diff --cc Documentation/kernel-parameters.txt index e0f346d,e0f346d,e0f346d,e0f346d,c86c074,e0f346d,e0f346d,dc6b06f,e0f346d,53ba7c7,e0f346d,e0f346d,e0f346d,e0f346d,e0f346d,e0f346d,e0f346d,343e0f0,6246220,e0f346d,63bac58,c86c074,e0f346d,c86c074,e0f346d,343e0f0,e0f346d,e0f346d,e0f346d,e0f346d..d5418d5 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -198,50 -198,50 -198,50 -198,50 -198,67 -198,50 -198,50 -198,67 -198,50 -198,40 -198,50 -198,50 -198,50 -198,50 -198,50 -198,50 -198,50 -198,67 -198,67 -198,50 -198,67 -198,67 -198,50 -198,67 -198,50 -198,67 -198,50 -198,50 -198,50 -198,50 +198,50 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ and is between 256 and 4096 characters that require a timer override, but don't have HPET - - - -- -- - - acpi.debug_layer= [HW,ACPI] - -- - - Format: - -- - - Each bit of the indicates an ACPI debug layer, - -- - - 1: enable, 0: disable. It is useful for boot time - -- - - debugging. After system has booted up, it can be set - -- - - via /sys/module/acpi/parameters/debug_layer. - -- - - CONFIG_ACPI_DEBUG must be enabled for this to produce any output. - -- - - Available bits (add the numbers together) to enable debug output - -- - - for specific parts of the ACPI subsystem: - -- - - 0x01 utilities 0x02 hardware 0x04 events 0x08 tables - -- - - 0x10 namespace 0x20 parser 0x40 dispatcher - -- - - 0x80 executer 0x100 resources 0x200 acpica debugger - -- - - 0x400 os services 0x800 acpica disassembler. - -- - - The number can be in decimal or prefixed with 0x in hex. - -- - - Warning: Many of these options can produce a lot of - -- - - output and make your system unusable. Be very careful. - -- - - - -- - - acpi.debug_level= [HW,ACPI] + + + ++ ++ + + acpi_backlight= [HW,ACPI] + + + ++ ++ + + acpi_backlight=vendor + + + ++ ++ + + acpi_backlight=video + + + ++ ++ + + If set to vendor, prefer vendor specific driver + + + ++ ++ + + (e.g. thinkpad_acpi, sony_acpi, etc.) instead + + + ++ ++ + + of the ACPI video.ko driver. + + + ++ ++ + + + + + ++ ++ + + acpi_display_output= [HW,ACPI] + + + ++ ++ + + acpi_display_output=vendor + + + ++ ++ + + acpi_display_output=video + + + ++ ++ + + See above. + + + ++ ++ + + + + + ++ ++ + + acpi.debug_layer= [HW,ACPI,ACPI_DEBUG] + + + ++ ++ + + acpi.debug_level= [HW,ACPI,ACPI_DEBUG] Format: - - - - Each bit of the indicates an ACPI debug layer, - Each bit of the indicates an ACPI debug level, - - - - - 1: enable, 0: disable. It is useful for boot time - - - - - debugging. After system has booted up, it can be set - - - - via /sys/module/acpi/parameters/debug_layer. - via /sys/module/acpi/parameters/debug_level. - - - - - CONFIG_ACPI_DEBUG must be enabled for this to produce any output. - - - - Available bits (add the numbers together) to enable debug output - - - - for specific parts of the ACPI subsystem: - - - - 0x01 utilities 0x02 hardware 0x04 events 0x08 tables - - - - 0x10 namespace 0x20 parser 0x40 dispatcher - - - - 0x80 executer 0x100 resources 0x200 acpica debugger - - - - 0x400 os services 0x800 acpica disassembler. - Available bits (add the numbers together) to enable different - debug output levels of the ACPI subsystem: - 0x01 error 0x02 warn 0x04 init 0x08 debug object - 0x10 info 0x20 init names 0x40 parse 0x80 load - 0x100 dispatch 0x200 execute 0x400 names 0x800 operation region - 0x1000 bfield 0x2000 tables 0x4000 values 0x8000 objects - 0x10000 resources 0x20000 user requests 0x40000 package. - - - - - The number can be in decimal or prefixed with 0x in hex. - - - - - Warning: Many of these options can produce a lot of - - - - - output and make your system unusable. Be very careful. - - - - - - - - acpi.debug_level= [HW,ACPI] - - - - Format: - - -- -- - - Each bit of the indicates an ACPI debug level, - - -- -- - - which corresponds to the level in an ACPI_DEBUG_PRINT - - -- -- - - statement. After system has booted up, this mask - - -- -- - - can be set via /sys/module/acpi/parameters/debug_level. - - -- -- - - - - -- -- - - CONFIG_ACPI_DEBUG must be enabled for this to produce - - -- -- - - any output. The number can be in decimal or prefixed - - -- -- - - with 0x in hex. Some of these options produce so much - - -- -- - - output that the system is unusable. - - -- -- - - - - -- -- - - The following global components are defined by the - - -- -- - - ACPI CA: - - -- -- - - 0x01 error - - -- -- - - 0x02 warn - - -- -- - - 0x04 init - - -- -- - - 0x08 debug object - - -- -- - - 0x10 info - - -- -- - - 0x20 init names - - -- -- - - 0x40 parse - - -- -- - - 0x80 load - - -- -- - - 0x100 dispatch - - -- -- - - 0x200 execute - - -- -- - - 0x400 names - - -- -- - - 0x800 operation region - - -- -- - - 0x1000 bfield - - -- -- - - 0x2000 tables - - -- -- - - 0x4000 values - - -- -- - - 0x8000 objects - - -- -- - - 0x10000 resources - - -- -- - - 0x20000 user requests - - -- -- - - 0x40000 package - - -- -- - - The number can be in decimal or prefixed with 0x in hex. - - -- -- - - Warning: Many of these options can produce a lot of - - -- -- - - output and make your system unusable. Be very careful. + + + ++ ++ + + CONFIG_ACPI_DEBUG must be enabled to produce any ACPI + + + ++ ++ + + debug output. Bits in debug_layer correspond to a + + + ++ ++ + + _COMPONENT in an ACPI source file, e.g., + + + ++ ++ + + #define _COMPONENT ACPI_PCI_COMPONENT + + + ++ ++ + + Bits in debug_level correspond to a level in + + + ++ ++ + + ACPI_DEBUG_PRINT statements, e.g., + + + ++ ++ + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, ... + + + ++ ++ + + See Documentation/acpi/debug.txt for more information + + + ++ ++ + + about debug layers and levels. + + + ++ ++ + + + + + ++ ++ + + Enable AML "Debug" output, i.e., stores to the Debug + + + ++ ++ + + object while interpreting AML: + + + ++ ++ + + acpi.debug_layer=0xffffffff acpi.debug_level=0x2 + + + ++ ++ + + Enable PCI/PCI interrupt routing info messages: + + + ++ ++ + + acpi.debug_layer=0x400000 acpi.debug_level=0x4 + + + ++ ++ + + Enable all messages related to ACPI hardware: + + + ++ ++ + + acpi.debug_layer=0x2 acpi.debug_level=0xffffffff + + + ++ ++ + + + + + ++ ++ + + Some values produce so much output that the system is + + + ++ ++ + + unusable. The "log_buf_len" parameter may be useful + + + ++ ++ + + if you need to capture more output. + + acpi.power_nocheck= [HW,ACPI] + Format: 1/0 enable/disable the check of power state. + On some bogus BIOS the _PSC object/_STA object of + power resource can't return the correct device power + state. In such case it is unneccessary to check its + power state again in power transition. + 1 : disable the power state check acpi_pm_good [X86-32,X86-64] Override the pmtimer bug detection: force the kernel @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1710,13 -1710,13 -1710,13 -1710,13 -1722,13 -1710,13 -1710,13 -1722,13 -1710,13 -1695,13 -1710,13 -1710,13 -1710,13 -1710,13 -1710,13 -1710,13 -1710,13 -1722,13 -1735,13 -1710,13 -1733,13 -1722,13 -1710,13 -1722,13 -1710,13 -1722,13 -1710,13 -1710,13 -1710,13 -1710,13 +1734,13 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ See arch/parisc/kernel/pdc_chassis.c pf. [PARIDE] - - - -- -- - - See Documentation/paride.txt. + + + ++ ++ + + See Documentation/blockdev/paride.txt. pg. [PARIDE] - - - -- -- - - See Documentation/paride.txt. + + + ++ ++ + + See Documentation/blockdev/paride.txt. pirq= [SMP,APIC] Manual mp-table setup - -- - - See Documentation/i386/IO-APIC.txt. + ++ + + See Documentation/x86/i386/IO-APIC.txt. plip= [PPT,NET] Parallel port network link Format: { parport | timid | 0 } diff --cc arch/x86/Kconfig index ac22bb7,ac22bb7,d4d4cb7,ac22bb7,93224b5,ab98cca,ac22bb7,6f20718,e795b5b,5b9b123,ac22bb7,ac22bb7,ac22bb7,a7d50f5,ac22bb7,ac22bb7,cb6a58b,350bee1,d11d7b5,ac22bb7,f4ed47d,4cf0ab1,705e72e,ebcad15,ac22bb7,350bee1,ac22bb7,ac22bb7,ac22bb7,ac22bb7..1cbec02 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -956,11 -956,11 -948,11 -956,11 -953,11 -960,11 -956,11 -953,11 -956,11 -946,11 -956,11 -956,11 -956,11 -958,11 -956,11 -956,11 -956,20 -949,11 -949,11 -956,11 -973,11 -953,11 -952,11 -953,11 -956,11 -949,11 -956,11 -956,11 -956,11 -956,11 +974,20 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ config X86_PA config ARCH_PHYS_ADDR_T_64BIT def_bool X86_64 || X86_PAE ++++++++++++++++ +++++++++++++config DIRECT_GBPAGES ++++++++++++++++ +++++++++++++ bool "Enable 1GB pages for kernel pagetables" if EMBEDDED ++++++++++++++++ +++++++++++++ default y ++++++++++++++++ +++++++++++++ depends on X86_64 ++++++++++++++++ +++++++++++++ help ++++++++++++++++ +++++++++++++ Allow the kernel linear mapping to use 1GB pages on CPUs that ++++++++++++++++ +++++++++++++ support it. This can improve the kernel's performance a tiny bit by ++++++++++++++++ +++++++++++++ reducing TLB pressure. If in doubt, say "Y". ++++++++++++++++ +++++++++++++ # Common NUMA Features config NUMA -------- --------------------- bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" ++++++++ +++++++++++++++++++++ bool "Numa Memory Allocation and Scheduler Support" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help diff --cc arch/x86/include/asm/msr.h index c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,46be2fa,397efa3,0000000,c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,c2a812e,46be2fa,46be2fa,c2a812e,46be2fa,c2a812e,c2a812e,c2a812e,c2a812e,46be2fa,c2a812e,42f639b,c2a812e,c2a812e..4640ddd mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,245 -1,245 -1,245 -1,245 -1,245 -1,245 -1,245 -1,247 -1,245 -1,0 -1,245 -1,245 -1,245 -1,245 -1,245 -1,245 -1,245 -1,247 -1,247 -1,245 -1,247 -1,245 -1,245 -1,245 -1,245 -1,247 -1,245 -1,245 -1,245 -1,245 +1,245 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + #ifndef _ASM_X86_MSR_H + #define _ASM_X86_MSR_H + + #include + + #ifndef __ASSEMBLY__ + # include + #endif + + #ifdef __KERNEL__ + #ifndef __ASSEMBLY__ + + #include + #include + + static inline unsigned long long native_read_tscp(unsigned int *aux) + { + unsigned long low, high; + asm volatile(".byte 0x0f,0x01,0xf9" + : "=a" (low), "=d" (high), "=c" (*aux)); + return low | ((u64)high << 32); + } + + /* -------- -------------------- * i386 calling convention returns 64-bit value in edx:eax, while -------- -------------------- * x86_64 returns at rax. Also, the "A" constraint does not really -------- -------------------- * mean rdx:rax in x86_64, so we need specialized behaviour for each -------- -------------------- * architecture ++++++++ +++++++++++++++++++++ * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" ++++++++ +++++++++++++++++++++ * constraint has different meanings. For i386, "A" means exactly ++++++++ +++++++++++++++++++++ * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, ++++++++ +++++++++++++++++++++ * it means rax *or* rdx. + */ + #ifdef CONFIG_X86_64 + #define DECLARE_ARGS(val, low, high) unsigned low, high + #define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) + #define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) + #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) + #else + #define DECLARE_ARGS(val, low, high) unsigned long long val + #define EAX_EDX_VAL(val, low, high) (val) + #define EAX_EDX_ARGS(val, low, high) "A" (val) + #define EAX_EDX_RET(val, low, high) "=A" (val) + #endif + + static inline unsigned long long native_read_msr(unsigned int msr) + { + DECLARE_ARGS(val, low, high); + + asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr)); + return EAX_EDX_VAL(val, low, high); + } + + static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) + { + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %[err],%[err]\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : [err] "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), [fault] "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); + } + + static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, + int *err) + { + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); + } + + static inline void native_write_msr(unsigned int msr, + unsigned low, unsigned high) + { + asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); + } + + static inline int native_write_msr_safe(unsigned int msr, + unsigned low, unsigned high) + { + int err; + asm volatile("2: wrmsr ; xor %[err],%[err]\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : [err] "=a" (err) + : "c" (msr), "0" (low), "d" (high), + [fault] "i" (-EFAULT) + : "memory"); + return err; + } + + extern unsigned long long native_read_tsc(void); + + static __always_inline unsigned long long __native_read_tsc(void) + { + DECLARE_ARGS(val, low, high); + - -- - - rdtsc_barrier(); + asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - -- - - rdtsc_barrier(); + + return EAX_EDX_VAL(val, low, high); + } + + static inline unsigned long long native_read_pmc(int counter) + { + DECLARE_ARGS(val, low, high); + + asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); + return EAX_EDX_VAL(val, low, high); + } + + #ifdef CONFIG_PARAVIRT + #include + #else + #include + /* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + + #define rdmsr(msr, val1, val2) \ + do { \ + u64 __val = native_read_msr((msr)); \ + (val1) = (u32)__val; \ + (val2) = (u32)(__val >> 32); \ + } while (0) + + static inline void wrmsr(unsigned msr, unsigned low, unsigned high) + { + native_write_msr(msr, low, high); + } + + #define rdmsrl(msr, val) \ + ((val) = native_read_msr((msr))) + + #define wrmsrl(msr, val) \ + native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32)) + + /* wrmsr with exception handling */ + static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) + { + return native_write_msr_safe(msr, low, high); + } + + /* rdmsr with exception handling */ + #define rdmsr_safe(msr, p1, p2) \ + ({ \ + int __err; \ + u64 __val = native_read_msr_safe((msr), &__err); \ + (*p1) = (u32)__val; \ + (*p2) = (u32)(__val >> 32); \ + __err; \ + }) + + static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) + { + int err; + + *p = native_read_msr_safe(msr, &err); + return err; + } + static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) + { + int err; + + *p = native_read_msr_amd_safe(msr, &err); + return err; + } + + #define rdtscl(low) \ --------- ----------------- -- ((low) = (u32)native_read_tsc()) +++++++++++++++++++++++++++ ++ ((low) = (u32)__native_read_tsc()) + + #define rdtscll(val) \ --------- ----------------- -- ((val) = native_read_tsc()) +++++++++++++++++++++++++++ ++ ((val) = __native_read_tsc()) + + #define rdpmc(counter, low, high) \ + do { \ + u64 _l = native_read_pmc((counter)); \ + (low) = (u32)_l; \ + (high) = (u32)(_l >> 32); \ + } while (0) + + #define rdtscp(low, high, aux) \ + do { \ + unsigned long long _val = native_read_tscp(&(aux)); \ + (low) = (u32)_val; \ + (high) = (u32)(_val >> 32); \ + } while (0) + + #define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) + + #endif /* !CONFIG_PARAVIRT */ + + + #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ + (u32)((val) >> 32)) + + #define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) + + #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) + + #ifdef CONFIG_SMP + int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); + int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); + int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); + int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); + #else /* CONFIG_SMP */ + static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) + { + rdmsr(msr_no, *l, *h); + return 0; + } + static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) + { + wrmsr(msr_no, l, h); + return 0; + } + static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, + u32 *l, u32 *h) + { + return rdmsr_safe(msr_no, l, h); + } + static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) + { + return wrmsr_safe(msr_no, l, h); + } + #endif /* CONFIG_SMP */ + #endif /* __ASSEMBLY__ */ + #endif /* __KERNEL__ */ + + + #endif /* _ASM_X86_MSR_H */ diff --cc arch/x86/include/asm/pgtable.h index c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,0000000,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,b7c2ecd,c012f3b,c012f3b,875192bf,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b,c012f3b..83e69f4 mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,0 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,564 -1,562 -1,562 -1,576 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 -1,562 +1,578 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + #ifndef _ASM_X86_PGTABLE_H + #define _ASM_X86_PGTABLE_H + + #define FIRST_USER_ADDRESS 0 + + #define _PAGE_BIT_PRESENT 0 /* is present */ + #define _PAGE_BIT_RW 1 /* writeable */ + #define _PAGE_BIT_USER 2 /* userspace addressable */ + #define _PAGE_BIT_PWT 3 /* page write through */ + #define _PAGE_BIT_PCD 4 /* page cache disabled */ + #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ + #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ --------- ------ -------------#define _PAGE_BIT_FILE 6 + #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ + #define _PAGE_BIT_PAT 7 /* on 4KB pages */ + #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ + #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ + #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ + #define _PAGE_BIT_UNUSED3 11 + #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ + #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 + #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 + #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + ++++++++++++++++ +++++++++++++/* If _PAGE_BIT_PRESENT is clear, we use these: */ ++++++++++++++++ +++++++++++++/* - if the user mapped it with PROT_NONE; pte_present gives true */ ++++++++++++++++ +++++++++++++#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL ++++++++++++++++ +++++++++++++/* - set: nonlinear file mapping, saved PTE; unset:swap */ ++++++++++++++++ +++++++++++++#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY ++++++++++++++++ +++++++++++++ + #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) + #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) + #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) + #define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) + #define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) + #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) + #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) + #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) + #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) + #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) + #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) + #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) + #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) + #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) + #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) + #define __HAVE_ARCH_PTE_SPECIAL + + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) + #else + #define _PAGE_NX (_AT(pteval_t, 0)) + #endif + --------- ------ -------------/* If _PAGE_PRESENT is clear, we use these: */ --------- ------ -------------#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, --------- ------ ------------- * saved PTE; unset:swap */ --------- ------ -------------#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; --------- ------ ------------- pte_present gives true */ ++++++++++++++++ +++++++++++++#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) ++++++++++++++++ +++++++++++++#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) + + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) + #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ + _PAGE_DIRTY) + + /* Set of bits not changed in pte_modify */ + #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) + + #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) + #define _PAGE_CACHE_WB (0) + #define _PAGE_CACHE_WC (_PAGE_PWT) + #define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) + #define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) + + #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) + #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + + #define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ + _PAGE_USER | _PAGE_ACCESSED) + #define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + #define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + #define PAGE_COPY PAGE_COPY_NOEXEC + #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + + #define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) + #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) + + #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) + #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) + #define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) + #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) + #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) + #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) + #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + + #define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) + #define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) + #define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) + #define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) + + #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) + #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) + #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) + #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) + #define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) + #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) + #define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) + #define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) + #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) + #define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) + #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) + #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) + #define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) + + #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) + #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) + #define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) + #define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) + + /* xwr */ + #define __P000 PAGE_NONE + #define __P001 PAGE_READONLY + #define __P010 PAGE_COPY + #define __P011 PAGE_COPY + #define __P100 PAGE_READONLY_EXEC + #define __P101 PAGE_READONLY_EXEC + #define __P110 PAGE_COPY_EXEC + #define __P111 PAGE_COPY_EXEC + + #define __S000 PAGE_NONE + #define __S001 PAGE_READONLY + #define __S010 PAGE_SHARED + #define __S011 PAGE_SHARED + #define __S100 PAGE_READONLY_EXEC + #define __S101 PAGE_READONLY_EXEC + #define __S110 PAGE_SHARED_EXEC + #define __S111 PAGE_SHARED_EXEC + + /* + * early identity mapping pte attrib macros. + */ + #ifdef CONFIG_X86_64 + #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC + #else + /* + * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection + * bits are combined, this will alow user to access the high address mapped + * VDSO in the presence of CONFIG_COMPAT_VDSO + */ + #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ + #define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ + #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ + #endif + +++++++++++++++++++ ++++++++++/* +++++++++++++++++++ ++++++++++ * Macro to mark a page protection value as UC- +++++++++++++++++++ ++++++++++ */ +++++++++++++++++++ ++++++++++#define pgprot_noncached(prot) \ +++++++++++++++++++ ++++++++++ ((boot_cpu_data.x86 > 3) \ +++++++++++++++++++ ++++++++++ ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ +++++++++++++++++++ ++++++++++ : (prot)) +++++++++++++++++++ ++++++++++ + #ifndef __ASSEMBLY__ + +++++++++++++++++++ ++++++++++#define pgprot_writecombine pgprot_writecombine +++++++++++++++++++ ++++++++++extern pgprot_t pgprot_writecombine(pgprot_t prot); +++++++++++++++++++ ++++++++++ + /* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ + extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; + #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + + extern spinlock_t pgd_lock; + extern struct list_head pgd_list; + + /* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ + static inline int pte_dirty(pte_t pte) + { + return pte_flags(pte) & _PAGE_DIRTY; + } + + static inline int pte_young(pte_t pte) + { + return pte_flags(pte) & _PAGE_ACCESSED; + } + + static inline int pte_write(pte_t pte) + { + return pte_flags(pte) & _PAGE_RW; + } + + static inline int pte_file(pte_t pte) + { + return pte_flags(pte) & _PAGE_FILE; + } + + static inline int pte_huge(pte_t pte) + { + return pte_flags(pte) & _PAGE_PSE; + } + + static inline int pte_global(pte_t pte) + { + return pte_flags(pte) & _PAGE_GLOBAL; + } + + static inline int pte_exec(pte_t pte) + { + return !(pte_flags(pte) & _PAGE_NX); + } + + static inline int pte_special(pte_t pte) + { + return pte_flags(pte) & _PAGE_SPECIAL; + } + + static inline unsigned long pte_pfn(pte_t pte) + { + return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; + } + + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) + + static inline int pmd_large(pmd_t pte) + { + return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); + } + + static inline pte_t pte_mkclean(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_DIRTY); + } + + static inline pte_t pte_mkold(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + } + + static inline pte_t pte_wrprotect(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_RW); + } + + static inline pte_t pte_mkexec(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_NX); + } + + static inline pte_t pte_mkdirty(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_DIRTY); + } + + static inline pte_t pte_mkyoung(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_ACCESSED); + } + + static inline pte_t pte_mkwrite(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_RW); + } + + static inline pte_t pte_mkhuge(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_PSE); + } + + static inline pte_t pte_clrhuge(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_PSE); + } + + static inline pte_t pte_mkglobal(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_GLOBAL); + } + + static inline pte_t pte_clrglobal(pte_t pte) + { + return __pte(pte_val(pte) & ~_PAGE_GLOBAL); + } + + static inline pte_t pte_mkspecial(pte_t pte) + { + return __pte(pte_val(pte) | _PAGE_SPECIAL); + } + + extern pteval_t __supported_pte_mask; + + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) + { + return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); + } + + static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) + { + return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); + } + + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) + { + pteval_t val = pte_val(pte); + + /* + * Chop off the NX bit (if present), and add the NX portion of + * the newprot (if present): + */ + val &= _PAGE_CHG_MASK; + val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + + return __pte(val); + } + + /* mprotect needs to preserve PAT bits when updating vm_page_prot */ + #define pgprot_modify pgprot_modify + static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) + { + pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; + pgprotval_t addbits = pgprot_val(newprot); + return __pgprot(preservebits | addbits); + } + + #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) + + #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) + + #ifndef __ASSEMBLY__ +++++++++++++++++++ ++++++++++/* Indicate that x86 has its own track and untrack pfn vma functions */ +++++++++++++++++++ ++++++++++#define __HAVE_PFNMAP_TRACKING +++++++++++++++++++ ++++++++++ + #define __HAVE_PHYS_MEM_ACCESS_PROT + struct file; + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); + int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot); + #endif + + /* Install a pte for a particular vaddr in kernel space. */ + void set_pte_vaddr(unsigned long vaddr, pte_t pte); + + #ifdef CONFIG_X86_32 + extern void native_pagetable_setup_start(pgd_t *base); + extern void native_pagetable_setup_done(pgd_t *base); + #else + static inline void native_pagetable_setup_start(pgd_t *base) {} + static inline void native_pagetable_setup_done(pgd_t *base) {} + #endif + + struct seq_file; + extern void arch_report_meminfo(struct seq_file *m); + + #ifdef CONFIG_PARAVIRT + #include + #else /* !CONFIG_PARAVIRT */ + #define set_pte(ptep, pte) native_set_pte(ptep, pte) + #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) + + #define set_pte_present(mm, addr, ptep, pte) \ + native_set_pte_present(mm, addr, ptep, pte) + #define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + + #define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + + #ifndef __PAGETABLE_PUD_FOLDED + #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) + #define pgd_clear(pgd) native_pgd_clear(pgd) + #endif + + #ifndef set_pud + # define set_pud(pudp, pud) native_set_pud(pudp, pud) + #endif + + #ifndef __PAGETABLE_PMD_FOLDED + #define pud_clear(pud) native_pud_clear(pud) + #endif + + #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) + #define pmd_clear(pmd) native_pmd_clear(pmd) + + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) + + static inline void __init paravirt_pagetable_setup_start(pgd_t *base) + { + native_pagetable_setup_start(base); + } + + static inline void __init paravirt_pagetable_setup_done(pgd_t *base) + { + native_pagetable_setup_done(base); + } + #endif /* CONFIG_PARAVIRT */ + + #endif /* __ASSEMBLY__ */ + + #ifdef CONFIG_X86_32 + # include "pgtable_32.h" + #else + # include "pgtable_64.h" + #endif + + /* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * this macro returns the index of the entry in the pgd page which would + * control the given virtual address + */ + #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) + + /* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; + */ + #define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) + /* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ + #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) + + + #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) + #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) + + #ifndef __ASSEMBLY__ + + enum { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_NUM + }; + + #ifdef CONFIG_PROC_FS + extern void update_page_count(int level, unsigned long pages); + #else + static inline void update_page_count(int level, unsigned long pages) { } + #endif + + /* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ + extern pte_t *lookup_address(unsigned long address, unsigned int *level); + + /* local pte updates need not use xchg for locking */ + static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) + { + pte_t res = *ptep; + + /* Pure native function needs no input for mm, addr */ + native_pte_clear(NULL, 0, ptep); + return res; + } + + static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) + { + native_set_pte(ptep, pte); + } + + #ifndef CONFIG_PARAVIRT + /* + * Rules for using pte_update - it must be called after any PTE update which + * has not been done using the set_pte / clear_pte interfaces. It is used by + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE + * updates should either be sets, clears, or set_pte_atomic for P->P + * transitions, which means this hook should only be called for user PTEs. + * This hook implies a P->P protection or access change has taken place, which + * requires a subsequent TLB flush. The notification can optionally be delayed + * until the TLB flush event by using the pte_update_defer form of the + * interface, but care must be taken to assure that the flush happens while + * still holding the same page table lock so that the shadow and primary pages + * do not become out of sync on SMP. + */ + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) + #endif + + /* + * We only update the dirty/accessed state if we set + * the dirty bit by hand in the kernel, since the hardware + * will do the accessed bit for us, and we don't want to + * race with other CPU's that might be updating the dirty + * bit at the same time. + */ + struct vm_area_struct; + + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS + extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); + + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG + extern int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + + #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH + extern int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); + + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR + static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) + { + pte_t pte = native_ptep_get_and_clear(ptep); + pte_update(mm, addr, ptep); + return pte; + } + + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL + static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + int full) + { + pte_t pte; + if (full) { + /* + * Full address destruction in progress; paravirt does not + * care about updates and native needs no locking + */ + pte = native_local_ptep_get_and_clear(ptep); + } else { + pte = ptep_get_and_clear(mm, addr, ptep); + } + return pte; + } + + #define __HAVE_ARCH_PTEP_SET_WRPROTECT + static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) + { + clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); + pte_update(mm, addr, ptep); + } + + /* + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); + * + * dst - pointer to pgd range anwhere on a pgd page + * src - "" + * count - the number of pgds to copy. + * + * dst and src can be on the same page, but the range must not overlap, + * and must not cross a page boundary. + */ + static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) + { + memcpy(dst, src, count * sizeof(pgd_t)); + } + + + #include + #endif /* __ASSEMBLY__ */ + + #endif /* _ASM_X86_PGTABLE_H */ diff --cc arch/x86/include/asm/pgtable_64.h index 545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,0000000,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,c54ba69,545a0e0,545a0e0,4798a40,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0,545a0e0..ba09289 mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,0 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,297 -1,285 -1,285 -1,279 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 -1,285 +1,291 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + #ifndef _ASM_X86_PGTABLE_64_H + #define _ASM_X86_PGTABLE_64_H + + #include + #ifndef __ASSEMBLY__ + + /* + * This file contains the functions and defines necessary to modify and use + * the x86-64 page table tree. + */ + #include + #include + #include + #include + + extern pud_t level3_kernel_pgt[512]; + extern pud_t level3_ident_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pmd_t level2_fixmap_pgt[512]; + extern pmd_t level2_ident_pgt[512]; + extern pgd_t init_level4_pgt[]; + + #define swapper_pg_dir init_level4_pgt + + extern void paging_init(void); + + #endif /* !__ASSEMBLY__ */ + + #define SHARED_KERNEL_PMD 0 + + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ + #define PGDIR_SHIFT 39 + #define PTRS_PER_PGD 512 + + /* + * 3rd level page + */ + #define PUD_SHIFT 30 + #define PTRS_PER_PUD 512 + + /* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ + #define PMD_SHIFT 21 + #define PTRS_PER_PMD 512 + + /* + * entries per page directory level + */ + #define PTRS_PER_PTE 512 + + #ifndef __ASSEMBLY__ + + #define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pte_val(e)) + #define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pmd_val(e)) + #define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pud_val(e)) + #define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016lx).\n", \ + __FILE__, __LINE__, &(e), pgd_val(e)) + + #define pgd_none(x) (!pgd_val(x)) + #define pud_none(x) (!pud_val(x)) + + struct mm_struct; + + void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); + + + static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) + { + *ptep = native_make_pte(0); + } + + static inline void native_set_pte(pte_t *ptep, pte_t pte) + { + *ptep = pte; + } + + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) + { + native_set_pte(ptep, pte); + } + + static inline pte_t native_ptep_get_and_clear(pte_t *xp) + { + #ifdef CONFIG_SMP + return native_make_pte(xchg(&xp->pte, 0)); + #else + /* native_local_ptep_get_and_clear, + but duplicated because of cyclic dependency */ + pte_t ret = *xp; + native_pte_clear(NULL, 0, xp); + return ret; + #endif + } + + static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) + { + *pmdp = pmd; + } + + static inline void native_pmd_clear(pmd_t *pmd) + { + native_set_pmd(pmd, native_make_pmd(0)); + } + + static inline void native_set_pud(pud_t *pudp, pud_t pud) + { + *pudp = pud; + } + + static inline void native_pud_clear(pud_t *pud) + { + native_set_pud(pud, native_make_pud(0)); + } + + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) + { + *pgdp = pgd; + } + + static inline void native_pgd_clear(pgd_t *pgd) + { + native_set_pgd(pgd, native_make_pgd(0)); + } + + #define pte_same(a, b) ((a).pte == (b).pte) + + #endif /* !__ASSEMBLY__ */ + + #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) + #define PMD_MASK (~(PMD_SIZE - 1)) + #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) + #define PUD_MASK (~(PUD_SIZE - 1)) + #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE - 1)) + + --------- ------ -------------#define MAXMEM _AC(0x00003fffffffffff, UL) ++++++++++++++++ +++++++++++++#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #define VMALLOC_START _AC(0xffffc20000000000, UL) + #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) + #define VMEMMAP_START _AC(0xffffe20000000000, UL) + #define MODULES_VADDR _AC(0xffffffffa0000000, UL) + #define MODULES_END _AC(0xffffffffff000000, UL) + #define MODULES_LEN (MODULES_END - MODULES_VADDR) + + #ifndef __ASSEMBLY__ + + static inline int pgd_bad(pgd_t pgd) + { + return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; + } + + static inline int pud_bad(pud_t pud) + { + return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; + } + + static inline int pmd_bad(pmd_t pmd) + { + return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; + } + + #define pte_none(x) (!pte_val((x))) + #define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) + + #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ + + /* --------- --------- ---------- * Macro to mark a page protection value as "uncacheable". --------- --------- ---------- */ --------- --------- ----------#define pgprot_noncached(prot) \ --------- --------- ---------- (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) --------- --------- ---------- --------- --------- ----------/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + + /* + * Level 4 access. + */ + #define pgd_page_vaddr(pgd) \ + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) + #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) + #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) + static inline int pgd_large(pgd_t pgd) { return 0; } + #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) + + /* PUD - Level3 access */ + /* to find an entry in a page-table-directory. */ + #define pud_page_vaddr(pud) \ + ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK)) + #define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) + #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + #define pud_offset(pgd, address) \ + ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) + #define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT) + + static inline int pud_large(pud_t pte) + { + return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); + } + + /* PMD - Level 2 access */ + #define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) + #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) + + #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + #define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ + pmd_index(address)) + #define pmd_none(x) (!pmd_val((x))) + #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) + #define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) + #define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) + + #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) + #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ + _PAGE_FILE }) + #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT + + /* PTE - Level 1 access. */ + + /* page, protection -> pte */ + #define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot)) + + #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ + pte_index((address))) + + /* x86-64 always has all page tables mapped. */ + #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) + #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) + #define pte_unmap(pte) /* NOP */ + #define pte_unmap_nested(pte) /* NOP */ + + #define update_mmu_cache(vma, address, pte) do { } while (0) + + extern int direct_gbpages; + + /* Encode and de-code a swap entry */ --------- ------ -------------#define __swp_type(x) (((x).val >> 1) & 0x3f) --------- ------ -------------#define __swp_offset(x) ((x).val >> 8) --------- ------ -------------#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ --------- ------ ------------- ((offset) << 8) }) ++++++++++++++++ +++++++++++++#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE ++++++++++++++++ +++++++++++++#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) ++++++++++++++++ +++++++++++++#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) ++++++++++++++++ +++++++++++++#else ++++++++++++++++ +++++++++++++#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) ++++++++++++++++ +++++++++++++#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) ++++++++++++++++ +++++++++++++#endif ++++++++++++++++ +++++++++++++ ++++++++++++++++ +++++++++++++#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) ++++++++++++++++ +++++++++++++ ++++++++++++++++ +++++++++++++#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ ++++++++++++++++ +++++++++++++ & ((1U << SWP_TYPE_BITS) - 1)) ++++++++++++++++ +++++++++++++#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) ++++++++++++++++ +++++++++++++#define __swp_entry(type, offset) ((swp_entry_t) { \ ++++++++++++++++ +++++++++++++ ((type) << (_PAGE_BIT_PRESENT + 1)) \ ++++++++++++++++ +++++++++++++ | ((offset) << SWP_OFFSET_SHIFT) }) + #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) + #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) + + extern int kern_addr_valid(unsigned long addr); + extern void cleanup_highmap(void); + + #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + + #define HAVE_ARCH_UNMAPPED_AREA + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + + #define pgtable_cache_init() do { } while (0) + #define check_pgt_cache() do { } while (0) + + #define PAGE_AGP PAGE_KERNEL_NOCACHE + #define HAVE_PAGE_AGP 1 + + /* fs/proc/kcore.c */ + #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) + #define kc_offset_to_vaddr(o) \ + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \ + ? ((o) | ~__VIRTUAL_MASK) \ + : (o)) + + #define __HAVE_ARCH_PTE_SAME + #endif /* !__ASSEMBLY__ */ + + #endif /* _ASM_X86_PGTABLE_64_H */ diff --cc arch/x86/include/asm/setup.h index f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,0000000,f12d372,f12d372,f12d372,f12d372,1ed8b2e,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,294daeb,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372,f12d372..4fcd53f mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,0 -1,105 -1,105 -1,105 -1,105 -1,109 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,108 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 -1,105 +1,112 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + #ifndef _ASM_X86_SETUP_H + #define _ASM_X86_SETUP_H + + #define COMMAND_LINE_SIZE 2048 + + #ifndef __ASSEMBLY__ + + /* Interrupt control for vSMPowered x86_64 systems */ + void vsmp_init(void); + ++++++++++++++ +++++++++++++++ ++++++++++++++ +++++++++++++++void setup_bios_corruption_check(void); ++++++++++++++ +++++++++++++++ ++++++++++++++ +++++++++++++++ + #ifdef CONFIG_X86_VISWS + extern void visws_early_detect(void); + extern int is_visws_box(void); + #else + static inline void visws_early_detect(void) { } + static inline int is_visws_box(void) { return 0; } + #endif + ++++++++++++++++++++++ +++++++extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); ++++++++++++++++++++++ +++++++extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); + /* + * Any setup quirks to be performed? + */ + struct mpc_config_processor; + struct mpc_config_bus; + struct mp_config_oemtable; + struct x86_quirks { + int (*arch_pre_time_init)(void); + int (*arch_time_init)(void); + int (*arch_pre_intr_init)(void); + int (*arch_intr_init)(void); + int (*arch_trap_init)(void); + char * (*arch_memory_setup)(void); + int (*mach_get_smp_config)(unsigned int early); + int (*mach_find_smp_config)(unsigned int reserve); + + int *mpc_record; + int (*mpc_apic_id)(struct mpc_config_processor *m); + void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name); + void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); + void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, + unsigned short oemsize); + int (*setup_ioapic_ids)(void); ++++++++++++++++++++++ +++++++ int (*update_genapic)(void); + }; + + extern struct x86_quirks *x86_quirks; + extern unsigned long saved_video_mode; + + #ifndef CONFIG_PARAVIRT + #define paravirt_post_allocator_init() do {} while (0) + #endif + #endif /* __ASSEMBLY__ */ + + #ifdef __KERNEL__ + + #ifdef __i386__ + + #include + /* + * Reserved space for vmalloc and iomap - defined in asm/page.h + */ + #define MAXMEM_PFN PFN_DOWN(MAXMEM) + #define MAX_NONPAE_PFN (1 << 20) + + #endif /* __i386__ */ + + #define PARAM_SIZE 4096 /* sizeof(struct boot_params) */ + + #define OLD_CL_MAGIC 0xA33F + #define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */ + #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ + + #ifndef __ASSEMBLY__ + #include + + #ifndef _SETUP + + /* + * This is set up by the setup-routine at boot-time + */ + extern struct boot_params boot_params; + + /* + * Do NOT EVER look at the BIOS memory size location. + * It does not work on many machines. + */ + #define LOWMEMSIZE() (0x9f000) + + #ifdef __i386__ + + void __init i386_start_kernel(void); + extern void probe_roms(void); + + extern unsigned long init_pg_tables_start; + extern unsigned long init_pg_tables_end; + + #else + void __init x86_64_init_pda(void); + void __init x86_64_start_kernel(char *real_mode); + void __init x86_64_start_reservations(char *real_mode_data); + + #endif /* __i386__ */ + #endif /* _SETUP */ + #endif /* __ASSEMBLY__ */ + #endif /* __KERNEL__ */ + + #endif /* _ASM_X86_SETUP_H */ diff --cc arch/x86/include/asm/syscalls.h index 87803da,87803da,c0b0bda,87803da,87803da,87803da,87803da,87803da,87803da,0000000,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,87803da,3a5252c,87803da,87803da,87803da,87803da..9c6797c mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,0 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 -1,93 +1,93 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + /* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + + #ifndef _ASM_X86_SYSCALLS_H + #define _ASM_X86_SYSCALLS_H + + #include + #include + #include + #include + + /* Common in X86_32 and X86_64 */ + /* kernel/ioport.c */ + asmlinkage long sys_ioperm(unsigned long, unsigned long, int); + ++ +++++++++++++++++++++++++++/* kernel/ldt.c */ ++ +++++++++++++++++++++++++++asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); ++ +++++++++++++++++++++++++++ ++ +++++++++++++++++++++++++++/* kernel/tls.c */ ++ +++++++++++++++++++++++++++asmlinkage int sys_set_thread_area(struct user_desc __user *); ++ +++++++++++++++++++++++++++asmlinkage int sys_get_thread_area(struct user_desc __user *); ++ +++++++++++++++++++++++++++ + /* X86_32 only */ + #ifdef CONFIG_X86_32 + /* kernel/process_32.c */ + asmlinkage int sys_fork(struct pt_regs); + asmlinkage int sys_clone(struct pt_regs); + asmlinkage int sys_vfork(struct pt_regs); + asmlinkage int sys_execve(struct pt_regs); + + /* kernel/signal_32.c */ + asmlinkage int sys_sigsuspend(int, int, old_sigset_t); + asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); + asmlinkage int sys_sigaltstack(unsigned long); + asmlinkage unsigned long sys_sigreturn(unsigned long); --------- --------------- ----asmlinkage int sys_rt_sigreturn(unsigned long); +++++++++++++++++++++++++ ++++asmlinkage int sys_rt_sigreturn(struct pt_regs); + + /* kernel/ioport.c */ + asmlinkage long sys_iopl(unsigned long); + -- ------ --------------------/* kernel/ldt.c */ -- ------ --------------------asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); -- ------ -------------------- + /* kernel/sys_i386_32.c */ + asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + struct mmap_arg_struct; + asmlinkage int old_mmap(struct mmap_arg_struct __user *); + struct sel_arg_struct; + asmlinkage int old_select(struct sel_arg_struct __user *); + asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); + struct old_utsname; + asmlinkage int sys_uname(struct old_utsname __user *); + struct oldold_utsname; + asmlinkage int sys_olduname(struct oldold_utsname __user *); + -- ------ --------------------/* kernel/tls.c */ -- ------ --------------------asmlinkage int sys_set_thread_area(struct user_desc __user *); -- ------ --------------------asmlinkage int sys_get_thread_area(struct user_desc __user *); -- ------ -------------------- + /* kernel/vm86_32.c */ + asmlinkage int sys_vm86old(struct pt_regs); + asmlinkage int sys_vm86(struct pt_regs); + + #else /* CONFIG_X86_32 */ + + /* X86_64 only */ + /* kernel/process_64.c */ + asmlinkage long sys_fork(struct pt_regs *); + asmlinkage long sys_clone(unsigned long, unsigned long, + void __user *, void __user *, + struct pt_regs *); + asmlinkage long sys_vfork(struct pt_regs *); + asmlinkage long sys_execve(char __user *, char __user * __user *, + char __user * __user *, + struct pt_regs *); + + /* kernel/ioport.c */ + asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + + /* kernel/signal_64.c */ + asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); + asmlinkage long sys_rt_sigreturn(struct pt_regs *); + + /* kernel/sys_x86_64.c */ + asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + struct new_utsname; + asmlinkage long sys_uname(struct new_utsname __user *); + + #endif /* CONFIG_X86_32 */ + #endif /* _ASM_X86_SYSCALLS_H */ diff --cc arch/x86/include/asm/system.h index 2ed3f0f,2ed3f0f,59555f4,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,0000000,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,07c3e40,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f,2ed3f0f..8e626ea mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,0 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 -1,427 -1,425 -1,425 -1,425 -1,425 -1,425 -1,425 +1,427 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + #ifndef _ASM_X86_SYSTEM_H + #define _ASM_X86_SYSTEM_H + + #include + #include + #include + #include + #include + + #include + #include + + /* entries in ARCH_DLINFO: */ + #ifdef CONFIG_IA32_EMULATION + # define AT_VECTOR_SIZE_ARCH 2 + #else + # define AT_VECTOR_SIZE_ARCH 1 + #endif + -- ------ --------------------#ifdef CONFIG_X86_32 -- ------ -------------------- + struct task_struct; /* one of the stranger aspects of C forward declarations */ + struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); + ++ +++++++++++++++++++++++++++#ifdef CONFIG_X86_32 ++ +++++++++++++++++++++++++++ + /* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ + #define switch_to(prev, next, last) \ + do { \ + /* \ + * Context-switching clobbers all registers, so we clobber \ + * them explicitly, via unused output variables. \ + * (EAX and EBP is not listed because EBP is saved/restored \ + * explicitly for wchan access and EAX is the return value of \ + * __switch_to()) \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ + asm volatile("pushfl\n\t" /* save flags */ \ + "pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ + "popfl\n" /* restore flags */ \ + \ + /* output parameters */ \ + : [prev_sp] "=m" (prev->thread.sp), \ + [prev_ip] "=m" (prev->thread.ip), \ + "=a" (last), \ + \ + /* clobbered output registers: */ \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ + : /* reloaded segment registers */ \ + "memory"); \ + } while (0) + + /* + * disable hlt during certain critical i/o operations + */ + #define HAVE_DISABLE_HLT + #else + #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" + #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" + + /* frame pointer must be last for get_wchan */ + #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" + #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + + #define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + + /* Save restore flags to clear handle leaking NT */ + #define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [tif_fork] "i" (TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + : "memory", "cc" __EXTRA_CLOBBER) + #endif + + #ifdef __KERNEL__ + #define _set_base(addr, base) do { unsigned long __pr; \ + __asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while (0) + + #define _set_limit(addr, limit) do { unsigned long __lr; \ + __asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while (0) + + #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) + #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) + + extern void native_load_gs_index(unsigned); + + /* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ + #define loadsegment(seg, value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %k0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "movl %k1, %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b,3b) \ + : :"r" (value), "r" (0) : "memory") + + + /* + * Save a segment register away + */ + #define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + + static inline unsigned long get_limit(unsigned long segment) + { + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); + return __limit + 1; + } + + static inline void native_clts(void) + { + asm volatile("clts"); + } + + /* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ + static unsigned long __force_order; + + static inline unsigned long native_read_cr0(void) + { + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr0(unsigned long val) + { + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr2(void) + { + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr2(unsigned long val) + { + asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr3(void) + { + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline void native_write_cr3(unsigned long val) + { + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); + } + + static inline unsigned long native_read_cr4(void) + { + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; + } + + static inline unsigned long native_read_cr4_safe(void) + { + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ + #ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); + #else + val = native_read_cr4(); + #endif + return val; + } + + static inline void native_write_cr4(unsigned long val) + { + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); + } + + #ifdef CONFIG_X86_64 + static inline unsigned long native_read_cr8(void) + { + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; + } + + static inline void native_write_cr8(unsigned long val) + { + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); + } + #endif + + static inline void native_wbinvd(void) + { + asm volatile("wbinvd": : :"memory"); + } + + #ifdef CONFIG_PARAVIRT + #include + #else + #define read_cr0() (native_read_cr0()) + #define write_cr0(x) (native_write_cr0(x)) + #define read_cr2() (native_read_cr2()) + #define write_cr2(x) (native_write_cr2(x)) + #define read_cr3() (native_read_cr3()) + #define write_cr3(x) (native_write_cr3(x)) + #define read_cr4() (native_read_cr4()) + #define read_cr4_safe() (native_read_cr4_safe()) + #define write_cr4(x) (native_write_cr4(x)) + #define wbinvd() (native_wbinvd()) + #ifdef CONFIG_X86_64 + #define read_cr8() (native_read_cr8()) + #define write_cr8(x) (native_write_cr8(x)) + #define load_gs_index native_load_gs_index + #endif + + /* Clear the 'TS' bit */ + #define clts() (native_clts()) + + #endif/* CONFIG_PARAVIRT */ + + #define stts() write_cr0(read_cr0() | X86_CR0_TS) + + #endif /* __KERNEL__ */ + + static inline void clflush(volatile void *__p) + { + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); + } + + #define nop() asm volatile ("nop") + + void disable_hlt(void); + void enable_hlt(void); + + void cpu_idle_wait(void); + + extern unsigned long arch_align_stack(unsigned long sp); + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + + void default_idle(void); + +++++++++++++++++++++++ ++++++void stop_this_cpu(void *dummy); +++++++++++++++++++++++ ++++++ + /* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + #ifdef CONFIG_X86_32 + /* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) + #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) + #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + #else + #define mb() asm volatile("mfence":::"memory") + #define rmb() asm volatile("lfence":::"memory") + #define wmb() asm volatile("sfence" ::: "memory") + #endif + + /** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + + #define read_barrier_depends() do { } while (0) + + #ifdef CONFIG_SMP + #define smp_mb() mb() + #ifdef CONFIG_X86_PPRO_FENCE + # define smp_rmb() rmb() + #else + # define smp_rmb() barrier() + #endif + #ifdef CONFIG_X86_OOSTORE + # define smp_wmb() wmb() + #else + # define smp_wmb() barrier() + #endif + #define smp_read_barrier_depends() read_barrier_depends() + #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + #else + #define smp_mb() barrier() + #define smp_rmb() barrier() + #define smp_wmb() barrier() + #define smp_read_barrier_depends() do { } while (0) + #define set_mb(var, value) do { var = value; barrier(); } while (0) + #endif + + /* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ + static inline void rdtsc_barrier(void) + { + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); + } + + #endif /* _ASM_X86_SYSTEM_H */ diff --cc arch/x86/kernel/Makefile index b62a766,b62a766,b62a766,b62a766,e489ff9,943fe60,e489ff9,e489ff9,b62a766,db3216a,b62a766,b62a766,b62a766,e489ff9,b2077d3,b62a766,b62a766,d7e5a58,e489ff9,b62a766,d7e5a58,e489ff9,b62a766,e489ff9,b62a766,ef28c21,e489ff9,b62a766,b62a766,b62a766..1f208aa --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -11,7 -11,7 -11,7 -11,7 -11,7 -11,8 -11,7 -11,7 -11,7 -11,6 -11,7 -11,7 -11,7 -11,7 -11,7 -11,7 -11,7 -11,6 -11,7 -11,7 -11,6 -11,7 -11,7 -11,7 -11,7 -11,6 -11,7 -11,7 -11,7 -11,7 +11,8 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ifdef CONFIG_FUNCTION_TRACE CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg + + + + CFLAGS_REMOVE_ftrace.o = -pg +++++ ++++++++++++++++++++++++CFLAGS_REMOVE_early_printk.o = -pg endif # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -23,9 -23,9 -23,9 -23,9 -23,9 -24,9 -23,9 -23,9 -23,9 -22,9 -23,9 -23,9 -23,9 -23,9 -23,9 -23,9 -23,9 -22,9 -23,9 -23,9 -22,9 -23,9 -23,9 -23,9 -23,9 -22,9 -23,9 -23,9 -23,9 -23,9 +24,9 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ CFLAGS_vsyscall_64.o := $(PROFILING) -g CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) ------------------------- ----obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o +++++++++++++++++++++++++ ++++obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o --------- --------------------obj-y += time_$(BITS).o ioport.o ldt.o +++++++++ ++++++++++++++++++++obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o diff --cc arch/x86/kernel/ds.c index a2d1176,a2d1176,a2d1176,a2d1176,2b69994,d1a1214,d1a1214,2b69994,a2d1176,2b69994,a2d1176,a2d1176,a2d1176,d1a1214,d1a1214,a2d1176,a2d1176,2b69994,2b69994,a2d1176,2b69994,c570252,a2d1176,2b69994,a2d1176,2b69994,d1a1214,a2d1176,a2d1176,a2d1176..d6938d9 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -231,12 -231,12 -231,12 -231,12 -234,12 -234,16 -234,16 -234,12 -231,12 -234,12 -231,12 -231,12 -231,12 -234,16 -234,16 -231,12 -231,12 -234,12 -234,12 -231,12 -234,12 -234,12 -231,12 -234,12 -231,12 -234,12 -234,16 -231,12 -231,12 -231,12 +231,12 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ static inline struct ds_context *ds_all struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; ++++ + ++ ++ ++ + ++ unsigned long irq; if (!context) { -- -- - spin_unlock(&ds_lock); -- -- - context = kzalloc(sizeof(*context), GFP_KERNEL); ---- - -- -- -- - -- -- -- - if (!context) { -- -- - spin_lock(&ds_lock); ++ ++ + if (!context) return NULL; -- -- - } context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -244,27 -244,27 -244,27 -244,27 -247,18 -251,30 -251,30 -247,18 -244,27 -247,18 -244,27 -244,27 -244,27 -251,30 -251,30 -244,27 -244,27 -247,18 -247,18 -244,27 -247,18 -247,18 -244,27 -247,18 -244,27 -247,18 -251,30 -244,27 -244,27 -244,27 +244,27 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ return NULL; } - - - -- -- - - *p_context = context; -- -- - spin_lock(&ds_lock); -- -- - /* -- -- - * Check for race - another CPU could have allocated -- -- - * it meanwhile: -- -- - */ ++++ + ++ ++ ++ + ++ spin_lock_irqsave(&ds_lock, irq); ++ ++ + - - - -- -- - - context->this = p_context; - - - -- -- - - context->task = task; + + + ++ ++ + + if (*p_context) { + + + ++ ++ + + kfree(context->ds); + + + ++ ++ + + kfree(context); -- -- - return *p_context; -- -- - } + - - - -- - - - if (task) - - - -- - - - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); -- -- - *p_context = context; ++++ + ++ ++ ++ + ++ context = *p_context; ++++ + ++ ++ ++ + ++ } else { ++++ + ++ ++ ++ + ++ *p_context = context; - - - -- - - - if (!task || (task == current)) - - - -- - - - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); -- -- - context->this = p_context; -- -- - context->task = task; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); ++++ + ++ ++ ++ + ++ context->this = p_context; ++++ + ++ ++ ++ + ++ context->task = task; - - - -- - - - get_tracer(task); -- -- - if (task) -- -- - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); ++++ + ++ ++ ++ + ++ if (task) ++++ + ++ ++ ++ + ++ set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + + ++ + + + -- -- - if (!task || (task == current)) -- -- - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); -- -- - -- -- - - get_tracer(task); ++++ + ++ ++ ++ + ++ if (!task || (task == current)) ++++ + ++ ++ ++ + ++ wrmsrl(MSR_IA32_DS_AREA, ++++ + ++ ++ ++ + ++ (unsigned long)context->ds); ++++ + ++ ++ ++ + ++ } ++++ + ++ ++ ++ + ++ spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -391,27 -391,27 -391,27 -391,27 -382,25 -398,26 -398,26 -382,25 -391,27 -382,25 -391,27 -391,27 -391,27 -398,26 -398,26 -391,27 -391,27 -382,25 -382,25 -391,27 -382,25 -382,25 -391,27 -382,25 -391,27 -382,25 -398,26 -391,27 -391,27 -391,27 +391,27 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ static int ds_request(struct task_struc return -EOPNOTSUPP; ---- - -- -- -- - -- spin_lock(&ds_lock); - - - -- -- - - - - - -- -- - - if (!check_tracer(task)) - - - -- -- - - return -EPERM; ---- - -- -- -- - -- ---- - -- -- -- - -- error = -ENOMEM; context = ds_alloc_context(task); if (!context) -- -- - goto out_unlock; ++++ + ++ ++ ++ + ++ return -ENOMEM; ++++ + ++ ++ ++ + ++ ++++ + ++ ++ ++ + ++ spin_lock_irqsave(&ds_lock, irq); + + + ++ ++ + + + + + ++ ++ + + error = -EPERM; + + + ++ ++ + + if (!check_tracer(task)) goto out_unlock; ++++ + ++ ++ ++ + ++ get_tracer(task); ++++ + ++ ++ ++ + ++ error = -EALREADY; if (context->owner[qual] == current) ---- - -- -- -- - -- goto out_unlock; ++++ + ++ ++ ++ + ++ goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) ---- - -- -- -- - -- goto out_unlock; ++++ + ++ ++ ++ + ++ goto out_put_tracer; context->owner[qual] = current; ---- - -- -- -- - -- spin_unlock(&ds_lock); ++++ + ++ ++ ++ + ++ spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; diff --cc arch/x86/kernel/entry_64.S index b86f332,b86f332,5492778,b86f332,b86f332,4a16bf3,b86f332,b86f332,b86f332,09e7145,b86f332,b86f332,b86f332,b86f332,b86f332,b86f332,b86f332,09e7145,b86f332,b86f332,09e7145,b86f332,b86f332,b86f332,b86f332,09e7145,b86f332,b86f332,b86f332,b86f332..42571ba --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -255,8 -255,8 -255,8 -255,8 -255,8 -255,9 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 -255,8 +255,9 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ENTRY(ret_from_fork call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) +++++ ++++++++++++++++++++++++ CFI_REMEMBER_STATE jnz rff_trace -- ---------------------------rff_action: ++ +++++++++++++++++++++++++++rff_action: RESTORE_REST testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call diff --cc arch/x86/kernel/reboot.c index cc5a254,cc5a254,cc5a254,cc5a254,c3cd512,cc5a254,cc5a254,f4c93f1,cc5a254,f4c93f1,cc5a254,cc5a254,cc5a254,cc5a254,724adfc,cc5a254,cc5a254,f4c93f1,f4c93f1,cc5a254,f4c93f1,724adfc,cc5a254,bb387ab,cc5a254,f4c93f1,cc5a254,cc5a254,cc5a254,cc5a254..61f718d --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -404,12 -404,12 -404,12 -404,12 -398,12 -404,12 -404,12 -399,12 -404,12 -399,12 -404,12 -404,12 -404,12 -404,12 -395,12 -404,12 -404,12 -399,12 -399,12 -404,12 -399,12 -395,12 -404,12 -400,27 -404,12 -399,12 -404,12 -404,12 -404,12 -404,12 +412,27 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ static void native_machine_emergency_re reboot_type = BOOT_KBD; break; ----------------------- ------ case BOOT_EFI: if (efi_enabled) ----------------------- ------ efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, +++++++++++++++++++++++ ++++++ efi.reset_system(reboot_mode ? +++++++++++++++++++++++ ++++++ EFI_RESET_WARM : +++++++++++++++++++++++ ++++++ EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); +++++++++++++++++++++++ ++++++ reboot_type = BOOT_KBD; +++++++++++++++++++++++ ++++++ break; + + ++ + + +++++++++++++++++++++++ ++++++ case BOOT_CF9: +++++++++++++++++++++++ ++++++ port_cf9_safe = true; +++++++++++++++++++++++ ++++++ /* fall through */ +++++++ + +++++++ + ++ + ++++ +++++++++++++++++++++++ ++++++ case BOOT_CF9_COND: +++++++++++++++++++++++ ++++++ if (port_cf9_safe) { +++++++++++++++++++++++ ++++++ u8 cf9 = inb(0xcf9) & ~6; +++++++++++++++++++++++ ++++++ outb(cf9|2, 0xcf9); /* Request hard reset */ +++++++++++++++++++++++ ++++++ udelay(50); +++++++++++++++++++++++ ++++++ outb(cf9|6, 0xcf9); /* Actually do the reset */ +++++++++++++++++++++++ ++++++ udelay(50); +++++++++++++++++++++++ ++++++ } reboot_type = BOOT_KBD; break; } diff --cc arch/x86/kernel/setup.c index bdec76e,bdec76e,81f5d22,9d5674f,0fa6790,9d5674f,9d5674f,f44dadf,9d5674f,0fa6790,9d5674f,bdec76e,9d5674f,9d5674f,13a5f59,9d5674f,9d5674f,0fa6790,0fa6790,bdec76e,0fa6790,0fa6790,f5096a6,0fa6790,9d5674f,0fa6790,9d5674f,9d5674f,9d5674f,bdec76e..a3122382 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -583,161 -583,161 -584,161 -583,161 -583,161 -583,161 -583,161 -584,161 -583,161 -583,161 -583,161 -583,161 -583,161 -583,161 -583,11 -583,161 -583,161 -583,161 -583,161 -583,161 -583,161 -583,161 -583,174 -583,161 -583,161 -583,161 -583,161 -583,161 -583,161 -583,161 +585,24 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ static int __init setup_elfcorehdr(cha early_param("elfcorehdr", setup_elfcorehdr); #endif ---------------------- -------static struct x86_quirks default_x86_quirks __initdata; -------------- ------- ------- -------------- ------- -------struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; -------------- ------- ------- -------------- ------- -------/* -------------- ------- ------- * Some BIOSes seem to corrupt the low 64k of memory during events -------------- ------- ------- * like suspend/resume and unplugging an HDMI cable. Reserve all -------------- ------- ------- * remaining free memory in that area and fill it with a distinct -------------- ------- ------- * pattern. -------------- ------- ------- */ -------------- ------- -------#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION -------------- ------- -------#define MAX_SCAN_AREAS 8 -------------- ------- ------- -------------- ------- -------static int __read_mostly memory_corruption_check = -1; -------------- ------- ------- -------------- ------- -------static unsigned __read_mostly corruption_check_size = 64*1024; -------------- ------- -------static unsigned __read_mostly corruption_check_period = 60; /* seconds */ -------------- ------- ------- -------------- ------- -------static struct e820entry scan_areas[MAX_SCAN_AREAS]; -------------- ------- -------static int num_scan_areas; -------------- ------- ------- -------------- ------- ------- -------------- ------- -------static int set_corruption_check(char *arg) ----- --- -- ---- -- ------ { ----- --- -- ---- -- ------ char *end; ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ memory_corruption_check = simple_strtol(arg, &end, 10); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ return (*end == 0) ? 0 : -EINVAL; ----- --- -- ---- -- ------ } ----- --- -- ---- -- ------ early_param("memory_corruption_check", set_corruption_check); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ static int set_corruption_check_period(char *arg) ----- --- -- ---- -- ------ { ----- --- -- ---- -- ------ char *end; ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ corruption_check_period = simple_strtoul(arg, &end, 10); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ return (*end == 0) ? 0 : -EINVAL; ----- --- -- ---- -- ------ } ----- --- -- ---- -- ------ early_param("memory_corruption_check_period", set_corruption_check_period); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ static int set_corruption_check_size(char *arg) ----- --- -- ---- -- ------ { ----- --- -- ---- -- ------ char *end; ----- --- -- ---- -- ------ unsigned size; ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ size = memparse(arg, &end); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ if (*end == '\0') ----- --- -- ---- -- ------ corruption_check_size = size; ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ return (size == corruption_check_size) ? 0 : -EINVAL; ----- --- -- ---- -- ------ } ----- --- -- ---- -- ------ early_param("memory_corruption_check_size", set_corruption_check_size); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ static void __init setup_bios_corruption_check(void) ++++++++++++++++++++++ +++++++static int __init default_update_genapic(void) + { -- - - - - char *end; -- - - - - -- - - - - memory_corruption_check = simple_strtol(arg, &end, 10); -- - - - - -- - - - - return (*end == 0) ? 0 : -EINVAL; -- - - - -} -- - - - -early_param("memory_corruption_check", set_corruption_check); -- - - - - -- - - - -static int set_corruption_check_period(char *arg) -- - - - -{ -- - - - - char *end; -- - - - - -- - - - - corruption_check_period = simple_strtoul(arg, &end, 10); -- - - - - -- - - - - return (*end == 0) ? 0 : -EINVAL; -- - - - -} -- - - - -early_param("memory_corruption_check_period", set_corruption_check_period); -- - - - - -- - - - -static int set_corruption_check_size(char *arg) -- - - - -{ -- - - - - char *end; -- - - - - unsigned size; -- - - - - -- - - - - size = memparse(arg, &end); -- - - - - -- - - - - if (*end == '\0') -- - - - - corruption_check_size = size; -- - - - - -- - - - - return (size == corruption_check_size) ? 0 : -EINVAL; -- - - - -} -- - - - -early_param("memory_corruption_check_size", set_corruption_check_size); -- - - - - -- - - - - -- - - - -static void __init setup_bios_corruption_check(void) -- - - - -{ -------------- ------- ------- u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ -------------- ------- ------- -------------- ------- ------- if (memory_corruption_check == -1) { -------------- ------- ------- memory_corruption_check = -------------- ------- -------#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK -------------- ------- ------- 1 -------------- ------- -------#else -------------- ------- ------- 0 ++++++++++++++++++++++ +++++++#ifdef CONFIG_X86_SMP ++++++++++++++++++++++ +++++++# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) ++++++++++++++++++++++ +++++++ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; ++++++++++++++++++++++ +++++++# endif + #endif -------------- ------- ------- ; -------------- ------- ------- } ------- ------ ------- ------- ------- ------ ------- ------- if (corruption_check_size == 0) ------- ------ ------- ------- memory_corruption_check = 0; ------- ------ ------- ------- ------- ------ ------- ------- if (!memory_corruption_check) ------- ------ ------- ------- return; ------- ------ ------- ------- ------- ------ ------- ------- corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { ----- --- -- ---- -- ------ u64 size; ----- --- -- ---- -- ------ addr = find_e820_area_size(addr, &size, PAGE_SIZE); ----- --- -- ---- -- ------ ----- --- -- ---- -- ------ if (addr == 0) ----- --- -- ---- -- ------ break; + - if (corruption_check_size == 0) - memory_corruption_check = 0; - - if (!memory_corruption_check) - return; - - corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); - -- - - - - while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { -- - - - - u64 size; -- - - - - addr = find_e820_area_size(addr, &size, PAGE_SIZE); -- - - - - -- - - - - if (addr == 0) -- - - - - break; -- - - - - -------------- ------- ------- if ((addr + size) > corruption_check_size) -------------- ------- ------- size = corruption_check_size - addr; -------------- ------- ------- -------------- ------- ------- if (size == 0) -------------- ------- ------- break; -------------- ------- ------- -------------- ------- ------- e820_update_range(addr, size, E820_RAM, E820_RESERVED); -------------- ------- ------- scan_areas[num_scan_areas].addr = addr; -------------- ------- ------- scan_areas[num_scan_areas].size = size; -------------- ------- ------- num_scan_areas++; -------------- ------- ------- -------------- ------- ------- /* Assume we've already mapped this early memory */ -------------- ------- ------- memset(__va(addr), 0, size); -------------- ------- ------- -------------- ------- ------- addr += size; -------------- ------- ------- } -------------- ------- ------- -------------- ------- ------- printk(KERN_INFO "Scanning %d areas for low memory corruption\n", -------------- ------- ------- num_scan_areas); -------------- ------- ------- update_e820(); -------------- ------- -------} -------------- ------- ------- -------------- ------- -------static struct timer_list periodic_check_timer; -------------- ------- ------- -------------- ------- -------void check_for_bios_corruption(void) -------------- ------- -------{ -------------- ------- ------- int i; -------------- ------- ------- int corruption = 0; -------------- ------- ------- -------------- ------- ------- if (!memory_corruption_check) -------------- ------- ------- return; -------------- ------- ------- -------------- ------- ------- for(i = 0; i < num_scan_areas; i++) { -------------- ------- ------- unsigned long *addr = __va(scan_areas[i].addr); -------------- ------- ------- unsigned long size = scan_areas[i].size; -------------- ------- ------- -------------- ------- ------- for(; size; addr++, size -= sizeof(unsigned long)) { -------------- ------- ------- if (!*addr) -------------- ------- ------- continue; -------------- ------- ------- printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", -------------- ------- ------- addr, __pa(addr), *addr); -------------- ------- ------- corruption = 1; -------------- ------- ------- *addr = 0; -------------- ------- ------- } -------------- ------- ------- } -------------- ------- ------- -------------- ------- ------- WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); -------------- ------- -------} -------------- ------- ------- -------------- ------- -------static void periodic_check_for_corruption(unsigned long data) -------------- ------- -------{ -------------- ------- ------- check_for_bios_corruption(); -------------- ------- ------- mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); ++++++++++++++++++++++ +++++++ return 0; + } + -------------- ------- -------void start_periodic_check_for_corruption(void) -------------- ------- -------{ -------------- ------- ------- if (!memory_corruption_check || corruption_check_period == 0) -------------- ------- ------- return; -------------- ------- ------- -------------- ------- ------- printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", -------------- ------- ------- corruption_check_period); ++++++++++++++++++++++ +++++++static struct x86_quirks default_x86_quirks __initdata = { ++++++++++++++++++++++ +++++++ .update_genapic = default_update_genapic, ++++++++++++++++++++++ +++++++}; -------------- ------- ------- init_timer(&periodic_check_timer); -------------- ------- ------- periodic_check_timer.function = &periodic_check_for_corruption; -------------- ------- ------- periodic_check_for_corruption(0); -------------- ------- -------} -------------- ------- -------#endif ++++++++++++++ +++++++ +++++++struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; - /* - * Some BIOSes seem to corrupt the low 64k of memory during events - * like suspend/resume and unplugging an HDMI cable. Reserve all - * remaining free memory in that area and fill it with a distinct - * pattern. - */ - #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION - #define MAX_SCAN_AREAS 8 - - static int __read_mostly memory_corruption_check = -1; - - static unsigned __read_mostly corruption_check_size = 64*1024; - static unsigned __read_mostly corruption_check_period = 60; /* seconds */ - - static struct e820entry scan_areas[MAX_SCAN_AREAS]; - static int num_scan_areas; - - - static int set_corruption_check(char *arg) - { - char *end; - - memory_corruption_check = simple_strtol(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; - } - early_param("memory_corruption_check", set_corruption_check); - - static int set_corruption_check_period(char *arg) - { - char *end; - - corruption_check_period = simple_strtoul(arg, &end, 10); - - return (*end == 0) ? 0 : -EINVAL; - } - early_param("memory_corruption_check_period", set_corruption_check_period); - - static int set_corruption_check_size(char *arg) - { - char *end; - unsigned size; - - size = memparse(arg, &end); - - if (*end == '\0') - corruption_check_size = size; - - return (size == corruption_check_size) ? 0 : -EINVAL; - } - early_param("memory_corruption_check_size", set_corruption_check_size); - - - static void __init setup_bios_corruption_check(void) - { - u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ - - if (memory_corruption_check == -1) { - memory_corruption_check = - #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK - 1 - #else - 0 - #endif - ; - } - - if (corruption_check_size == 0) - memory_corruption_check = 0; - - if (!memory_corruption_check) - return; - - corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); - - while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { - u64 size; - addr = find_e820_area_size(addr, &size, PAGE_SIZE); - - if (addr == 0) - break; - - if ((addr + size) > corruption_check_size) - size = corruption_check_size - addr; - - if (size == 0) - break; - - e820_update_range(addr, size, E820_RAM, E820_RESERVED); - scan_areas[num_scan_areas].addr = addr; - scan_areas[num_scan_areas].size = size; - num_scan_areas++; - - /* Assume we've already mapped this early memory */ - memset(__va(addr), 0, size); - - addr += size; - } - - printk(KERN_INFO "Scanning %d areas for low memory corruption\n", - num_scan_areas); - update_e820(); - } - - static struct timer_list periodic_check_timer; - - void check_for_bios_corruption(void) - { - int i; - int corruption = 0; - - if (!memory_corruption_check) - return; - - for(i = 0; i < num_scan_areas; i++) { - unsigned long *addr = __va(scan_areas[i].addr); - unsigned long size = scan_areas[i].size; - - for(; size; addr++, size -= sizeof(unsigned long)) { - if (!*addr) - continue; - printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", - addr, __pa(addr), *addr); - corruption = 1; - *addr = 0; - } - } - - WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); - } - - static void periodic_check_for_corruption(unsigned long data) - { - check_for_bios_corruption(); - mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); - } - - void start_periodic_check_for_corruption(void) - { - if (!memory_corruption_check || corruption_check_period == 0) - return; - - printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", - corruption_check_period); - - init_timer(&periodic_check_timer); - periodic_check_timer.function = &periodic_check_for_corruption; - periodic_check_for_corruption(0); - } - #endif - ++++++++++++++ +++++++++++++++#ifdef CONFIG_X86_RESERVE_LOW_64K static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE