From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Jul 2008 14:37:17 +0000 (+0200)
Subject: Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core... 
X-Git-Tag: v3.0~14509^2~8
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=acee709cab689ec7703770e8b8cb5cc3a4abcb31;p=platform%2Fkernel%2Flinux-amlogic.git

Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core', 'x86/cpu', 'x86/fixmap', 'x86/gart', 'x86/kprobes', 'x86/memtest', 'x86/modules', 'x86/nmi', 'x86/pat', 'x86/reboot', 'x86/setup', 'x86/step', 'x86/unify-pci', 'x86/uv', 'x86/xen' and 'xen-64bit' into x86/for-linus
---

acee709cab689ec7703770e8b8cb5cc3a4abcb31
diff --cc arch/x86/ia32/ia32entry.S
index 20371d0,20371d0,20371d0,20371d0,20371d0,20371d0,20371d0,20371d0,20371d0,20371d0,b5e329d,20371d0,20371d0,20371d0,20371d0,8796d19,20371d0,20371d0,20371d0,0ae1e77..23d146c
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@@@@@@@@@@@@@@@@@@@@ -136,14 -136,14 -136,14 -136,14 -136,14 -136,14 -136,14 -136,14 -136,14 -136,14 -123,13 -136,14 -136,14 -136,14 -136,14 -140,13 -136,14 -136,14 -136,14 -136,14 +140,13 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_sysenter_target
                     	.quad 1b,ia32_badarg
                     	.previous	
                    	GET_THREAD_INFO(%r10)
          -         	orl    $TS_COMPAT,threadinfo_status(%r10)
          -         	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +         	orl    $TS_COMPAT,TI_status(%r10)
---------- ---- ----	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----		 TI_flags(%r10)
+++++++++++++++ ++++	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                    	CFI_REMEMBER_STATE
                    	jnz  sysenter_tracesys
--------------- ----sysenter_do_call:	
                    	cmpl	$(IA32_NR_syscalls-1),%eax
                    	ja	ia32_badsys
+++++++++++++++ ++++sysenter_do_call:
                    	IA32_ARG_FIXUP 1
                    	call	*ia32_sys_call_table(,%rax,8)
                    	movq	%rax,RAX-ARGOFFSET(%rsp)
@@@@@@@@@@@@@@@@@@@@@ -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -230,8 -241,9 -241,9 -241,9 -241,9 -244,8 -241,9 -241,9 -241,9 -241,9 +244,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_cstar_target
                    	.quad 1b,ia32_badarg
                    	.previous	
                    	GET_THREAD_INFO(%r10)
          -         	orl   $TS_COMPAT,threadinfo_status(%r10)
          -         	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +         	orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----		TI_flags(%r10)
+++++++++++++++ ++++	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                    	CFI_REMEMBER_STATE
                    	jnz   cstar_tracesys
                    cstar_do_call:	
@@@@@@@@@@@@@@@@@@@@@ -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -310,7 -321,7 -321,7 -321,7 -321,7 -323,7 -321,7 -321,7 -321,7 -321,8 +323,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_syscall
                    	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
                    	/*CFI_REL_OFFSET	cs,CS-RIP*/
                    	CFI_REL_OFFSET	rip,RIP-RIP
          -         	swapgs
+++++++++++++++++++ 	PARAVIRT_ADJUST_EXCEPTION_FRAME
          +         	SWAPGS
                    	/*
                    	 * No need to follow this irqs on/off section: the syscall
                    	 * disabled irqs and here we enable it straight after entry:
@@@@@@@@@@@@@@@@@@@@@ -335,9 -335,9 -335,9 -335,9 -335,9 -335,9 -335,9 -335,9 -335,9 -335,9 -324,8 -335,9 -335,9 -335,9 -335,9 -337,8 -335,9 -335,9 -335,9 -336,9 +338,8 @@@@@@@@@@@@@@@@@@@@@
                    	   this could be a problem. */
                    	SAVE_ARGS 0,0,1
                    	GET_THREAD_INFO(%r10)
          -         	orl   $TS_COMPAT,threadinfo_status(%r10)
          -         	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +         	orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----		TI_flags(%r10)
+++++++++++++++ ++++	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                    	jnz ia32_tracesys
                    ia32_do_syscall:	
                    	cmpl $(IA32_NR_syscalls-1),%eax
diff --cc arch/x86/kernel/amd_iommu.c
index f2766d8,8c3deb0,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,cf2f74b,f2766d8,f2766d8,0000000,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8..c25210e
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@@@@@@@@@@@@@@@@@@@@ -1,962 -1,1167 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,0 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 -1,962 +1,1167 @@@@@@@@@@@@@@@@@@@@@
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/gfp.h>
          +         #include <linux/bitops.h>
          +         #include <linux/scatterlist.h>
          +         #include <linux/iommu-helper.h>
          +         #include <asm/proto.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
          +         
          +         #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
          +         
          +         #define to_pages(addr, size) \
          +         	 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
          +         
+ ++++++++++++++++++#define EXIT_LOOP_COUNT 10000000
+ ++++++++++++++++++
          +         static DEFINE_RWLOCK(amd_iommu_devtable_lock);
          +         
- -------- ---------struct command {
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * general struct to manage commands send to an IOMMU
+ ++++++++++++++++++ */
+ ++++++++++++++++++struct iommu_cmd {
          +         	u32 data[4];
          +         };
          +         
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +         			     struct unity_map_entry *e);
          +         
+ ++++++++++++++++++/* returns !0 if the IOMMU is caching non-present entries in its TLB */
          +         static int iommu_has_npcache(struct amd_iommu *iommu)
          +         {
          +         	return iommu->cap & IOMMU_CAP_NPCACHE;
          +         }
          +         
- -------- ---------static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * IOMMU command queuing functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Writes the command to the IOMMUs command buffer and informs the
+ ++++++++++++++++++ * hardware about the new command. Must be called with iommu->lock held.
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +         	u32 tail, head;
          +         	u8 *target;
          +         
          +         	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +         	target = (iommu->cmd_buf + tail);
          +         	memcpy_toio(target, cmd, sizeof(*cmd));
          +         	tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
          +         	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
          +         	if (tail == head)
          +         		return -ENOMEM;
          +         	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +         
          +         	return 0;
          +         }
          +         
- -------- ---------static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * General queuing function for commands. Takes iommu->lock and calls
+ ++++++++++++++++++ * __iommu_queue_command().
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +         	unsigned long flags;
          +         	int ret;
          +         
          +         	spin_lock_irqsave(&iommu->lock, flags);
          +         	ret = __iommu_queue_command(iommu, cmd);
          +         	spin_unlock_irqrestore(&iommu->lock, flags);
          +         
          +         	return ret;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function is called whenever we need to ensure that the IOMMU has
+ ++++++++++++++++++ * completed execution of all commands we sent. It sends a
+ ++++++++++++++++++ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
+ ++++++++++++++++++ * us about that by writing a value to a physical address we pass with
+ ++++++++++++++++++ * the command.
+ ++++++++++++++++++ */
          +         static int iommu_completion_wait(struct amd_iommu *iommu)
          +         {
          +         	int ret;
- -------- ---------	struct command cmd;
+ ++++++++++++++++++	struct iommu_cmd cmd;
          +         	volatile u64 ready = 0;
          +         	unsigned long ready_phys = virt_to_phys(&ready);
+ ++++++++++++++++++	unsigned long i = 0;
          +         
          +         	memset(&cmd, 0, sizeof(cmd));
          +         	cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- -------- ---------	cmd.data[1] = HIGH_U32(ready_phys);
+ ++++++++++++++++++	cmd.data[1] = upper_32_bits(ready_phys);
          +         	cmd.data[2] = 1; /* value written to 'ready' */
          +         	CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
          +         
          +         	iommu->need_sync = 0;
          +         
          +         	ret = iommu_queue_command(iommu, &cmd);
          +         
          +         	if (ret)
          +         		return ret;
          +         
- -------- ---------	while (!ready)
+ ++++++++++++++++++	while (!ready && (i < EXIT_LOOP_COUNT)) {
+ ++++++++++++++++++		++i;
          +         		cpu_relax();
+ ++++++++++++++++++	}
+ ++++++++++++++++++
+ ++++++++++++++++++	if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
+ ++++++++++++++++++		printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Command send function for invalidating a device table entry
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
          +         {
- -------- ---------	struct command cmd;
+ ++++++++++++++++++	struct iommu_cmd cmd;
          +         
          +         	BUG_ON(iommu == NULL);
          +         
          +         	memset(&cmd, 0, sizeof(cmd));
          +         	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
          +         	cmd.data[0] = devid;
          +         
          +         	iommu->need_sync = 1;
          +         
          +         	return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic command send function for invalidaing TLB entries
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
          +         		u64 address, u16 domid, int pde, int s)
          +         {
- -------- ---------	struct command cmd;
+ ++++++++++++++++++	struct iommu_cmd cmd;
          +         
          +         	memset(&cmd, 0, sizeof(cmd));
          +         	address &= PAGE_MASK;
          +         	CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
          +         	cmd.data[1] |= domid;
          +         	cmd.data[2] = LOW_U32(address);
- -------- ---------	cmd.data[3] = HIGH_U32(address);
- -------- ---------	if (s)
+ ++++++++++++++++++	cmd.data[3] = upper_32_bits(address);
+ ++++++++++++++++++	if (s) /* size bit - we flush more than one 4kb page */
          +         		cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
- -------- ---------	if (pde)
+ ++++++++++++++++++	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
          +         		cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
          +         
          +         	iommu->need_sync = 1;
          +         
          +         	return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * TLB invalidation function which is called from the mapping functions.
+ ++++++++++++++++++ * It invalidates a single PTE if the range to flush is within a single
+ ++++++++++++++++++ * page. Otherwise it flushes the whole TLB of the IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
          +         		u64 address, size_t size)
          +         {
          +         	int s = 0;
          +         	unsigned pages = to_pages(address, size);
          +         
          +         	address &= PAGE_MASK;
          +         
          +         	if (pages > 1) {
          +         		/*
          +         		 * If we have to flush more than one page, flush all
          +         		 * TLB entries for this domain
          +         		 */
          +         		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
          +         		s = 1;
          +         	}
          +         
          +         	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below are used the create the page table mappings for
+ ++++++++++++++++++ * unity mapped regions.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic mapping functions. It maps a physical address into a DMA
+ ++++++++++++++++++ * address space. It allocates the page table pages if necessary.
+ ++++++++++++++++++ * In the future it can be extended to a generic mapping function
+ ++++++++++++++++++ * supporting all features of AMD IOMMU page tables like level skipping
+ ++++++++++++++++++ * and full 64 bit address spaces.
+ ++++++++++++++++++ */
          +         static int iommu_map(struct protection_domain *dom,
          +         		     unsigned long bus_addr,
          +         		     unsigned long phys_addr,
          +         		     int prot)
          +         {
          +         	u64 __pte, *pte, *page;
          +         
          +         	bus_addr  = PAGE_ALIGN(bus_addr);
          +         	phys_addr = PAGE_ALIGN(bus_addr);
          +         
          +         	/* only support 512GB address spaces for now */
          +         	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
          +         		return -EINVAL;
          +         
          +         	pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
          +         
          +         	if (!IOMMU_PTE_PRESENT(*pte)) {
          +         		page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +         		if (!page)
          +         			return -ENOMEM;
          +         		*pte = IOMMU_L2_PDE(virt_to_phys(page));
          +         	}
          +         
          +         	pte = IOMMU_PTE_PAGE(*pte);
          +         	pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
          +         
          +         	if (!IOMMU_PTE_PRESENT(*pte)) {
          +         		page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +         		if (!page)
          +         			return -ENOMEM;
          +         		*pte = IOMMU_L1_PDE(virt_to_phys(page));
          +         	}
          +         
          +         	pte = IOMMU_PTE_PAGE(*pte);
          +         	pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
          +         
          +         	if (IOMMU_PTE_PRESENT(*pte))
          +         		return -EBUSY;
          +         
          +         	__pte = phys_addr | IOMMU_PTE_P;
          +         	if (prot & IOMMU_PROT_IR)
          +         		__pte |= IOMMU_PTE_IR;
          +         	if (prot & IOMMU_PROT_IW)
          +         		__pte |= IOMMU_PTE_IW;
          +         
          +         	*pte = __pte;
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function checks if a specific unity mapping entry is needed for
+ ++++++++++++++++++ * this specific IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_for_unity_map(struct amd_iommu *iommu,
          +         			       struct unity_map_entry *entry)
          +         {
          +         	u16 bdf, i;
          +         
          +         	for (i = entry->devid_start; i <= entry->devid_end; ++i) {
          +         		bdf = amd_iommu_alias_table[i];
          +         		if (amd_iommu_rlookup_table[bdf] == iommu)
          +         			return 1;
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Init the unity mappings for a specific IOMMU in the system
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Basically iterates over all unity mapping entries and applies them to
+ ++++++++++++++++++ * the default domain DMA of that IOMMU if necessary.
+ ++++++++++++++++++ */
          +         static int iommu_init_unity_mappings(struct amd_iommu *iommu)
          +         {
          +         	struct unity_map_entry *entry;
          +         	int ret;
          +         
          +         	list_for_each_entry(entry, &amd_iommu_unity_map, list) {
          +         		if (!iommu_for_unity_map(iommu, entry))
          +         			continue;
          +         		ret = dma_ops_unity_map(iommu->default_dom, entry);
          +         		if (ret)
          +         			return ret;
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function actually applies the mapping to the page table of the
+ ++++++++++++++++++ * dma_ops domain.
+ ++++++++++++++++++ */
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +         			     struct unity_map_entry *e)
          +         {
          +         	u64 addr;
          +         	int ret;
          +         
          +         	for (addr = e->address_start; addr < e->address_end;
          +         	     addr += PAGE_SIZE) {
          +         		ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
          +         		if (ret)
          +         			return ret;
          +         		/*
          +         		 * if unity mapping is in aperture range mark the page
          +         		 * as allocated in the aperture
          +         		 */
          +         		if (addr < dma_dom->aperture_size)
          +         			__set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Inits the unity mappings required for a specific device
+ ++++++++++++++++++ */
          +         static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
          +         					  u16 devid)
          +         {
          +         	struct unity_map_entry *e;
          +         	int ret;
          +         
          +         	list_for_each_entry(e, &amd_iommu_unity_map, list) {
          +         		if (!(devid >= e->devid_start && devid <= e->devid_end))
          +         			continue;
          +         		ret = dma_ops_unity_map(dma_dom, e);
          +         		if (ret)
          +         			return ret;
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the address allocator for the dma_ops
+ ++++++++++++++++++ * interface functions. They work like the allocators in the other IOMMU
+ ++++++++++++++++++ * drivers. Its basically a bitmap which marks the allocated pages in
+ ++++++++++++++++++ * the aperture. Maybe it could be enhanced in the future to a more
+ ++++++++++++++++++ * efficient allocator.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static unsigned long dma_mask_to_pages(unsigned long mask)
          +         {
          +         	return (mask >> PAGE_SHIFT) +
          +         		(PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address allocator core function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static unsigned long dma_ops_alloc_addresses(struct device *dev,
          +         					     struct dma_ops_domain *dom,
          +         					     unsigned int pages)
          +         {
          +         	unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
          +         	unsigned long address;
          +         	unsigned long size = dom->aperture_size >> PAGE_SHIFT;
          +         	unsigned long boundary_size;
          +         
          +         	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
          +         			PAGE_SIZE) >> PAGE_SHIFT;
          +         	limit = limit < size ? limit : size;
          +         
          +         	if (dom->next_bit >= limit)
          +         		dom->next_bit = 0;
          +         
          +         	address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
          +         			0 , boundary_size, 0);
          +         	if (address == -1)
          +         		address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
          +         				0, boundary_size, 0);
          +         
          +         	if (likely(address != -1)) {
          +         		dom->next_bit = address + pages;
          +         		address <<= PAGE_SHIFT;
          +         	} else
          +         		address = bad_dma_address;
          +         
          +         	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
          +         
          +         	return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address free function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static void dma_ops_free_addresses(struct dma_ops_domain *dom,
          +         				   unsigned long address,
          +         				   unsigned int pages)
          +         {
          +         	address >>= PAGE_SHIFT;
          +         	iommu_area_free(dom->bitmap, address, pages);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the domain allocation. A domain is
+ ++++++++++++++++++ * allocated for every IOMMU as the default domain. If device isolation
+ ++++++++++++++++++ * is enabled, every device get its own domain. The most important thing
+ ++++++++++++++++++ * about domains is the page table mapping the DMA address space they
+ ++++++++++++++++++ * contain.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static u16 domain_id_alloc(void)
          +         {
          +         	unsigned long flags;
          +         	int id;
          +         
          +         	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +         	id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
          +         	BUG_ON(id == 0);
          +         	if (id > 0 && id < MAX_DOMAIN_ID)
          +         		__set_bit(id, amd_iommu_pd_alloc_bitmap);
          +         	else
          +         		id = 0;
          +         	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +         	return id;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ ++++++++++++++++++ * ranges.
+ ++++++++++++++++++ */
          +         static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
          +         				      unsigned long start_page,
          +         				      unsigned int pages)
          +         {
          +         	unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
          +         
          +         	if (start_page + pages > last_page)
          +         		pages = last_page - start_page;
          +         
          +         	set_bit_string(dom->bitmap, start_page, pages);
          +         }
          +         
          +         static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
          +         {
          +         	int i, j;
          +         	u64 *p1, *p2, *p3;
          +         
          +         	p1 = dma_dom->domain.pt_root;
          +         
          +         	if (!p1)
          +         		return;
          +         
          +         	for (i = 0; i < 512; ++i) {
          +         		if (!IOMMU_PTE_PRESENT(p1[i]))
          +         			continue;
          +         
          +         		p2 = IOMMU_PTE_PAGE(p1[i]);
          +         		for (j = 0; j < 512; ++i) {
          +         			if (!IOMMU_PTE_PRESENT(p2[j]))
          +         				continue;
          +         			p3 = IOMMU_PTE_PAGE(p2[j]);
          +         			free_page((unsigned long)p3);
          +         		}
          +         
          +         		free_page((unsigned long)p2);
          +         	}
          +         
          +         	free_page((unsigned long)p1);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Free a domain, only used if something went wrong in the
+ ++++++++++++++++++ * allocation path and we need to free an already allocated page table
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_free(struct dma_ops_domain *dom)
          +         {
          +         	if (!dom)
          +         		return;
          +         
          +         	dma_ops_free_pagetable(dom);
          +         
          +         	kfree(dom->pte_pages);
          +         
          +         	kfree(dom->bitmap);
          +         
          +         	kfree(dom);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates a new protection domain usable for the dma_ops functions.
+ ++++++++++++++++++ * It also intializes the page table and the address allocator data
+ ++++++++++++++++++ * structures required for the dma_ops interface
+ ++++++++++++++++++ */
          +         static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
          +         						   unsigned order)
          +         {
          +         	struct dma_ops_domain *dma_dom;
          +         	unsigned i, num_pte_pages;
          +         	u64 *l2_pde;
          +         	u64 address;
          +         
          +         	/*
          +         	 * Currently the DMA aperture must be between 32 MB and 1GB in size
          +         	 */
          +         	if ((order < 25) || (order > 30))
          +         		return NULL;
          +         
          +         	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
          +         	if (!dma_dom)
          +         		return NULL;
          +         
          +         	spin_lock_init(&dma_dom->domain.lock);
          +         
          +         	dma_dom->domain.id = domain_id_alloc();
          +         	if (dma_dom->domain.id == 0)
          +         		goto free_dma_dom;
          +         	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
          +         	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
          +         	dma_dom->domain.priv = dma_dom;
          +         	if (!dma_dom->domain.pt_root)
          +         		goto free_dma_dom;
          +         	dma_dom->aperture_size = (1ULL << order);
          +         	dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
          +         				  GFP_KERNEL);
          +         	if (!dma_dom->bitmap)
          +         		goto free_dma_dom;
          +         	/*
          +         	 * mark the first page as allocated so we never return 0 as
          +         	 * a valid dma-address. So we can use 0 as error value
          +         	 */
          +         	dma_dom->bitmap[0] = 1;
          +         	dma_dom->next_bit = 0;
          +         
+ ++++++++++++++++++	/* Intialize the exclusion range if necessary */
          +         	if (iommu->exclusion_start &&
          +         	    iommu->exclusion_start < dma_dom->aperture_size) {
          +         		unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
          +         		int pages = to_pages(iommu->exclusion_start,
          +         				iommu->exclusion_length);
          +         		dma_ops_reserve_addresses(dma_dom, startpage, pages);
          +         	}
          +         
+ ++++++++++++++++++	/*
+ ++++++++++++++++++	 * At the last step, build the page tables so we don't need to
+ ++++++++++++++++++	 * allocate page table pages in the dma_ops mapping/unmapping
+ ++++++++++++++++++	 * path.
+ ++++++++++++++++++	 */
          +         	num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
          +         	dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
          +         			GFP_KERNEL);
          +         	if (!dma_dom->pte_pages)
          +         		goto free_dma_dom;
          +         
          +         	l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
          +         	if (l2_pde == NULL)
          +         		goto free_dma_dom;
          +         
          +         	dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
          +         
          +         	for (i = 0; i < num_pte_pages; ++i) {
          +         		dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
          +         		if (!dma_dom->pte_pages[i])
          +         			goto free_dma_dom;
          +         		address = virt_to_phys(dma_dom->pte_pages[i]);
          +         		l2_pde[i] = IOMMU_L1_PDE(address);
          +         	}
          +         
          +         	return dma_dom;
          +         
          +         free_dma_dom:
          +         	dma_ops_domain_free(dma_dom);
          +         
          +         	return NULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Find out the protection domain structure for a given PCI device. This
+ ++++++++++++++++++ * will give us the pointer to the page table root for example.
+ ++++++++++++++++++ */
          +         static struct protection_domain *domain_for_device(u16 devid)
          +         {
          +         	struct protection_domain *dom;
          +         	unsigned long flags;
          +         
          +         	read_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +         	dom = amd_iommu_pd_table[devid];
          +         	read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +         	return dom;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * If a device is not yet associated with a domain, this function does
+ ++++++++++++++++++ * assigns it visible for the hardware
+ ++++++++++++++++++ */
          +         static void set_device_domain(struct amd_iommu *iommu,
          +         			      struct protection_domain *domain,
          +         			      u16 devid)
          +         {
          +         	unsigned long flags;
          +         
          +         	u64 pte_root = virt_to_phys(domain->pt_root);
          +         
          +         	pte_root |= (domain->mode & 0x07) << 9;
          +         	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
          +         
          +         	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +         	amd_iommu_dev_table[devid].data[0] = pte_root;
          +         	amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
          +         	amd_iommu_dev_table[devid].data[2] = domain->id;
          +         
          +         	amd_iommu_pd_table[devid] = domain;
          +         	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +         	iommu_queue_inv_dev_entry(iommu, devid);
          +         
          +         	iommu->need_sync = 1;
          +         }
          +         
+ ++++++++++++++++++/*****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the dma_ops mapping/unmapping code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * In the dma_ops path we only have the struct device. This function
+ ++++++++++++++++++ * finds the corresponding IOMMU, the protection domain and the
+ ++++++++++++++++++ * requestor id for a given device.
+ ++++++++++++++++++ * If the device is not yet associated with a domain this is also done
+ ++++++++++++++++++ * in this function.
+ ++++++++++++++++++ */
          +         static int get_device_resources(struct device *dev,
          +         				struct amd_iommu **iommu,
          +         				struct protection_domain **domain,
          +         				u16 *bdf)
          +         {
          +         	struct dma_ops_domain *dma_dom;
          +         	struct pci_dev *pcidev;
          +         	u16 _bdf;
          +         
          +         	BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
          +         
          +         	pcidev = to_pci_dev(dev);
- -------- ---------	_bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+ ++++++++++++++++++	_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
          +         
+ ++++++++++++++++++	/* device not translated by any IOMMU in the system? */
          +         	if (_bdf >= amd_iommu_last_bdf) {
          +         		*iommu = NULL;
          +         		*domain = NULL;
          +         		*bdf = 0xffff;
          +         		return 0;
          +         	}
          +         
          +         	*bdf = amd_iommu_alias_table[_bdf];
          +         
          +         	*iommu = amd_iommu_rlookup_table[*bdf];
          +         	if (*iommu == NULL)
          +         		return 0;
          +         	dma_dom = (*iommu)->default_dom;
          +         	*domain = domain_for_device(*bdf);
          +         	if (*domain == NULL) {
          +         		*domain = &dma_dom->domain;
          +         		set_device_domain(*iommu, *domain, *bdf);
          +         		printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
          +         				"device ", (*domain)->id);
          +         		print_devid(_bdf, 1);
          +         	}
          +         
          +         	return 1;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the generic map function. It maps one 4kb page at paddr to
+ ++++++++++++++++++ * the given address in the DMA address space for the domain.
+ ++++++++++++++++++ */
          +         static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
          +         				     struct dma_ops_domain *dom,
          +         				     unsigned long address,
          +         				     phys_addr_t paddr,
          +         				     int direction)
          +         {
          +         	u64 *pte, __pte;
          +         
          +         	WARN_ON(address > dom->aperture_size);
          +         
          +         	paddr &= PAGE_MASK;
          +         
          +         	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +         	pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +         	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
          +         
          +         	if (direction == DMA_TO_DEVICE)
          +         		__pte |= IOMMU_PTE_IR;
          +         	else if (direction == DMA_FROM_DEVICE)
          +         		__pte |= IOMMU_PTE_IW;
          +         	else if (direction == DMA_BIDIRECTIONAL)
          +         		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
          +         
          +         	WARN_ON(*pte);
          +         
          +         	*pte = __pte;
          +         
          +         	return (dma_addr_t)address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The generic unmapping function for on page in the DMA address space.
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_unmap(struct amd_iommu *iommu,
          +         				 struct dma_ops_domain *dom,
          +         				 unsigned long address)
          +         {
          +         	u64 *pte;
          +         
          +         	if (address >= dom->aperture_size)
          +         		return;
          +         
          +         	WARN_ON(address & 0xfffULL || address > dom->aperture_size);
          +         
          +         	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +         	pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +         	WARN_ON(!*pte);
          +         
          +         	*pte = 0ULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function contains common code for mapping of a physically
+ ++++++++++++++++++ * contiguous memory region into DMA address space. It is uses by all
+ ++++++++++++++++++ * mapping functions provided by this IOMMU driver.
+ ++++++++++++++++++ * Must be called with the domain lock held.
+ ++++++++++++++++++ */
          +         static dma_addr_t __map_single(struct device *dev,
          +         			       struct amd_iommu *iommu,
          +         			       struct dma_ops_domain *dma_dom,
          +         			       phys_addr_t paddr,
          +         			       size_t size,
          +         			       int dir)
          +         {
          +         	dma_addr_t offset = paddr & ~PAGE_MASK;
          +         	dma_addr_t address, start;
          +         	unsigned int pages;
          +         	int i;
          +         
          +         	pages = to_pages(paddr, size);
          +         	paddr &= PAGE_MASK;
          +         
          +         	address = dma_ops_alloc_addresses(dev, dma_dom, pages);
          +         	if (unlikely(address == bad_dma_address))
          +         		goto out;
          +         
          +         	start = address;
          +         	for (i = 0; i < pages; ++i) {
          +         		dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
          +         		paddr += PAGE_SIZE;
          +         		start += PAGE_SIZE;
          +         	}
          +         	address += offset;
          +         
          +         out:
          +         	return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Does the reverse of the __map_single function. Must be called with
+ ++++++++++++++++++ * the domain lock held too
+ ++++++++++++++++++ */
          +         static void __unmap_single(struct amd_iommu *iommu,
          +         			   struct dma_ops_domain *dma_dom,
          +         			   dma_addr_t dma_addr,
          +         			   size_t size,
          +         			   int dir)
          +         {
          +         	dma_addr_t i, start;
          +         	unsigned int pages;
          +         
          +         	if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
          +         		return;
          +         
          +         	pages = to_pages(dma_addr, size);
          +         	dma_addr &= PAGE_MASK;
          +         	start = dma_addr;
          +         
          +         	for (i = 0; i < pages; ++i) {
          +         		dma_ops_domain_unmap(iommu, dma_dom, start);
          +         		start += PAGE_SIZE;
          +         	}
          +         
          +         	dma_ops_free_addresses(dma_dom, dma_addr, pages);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
          +         			     size_t size, int dir)
          +         {
          +         	unsigned long flags;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	u16 devid;
          +         	dma_addr_t addr;
          +         
          +         	get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +         	if (iommu == NULL || domain == NULL)
+ ++++++++++++++++++		/* device not handled by any AMD IOMMU */
          +         		return (dma_addr_t)paddr;
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         	addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
          +         	if (addr == bad_dma_address)
          +         		goto out;
          +         
          +         	if (iommu_has_npcache(iommu))
          +         		iommu_flush_pages(iommu, domain->id, addr, size);
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         out:
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         	return addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported unmap_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static void unmap_single(struct device *dev, dma_addr_t dma_addr,
          +         			 size_t size, int dir)
          +         {
          +         	unsigned long flags;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	u16 devid;
          +         
          +         	if (!get_device_resources(dev, &iommu, &domain, &devid))
+ ++++++++++++++++++		/* device not handled by any AMD IOMMU */
          +         		return;
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         
          +         	__unmap_single(iommu, domain->priv, dma_addr, size, dir);
          +         
          +         	iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is a special map_sg function which is used if we should map a
+ ++++++++++++++++++ * device which is not handled by an AMD IOMMU in the system.
+ ++++++++++++++++++ */
          +         static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
          +         			   int nelems, int dir)
          +         {
          +         	struct scatterlist *s;
          +         	int i;
          +         
          +         	for_each_sg(sglist, s, nelems, i) {
          +         		s->dma_address = (dma_addr_t)sg_phys(s);
          +         		s->dma_length  = s->length;
          +         	}
          +         
          +         	return nelems;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static int map_sg(struct device *dev, struct scatterlist *sglist,
          +         		  int nelems, int dir)
          +         {
          +         	unsigned long flags;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	u16 devid;
          +         	int i;
          +         	struct scatterlist *s;
          +         	phys_addr_t paddr;
          +         	int mapped_elems = 0;
          +         
          +         	get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +         	if (!iommu || !domain)
          +         		return map_sg_no_iommu(dev, sglist, nelems, dir);
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         
          +         	for_each_sg(sglist, s, nelems, i) {
          +         		paddr = sg_phys(s);
          +         
          +         		s->dma_address = __map_single(dev, iommu, domain->priv,
          +         					      paddr, s->length, dir);
          +         
          +         		if (s->dma_address) {
          +         			s->dma_length = s->length;
          +         			mapped_elems++;
          +         		} else
          +         			goto unmap;
          +         		if (iommu_has_npcache(iommu))
          +         			iommu_flush_pages(iommu, domain->id, s->dma_address,
          +         					  s->dma_length);
          +         	}
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         out:
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         	return mapped_elems;
          +         unmap:
          +         	for_each_sg(sglist, s, mapped_elems, i) {
          +         		if (s->dma_address)
          +         			__unmap_single(iommu, domain->priv, s->dma_address,
          +         				       s->dma_length, dir);
          +         		s->dma_address = s->dma_length = 0;
          +         	}
          +         
          +         	mapped_elems = 0;
          +         
          +         	goto out;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static void unmap_sg(struct device *dev, struct scatterlist *sglist,
          +         		     int nelems, int dir)
          +         {
          +         	unsigned long flags;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	struct scatterlist *s;
          +         	u16 devid;
          +         	int i;
          +         
          +         	if (!get_device_resources(dev, &iommu, &domain, &devid))
          +         		return;
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         
          +         	for_each_sg(sglist, s, nelems, i) {
          +         		__unmap_single(iommu, domain->priv, s->dma_address,
          +         			       s->dma_length, dir);
          +         		iommu_flush_pages(iommu, domain->id, s->dma_address,
          +         				  s->dma_length);
          +         		s->dma_address = s->dma_length = 0;
          +         	}
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported alloc_coherent function for dma_ops.
+ ++++++++++++++++++ */
          +         static void *alloc_coherent(struct device *dev, size_t size,
          +         			    dma_addr_t *dma_addr, gfp_t flag)
          +         {
          +         	unsigned long flags;
          +         	void *virt_addr;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	u16 devid;
          +         	phys_addr_t paddr;
          +         
          +         	virt_addr = (void *)__get_free_pages(flag, get_order(size));
          +         	if (!virt_addr)
          +         		return 0;
          +         
          +         	memset(virt_addr, 0, size);
          +         	paddr = virt_to_phys(virt_addr);
          +         
          +         	get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +         	if (!iommu || !domain) {
          +         		*dma_addr = (dma_addr_t)paddr;
          +         		return virt_addr;
          +         	}
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         
          +         	*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
          +         				 size, DMA_BIDIRECTIONAL);
          +         
          +         	if (*dma_addr == bad_dma_address) {
          +         		free_pages((unsigned long)virt_addr, get_order(size));
          +         		virt_addr = NULL;
          +         		goto out;
          +         	}
          +         
          +         	if (iommu_has_npcache(iommu))
          +         		iommu_flush_pages(iommu, domain->id, *dma_addr, size);
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         out:
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         	return virt_addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported free_coherent function for dma_ops.
+ ++++++++++++++++++ * FIXME: fix the generic x86 DMA layer so that it actually calls that
+ ++++++++++++++++++ *        function.
+ ++++++++++++++++++ */
          +         static void free_coherent(struct device *dev, size_t size,
          +         			  void *virt_addr, dma_addr_t dma_addr)
          +         {
          +         	unsigned long flags;
          +         	struct amd_iommu *iommu;
          +         	struct protection_domain *domain;
          +         	u16 devid;
          +         
          +         	get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +         	if (!iommu || !domain)
          +         		goto free_mem;
          +         
          +         	spin_lock_irqsave(&domain->lock, flags);
          +         
          +         	__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
          +         	iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +         	if (iommu->need_sync)
          +         		iommu_completion_wait(iommu);
          +         
          +         	spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         free_mem:
          +         	free_pages((unsigned long)virt_addr, get_order(size));
          +         }
          +         
          +         /*
+ ++++++++++++++++++ * The function for pre-allocating protection domains.
+ ++++++++++++++++++ *
          +          * If the driver core informs the DMA layer if a driver grabs a device
          +          * we don't need to preallocate the protection domains anymore.
          +          * For now we have to.
          +          */
          +         void prealloc_protection_domains(void)
          +         {
          +         	struct pci_dev *dev = NULL;
          +         	struct dma_ops_domain *dma_dom;
          +         	struct amd_iommu *iommu;
          +         	int order = amd_iommu_aperture_order;
          +         	u16 devid;
          +         
          +         	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
          +         		devid = (dev->bus->number << 8) | dev->devfn;
          +         		if (devid >= amd_iommu_last_bdf)
          +         			continue;
          +         		devid = amd_iommu_alias_table[devid];
          +         		if (domain_for_device(devid))
          +         			continue;
          +         		iommu = amd_iommu_rlookup_table[devid];
          +         		if (!iommu)
          +         			continue;
          +         		dma_dom = dma_ops_domain_alloc(iommu, order);
          +         		if (!dma_dom)
          +         			continue;
          +         		init_unity_mappings_for_device(dma_dom, devid);
          +         		set_device_domain(iommu, &dma_dom->domain, devid);
          +         		printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
          +         		       dma_dom->domain.id);
          +         		print_devid(devid, 1);
          +         	}
          +         }
          +         
          +         static struct dma_mapping_ops amd_iommu_dma_ops = {
          +         	.alloc_coherent = alloc_coherent,
          +         	.free_coherent = free_coherent,
          +         	.map_single = map_single,
          +         	.unmap_single = unmap_single,
          +         	.map_sg = map_sg,
          +         	.unmap_sg = unmap_sg,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The function which clues the AMD IOMMU driver into dma_ops.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init_dma_ops(void)
          +         {
          +         	struct amd_iommu *iommu;
          +         	int order = amd_iommu_aperture_order;
          +         	int ret;
          +         
+ ++++++++++++++++++	/*
+ ++++++++++++++++++	 * first allocate a default protection domain for every IOMMU we
+ ++++++++++++++++++	 * found in the system. Devices not assigned to any other
+ ++++++++++++++++++	 * protection domain will be assigned to the default one.
+ ++++++++++++++++++	 */
          +         	list_for_each_entry(iommu, &amd_iommu_list, list) {
          +         		iommu->default_dom = dma_ops_domain_alloc(iommu, order);
          +         		if (iommu->default_dom == NULL)
          +         			return -ENOMEM;
          +         		ret = iommu_init_unity_mappings(iommu);
          +         		if (ret)
          +         			goto free_domains;
          +         	}
          +         
+ ++++++++++++++++++	/*
+ ++++++++++++++++++	 * If device isolation is enabled, pre-allocate the protection
+ ++++++++++++++++++	 * domains for each device.
+ ++++++++++++++++++	 */
          +         	if (amd_iommu_isolate)
          +         		prealloc_protection_domains();
          +         
          +         	iommu_detected = 1;
          +         	force_iommu = 1;
          +         	bad_dma_address = 0;
          +         #ifdef CONFIG_GART_IOMMU
          +         	gart_iommu_aperture_disabled = 1;
          +         	gart_iommu_aperture = 0;
          +         #endif
          +         
+ ++++++++++++++++++	/* Make the driver finally visible to the drivers */
          +         	dma_ops = &amd_iommu_dma_ops;
          +         
          +         	return 0;
          +         
          +         free_domains:
          +         
          +         	list_for_each_entry(iommu, &amd_iommu_list, list) {
          +         		if (iommu->default_dom)
          +         			dma_ops_domain_free(iommu->default_dom);
          +         	}
          +         
          +         	return ret;
          +         }
diff --cc arch/x86/kernel/amd_iommu_init.c
index 2a13e43,7661b02,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,6643828,2a13e43,2a13e43,0000000,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43..c9d8ff2
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@@@@@@@@@@@@@@@@@@@@ -1,875 -1,1060 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,0 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 -1,875 +1,1060 @@@@@@@@@@@@@@@@@@@@@
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/acpi.h>
          +         #include <linux/gfp.h>
          +         #include <linux/list.h>
          +         #include <linux/sysdev.h>
          +         #include <asm/pci-direct.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         
          +         /*
          +          * definitions for the ACPI scanning code
          +          */
- -------- ---------#define UPDATE_LAST_BDF(x) do {\
- -------- ---------	if ((x) > amd_iommu_last_bdf) \
- -------- ---------		amd_iommu_last_bdf = (x); \
- -------- ---------	} while (0);
- -------- ---------
- -------- ---------#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
          +         #define PCI_BUS(x) (((x) >> 8) & 0xff)
          +         #define IVRS_HEADER_LENGTH 48
- -------- ---------#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
          +         
          +         #define ACPI_IVHD_TYPE                  0x10
          +         #define ACPI_IVMD_TYPE_ALL              0x20
          +         #define ACPI_IVMD_TYPE                  0x21
          +         #define ACPI_IVMD_TYPE_RANGE            0x22
          +         
          +         #define IVHD_DEV_ALL                    0x01
          +         #define IVHD_DEV_SELECT                 0x02
          +         #define IVHD_DEV_SELECT_RANGE_START     0x03
          +         #define IVHD_DEV_RANGE_END              0x04
          +         #define IVHD_DEV_ALIAS                  0x42
          +         #define IVHD_DEV_ALIAS_RANGE            0x43
          +         #define IVHD_DEV_EXT_SELECT             0x46
          +         #define IVHD_DEV_EXT_SELECT_RANGE       0x47
          +         
          +         #define IVHD_FLAG_HT_TUN_EN             0x00
          +         #define IVHD_FLAG_PASSPW_EN             0x01
          +         #define IVHD_FLAG_RESPASSPW_EN          0x02
          +         #define IVHD_FLAG_ISOC_EN               0x03
          +         
          +         #define IVMD_FLAG_EXCL_RANGE            0x08
          +         #define IVMD_FLAG_UNITY_MAP             0x01
          +         
          +         #define ACPI_DEVFLAG_INITPASS           0x01
          +         #define ACPI_DEVFLAG_EXTINT             0x02
          +         #define ACPI_DEVFLAG_NMI                0x04
          +         #define ACPI_DEVFLAG_SYSMGT1            0x10
          +         #define ACPI_DEVFLAG_SYSMGT2            0x20
          +         #define ACPI_DEVFLAG_LINT0              0x40
          +         #define ACPI_DEVFLAG_LINT1              0x80
          +         #define ACPI_DEVFLAG_ATSDIS             0x10000000
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * ACPI table definitions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These data structures are laid over the table to parse the important values
+ ++++++++++++++++++ * out of it.
+ ++++++++++++++++++ */
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * structure describing one IOMMU in the ACPI table. Typically followed by one
+ ++++++++++++++++++ * or more ivhd_entrys.
+ ++++++++++++++++++ */
          +         struct ivhd_header {
          +         	u8 type;
          +         	u8 flags;
          +         	u16 length;
          +         	u16 devid;
          +         	u16 cap_ptr;
          +         	u64 mmio_phys;
          +         	u16 pci_seg;
          +         	u16 info;
          +         	u32 reserved;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * A device entry describing which devices a specific IOMMU translates and
+ ++++++++++++++++++ * which requestor ids they use.
+ ++++++++++++++++++ */
          +         struct ivhd_entry {
          +         	u8 type;
          +         	u16 devid;
          +         	u8 flags;
          +         	u32 ext;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * An AMD IOMMU memory definition structure. It defines things like exclusion
+ ++++++++++++++++++ * ranges for devices and regions that should be unity mapped.
+ ++++++++++++++++++ */
          +         struct ivmd_header {
          +         	u8 type;
          +         	u8 flags;
          +         	u16 length;
          +         	u16 devid;
          +         	u16 aux;
          +         	u64 resv;
          +         	u64 range_start;
          +         	u64 range_length;
          +         } __attribute__((packed));
          +         
          +         static int __initdata amd_iommu_detected;
          +         
- -------- ---------u16 amd_iommu_last_bdf;
- -------- ---------struct list_head amd_iommu_unity_map;
- -------- ---------unsigned amd_iommu_aperture_order = 26;
- -------- ---------int amd_iommu_isolate;
+ ++++++++++++++++++u16 amd_iommu_last_bdf;			/* largest PCI device id we have
+ ++++++++++++++++++					   to handle */
+ ++++++++++++++++++LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
+ ++++++++++++++++++					   we find in ACPI */
+ ++++++++++++++++++unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+ ++++++++++++++++++int amd_iommu_isolate;			/* if 1, device isolation is enabled */
+ ++++++++++++++++++
+ ++++++++++++++++++LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
+ ++++++++++++++++++					   system */
          +         
- -------- ---------struct list_head amd_iommu_list;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Pointer to the device table which is shared by all AMD IOMMUs
+ ++++++++++++++++++ * it is indexed by the PCI device id or the HT unit id and contains
+ ++++++++++++++++++ * information about the domain the device belongs to as well as the
+ ++++++++++++++++++ * page table root pointer.
+ ++++++++++++++++++ */
          +         struct dev_table_entry *amd_iommu_dev_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The alias table is a driver specific data structure which contains the
+ ++++++++++++++++++ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ ++++++++++++++++++ * More than one device can share the same requestor id.
+ ++++++++++++++++++ */
          +         u16 *amd_iommu_alias_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The rlookup table is used to find the IOMMU which is responsible
+ ++++++++++++++++++ * for a specific device. It is also indexed by the PCI device id.
+ ++++++++++++++++++ */
          +         struct amd_iommu **amd_iommu_rlookup_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The pd table (protection domain table) is used to find the protection domain
+ ++++++++++++++++++ * data structure a device belongs to. Indexed with the PCI device id too.
+ ++++++++++++++++++ */
          +         struct protection_domain **amd_iommu_pd_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ ++++++++++++++++++ * to know which ones are already in use.
+ ++++++++++++++++++ */
          +         unsigned long *amd_iommu_pd_alloc_bitmap;
          +         
- -------- ---------static u32 dev_table_size;
- -------- ---------static u32 alias_table_size;
- -------- ---------static u32 rlookup_table_size;
+ ++++++++++++++++++static u32 dev_table_size;	/* size of the device table */
+ ++++++++++++++++++static u32 alias_table_size;	/* size of the alias table */
+ ++++++++++++++++++static u32 rlookup_table_size;	/* size if the rlookup table */
          +         
+ ++++++++++++++++++static inline void update_last_devid(u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++	if (devid > amd_iommu_last_bdf)
+ ++++++++++++++++++		amd_iommu_last_bdf = devid;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++static inline unsigned long tbl_size(int entry_size)
+ ++++++++++++++++++{
+ ++++++++++++++++++	unsigned shift = PAGE_SHIFT +
+ ++++++++++++++++++			 get_order(amd_iommu_last_bdf * entry_size);
+ ++++++++++++++++++
+ ++++++++++++++++++	return 1UL << shift;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * AMD IOMMU MMIO register space handling functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These functions are used to program the IOMMU device registers in
+ ++++++++++++++++++ * MMIO space required for that driver.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function set the exclusion range in the IOMMU. DMA accesses to the
+ ++++++++++++++++++ * exclusion range are passed through untranslated
+ ++++++++++++++++++ */
          +         static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
          +         {
          +         	u64 start = iommu->exclusion_start & PAGE_MASK;
          +         	u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
          +         	u64 entry;
          +         
          +         	if (!iommu->exclusion_start)
          +         		return;
          +         
          +         	entry = start | MMIO_EXCL_ENABLE_MASK;
          +         	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
          +         			&entry, sizeof(entry));
          +         
          +         	entry = limit;
          +         	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
          +         			&entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Programs the physical address of the device table into the IOMMU hardware */
          +         static void __init iommu_set_device_table(struct amd_iommu *iommu)
          +         {
          +         	u32 entry;
          +         
          +         	BUG_ON(iommu->mmio_base == NULL);
          +         
          +         	entry = virt_to_phys(amd_iommu_dev_table);
          +         	entry |= (dev_table_size >> 12) - 1;
          +         	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
          +         			&entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Generic functions to enable/disable certain features of the IOMMU. */
          +         static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
          +         {
          +         	u32 ctrl;
          +         
          +         	ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         	ctrl |= (1 << bit);
          +         	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
          +         static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
          +         {
          +         	u32 ctrl;
          +         
          +         	ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         	ctrl &= ~(1 << bit);
          +         	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
+ ++++++++++++++++++/* Function to enable the hardware */
          +         void __init iommu_enable(struct amd_iommu *iommu)
          +         {
          +         	printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
          +         	print_devid(iommu->devid, 0);
          +         	printk(" cap 0x%hx\n", iommu->cap_ptr);
          +         
          +         	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
+ ++++++++++++++++++ * the system has one.
+ ++++++++++++++++++ */
          +         static u8 * __init iommu_map_mmio_space(u64 address)
          +         {
          +         	u8 *ret;
          +         
          +         	if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
          +         		return NULL;
          +         
          +         	ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
          +         	if (ret != NULL)
          +         		return ret;
          +         
          +         	release_mem_region(address, MMIO_REGION_LENGTH);
          +         
          +         	return NULL;
          +         }
          +         
          +         static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
          +         {
          +         	if (iommu->mmio_base)
          +         		iounmap(iommu->mmio_base);
          +         	release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below belong to the first pass of AMD IOMMU ACPI table
+ ++++++++++++++++++ * parsing. In this pass we try to find out the highest device id this
+ ++++++++++++++++++ * code has to handle. Upon this information the size of the shared data
+ ++++++++++++++++++ * structures is determined later.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads the last device id the IOMMU has to handle from the PCI
+ ++++++++++++++++++ * capability header for this IOMMU
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
          +         {
          +         	u32 cap;
          +         
          +         	cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------	UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+ ++++++++++++++++++	update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * After reading the highest device id from the IOMMU PCI capability header
+ ++++++++++++++++++ * this function looks if there is a higher device id defined in the ACPI table
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
          +         {
          +         	u8 *p = (void *)h, *end = (void *)h;
          +         	struct ivhd_entry *dev;
          +         
          +         	p += sizeof(*h);
          +         	end += h->length;
          +         
          +         	find_last_devid_on_pci(PCI_BUS(h->devid),
          +         			PCI_SLOT(h->devid),
          +         			PCI_FUNC(h->devid),
          +         			h->cap_ptr);
          +         
          +         	while (p < end) {
          +         		dev = (struct ivhd_entry *)p;
          +         		switch (dev->type) {
          +         		case IVHD_DEV_SELECT:
          +         		case IVHD_DEV_RANGE_END:
          +         		case IVHD_DEV_ALIAS:
          +         		case IVHD_DEV_EXT_SELECT:
- -------- ---------			UPDATE_LAST_BDF(dev->devid);
+ ++++++++++++++++++			/* all the above subfield types refer to device ids */
+ ++++++++++++++++++			update_last_devid(dev->devid);
          +         			break;
          +         		default:
          +         			break;
          +         		}
          +         		p += 0x04 << (*p >> 6);
          +         	}
          +         
          +         	WARN_ON(p != end);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterate over all IVHD entries in the ACPI table and find the highest device
+ ++++++++++++++++++ * id which we need to handle. This is the first of three functions which parse
+ ++++++++++++++++++ * the ACPI table. So we check the checksum here.
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_acpi(struct acpi_table_header *table)
          +         {
          +         	int i;
          +         	u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
          +         	struct ivhd_header *h;
          +         
          +         	/*
          +         	 * Validate checksum here so we don't need to do it when
          +         	 * we actually parse the table
          +         	 */
          +         	for (i = 0; i < table->length; ++i)
          +         		checksum += p[i];
          +         	if (checksum != 0)
          +         		/* ACPI table corrupt */
          +         		return -ENODEV;
          +         
          +         	p += IVRS_HEADER_LENGTH;
          +         
          +         	end += table->length;
          +         	while (p < end) {
          +         		h = (struct ivhd_header *)p;
          +         		switch (h->type) {
          +         		case ACPI_IVHD_TYPE:
          +         			find_last_devid_from_ivhd(h);
          +         			break;
          +         		default:
          +         			break;
          +         		}
          +         		p += h->length;
          +         	}
          +         	WARN_ON(p != end);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The following functions belong the the code path which parses the ACPI table
+ ++++++++++++++++++ * the second time. In this ACPI parsing iteration we allocate IOMMU specific
+ ++++++++++++++++++ * data structures, initialize the device/alias/rlookup table and also
+ ++++++++++++++++++ * basically initialize the hardware.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates the command buffer. This buffer is per AMD IOMMU. We can
+ ++++++++++++++++++ * write commands to that buffer later and the IOMMU will execute them
+ ++++++++++++++++++ * asynchronously
+ ++++++++++++++++++ */
          +         static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +         			get_order(CMD_BUFFER_SIZE));
- -------- ---------	u64 entry = 0;
+ ++++++++++++++++++	u64 entry;
          +         
          +         	if (cmd_buf == NULL)
          +         		return NULL;
          +         
          +         	iommu->cmd_buf_size = CMD_BUFFER_SIZE;
          +         
- -------- ---------	memset(cmd_buf, 0, CMD_BUFFER_SIZE);
- -------- ---------
          +         	entry = (u64)virt_to_phys(cmd_buf);
          +         	entry |= MMIO_CMD_SIZE_512;
          +         	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
          +         			&entry, sizeof(entry));
          +         
          +         	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
          +         
          +         	return cmd_buf;
          +         }
          +         
          +         static void __init free_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------	if (iommu->cmd_buf)
- -------- ---------		free_pages((unsigned long)iommu->cmd_buf,
- -------- ---------				get_order(CMD_BUFFER_SIZE));
+ ++++++++++++++++++	free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
          +         }
          +         
+ ++++++++++++++++++/* sets a specific bit in the device table entry. */
          +         static void set_dev_entry_bit(u16 devid, u8 bit)
          +         {
          +         	int i = (bit >> 5) & 0x07;
          +         	int _bit = bit & 0x1f;
          +         
          +         	amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
          +         }
          +         
- -------- ---------static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+ ++++++++++++++++++/* Writes the specific IOMMU for a device into the rlookup table */
+ ++++++++++++++++++static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++	amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function takes the device specific flags read from the ACPI
+ ++++++++++++++++++ * table and sets up the device table entry with that information
+ ++++++++++++++++++ */
+ ++++++++++++++++++static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ ++++++++++++++++++					   u16 devid, u32 flags, u32 ext_flags)
          +         {
          +         	if (flags & ACPI_DEVFLAG_INITPASS)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
          +         	if (flags & ACPI_DEVFLAG_EXTINT)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
          +         	if (flags & ACPI_DEVFLAG_NMI)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
          +         	if (flags & ACPI_DEVFLAG_SYSMGT1)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
          +         	if (flags & ACPI_DEVFLAG_SYSMGT2)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
          +         	if (flags & ACPI_DEVFLAG_LINT0)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
          +         	if (flags & ACPI_DEVFLAG_LINT1)
          +         		set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
- -------- ---------}
          +         
- -------- ---------static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
- -------- ---------{
- -------- ---------	amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++	set_iommu_for_device(iommu, devid);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Reads the device exclusion range from ACPI and initialize IOMMU with
+ ++++++++++++++++++ * it
+ ++++++++++++++++++ */
          +         static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
          +         {
          +         	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
          +         
          +         	if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
          +         		return;
          +         
          +         	if (iommu) {
+ ++++++++++++++++++		/*
+ ++++++++++++++++++		 * We only can configure exclusion ranges per IOMMU, not
+ ++++++++++++++++++		 * per device. But we can enable the exclusion range per
+ ++++++++++++++++++		 * device. This is done here
+ ++++++++++++++++++		 */
          +         		set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
          +         		iommu->exclusion_start = m->range_start;
          +         		iommu->exclusion_length = m->range_length;
          +         	}
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads some important data from the IOMMU PCI space and
+ ++++++++++++++++++ * initializes the driver data structure with it. It reads the hardware
+ ++++++++++++++++++ * capabilities and the first/last device entries
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_pci(struct amd_iommu *iommu)
          +         {
          +         	int bus = PCI_BUS(iommu->devid);
          +         	int dev = PCI_SLOT(iommu->devid);
          +         	int fn  = PCI_FUNC(iommu->devid);
          +         	int cap_ptr = iommu->cap_ptr;
          +         	u32 range;
          +         
          +         	iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
          +         
          +         	range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------	iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
- -------- ---------	iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+ ++++++++++++++++++	iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++					 MMIO_GET_FD(range));
+ ++++++++++++++++++	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++					MMIO_GET_LD(range));
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Takes a pointer to an AMD IOMMU entry in the ACPI table and
+ ++++++++++++++++++ * initializes the hardware and our data structures with it.
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
          +         					struct ivhd_header *h)
          +         {
          +         	u8 *p = (u8 *)h;
          +         	u8 *end = p, flags = 0;
          +         	u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
          +         	u32 ext_flags = 0;
- -------- ---------	bool alias = 0;
+ ++++++++++++++++++	bool alias = false;
          +         	struct ivhd_entry *e;
          +         
          +         	/*
          +         	 * First set the recommended feature enable bits from ACPI
          +         	 * into the IOMMU control registers
          +         	 */
          +         	h->flags & IVHD_FLAG_HT_TUN_EN ?
          +         		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
          +         		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
          +         
          +         	h->flags & IVHD_FLAG_PASSPW_EN ?
          +         		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
          +         		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
          +         
          +         	h->flags & IVHD_FLAG_RESPASSPW_EN ?
          +         		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
          +         		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
          +         
          +         	h->flags & IVHD_FLAG_ISOC_EN ?
          +         		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
          +         		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
          +         
          +         	/*
          +         	 * make IOMMU memory accesses cache coherent
          +         	 */
          +         	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
          +         
          +         	/*
          +         	 * Done. Now parse the device entries
          +         	 */
          +         	p += sizeof(struct ivhd_header);
          +         	end += h->length;
          +         
          +         	while (p < end) {
          +         		e = (struct ivhd_entry *)p;
          +         		switch (e->type) {
          +         		case IVHD_DEV_ALL:
          +         			for (dev_i = iommu->first_device;
          +         					dev_i <= iommu->last_device; ++dev_i)
- -------- ---------				set_dev_entry_from_acpi(dev_i, e->flags, 0);
+ ++++++++++++++++++				set_dev_entry_from_acpi(iommu, dev_i,
+ ++++++++++++++++++							e->flags, 0);
          +         			break;
          +         		case IVHD_DEV_SELECT:
          +         			devid = e->devid;
- -------- ---------			set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +         			break;
          +         		case IVHD_DEV_SELECT_RANGE_START:
          +         			devid_start = e->devid;
          +         			flags = e->flags;
          +         			ext_flags = 0;
- -------- ---------			alias = 0;
+ ++++++++++++++++++			alias = false;
          +         			break;
          +         		case IVHD_DEV_ALIAS:
          +         			devid = e->devid;
          +         			devid_to = e->ext >> 8;
- -------- ---------			set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +         			amd_iommu_alias_table[devid] = devid_to;
          +         			break;
          +         		case IVHD_DEV_ALIAS_RANGE:
          +         			devid_start = e->devid;
          +         			flags = e->flags;
          +         			devid_to = e->ext >> 8;
          +         			ext_flags = 0;
- -------- ---------			alias = 1;
+ ++++++++++++++++++			alias = true;
          +         			break;
          +         		case IVHD_DEV_EXT_SELECT:
          +         			devid = e->devid;
- -------- ---------			set_dev_entry_from_acpi(devid, e->flags, e->ext);
+ ++++++++++++++++++			set_dev_entry_from_acpi(iommu, devid, e->flags,
+ ++++++++++++++++++						e->ext);
          +         			break;
          +         		case IVHD_DEV_EXT_SELECT_RANGE:
          +         			devid_start = e->devid;
          +         			flags = e->flags;
          +         			ext_flags = e->ext;
- -------- ---------			alias = 0;
+ ++++++++++++++++++			alias = false;
          +         			break;
          +         		case IVHD_DEV_RANGE_END:
          +         			devid = e->devid;
          +         			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
          +         				if (alias)
          +         					amd_iommu_alias_table[dev_i] = devid_to;
- -------- ---------				set_dev_entry_from_acpi(
+ ++++++++++++++++++				set_dev_entry_from_acpi(iommu,
          +         						amd_iommu_alias_table[dev_i],
          +         						flags, ext_flags);
          +         			}
          +         			break;
          +         		default:
          +         			break;
          +         		}
          +         
          +         		p += 0x04 << (e->type >> 6);
          +         	}
          +         }
          +         
+ ++++++++++++++++++/* Initializes the device->iommu mapping for the driver */
          +         static int __init init_iommu_devices(struct amd_iommu *iommu)
          +         {
          +         	u16 i;
          +         
          +         	for (i = iommu->first_device; i <= iommu->last_device; ++i)
          +         		set_iommu_for_device(iommu, i);
          +         
          +         	return 0;
          +         }
          +         
          +         static void __init free_iommu_one(struct amd_iommu *iommu)
          +         {
          +         	free_command_buffer(iommu);
          +         	iommu_unmap_mmio_space(iommu);
          +         }
          +         
          +         static void __init free_iommu_all(void)
          +         {
          +         	struct amd_iommu *iommu, *next;
          +         
          +         	list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
          +         		list_del(&iommu->list);
          +         		free_iommu_one(iommu);
          +         		kfree(iommu);
          +         	}
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function clues the initialization function for one IOMMU
+ ++++++++++++++++++ * together and also allocates the command buffer and programs the
+ ++++++++++++++++++ * hardware. It does NOT enable the IOMMU. This is done afterwards.
+ ++++++++++++++++++ */
          +         static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
          +         {
          +         	spin_lock_init(&iommu->lock);
          +         	list_add_tail(&iommu->list, &amd_iommu_list);
          +         
          +         	/*
          +         	 * Copy data from ACPI table entry to the iommu struct
          +         	 */
          +         	iommu->devid = h->devid;
          +         	iommu->cap_ptr = h->cap_ptr;
          +         	iommu->mmio_phys = h->mmio_phys;
          +         	iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
          +         	if (!iommu->mmio_base)
          +         		return -ENOMEM;
          +         
          +         	iommu_set_device_table(iommu);
          +         	iommu->cmd_buf = alloc_command_buffer(iommu);
          +         	if (!iommu->cmd_buf)
          +         		return -ENOMEM;
          +         
          +         	init_iommu_from_pci(iommu);
          +         	init_iommu_from_acpi(iommu, h);
          +         	init_iommu_devices(iommu);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterates over all IOMMU entries in the ACPI table, allocates the
+ ++++++++++++++++++ * IOMMU structure and initializes it with init_iommu_one()
+ ++++++++++++++++++ */
          +         static int __init init_iommu_all(struct acpi_table_header *table)
          +         {
          +         	u8 *p = (u8 *)table, *end = (u8 *)table;
          +         	struct ivhd_header *h;
          +         	struct amd_iommu *iommu;
          +         	int ret;
          +         
- -------- ---------	INIT_LIST_HEAD(&amd_iommu_list);
- -------- ---------
          +         	end += table->length;
          +         	p += IVRS_HEADER_LENGTH;
          +         
          +         	while (p < end) {
          +         		h = (struct ivhd_header *)p;
          +         		switch (*p) {
          +         		case ACPI_IVHD_TYPE:
          +         			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
          +         			if (iommu == NULL)
          +         				return -ENOMEM;
          +         			ret = init_iommu_one(iommu, h);
          +         			if (ret)
          +         				return ret;
          +         			break;
          +         		default:
          +         			break;
          +         		}
          +         		p += h->length;
          +         
          +         	}
          +         	WARN_ON(p != end);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the third pass of parsing the ACPI
+ ++++++++++++++++++ * table. In this last pass the memory mapping requirements are
+ ++++++++++++++++++ * gathered (like exclusion and unity mapping reanges).
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static void __init free_unity_maps(void)
          +         {
          +         	struct unity_map_entry *entry, *next;
          +         
          +         	list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
          +         		list_del(&entry->list);
          +         		kfree(entry);
          +         	}
          +         }
          +         
+ ++++++++++++++++++/* called when we find an exclusion range definition in ACPI */
          +         static int __init init_exclusion_range(struct ivmd_header *m)
          +         {
          +         	int i;
          +         
          +         	switch (m->type) {
          +         	case ACPI_IVMD_TYPE:
          +         		set_device_exclusion_range(m->devid, m);
          +         		break;
          +         	case ACPI_IVMD_TYPE_ALL:
          +         		for (i = 0; i < amd_iommu_last_bdf; ++i)
          +         			set_device_exclusion_range(i, m);
          +         		break;
          +         	case ACPI_IVMD_TYPE_RANGE:
          +         		for (i = m->devid; i <= m->aux; ++i)
          +         			set_device_exclusion_range(i, m);
          +         		break;
          +         	default:
          +         		break;
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/* called for unity map ACPI definition */
          +         static int __init init_unity_map_range(struct ivmd_header *m)
          +         {
          +         	struct unity_map_entry *e = 0;
          +         
          +         	e = kzalloc(sizeof(*e), GFP_KERNEL);
          +         	if (e == NULL)
          +         		return -ENOMEM;
          +         
          +         	switch (m->type) {
          +         	default:
          +         	case ACPI_IVMD_TYPE:
          +         		e->devid_start = e->devid_end = m->devid;
          +         		break;
          +         	case ACPI_IVMD_TYPE_ALL:
          +         		e->devid_start = 0;
          +         		e->devid_end = amd_iommu_last_bdf;
          +         		break;
          +         	case ACPI_IVMD_TYPE_RANGE:
          +         		e->devid_start = m->devid;
          +         		e->devid_end = m->aux;
          +         		break;
          +         	}
          +         	e->address_start = PAGE_ALIGN(m->range_start);
          +         	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
          +         	e->prot = m->flags >> 1;
          +         
          +         	list_add_tail(&e->list, &amd_iommu_unity_map);
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/* iterates over all memory definitions we find in the ACPI table */
          +         static int __init init_memory_definitions(struct acpi_table_header *table)
          +         {
          +         	u8 *p = (u8 *)table, *end = (u8 *)table;
          +         	struct ivmd_header *m;
          +         
- -------- ---------	INIT_LIST_HEAD(&amd_iommu_unity_map);
- -------- ---------
          +         	end += table->length;
          +         	p += IVRS_HEADER_LENGTH;
          +         
          +         	while (p < end) {
          +         		m = (struct ivmd_header *)p;
          +         		if (m->flags & IVMD_FLAG_EXCL_RANGE)
          +         			init_exclusion_range(m);
          +         		else if (m->flags & IVMD_FLAG_UNITY_MAP)
          +         			init_unity_map_range(m);
          +         
          +         		p += m->length;
          +         	}
          +         
          +         	return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function finally enables all IOMMUs found in the system after
+ ++++++++++++++++++ * they have been initialized
+ ++++++++++++++++++ */
          +         static void __init enable_iommus(void)
          +         {
          +         	struct amd_iommu *iommu;
          +         
          +         	list_for_each_entry(iommu, &amd_iommu_list, list) {
          +         		iommu_set_exclusion_range(iommu);
          +         		iommu_enable(iommu);
          +         	}
          +         }
          +         
          +         /*
          +          * Suspend/Resume support
          +          * disable suspend until real resume implemented
          +          */
          +         
          +         static int amd_iommu_resume(struct sys_device *dev)
          +         {
          +         	return 0;
          +         }
          +         
          +         static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +         	return -EINVAL;
          +         }
          +         
          +         static struct sysdev_class amd_iommu_sysdev_class = {
          +         	.name = "amd_iommu",
          +         	.suspend = amd_iommu_suspend,
          +         	.resume = amd_iommu_resume,
          +         };
          +         
          +         static struct sys_device device_amd_iommu = {
          +         	.id = 0,
          +         	.cls = &amd_iommu_sysdev_class,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the core init function for AMD IOMMU hardware in the system.
+ ++++++++++++++++++ * This function is called from the generic x86 DMA layer initialization
+ ++++++++++++++++++ * code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * This function basically parses the ACPI table for AMD IOMMU (IVRS)
+ ++++++++++++++++++ * three times:
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *	1 pass) Find the highest PCI device id the driver has to handle.
+ ++++++++++++++++++ *		Upon this information the size of the data structures is
+ ++++++++++++++++++ *		determined that needs to be allocated.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *	2 pass) Initialize the data structures just allocated with the
+ ++++++++++++++++++ *		information in the ACPI table about available AMD IOMMUs
+ ++++++++++++++++++ *		in the system. It also maps the PCI devices in the
+ ++++++++++++++++++ *		system to specific IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *	3 pass) After the basic data structures are allocated and
+ ++++++++++++++++++ *		initialized we update them with information about memory
+ ++++++++++++++++++ *		remapping requirements parsed out of the ACPI table in
+ ++++++++++++++++++ *		this last pass.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * After that the hardware is initialized and ready to go. In the last
+ ++++++++++++++++++ * step we do some Linux specific things like registering the driver in
+ ++++++++++++++++++ * the dma_ops interface and initializing the suspend/resume support
+ ++++++++++++++++++ * functions. Finally it prints some information about AMD IOMMUs and
+ ++++++++++++++++++ * the driver state and enables the hardware.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init(void)
          +         {
          +         	int i, ret = 0;
          +         
          +         
          +         	if (no_iommu) {
          +         		printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
          +         		return 0;
          +         	}
          +         
          +         	if (!amd_iommu_detected)
          +         		return -ENODEV;
          +         
          +         	/*
          +         	 * First parse ACPI tables to find the largest Bus/Dev/Func
          +         	 * we need to handle. Upon this information the shared data
          +         	 * structures for the IOMMUs in the system will be allocated
          +         	 */
          +         	if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
          +         		return -ENODEV;
          +         
- -------- ---------	dev_table_size     = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
- -------- ---------	alias_table_size   = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
- -------- ---------	rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++	dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
          +         
          +         	ret = -ENOMEM;
          +         
          +         	/* Device table - directly used by all IOMMUs */
- -------- ---------	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +         				      get_order(dev_table_size));
          +         	if (amd_iommu_dev_table == NULL)
          +         		goto out;
          +         
          +         	/*
          +         	 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
          +         	 * IOMMU see for that device
          +         	 */
          +         	amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
          +         			get_order(alias_table_size));
          +         	if (amd_iommu_alias_table == NULL)
          +         		goto free;
          +         
          +         	/* IOMMU rlookup table - find the IOMMU for a specific device */
          +         	amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
          +         			get_order(rlookup_table_size));
          +         	if (amd_iommu_rlookup_table == NULL)
          +         		goto free;
          +         
          +         	/*
          +         	 * Protection Domain table - maps devices to protection domains
          +         	 * This table has the same size as the rlookup_table
          +         	 */
- -------- ---------	amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++	amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +         				     get_order(rlookup_table_size));
          +         	if (amd_iommu_pd_table == NULL)
          +         		goto free;
          +         
- -------- ---------	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
+ ++++++++++++++++++					    GFP_KERNEL | __GFP_ZERO,
          +         					    get_order(MAX_DOMAIN_ID/8));
          +         	if (amd_iommu_pd_alloc_bitmap == NULL)
          +         		goto free;
          +         
          +         	/*
- -------- ---------	 * memory is allocated now; initialize the device table with all zeroes
- -------- ---------	 * and let all alias entries point to itself
+ ++++++++++++++++++	 * let all alias entries point to itself
          +         	 */
- -------- ---------	memset(amd_iommu_dev_table, 0, dev_table_size);
          +         	for (i = 0; i < amd_iommu_last_bdf; ++i)
          +         		amd_iommu_alias_table[i] = i;
          +         
- -------- ---------	memset(amd_iommu_pd_table, 0, rlookup_table_size);
- -------- ---------	memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
- -------- ---------
          +         	/*
          +         	 * never allocate domain 0 because its used as the non-allocated and
          +         	 * error value placeholder
          +         	 */
          +         	amd_iommu_pd_alloc_bitmap[0] = 1;
          +         
          +         	/*
          +         	 * now the data structures are allocated and basically initialized
          +         	 * start the real acpi table scan
          +         	 */
          +         	ret = -ENODEV;
          +         	if (acpi_table_parse("IVRS", init_iommu_all) != 0)
          +         		goto free;
          +         
          +         	if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
          +         		goto free;
          +         
          +         	ret = amd_iommu_init_dma_ops();
          +         	if (ret)
          +         		goto free;
          +         
          +         	ret = sysdev_class_register(&amd_iommu_sysdev_class);
          +         	if (ret)
          +         		goto free;
          +         
          +         	ret = sysdev_register(&device_amd_iommu);
          +         	if (ret)
          +         		goto free;
          +         
          +         	enable_iommus();
          +         
          +         	printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
          +         			(1 << (amd_iommu_aperture_order-20)));
          +         
          +         	printk(KERN_INFO "AMD IOMMU: device isolation ");
          +         	if (amd_iommu_isolate)
          +         		printk("enabled\n");
          +         	else
          +         		printk("disabled\n");
          +         
          +         out:
          +         	return ret;
          +         
          +         free:
- -------- ---------	if (amd_iommu_pd_alloc_bitmap)
- -------- ---------		free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
+ ++++++++++++++++++	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +         
- -------- ---------	if (amd_iommu_pd_table)
- -------- ---------		free_pages((unsigned long)amd_iommu_pd_table,
- -------- ---------				get_order(rlookup_table_size));
+ ++++++++++++++++++	free_pages((unsigned long)amd_iommu_pd_table,
+ ++++++++++++++++++		   get_order(rlookup_table_size));
          +         
- -------- ---------	if (amd_iommu_rlookup_table)
- -------- ---------		free_pages((unsigned long)amd_iommu_rlookup_table,
- -------- ---------				get_order(rlookup_table_size));
+ ++++++++++++++++++	free_pages((unsigned long)amd_iommu_rlookup_table,
+ ++++++++++++++++++		   get_order(rlookup_table_size));
          +         
- -------- ---------	if (amd_iommu_alias_table)
- -------- ---------		free_pages((unsigned long)amd_iommu_alias_table,
- -------- ---------				get_order(alias_table_size));
+ ++++++++++++++++++	free_pages((unsigned long)amd_iommu_alias_table,
+ ++++++++++++++++++		   get_order(alias_table_size));
          +         
- -------- ---------	if (amd_iommu_dev_table)
- -------- ---------		free_pages((unsigned long)amd_iommu_dev_table,
- -------- ---------				get_order(dev_table_size));
+ ++++++++++++++++++	free_pages((unsigned long)amd_iommu_dev_table,
+ ++++++++++++++++++		   get_order(dev_table_size));
          +         
          +         	free_iommu_all();
          +         
          +         	free_unity_maps();
          +         
          +         	goto out;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Early detect code. This code runs at IOMMU detection time in the DMA
+ ++++++++++++++++++ * layer. It just looks if there is an IVRS ACPI table to detect AMD
+ ++++++++++++++++++ * IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static int __init early_amd_iommu_detect(struct acpi_table_header *table)
          +         {
          +         	return 0;
          +         }
          +         
          +         void __init amd_iommu_detect(void)
          +         {
- -------- ---------	if (swiotlb || no_iommu || iommu_detected)
+ ++++++++++++++++++	if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
          +         		return;
          +         
          +         	if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
          +         		iommu_detected = 1;
          +         		amd_iommu_detected = 1;
          +         #ifdef CONFIG_GART_IOMMU
          +         		gart_iommu_aperture_disabled = 1;
          +         		gart_iommu_aperture = 0;
          +         #endif
          +         	}
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Parsing functions for the AMD IOMMU specific kernel command line
+ ++++++++++++++++++ * options.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static int __init parse_amd_iommu_options(char *str)
          +         {
          +         	for (; *str; ++str) {
          +         		if (strcmp(str, "isolate") == 0)
          +         			amd_iommu_isolate = 1;
          +         	}
          +         
          +         	return 1;
          +         }
          +         
          +         static int __init parse_amd_iommu_size_options(char *str)
          +         {
- -------- ---------	for (; *str; ++str) {
- -------- ---------		if (strcmp(str, "32M") == 0)
- -------- ---------			amd_iommu_aperture_order = 25;
- -------- ---------		if (strcmp(str, "64M") == 0)
- -------- ---------			amd_iommu_aperture_order = 26;
- -------- ---------		if (strcmp(str, "128M") == 0)
- -------- ---------			amd_iommu_aperture_order = 27;
- -------- ---------		if (strcmp(str, "256M") == 0)
- -------- ---------			amd_iommu_aperture_order = 28;
- -------- ---------		if (strcmp(str, "512M") == 0)
- -------- ---------			amd_iommu_aperture_order = 29;
- -------- ---------		if (strcmp(str, "1G") == 0)
- -------- ---------			amd_iommu_aperture_order = 30;
- -------- ---------	}
+ ++++++++++++++++++	unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
+ ++++++++++++++++++
+ ++++++++++++++++++	if ((order > 24) && (order < 31))
+ ++++++++++++++++++		amd_iommu_aperture_order = order;
          +         
          +         	return 1;
          +         }
          +         
          +         __setup("amd_iommu=", parse_amd_iommu_options);
          +         __setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --cc arch/x86/kernel/apic_32.c
index a437d02,a437d02,7f30c0f,a437d02,e9a00e5,a437d02,3e58b67,3e94720,a437d02,a437d02,4b99b1b,a437d02,3e58b67,a437d02,a437d02,a437d02,3e58b67,a437d02,a437d02,a437d02..d6c8983
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@@@@@@@@@@@@@@@@@@@@ -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -74,7 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 +75,17 @@@@@@@@@@@@@@@@@@@@@ char system_vectors[NR_VECTORS] = { [0 
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         int pic_mode;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
          +         
          +         static struct resource lapic_resource = {
          +         	.name = "Local APIC",
          +         	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
          +         };
                    
                    static unsigned int calibration_result;
                    
@@@@@@@@@@@@@@@@@@@@@ -543,22 -543,22 -514,55 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -532,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 +514,55 @@@@@@@@@@@@@@@@@@@@@ static int __init calibrate_APIC_clock(
                    	if (!local_apic_timer_verify_ok) {
                    		printk(KERN_WARNING
                    		       "APIC timer disabled due to verification failure.\n");
++ +++++++++++++++++			return -1;
++ +++++++++++++++++	}
++ +++++++++++++++++
++ +++++++++++++++++	return 0;
++ +++++++++++++++++}
++ +++++++++++++++++
++ +++++++++++++++++/*
++ +++++++++++++++++ * Setup the boot APIC
++ +++++++++++++++++ *
++ +++++++++++++++++ * Calibrate and verify the result.
++ +++++++++++++++++ */
++ +++++++++++++++++void __init setup_boot_APIC_clock(void)
++ +++++++++++++++++{
++ +++++++++++++++++	/*
++ +++++++++++++++++	 * The local apic timer can be disabled via the kernel
++ +++++++++++++++++	 * commandline or from the CPU detection code. Register the lapic
++ +++++++++++++++++	 * timer as a dummy clock event source on SMP systems, so the
++ +++++++++++++++++	 * broadcast mechanism is used. On UP systems simply ignore it.
++ +++++++++++++++++	 */
++ +++++++++++++++++	if (local_apic_timer_disabled) {
                    		/* No broadcast on UP ! */
-- -----------------		if (num_possible_cpus() == 1)
-- -----------------			return;
-- -----------------	} else {
-- -----------------		/*
-- -----------------		 * If nmi_watchdog is set to IO_APIC, we need the
-- -----------------		 * PIT/HPET going.  Otherwise register lapic as a dummy
-- -----------------		 * device.
-- -----------------		 */
-- -----------------		if (nmi_watchdog != NMI_IO_APIC)
-- -----------------			lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-- -----------------		else
-- -----------------			printk(KERN_WARNING "APIC timer registered as dummy,"
-- ------- ---------				" due to nmi_watchdog=%d!\n", nmi_watchdog);
          -         			       " due to nmi_watchdog=1!\n");
++ +++++++++++++++++		if (num_possible_cpus() > 1) {
++ +++++++++++++++++			lapic_clockevent.mult = 1;
++ +++++++++++++++++			setup_APIC_timer();
++ +++++++++++++++++		}
++ +++++++++++++++++		return;
      ++  + +   +   	}
      ++  + +   +   
++ +++++++++++++++++	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++ +++++++++++++++++		    "calibrating APIC timer ...\n");
++ +++++++++++++++++
++ +++++++++++++++++	if (calibrate_APIC_clock()) {
++ +++++++++++++++++		/* No broadcast on UP ! */
++ +++++++++++++++++		if (num_possible_cpus() > 1)
++ +++++++++++++++++			setup_APIC_timer();
++ +++++++++++++++++		return;
++ +++  ++ + +++ +++	}
++ +++  ++ + +++ +++
++ +++++++++++++++++	/*
++ +++++++++++++++++	 * If nmi_watchdog is set to IO_APIC, we need the
++ +++++++++++++++++	 * PIT/HPET going.  Otherwise register lapic as a dummy
++ +++++++++++++++++	 * device.
++ +++++++++++++++++	 */
++ +++++++++++++++++	if (nmi_watchdog != NMI_IO_APIC)
++ +++++++++++++++++		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++ +++++++++++++++++	else
++ +++++++++++++++++		printk(KERN_WARNING "APIC timer registered as dummy,"
++ +++++++++++++++++			" due to nmi_watchdog=%d!\n", nmi_watchdog);
++ +++++++++++++++++
                    	/* Setup the lapic or request the broadcast */
                    	setup_APIC_timer();
                    }
@@@@@@@@@@@@@@@@@@@@@ -1214,9 -1214,9 -1218,9 -1214,9 -1214,6 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 -1236,9 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 -1214,9 +1218,6 @@@@@@@@@@@@@@@@@@@@@ int apic_version[MAX_APICS]
                    
                    int __init APIC_init_uniprocessor(void)
                    {
---- ----- ---------	if (disable_apic)
          -         	if (enable_local_apic < 0)
---- ---------------		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
---- ---------------
                    	if (!smp_found_config && !cpu_has_apic)
                    		return -1;
                    
@@@@@@@@@@@@@@@@@@@@@ -1333,17 -1333,17 -1337,17 -1333,17 -1330,17 -1333,17 -1333,13 -1333,13 -1333,17 -1333,17 -1351,13 -1333,17 -1333,13 -1333,17 -1333,17 -1333,17 -1333,13 -1333,17 -1333,17 -1333,17 +1334,17 @@@@@@@@@@@@@@@@@@@@@ void __init smp_intr_init(void
                    	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
                    	 * IPI, driven by wakeup.
                    	 */
          -         	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
          +         	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
                    
                    	/* IPI for invalidation */
          -         	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
          +         	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
                    
                    	/* IPI for generic function call */
          -         	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
          +         	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
      ++  + +   +   
      ++  + +   +   	/* IPI for single call function */
      ++  + +   +   	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
      ++  + +   +   				call_function_single_interrupt);
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -1699,8 -1699,8 -1703,8 -1699,8 -1696,8 -1699,8 -1695,8 -1695,8 -1699,8 -1699,8 -1710,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 +1700,8 @@@@@@@@@@@@@@@@@@@@@ early_param("lapic", parse_lapic)
                    
                    static int __init parse_nolapic(char *arg)
                    {
          -         	enable_local_apic = -1;
          -         	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +         	disable_apic = 1;
---- ----- ---------	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++++ +++++++++++++++	setup_clear_cpu_cap(X86_FEATURE_APIC);
                    	return 0;
                    }
                    early_param("nolapic", parse_nolapic);
diff --cc arch/x86/kernel/apic_64.c
index 1e3d32e,1e3d32e,98c70f0,1e3d32e,16e586c,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e,0633cfd,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e,1e3d32e..7f1f030
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@@@@@@@@@@@@@@@@@@@@ -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,7 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 +54,10 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
                    
                    static struct resource lapic_resource = {
                    	.name = "Local APIC",
diff --cc arch/x86/kernel/cpu/common_64.c
index 7b8cc72,7b8cc72,7b8cc72,2a4475b,daee611,7b8cc72,7b8cc72,7518502,7b8cc72,7b8cc72,0000000,7b8cc72,7b8cc72,7b8cc72,7b8cc72,7b8cc72,36537ab,7b8cc72,7b8cc72,736f50f..dd6e3f1
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@@@@@@@@@@@@@@@@@@@@ -1,681 -1,681 -1,681 -1,678 -1,676 -1,681 -1,681 -1,676 -1,681 -1,681 -1,0 -1,681 -1,681 -1,681 -1,681 -1,681 -1,679 -1,681 -1,681 -1,678 +1,670 @@@@@@@@@@@@@@@@@@@@@
          +         #include <linux/init.h>
          +         #include <linux/kernel.h>
          +         #include <linux/sched.h>
          +         #include <linux/string.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/bitops.h>
          +         #include <linux/module.h>
          +         #include <linux/kgdb.h>
          +         #include <linux/topology.h>
--- ------ ---------#include <linux/string.h>
          +         #include <linux/delay.h>
          +         #include <linux/smp.h>
--- ------ ---------#include <linux/module.h>
          +         #include <linux/percpu.h>
--- ------ ---------#include <asm/processor.h>
          +         #include <asm/i387.h>
          +         #include <asm/msr.h>
          +         #include <asm/io.h>
+++++++++++++++++++ #include <asm/linkage.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/mtrr.h>
          +         #include <asm/mce.h>
          +         #include <asm/pat.h>
          +         #include <asm/numa.h>
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         #include <asm/mpspec.h>
          +         #include <asm/apic.h>
          +         #include <mach_apic.h>
          +         #endif
          +         #include <asm/pda.h>
          +         #include <asm/pgtable.h>
          +         #include <asm/processor.h>
          +         #include <asm/desc.h>
          +         #include <asm/atomic.h>
          +         #include <asm/proto.h>
          +         #include <asm/sections.h>
          +         #include <asm/setup.h>
          +         #include <asm/genapic.h>
          +         
          +         #include "cpu.h"
          +         
          +         /* We need valid kernel segments for data and code in long mode too
          +          * IRET will check the segment types  kkeil 2000/10/28
          +          * Also sysret mandates a special GDT layout
          +          */
          +         /* The TLS descriptors are currently at a different place compared to i386.
          +            Hopefully nobody expects them at a fixed place (Wine?) */
          +         DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
          +         	[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
          +         	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
          +         	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
          +         	[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
          +         	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
          +         	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
          +         } };
          +         EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
          +         
          +         __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
          +         
          +         /* Current gdt points %fs at the "master" per-cpu area: after this,
          +          * it's on the real one. */
          +         void switch_to_new_gdt(void)
          +         {
          +         	struct desc_ptr gdt_descr;
          +         
          +         	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
          +         	gdt_descr.size = GDT_SIZE - 1;
          +         	load_gdt(&gdt_descr);
          +         }
          +         
          +         struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
          +         
          +         static void __cpuinit default_init(struct cpuinfo_x86 *c)
          +         {
          +         	display_cacheinfo(c);
          +         }
          +         
          +         static struct cpu_dev __cpuinitdata default_cpu = {
          +         	.c_init	= default_init,
          +         	.c_vendor = "Unknown",
          +         };
          +         static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
          +         
          +         int __cpuinit get_model_name(struct cpuinfo_x86 *c)
          +         {
          +         	unsigned int *v;
          +         
          +         	if (c->extended_cpuid_level < 0x80000004)
          +         		return 0;
          +         
          +         	v = (unsigned int *) c->x86_model_id;
          +         	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
          +         	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
          +         	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
          +         	c->x86_model_id[48] = 0;
          +         	return 1;
          +         }
          +         
          +         
          +         void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
          +         {
       -        -   	unsigned int n, dummy, eax, ebx, ecx, edx;
       +  +     +   	unsigned int n, dummy, ebx, ecx, edx;
          +         
          +         	n = c->extended_cpuid_level;
          +         
          +         	if (n >= 0x80000005) {
          +         		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
          +         		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
          +         		       "D cache %dK (%d bytes/line)\n",
          +         		       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
          +         		c->x86_cache_size = (ecx>>24) + (edx>>24);
          +         		/* On K8 L1 TLB is inclusive, so don't count it */
          +         		c->x86_tlbsize = 0;
          +         	}
          +         
          +         	if (n >= 0x80000006) {
          +         		cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
          +         		ecx = cpuid_ecx(0x80000006);
          +         		c->x86_cache_size = ecx >> 16;
          +         		c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
          +         
          +         		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
          +         		c->x86_cache_size, ecx & 0xFF);
          +         	}
       -        -   	if (n >= 0x80000008) {
       -        -   		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
       -        -   		c->x86_virt_bits = (eax >> 8) & 0xff;
       -        -   		c->x86_phys_bits = eax & 0xff;
       -        -   	}
          +         }
          +         
          +         void __cpuinit detect_ht(struct cpuinfo_x86 *c)
          +         {
          +         #ifdef CONFIG_SMP
          +         	u32 eax, ebx, ecx, edx;
          +         	int index_msb, core_bits;
          +         
          +         	cpuid(1, &eax, &ebx, &ecx, &edx);
          +         
          +         
          +         	if (!cpu_has(c, X86_FEATURE_HT))
          +         		return;
          +         	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
          +         		goto out;
          +         
          +         	smp_num_siblings = (ebx & 0xff0000) >> 16;
          +         
          +         	if (smp_num_siblings == 1) {
          +         		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
          +         	} else if (smp_num_siblings > 1) {
          +         
          +         		if (smp_num_siblings > NR_CPUS) {
          +         			printk(KERN_WARNING "CPU: Unsupported number of "
          +         			       "siblings %d", smp_num_siblings);
          +         			smp_num_siblings = 1;
          +         			return;
          +         		}
          +         
          +         		index_msb = get_count_order(smp_num_siblings);
          +         		c->phys_proc_id = phys_pkg_id(index_msb);
          +         
          +         		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
          +         
          +         		index_msb = get_count_order(smp_num_siblings);
          +         
          +         		core_bits = get_count_order(c->x86_max_cores);
          +         
          +         		c->cpu_core_id = phys_pkg_id(index_msb) &
          +         					       ((1 << core_bits) - 1);
          +         	}
          +         out:
          +         	if ((c->x86_max_cores * smp_num_siblings) > 1) {
          +         		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
          +         		       c->phys_proc_id);
          +         		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
          +         		       c->cpu_core_id);
          +         	}
          +         
          +         #endif
          +         }
          +         
          +         static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
          +         {
          +         	char *v = c->x86_vendor_id;
          +         	int i;
          +         	static int printed;
          +         
          +         	for (i = 0; i < X86_VENDOR_NUM; i++) {
          +         		if (cpu_devs[i]) {
          +         			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
          +         			    (cpu_devs[i]->c_ident[1] &&
          +         			    !strcmp(v, cpu_devs[i]->c_ident[1]))) {
          +         				c->x86_vendor = i;
          +         				this_cpu = cpu_devs[i];
          +         				return;
          +         			}
          +         		}
          +         	}
          +         	if (!printed) {
          +         		printed++;
          +         		printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
          +         		printk(KERN_ERR "CPU: Your system may be unstable.\n");
          +         	}
          +         	c->x86_vendor = X86_VENDOR_UNKNOWN;
          +         }
          +         
          +         static void __init early_cpu_support_print(void)
          +         {
          +         	int i,j;
          +         	struct cpu_dev *cpu_devx;
          +         
          +         	printk("KERNEL supported cpus:\n");
          +         	for (i = 0; i < X86_VENDOR_NUM; i++) {
          +         		cpu_devx = cpu_devs[i];
          +         		if (!cpu_devx)
          +         			continue;
          +         		for (j = 0; j < 2; j++) {
          +         			if (!cpu_devx->c_ident[j])
          +         				continue;
          +         			printk("  %s %s\n", cpu_devx->c_vendor,
          +         				cpu_devx->c_ident[j]);
          +         		}
          +         	}
          +         }
          +         
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
          +         
          +         void __init early_cpu_init(void)
          +         {
          +                 struct cpu_vendor_dev *cvdev;
          +         
          +                 for (cvdev = __x86cpuvendor_start ;
          +                      cvdev < __x86cpuvendor_end   ;
          +                      cvdev++)
          +                         cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
          +         	early_cpu_support_print();
          +         	early_identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         /* Do some early cpuid on the boot CPU to get some parameter that are
          +            needed before check_bugs. Everything advanced is in identify_cpu
          +            below. */
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +         	u32 tfms, xlvl;
          +         
          +         	c->loops_per_jiffy = loops_per_jiffy;
          +         	c->x86_cache_size = -1;
          +         	c->x86_vendor = X86_VENDOR_UNKNOWN;
          +         	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
          +         	c->x86_vendor_id[0] = '\0'; /* Unset */
          +         	c->x86_model_id[0] = '\0';  /* Unset */
          +         	c->x86_clflush_size = 64;
          +         	c->x86_cache_alignment = c->x86_clflush_size;
          +         	c->x86_max_cores = 1;
          +         	c->x86_coreid_bits = 0;
          +         	c->extended_cpuid_level = 0;
          +         	memset(&c->x86_capability, 0, sizeof c->x86_capability);
          +         
          +         	/* Get vendor name */
          +         	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          +         	      (unsigned int *)&c->x86_vendor_id[0],
          +         	      (unsigned int *)&c->x86_vendor_id[8],
          +         	      (unsigned int *)&c->x86_vendor_id[4]);
          +         
          +         	get_cpu_vendor(c);
          +         
          +         	/* Initialize the standard set of capabilities */
          +         	/* Note that the vendor-specific code below might override */
          +         
          +         	/* Intel-defined flags: level 0x00000001 */
          +         	if (c->cpuid_level >= 0x00000001) {
          +         		__u32 misc;
          +         		cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
          +         		      &c->x86_capability[0]);
          +         		c->x86 = (tfms >> 8) & 0xf;
          +         		c->x86_model = (tfms >> 4) & 0xf;
          +         		c->x86_mask = tfms & 0xf;
          +         		if (c->x86 == 0xf)
          +         			c->x86 += (tfms >> 20) & 0xff;
          +         		if (c->x86 >= 0x6)
          +         			c->x86_model += ((tfms >> 16) & 0xF) << 4;
          +         		if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
          +         			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
          +         	} else {
          +         		/* Have CPUID level 0 only - unheard of */
          +         		c->x86 = 4;
          +         	}
          +         
          +         	c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
          +         #ifdef CONFIG_SMP
          +         	c->phys_proc_id = c->initial_apicid;
          +         #endif
          +         	/* AMD-defined flags: level 0x80000001 */
          +         	xlvl = cpuid_eax(0x80000000);
          +         	c->extended_cpuid_level = xlvl;
          +         	if ((xlvl & 0xffff0000) == 0x80000000) {
          +         		if (xlvl >= 0x80000001) {
          +         			c->x86_capability[1] = cpuid_edx(0x80000001);
          +         			c->x86_capability[6] = cpuid_ecx(0x80000001);
          +         		}
          +         		if (xlvl >= 0x80000004)
          +         			get_model_name(c); /* Default name */
          +         	}
          +         
          +         	/* Transmeta-defined flags: level 0x80860001 */
          +         	xlvl = cpuid_eax(0x80860000);
          +         	if ((xlvl & 0xffff0000) == 0x80860000) {
          +         		/* Don't set x86_cpuid_level here for now to not confuse. */
          +         		if (xlvl >= 0x80860001)
          +         			c->x86_capability[2] = cpuid_edx(0x80860001);
          +         	}
          +         
---- ----- ---------	c->extended_cpuid_level = cpuid_eax(0x80000000);
          +         	if (c->extended_cpuid_level >= 0x80000007)
          +         		c->x86_power = cpuid_edx(0x80000007);
          +         
                -   	/* Assume all 64-bit CPUs support 32-bit syscall */
                -   	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
       +  +     +   	if (c->extended_cpuid_level >= 0x80000008) {
       +  +     +   		u32 eax = cpuid_eax(0x80000008);
       +  +     +   
       +  +     +   		c->x86_virt_bits = (eax >> 8) & 0xff;
       +  +     +   		c->x86_phys_bits = eax & 0xff;
       +  +     +   	}
       +  +         
------- -- ----- -- 	/* Assume all 64-bit CPUs support 32-bit syscall */
------- -- ----- -- 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
------- -- ----- -- 
          +         	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
          +         	    cpu_devs[c->x86_vendor]->c_early_init)
          +         		cpu_devs[c->x86_vendor]->c_early_init(c);
          +         
          +         	validate_pat_support(c);
---- ----- ---------
---- ----- ---------	/* early_param could clear that, but recall get it set again */
---- ----- ---------	if (disable_apic)
---- ----- ---------		clear_cpu_cap(c, X86_FEATURE_APIC);
          +         }
          +         
          +         /*
          +          * This does the hard work of actually picking apart the CPU stuff...
          +          */
          +         static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +         	int i;
          +         
          +         	early_identify_cpu(c);
          +         
          +         	init_scattered_cpuid_features(c);
          +         
          +         	c->apicid = phys_pkg_id(0);
          +         
          +         	/*
          +         	 * Vendor-specific initialization.  In this section we
          +         	 * canonicalize the feature flags, meaning if there are
          +         	 * features a certain CPU supports which CPUID doesn't
          +         	 * tell us, CPUID claiming incorrect flags, or other bugs,
          +         	 * we handle them here.
          +         	 *
          +         	 * At the end of this section, c->x86_capability better
          +         	 * indicate the features this CPU genuinely supports!
          +         	 */
          +         	if (this_cpu->c_init)
          +         		this_cpu->c_init(c);
          +         
          +         	detect_ht(c);
          +         
          +         	/*
          +         	 * On SMP, boot_cpu_data holds the common feature set between
          +         	 * all CPUs; so make sure that we indicate which features are
          +         	 * common between the CPUs.  The first time this routine gets
          +         	 * executed, c == &boot_cpu_data.
          +         	 */
          +         	if (c != &boot_cpu_data) {
          +         		/* AND the already accumulated flags with these */
          +         		for (i = 0; i < NCAPINTS; i++)
          +         			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
          +         	}
          +         
          +         	/* Clear all flags overriden by options */
          +         	for (i = 0; i < NCAPINTS; i++)
          +         		c->x86_capability[i] &= ~cleared_cpu_caps[i];
          +         
          +         #ifdef CONFIG_X86_MCE
          +         	mcheck_init(c);
          +         #endif
          +         	select_idle_routine(c);
          +         
          +         #ifdef CONFIG_NUMA
          +         	numa_add_cpu(smp_processor_id());
          +         #endif
          +         
          +         }
          +         
          +         void __cpuinit identify_boot_cpu(void)
          +         {
          +         	identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
          +         {
          +         	BUG_ON(c == &boot_cpu_data);
          +         	identify_cpu(c);
          +         	mtrr_ap_init();
          +         }
          +         
          +         static __init int setup_noclflush(char *arg)
          +         {
          +         	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
          +         	return 1;
          +         }
          +         __setup("noclflush", setup_noclflush);
          +         
          +         void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
          +         {
          +         	if (c->x86_model_id[0])
          +         		printk(KERN_CONT "%s", c->x86_model_id);
          +         
          +         	if (c->x86_mask || c->cpuid_level >= 0)
          +         		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
          +         	else
          +         		printk(KERN_CONT "\n");
          +         }
          +         
          +         static __init int setup_disablecpuid(char *arg)
          +         {
          +         	int bit;
          +         	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
          +         		setup_clear_cpu_cap(bit);
          +         	else
          +         		return 0;
          +         	return 1;
          +         }
          +         __setup("clearcpuid=", setup_disablecpuid);
          +         
          +         cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
          +         
          +         struct x8664_pda **_cpu_pda __read_mostly;
          +         EXPORT_SYMBOL(_cpu_pda);
          +         
          +         struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
          +         
          +         char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
          +         
          +         unsigned long __supported_pte_mask __read_mostly = ~0UL;
          +         EXPORT_SYMBOL_GPL(__supported_pte_mask);
          +         
          +         static int do_not_nx __cpuinitdata;
          +         
          +         /* noexec=on|off
          +         Control non executable mappings for 64bit processes.
          +         
          +         on	Enable(default)
          +         off	Disable
          +         */
          +         static int __init nonx_setup(char *str)
          +         {
          +         	if (!str)
          +         		return -EINVAL;
          +         	if (!strncmp(str, "on", 2)) {
          +         		__supported_pte_mask |= _PAGE_NX;
          +         		do_not_nx = 0;
          +         	} else if (!strncmp(str, "off", 3)) {
          +         		do_not_nx = 1;
          +         		__supported_pte_mask &= ~_PAGE_NX;
          +         	}
          +         	return 0;
          +         }
          +         early_param("noexec", nonx_setup);
          +         
          +         int force_personality32;
          +         
          +         /* noexec32=on|off
          +         Control non executable heap for 32bit processes.
          +         To control the stack too use noexec=off
          +         
          +         on	PROT_READ does not imply PROT_EXEC for 32bit processes (default)
          +         off	PROT_READ implies PROT_EXEC
          +         */
          +         static int __init nonx32_setup(char *str)
          +         {
          +         	if (!strcmp(str, "on"))
          +         		force_personality32 &= ~READ_IMPLIES_EXEC;
          +         	else if (!strcmp(str, "off"))
          +         		force_personality32 |= READ_IMPLIES_EXEC;
          +         	return 1;
          +         }
          +         __setup("noexec32=", nonx32_setup);
          +         
          +         void pda_init(int cpu)
          +         {
          +         	struct x8664_pda *pda = cpu_pda(cpu);
          +         
          +         	/* Setup up data that may be needed in __get_free_pages early */
          +         	loadsegment(fs, 0);
          +         	loadsegment(gs, 0);
          +         	/* Memory clobbers used to order PDA accessed */
          +         	mb();
          +         	wrmsrl(MSR_GS_BASE, pda);
          +         	mb();
          +         
          +         	pda->cpunumber = cpu;
          +         	pda->irqcount = -1;
          +         	pda->kernelstack = (unsigned long)stack_thread_info() -
          +         				 PDA_STACKOFFSET + THREAD_SIZE;
          +         	pda->active_mm = &init_mm;
          +         	pda->mmu_state = 0;
          +         
          +         	if (cpu == 0) {
          +         		/* others are initialized in smpboot.c */
          +         		pda->pcurrent = &init_task;
          +         		pda->irqstackptr = boot_cpu_stack;
          +         	} else {
          +         		pda->irqstackptr = (char *)
          +         			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
          +         		if (!pda->irqstackptr)
          +         			panic("cannot allocate irqstack for cpu %d", cpu);
          +         
          +         		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
          +         			pda->nodenumber = cpu_to_node(cpu);
          +         	}
          +         
          +         	pda->irqstackptr += IRQSTACKSIZE-64;
          +         }
          +         
          +         char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
---------- -------- 			   DEBUG_STKSZ]
---------- -------- __attribute__((section(".bss.page_aligned")));
+++++++++++++++++++ 			   DEBUG_STKSZ] __page_aligned_bss;
          +         
          +         extern asmlinkage void ignore_sysret(void);
          +         
          +         /* May not be marked __init: used by software suspend */
          +         void syscall_init(void)
          +         {
          +         	/*
          +         	 * LSTAR and STAR live in a bit strange symbiosis.
          +         	 * They both write to the same internal register. STAR allows to
          +         	 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
          +         	 */
          +         	wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
          +         	wrmsrl(MSR_LSTAR, system_call);
          +         	wrmsrl(MSR_CSTAR, ignore_sysret);
          +         
          +         #ifdef CONFIG_IA32_EMULATION
          +         	syscall32_cpu_init();
          +         #endif
          +         
          +         	/* Flags to clear on syscall */
          +         	wrmsrl(MSR_SYSCALL_MASK,
          +         	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
          +         }
          +         
          +         void __cpuinit check_efer(void)
          +         {
          +         	unsigned long efer;
          +         
          +         	rdmsrl(MSR_EFER, efer);
          +         	if (!(efer & EFER_NX) || do_not_nx)
          +         		__supported_pte_mask &= ~_PAGE_NX;
          +         }
          +         
          +         unsigned long kernel_eflags;
          +         
          +         /*
          +          * Copies of the original ist values from the tss are only accessed during
          +          * debugging, no special alignment required.
          +          */
          +         DEFINE_PER_CPU(struct orig_ist, orig_ist);
          +         
          +         /*
          +          * cpu_init() initializes state that is per-CPU. Some data is already
          +          * initialized (naturally) in the bootstrap process, such as the GDT
          +          * and IDT. We reload them nevertheless, this function acts as a
          +          * 'CPU state barrier', nothing should get across.
          +          * A lot of state is already set up in PDA init.
          +          */
          +         void __cpuinit cpu_init(void)
          +         {
          +         	int cpu = stack_smp_processor_id();
          +         	struct tss_struct *t = &per_cpu(init_tss, cpu);
          +         	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
          +         	unsigned long v;
          +         	char *estacks = NULL;
          +         	struct task_struct *me;
          +         	int i;
          +         
          +         	/* CPU 0 is initialised in head64.c */
          +         	if (cpu != 0)
          +         		pda_init(cpu);
          +         	else
          +         		estacks = boot_exception_stacks;
          +         
          +         	me = current;
          +         
          +         	if (cpu_test_and_set(cpu, cpu_initialized))
          +         		panic("CPU#%d already initialized!\n", cpu);
          +         
          +         	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
          +         
          +         	clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
          +         
          +         	/*
          +         	 * Initialize the per-CPU GDT with the boot GDT,
          +         	 * and set up the GDT descriptor:
          +         	 */
          +         
          +         	switch_to_new_gdt();
          +         	load_idt((const struct desc_ptr *)&idt_descr);
          +         
          +         	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
          +         	syscall_init();
          +         
          +         	wrmsrl(MSR_FS_BASE, 0);
          +         	wrmsrl(MSR_KERNEL_GS_BASE, 0);
          +         	barrier();
          +         
          +         	check_efer();
          +         
          +         	/*
          +         	 * set up and load the per-CPU TSS
          +         	 */
          +         	for (v = 0; v < N_EXCEPTION_STACKS; v++) {
          +         		static const unsigned int order[N_EXCEPTION_STACKS] = {
          +         			[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
          +         			[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
          +         		};
          +         		if (cpu) {
          +         			estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
          +         			if (!estacks)
          +         				panic("Cannot allocate exception stack %ld %d\n",
          +         				      v, cpu);
          +         		}
          +         		estacks += PAGE_SIZE << order[v];
          +         		orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
          +         	}
          +         
          +         	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
          +         	/*
          +         	 * <= is required because the CPU will access up to
          +         	 * 8 bits beyond the end of the IO permission bitmap.
          +         	 */
          +         	for (i = 0; i <= IO_BITMAP_LONGS; i++)
          +         		t->io_bitmap[i] = ~0UL;
          +         
          +         	atomic_inc(&init_mm.mm_count);
          +         	me->active_mm = &init_mm;
          +         	if (me->mm)
          +         		BUG();
          +         	enter_lazy_tlb(&init_mm, me);
          +         
          +         	load_sp0(t, &current->thread);
          +         	set_tss_desc(cpu, t);
          +         	load_TR_desc();
          +         	load_LDT(&init_mm.context);
          +         
          +         #ifdef CONFIG_KGDB
          +         	/*
          +         	 * If the kgdb is connected no debug regs should be altered.  This
          +         	 * is only applicable when KGDB and a KGDB I/O module are built
          +         	 * into the kernel and you are using early debugging with
          +         	 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
          +         	 */
          +         	if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
          +         		arch_kgdb_ops.correct_hw_break();
          +         	else {
          +         #endif
          +         	/*
          +         	 * Clear all 6 debug registers:
          +         	 */
          +         
          +         	set_debugreg(0UL, 0);
          +         	set_debugreg(0UL, 1);
          +         	set_debugreg(0UL, 2);
          +         	set_debugreg(0UL, 3);
          +         	set_debugreg(0UL, 6);
          +         	set_debugreg(0UL, 7);
          +         #ifdef CONFIG_KGDB
          +         	/* If the kgdb is connected no debug regs should be altered. */
          +         	}
          +         #endif
          +         
          +         	fpu_init();
          +         
          +         	raw_local_save_flags(kernel_eflags);
          +         
          +         	if (is_uv_system())
          +         		uv_cpu_init();
          +         }
diff --cc arch/x86/kernel/entry_32.S
index 6bc07f0,6bc07f0,6bc07f0,6bc07f0,6bc07f0,6bc07f0,6bc07f0,53393c3,6bc07f0,6bc07f0,c778e4f,6bc07f0,6bc07f0,6bc07f0,6bc07f0,cadf73f,cfe28a7,6bc07f0,ad5264c,6bc07f0..cdfd94c
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@@@@@@@@@@@@@@@@@@@@ -51,8 -51,8 -51,8 -51,8 -51,8 -51,8 -51,8 -51,7 -51,8 -51,8 -51,7 -51,8 -51,8 -51,8 -51,8 -51,8 -51,7 -51,8 -51,8 -51,8 +51,8 @@@@@@@@@@@@@@@@@@@@@
                    #include <asm/percpu.h>
                    #include <asm/dwarf2.h>
                    #include <asm/processor-flags.h>
          -         #include "irq_vectors.h"
       +  +     +   #include <asm/ftrace.h>
          +         #include <asm/irq_vectors.h>
                    
                    /*
                     * We use macros for low-level operations which need to be overridden
@@@@@@@@@@@@@@@@@@@@@ -1024,8 -1024,8 -1024,8 -1024,8 -1024,8 -1024,8 -1024,8 -1023,7 -1024,8 -1024,8 -1023,7 -1024,8 -1024,8 -1024,8 -1024,8 -1015,8 -1023,8 -1024,8 -1024,9 -1024,8 +1015,9 @@@@@@@@@@@@@@@@@@@@@ ENDPROC(kernel_thread_helper
                    ENTRY(xen_sysenter_target)
                    	RING0_INT_FRAME
                    	addl $5*4, %esp		/* remove xen-provided frame */
++++++++++++++++++ +	CFI_ADJUST_CFA_OFFSET -5*4
                    	jmp sysenter_past_esp
       +  +         	CFI_ENDPROC
                    
                    ENTRY(xen_hypervisor_callback)
                    	CFI_STARTPROC
diff --cc arch/x86/kernel/entry_64.S
index ae63e58,ae63e58,ae63e58,ae63e58,ae63e58,ae63e58,ba41bf4,466b928,ae63e58,ae63e58,556a8df5,ae63e58,ba41bf4,ae63e58,ae63e58,63001c6,466b928,ae63e58,ae63e58,80d5663..8410e26
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@@@@@@@@@@@@@@@@@@@ -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -243,8 -349,8 -349,8 -244,7 -349,8 -349,8 -349,8 -349,8 -349,7 -243,8 -349,8 -349,8 -349,8 +349,7 @@@@@@@@@@@@@@@@@@@@@ ENTRY(system_call_after_swapgs
                    	movq  %rcx,RIP-ARGOFFSET(%rsp)
                    	CFI_REL_OFFSET rip,RIP-ARGOFFSET
                    	GET_THREAD_INFO(%rcx)
---------- ---- ----	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----		TI_flags(%rcx)
          -         	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+++++++++++++++ ++++	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
                    	jnz tracesys
                    	cmpq $__NR_syscall_max,%rax
                    	ja badsys
diff --cc arch/x86/kernel/nmi.c
index ec024b3,ec024b3,384b49f,ec024b3,ec024b3,ec024b3,716b892,8dfe9db,ec024b3,ec024b3,0000000,e0b44b7,716b892,ec024b3,ec024b3,ec024b3,716b892,ec024b3,ec024b3,ec024b3..ac6d512
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@@@@@@@@@@@@@@@@@@@@ -1,516 -1,516 -1,516 -1,516 -1,516 -1,516 -1,516 -1,513 -1,516 -1,516 -1,0 -1,523 -1,516 -1,516 -1,516 -1,516 -1,516 -1,516 -1,516 -1,516 +1,523 @@@@@@@@@@@@@@@@@@@@@
          +         /*
          +          *  NMI watchdog support on APIC systems
          +          *
          +          *  Started by Ingo Molnar <mingo@redhat.com>
          +          *
          +          *  Fixes:
          +          *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
          +          *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
          +          *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
          +          *  Pavel Machek and
          +          *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
          +          */
          +         
          +         #include <asm/apic.h>
          +         
          +         #include <linux/nmi.h>
          +         #include <linux/mm.h>
          +         #include <linux/delay.h>
          +         #include <linux/interrupt.h>
          +         #include <linux/module.h>
          +         #include <linux/sysdev.h>
          +         #include <linux/sysctl.h>
          +         #include <linux/percpu.h>
          +         #include <linux/kprobes.h>
          +         #include <linux/cpumask.h>
          +         #include <linux/kernel_stat.h>
          +         #include <linux/kdebug.h>
          +         #include <linux/smp.h>
          +         
          +         #include <asm/i8259.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/smp.h>
          +         #include <asm/nmi.h>
          +         #include <asm/proto.h>
          +         #include <asm/timer.h>
          +         
          +         #include <asm/mce.h>
          +         
          +         #include <mach_traps.h>
          +         
          +         int unknown_nmi_panic;
          +         int nmi_watchdog_enabled;
          +         
          +         static cpumask_t backtrace_mask = CPU_MASK_NONE;
          +         
          +         /* nmi_active:
          +          * >0: the lapic NMI watchdog is active, but can be disabled
          +          * <0: the lapic NMI watchdog has not been set up, and cannot
          +          *     be enabled
          +          *  0: the lapic NMI watchdog is disabled, but can be enabled
          +          */
          +         atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
          +         EXPORT_SYMBOL(nmi_active);
          +         
          +         unsigned int nmi_watchdog = NMI_NONE;
          +         EXPORT_SYMBOL(nmi_watchdog);
          +         
          +         static int panic_on_timeout;
          +         
          +         static unsigned int nmi_hz = HZ;
          +         static DEFINE_PER_CPU(short, wd_enabled);
          +         static int endflag __initdata;
          +         
          +         static inline unsigned int get_nmi_count(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +         	return cpu_pda(cpu)->__nmi_count;
          +         #else
          +         	return nmi_count(cpu);
          +         #endif
          +         }
          +         
          +         static inline int mce_in_progress(void)
          +         {
          +         #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
          +         	return atomic_read(&mce_entry) > 0;
          +         #endif
          +         	return 0;
          +         }
          +         
          +         /*
          +          * Take the local apic timer and PIT/HPET into account. We don't
          +          * know which one is active, when we have highres/dyntick on
          +          */
          +         static inline unsigned int get_timer_irqs(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +         	return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
          +         #else
          +         	return per_cpu(irq_stat, cpu).apic_timer_irqs +
          +         		per_cpu(irq_stat, cpu).irq0_irqs;
          +         #endif
          +         }
          +         
          +         #ifdef CONFIG_SMP
          +         /*
          +          * The performance counters used by NMI_LOCAL_APIC don't trigger when
          +          * the CPU is idle. To make sure the NMI watchdog really ticks on all
          +          * CPUs during the test make them busy.
          +          */
          +         static __init void nmi_cpu_busy(void *data)
          +         {
          +         	local_irq_enable_in_hardirq();
          +         	/*
          +         	 * Intentionally don't use cpu_relax here. This is
          +         	 * to make sure that the performance counter really ticks,
          +         	 * even if there is a simulator or similar that catches the
          +         	 * pause instruction. On a real HT machine this is fine because
          +         	 * all other CPUs are busy with "useless" delay loops and don't
          +         	 * care if they get somewhat less cycles.
          +         	 */
          +         	while (endflag == 0)
          +         		mb();
          +         }
          +         #endif
          +         
          +         int __init check_nmi_watchdog(void)
          +         {
          +         	unsigned int *prev_nmi_count;
          +         	int cpu;
          +         
          +         	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
          +         		return 0;
          +         
          +         	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
          +         	if (!prev_nmi_count)
          +         		goto error;
          +         
          +         	printk(KERN_INFO "Testing NMI watchdog ... ");
          +         
          +         #ifdef CONFIG_SMP
          +         	if (nmi_watchdog == NMI_LOCAL_APIC)
      --    -   -   		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
      ++  + +   +   		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
          +         #endif
          +         
          +         	for_each_possible_cpu(cpu)
          +         		prev_nmi_count[cpu] = get_nmi_count(cpu);
          +         	local_irq_enable();
          +         	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
          +         
          +         	for_each_online_cpu(cpu) {
          +         		if (!per_cpu(wd_enabled, cpu))
          +         			continue;
          +         		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
          +         			printk(KERN_WARNING "WARNING: CPU#%d: NMI "
          +         				"appears to be stuck (%d->%d)!\n",
          +         				cpu,
          +         				prev_nmi_count[cpu],
          +         				get_nmi_count(cpu));
          +         			per_cpu(wd_enabled, cpu) = 0;
          +         			atomic_dec(&nmi_active);
          +         		}
          +         	}
          +         	endflag = 1;
          +         	if (!atomic_read(&nmi_active)) {
          +         		kfree(prev_nmi_count);
          +         		atomic_set(&nmi_active, -1);
          +         		goto error;
          +         	}
          +         	printk("OK.\n");
          +         
          +         	/*
          +         	 * now that we know it works we can reduce NMI frequency to
          +         	 * something more reasonable; makes a difference in some configs
          +         	 */
          +         	if (nmi_watchdog == NMI_LOCAL_APIC)
          +         		nmi_hz = lapic_adjust_nmi_hz(1);
          +         
          +         	kfree(prev_nmi_count);
          +         	return 0;
          +         error:
          +         	if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
          +         		disable_8259A_irq(0);
       +  +         #ifdef CONFIG_X86_32
       +  +         	timer_ack = 0;
       +  +         #endif
          +         	return -1;
          +         }
          +         
          +         static int __init setup_nmi_watchdog(char *str)
          +         {
          +         	unsigned int nmi;
          +         
          +         	if (!strncmp(str, "panic", 5)) {
          +         		panic_on_timeout = 1;
          +         		str = strchr(str, ',');
          +         		if (!str)
          +         			return 1;
          +         		++str;
          +         	}
          +         
          +         	get_option(&str, &nmi);
          +         
          +         	if (nmi >= NMI_INVALID)
          +         		return 0;
          +         
          +         	nmi_watchdog = nmi;
          +         	return 1;
          +         }
          +         __setup("nmi_watchdog=", setup_nmi_watchdog);
          +         
          +         /*
          +          * Suspend/resume support
          +          */
          +         #ifdef CONFIG_PM
          +         
          +         static int nmi_pm_active; /* nmi_active before suspend */
          +         
          +         static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +         	/* only CPU0 goes here, other CPUs should be offline */
          +         	nmi_pm_active = atomic_read(&nmi_active);
          +         	stop_apic_nmi_watchdog(NULL);
          +         	BUG_ON(atomic_read(&nmi_active) != 0);
          +         	return 0;
          +         }
          +         
          +         static int lapic_nmi_resume(struct sys_device *dev)
          +         {
          +         	/* only CPU0 goes here, other CPUs should be offline */
          +         	if (nmi_pm_active > 0) {
          +         		setup_apic_nmi_watchdog(NULL);
          +         		touch_nmi_watchdog();
          +         	}
          +         	return 0;
          +         }
          +         
          +         static struct sysdev_class nmi_sysclass = {
          +         	.name		= "lapic_nmi",
          +         	.resume		= lapic_nmi_resume,
          +         	.suspend	= lapic_nmi_suspend,
          +         };
          +         
          +         static struct sys_device device_lapic_nmi = {
          +         	.id	= 0,
          +         	.cls	= &nmi_sysclass,
          +         };
          +         
          +         static int __init init_lapic_nmi_sysfs(void)
          +         {
          +         	int error;
          +         
          +         	/*
          +         	 * should really be a BUG_ON but b/c this is an
          +         	 * init call, it just doesn't work.  -dcz
          +         	 */
          +         	if (nmi_watchdog != NMI_LOCAL_APIC)
          +         		return 0;
          +         
          +         	if (atomic_read(&nmi_active) < 0)
          +         		return 0;
          +         
          +         	error = sysdev_class_register(&nmi_sysclass);
          +         	if (!error)
          +         		error = sysdev_register(&device_lapic_nmi);
          +         	return error;
          +         }
          +         
          +         /* must come after the local APIC's device_initcall() */
          +         late_initcall(init_lapic_nmi_sysfs);
          +         
          +         #endif	/* CONFIG_PM */
          +         
          +         static void __acpi_nmi_enable(void *__unused)
          +         {
-- ------- ---------	apic_write_around(APIC_LVT0, APIC_DM_NMI);
++ +++++++++++++++++	apic_write(APIC_LVT0, APIC_DM_NMI);
          +         }
          +         
          +         /*
          +          * Enable timer based NMIs on all CPUs:
          +          */
          +         void acpi_nmi_enable(void)
          +         {
          +         	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
      --    -   -   		on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
      ++  + +   +   		on_each_cpu(__acpi_nmi_enable, NULL, 1);
          +         }
          +         
          +         static void __acpi_nmi_disable(void *__unused)
          +         {
-- ------- ---------	apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
++ +++++++++++++++++	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
          +         }
          +         
          +         /*
          +          * Disable timer based NMIs on all CPUs:
          +          */
          +         void acpi_nmi_disable(void)
          +         {
          +         	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
      --    -   -   		on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
      ++  + +   +   		on_each_cpu(__acpi_nmi_disable, NULL, 1);
          +         }
          +         
          +         void setup_apic_nmi_watchdog(void *unused)
          +         {
          +         	if (__get_cpu_var(wd_enabled))
          +         		return;
          +         
          +         	/* cheap hack to support suspend/resume */
          +         	/* if cpu0 is not active neither should the other cpus */
          +         	if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
          +         		return;
          +         
          +         	switch (nmi_watchdog) {
          +         	case NMI_LOCAL_APIC:
          +         		 /* enable it before to avoid race with handler */
          +         		__get_cpu_var(wd_enabled) = 1;
          +         		if (lapic_watchdog_init(nmi_hz) < 0) {
          +         			__get_cpu_var(wd_enabled) = 0;
          +         			return;
          +         		}
          +         		/* FALL THROUGH */
          +         	case NMI_IO_APIC:
          +         		__get_cpu_var(wd_enabled) = 1;
          +         		atomic_inc(&nmi_active);
          +         	}
          +         }
          +         
          +         void stop_apic_nmi_watchdog(void *unused)
          +         {
          +         	/* only support LOCAL and IO APICs for now */
          +         	if (!nmi_watchdog_active())
          +         		return;
          +         	if (__get_cpu_var(wd_enabled) == 0)
          +         		return;
          +         	if (nmi_watchdog == NMI_LOCAL_APIC)
          +         		lapic_watchdog_stop();
          +         	__get_cpu_var(wd_enabled) = 0;
          +         	atomic_dec(&nmi_active);
          +         }
          +         
          +         /*
          +          * the best way to detect whether a CPU has a 'hard lockup' problem
          +          * is to check it's local APIC timer IRQ counts. If they are not
          +          * changing then that CPU has some problem.
          +          *
          +          * as these watchdog NMI IRQs are generated on every CPU, we only
          +          * have to check the current processor.
          +          *
          +          * since NMIs don't listen to _any_ locks, we have to be extremely
          +          * careful not to rely on unsafe variables. The printk might lock
          +          * up though, so we have to break up any console locks first ...
          +          * [when there will be more tty-related locks, break them up here too!]
          +          */
          +         
          +         static DEFINE_PER_CPU(unsigned, last_irq_sum);
          +         static DEFINE_PER_CPU(local_t, alert_counter);
          +         static DEFINE_PER_CPU(int, nmi_touch);
          +         
          +         void touch_nmi_watchdog(void)
          +         {
          +         	if (nmi_watchdog_active()) {
          +         		unsigned cpu;
          +         
          +         		/*
          +         		 * Tell other CPUs to reset their alert counters. We cannot
          +         		 * do it ourselves because the alert count increase is not
          +         		 * atomic.
          +         		 */
          +         		for_each_present_cpu(cpu) {
          +         			if (per_cpu(nmi_touch, cpu) != 1)
          +         				per_cpu(nmi_touch, cpu) = 1;
          +         		}
          +         	}
          +         
          +         	/*
          +         	 * Tickle the softlockup detector too:
          +         	 */
          +         	touch_softlockup_watchdog();
          +         }
          +         EXPORT_SYMBOL(touch_nmi_watchdog);
          +         
          +         notrace __kprobes int
          +         nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
          +         {
          +         	/*
          +         	 * Since current_thread_info()-> is always on the stack, and we
          +         	 * always switch the stack NMI-atomically, it's safe to use
          +         	 * smp_processor_id().
          +         	 */
          +         	unsigned int sum;
          +         	int touched = 0;
          +         	int cpu = smp_processor_id();
          +         	int rc = 0;
          +         
          +         	/* check for other users first */
          +         	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
          +         			== NOTIFY_STOP) {
          +         		rc = 1;
          +         		touched = 1;
          +         	}
          +         
          +         	sum = get_timer_irqs(cpu);
          +         
          +         	if (__get_cpu_var(nmi_touch)) {
          +         		__get_cpu_var(nmi_touch) = 0;
          +         		touched = 1;
          +         	}
          +         
          +         	if (cpu_isset(cpu, backtrace_mask)) {
          +         		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
          +         
          +         		spin_lock(&lock);
          +         		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
          +         		dump_stack();
          +         		spin_unlock(&lock);
          +         		cpu_clear(cpu, backtrace_mask);
          +         	}
          +         
          +         	/* Could check oops_in_progress here too, but it's safer not to */
          +         	if (mce_in_progress())
          +         		touched = 1;
          +         
          +         	/* if the none of the timers isn't firing, this cpu isn't doing much */
          +         	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
          +         		/*
          +         		 * Ayiee, looks like this CPU is stuck ...
          +         		 * wait a few IRQs (5 seconds) before doing the oops ...
          +         		 */
          +         		local_inc(&__get_cpu_var(alert_counter));
          +         		if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
          +         			/*
          +         			 * die_nmi will return ONLY if NOTIFY_STOP happens..
          +         			 */
          +         			die_nmi("BUG: NMI Watchdog detected LOCKUP",
          +         				regs, panic_on_timeout);
          +         	} else {
          +         		__get_cpu_var(last_irq_sum) = sum;
          +         		local_set(&__get_cpu_var(alert_counter), 0);
          +         	}
          +         
          +         	/* see if the nmi watchdog went off */
          +         	if (!__get_cpu_var(wd_enabled))
          +         		return rc;
          +         	switch (nmi_watchdog) {
          +         	case NMI_LOCAL_APIC:
          +         		rc |= lapic_wd_event(nmi_hz);
          +         		break;
          +         	case NMI_IO_APIC:
          +         		/*
          +         		 * don't know how to accurately check for this.
          +         		 * just assume it was a watchdog timer interrupt
          +         		 * This matches the old behaviour.
          +         		 */
          +         		rc = 1;
          +         		break;
          +         	}
          +         	return rc;
          +         }
          +         
          +         #ifdef CONFIG_SYSCTL
          +         
+++++++++++ ++++++++static int __init setup_unknown_nmi_panic(char *str)
+++++++++++ ++++++++{
+++++++++++ ++++++++	unknown_nmi_panic = 1;
+++++++++++ ++++++++	return 1;
+++++++++++ ++++++++}
+++++++++++ ++++++++__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+++++++++++ ++++++++
          +         static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
          +         {
          +         	unsigned char reason = get_nmi_reason();
          +         	char buf[64];
          +         
          +         	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
          +         	die_nmi(buf, regs, 1); /* Always panic here */
          +         	return 0;
          +         }
          +         
          +         /*
          +          * proc handler for /proc/sys/kernel/nmi
          +          */
          +         int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
          +         			void __user *buffer, size_t *length, loff_t *ppos)
          +         {
          +         	int old_state;
          +         
          +         	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
          +         	old_state = nmi_watchdog_enabled;
          +         	proc_dointvec(table, write, file, buffer, length, ppos);
          +         	if (!!old_state == !!nmi_watchdog_enabled)
          +         		return 0;
          +         
          +         	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
          +         		printk(KERN_WARNING
          +         			"NMI watchdog is permanently disabled\n");
          +         		return -EIO;
          +         	}
          +         
          +         	if (nmi_watchdog == NMI_LOCAL_APIC) {
          +         		if (nmi_watchdog_enabled)
          +         			enable_lapic_nmi_watchdog();
          +         		else
          +         			disable_lapic_nmi_watchdog();
          +         	} else {
          +         		printk(KERN_WARNING
          +         			"NMI watchdog doesn't know what hardware to touch\n");
          +         		return -EIO;
          +         	}
          +         	return 0;
          +         }
          +         
          +         #endif /* CONFIG_SYSCTL */
          +         
          +         int do_nmi_callback(struct pt_regs *regs, int cpu)
          +         {
          +         #ifdef CONFIG_SYSCTL
          +         	if (unknown_nmi_panic)
          +         		return unknown_nmi_panic_callback(regs, cpu);
          +         #endif
          +         	return 0;
          +         }
          +         
          +         void __trigger_all_cpu_backtrace(void)
          +         {
          +         	int i;
          +         
          +         	backtrace_mask = cpu_online_map;
          +         	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
          +         	for (i = 0; i < 10 * 1000; i++) {
          +         		if (cpus_empty(backtrace_mask))
          +         			break;
          +         		mdelay(1);
          +         	}
          +         }
diff --cc arch/x86/kernel/paravirt.c
index e0f571d,e0f571d,5d7326a,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,74f0c5e,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,e0f571d,2963ab5..b4564d0
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@@@@@@@@@@@@@@@@@@@@ -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,7 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,8 -29,9 +29,9 @@@@@@@@@@@@@@@@@@@@@
                    #include <asm/desc.h>
                    #include <asm/setup.h>
                    #include <asm/arch_hooks.h>
+++++++++++++++++++ #include <asm/pgtable.h>
                    #include <asm/time.h>
          +         #include <asm/pgalloc.h>
                    #include <asm/irq.h>
                    #include <asm/delay.h>
                    #include <asm/fixmap.h>
diff --cc arch/x86/kernel/pci-dma.c
index 8467ec2,8467ec2,8467ec2,702714b,8467ec2,8467ec2,8467ec2,d12945d,8467ec2,8467ec2,dc00a13,8467ec2,8467ec2,8467ec2,8467ec2,8467ec2,8467ec2,8467ec2,8467ec2,8467ec2..a4213c0
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@@@@@@@@@@@@@@@@@@@@ -5,12 -5,12 -5,12 -5,11 -5,12 -5,12 -5,12 -5,12 -5,12 -5,12 -5,11 -5,12 -5,12 -5,12 -5,12 -5,12 -5,12 -5,12 -5,12 -5,12 +5,11 @@@@@@@@@@@@@@@@@@@@@
                    
                    #include <asm/proto.h>
                    #include <asm/dma.h>
------- ------------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
                    #include <asm/calgary.h>
          +         #include <asm/amd_iommu.h>
                    
--- ----------------int forbid_dac __read_mostly;
--- ----------------EXPORT_SYMBOL(forbid_dac);
+++ ++++++++++++++++static int forbid_dac __read_mostly;
                    
                    const struct dma_mapping_ops *dma_ops;
                    EXPORT_SYMBOL(dma_ops);
@@@@@@@@@@@@@@@@@@@@@ -123,12 -123,12 -123,12 -122,12 -123,12 -123,12 -123,12 -120,9 -123,12 -123,12 -121,10 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 +119,9 @@@@@@@@@@@@@@@@@@@@@ void __init pci_iommu_alloc(void
                    
                    	detect_intel_iommu();
                    
          -         #ifdef CONFIG_SWIOTLB
          +         	amd_iommu_detect();
          +         
------- -- ---------#ifdef CONFIG_SWIOTLB
                    	pci_swiotlb_init();
------- ------------#endif
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -505,12 -505,12 -505,12 -504,12 -505,12 -505,12 -505,12 -496,9 -505,12 -505,12 -501,10 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 +495,9 @@@@@@@@@@@@@@@@@@@@@ static int __init pci_iommu_init(void
                    
                    	intel_iommu_init();
                    
          -         #ifdef CONFIG_GART_IOMMU
          +         	amd_iommu_init();
          +         
------- -- ---------#ifdef CONFIG_GART_IOMMU
                    	gart_iommu_init();
------- ------------#endif
                    
                    	no_iommu_init();
                    	return 0;
diff --cc arch/x86/kernel/process.c
index 4d629c6,4d629c6,4d629c6,9f94bb1,4d629c6,74f2d196,4061d63,4061d63,7dceea9,4d629c6,ba370dc,4d629c6,4061d63,7dceea9,4d629c6,4d629c6,4061d63,4d629c6,4d629c6,4d629c6..7fc4d5b
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@@@@@@@@@@@@@@@@@@@@ -6,15 -6,15 -6,15 -6,15 -6,15 -6,16 -6,9 -6,9 -6,9 -6,15 -6,8 -6,15 -6,9 -6,9 -6,15 -6,15 -6,9 -6,15 -6,15 -6,15 +6,16 @@@@@@@@@@@@@@@@@@@@@
                    #include <linux/sched.h>
                    #include <linux/module.h>
                    #include <linux/pm.h>
          +         #include <linux/clockchips.h>
      +++ + ++  +   #include <asm/system.h>
      +++ + ++  +   
      +++ + ++  +   unsigned long idle_halt;
      +++ + ++  +   EXPORT_SYMBOL(idle_halt);
      +++ + ++  +   unsigned long idle_nomwait;
      +++ + ++  +   EXPORT_SYMBOL(idle_nomwait);
                    
                    struct kmem_cache *task_xstate_cachep;
+++++ ++++++++++++++static int force_mwait __cpuinitdata;
                    
                    int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
                    {
@@@@@@@@@@@@@@@@@@@@@ -199,15 -199,15 -199,15 -199,16 -199,15 -200,15 -193,15 -193,15 -193,15 -199,15 -122,8 -199,15 -193,15 -193,15 -199,15 -199,15 -193,15 -199,15 -199,15 -199,15 +200,16 @@@@@@@@@@@@@@@@@@@@@ static void poll_idle(void
                     *
                     * idle=mwait overrides this decision and forces the usage of mwait.
                     */
+++ ++++++++++++++++static int __cpuinitdata force_mwait;
          +         
          +         #define MWAIT_INFO			0x05
          +         #define MWAIT_ECX_EXTENDED_INFO		0x01
          +         #define MWAIT_EDX_C1			0xf0
          +         
                    static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
                    {
          +         	u32 eax, ebx, ecx, edx;
          +         
                    	if (force_mwait)
                    		return 1;
                    
diff --cc arch/x86/kernel/setup.c
index 531b55b,531b55b,531b55b,4064616,531b55b,531b55b,36c540d,e5d2089,36c540d,531b55b,6f80b85,531b55b,36c540d,36c540d,531b55b,531b55b,987b6fd,531b55b,531b55b,c9010f8..ec952aa5
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@@@@@@@@@@@@@@@@@@@@ -1,894 -1,894 -1,894 -1,885 -1,894 -1,894 -1,889 -1,880 -1,889 -1,894 -1,139 -1,894 -1,889 -1,889 -1,894 -1,894 -1,881 -1,894 -1,894 -1,897 +1,888 @@@@@@@@@@@@@@@@@@@@@
          -         #include <linux/kernel.h>
          +         /*
          +          *  Copyright (C) 1995  Linus Torvalds
          +          *
          +          *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
          +          *
          +          *  Memory region support
          +          *	David Parsons <orc@pell.chi.il.us>, July-August 1999
          +          *
          +          *  Added E820 sanitization routine (removes overlapping memory regions);
          +          *  Brian Moyle <bmoyle@mvista.com>, February 2001
          +          *
          +          * Moved CPU detection code to cpu/${cpu}.c
          +          *    Patrick Mochel <mochel@osdl.org>, March 2002
          +          *
          +          *  Provisions for empty E820 memory regions (reported by certain BIOSes).
          +          *  Alex Achenbach <xela@slit.de>, December 2002.
          +          *
          +          */
          +         
          +         /*
          +          * This file handles the architecture-dependent parts of initialization
          +          */
          +         
          +         #include <linux/sched.h>
          +         #include <linux/mm.h>
          +         #include <linux/mmzone.h>
          +         #include <linux/screen_info.h>
          +         #include <linux/ioport.h>
          +         #include <linux/acpi.h>
          +         #include <linux/apm_bios.h>
          +         #include <linux/initrd.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/seq_file.h>
          +         #include <linux/console.h>
          +         #include <linux/mca.h>
          +         #include <linux/root_dev.h>
          +         #include <linux/highmem.h>
                    #include <linux/module.h>
          +         #include <linux/efi.h>
                    #include <linux/init.h>
          -         #include <linux/bootmem.h>
          +         #include <linux/edd.h>
          +         #include <linux/iscsi_ibft.h>
          +         #include <linux/nodemask.h>
          +         #include <linux/kexec.h>
          +         #include <linux/dmi.h>
          +         #include <linux/pfn.h>
          +         #include <linux/pci.h>
          +         #include <asm/pci-direct.h>
          +         #include <linux/init_ohci1394_dma.h>
          +         #include <linux/kvm_para.h>
          +         
          +         #include <linux/errno.h>
          +         #include <linux/kernel.h>
          +         #include <linux/stddef.h>
          +         #include <linux/unistd.h>
          +         #include <linux/ptrace.h>
          +         #include <linux/slab.h>
          +         #include <linux/user.h>
          +         #include <linux/delay.h>
--- ------ ---------#include <linux/highmem.h>
          +         
          +         #include <linux/kallsyms.h>
--- ------ ---------#include <linux/edd.h>
--- ------ ---------#include <linux/iscsi_ibft.h>
--- ------ ---------#include <linux/kexec.h>
          +         #include <linux/cpufreq.h>
          +         #include <linux/dma-mapping.h>
          +         #include <linux/ctype.h>
          +         #include <linux/uaccess.h>
          +         
                    #include <linux/percpu.h>
          -         #include <asm/smp.h>
          -         #include <asm/percpu.h>
          +         #include <linux/crash_dump.h>
          +         
          +         #include <video/edid.h>
          +         
          +         #include <asm/mtrr.h>
          +         #include <asm/apic.h>
          +         #include <asm/e820.h>
          +         #include <asm/mpspec.h>
          +         #include <asm/setup.h>
          +         #include <asm/arch_hooks.h>
          +         #include <asm/efi.h>
                    #include <asm/sections.h>
          +         #include <asm/dmi.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/ist.h>
          +         #include <asm/vmi.h>
          +         #include <setup_arch.h>
          +         #include <asm/bios_ebda.h>
          +         #include <asm/cacheflush.h>
                    #include <asm/processor.h>
          -         #include <asm/setup.h>
          +         #include <asm/bugs.h>
          +         
          +         #include <asm/system.h>
          +         #include <asm/vsyscall.h>
          +         #include <asm/smp.h>
          +         #include <asm/desc.h>
          +         #include <asm/dma.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/proto.h>
          +         
          +         #include <mach_apic.h>
          +         #include <asm/paravirt.h>
          +         
          +         #include <asm/percpu.h>
--- ------ ---------#include <asm/sections.h>
                    #include <asm/topology.h>
          -         #include <asm/mpspec.h>
                    #include <asm/apicdef.h>
          +         #ifdef CONFIG_X86_64
          +         #include <asm/numa_64.h>
          +         #endif
                    
          -         #ifdef CONFIG_X86_LOCAL_APIC
          -         unsigned int num_processors;
          -         unsigned disabled_cpus __cpuinitdata;
          -         /* Processor that is doing the boot up */
          -         unsigned int boot_cpu_physical_apicid = -1U;
          -         EXPORT_SYMBOL(boot_cpu_physical_apicid);
          +         #ifndef ARCH_SETUP
          +         #define ARCH_SETUP
          +         #endif
                    
          -         DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
          -         EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
          +         #ifndef CONFIG_DEBUG_BOOT_PARAMS
          +         struct boot_params __initdata boot_params;
          +         #else
          +         struct boot_params boot_params;
          +         #endif
                    
          -         /* Bitmask of physically existing CPUs */
          -         physid_mask_t phys_cpu_present_map;
          +         /*
          +          * Machine setup..
          +          */
          +         static struct resource data_resource = {
          +         	.name	= "Kernel data",
          +         	.start	= 0,
          +         	.end	= 0,
          +         	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource code_resource = {
          +         	.name	= "Kernel code",
          +         	.start	= 0,
          +         	.end	= 0,
          +         	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource bss_resource = {
          +         	.name	= "Kernel bss",
          +         	.start	= 0,
          +         	.end	= 0,
          +         	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         
          +         #ifdef CONFIG_X86_32
          +         /* This value is set up by the early boot code to point to the value
          +            immediately after the boot time page tables.  It contains a *physical*
          +            address, and must not be in the .bss segment! */
          +         unsigned long init_pg_tables_start __initdata = ~0UL;
          +         unsigned long init_pg_tables_end __initdata = ~0UL;
          +         
          +         static struct resource video_ram_resource = {
          +         	.name	= "Video RAM area",
          +         	.start	= 0xa0000,
          +         	.end	= 0xbffff,
          +         	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         /* cpu data as detected by the assembly code in head.S */
          +         struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         /* common cpu data for all cpus */
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         static void set_mca_bus(int x)
          +         {
          +         #ifdef CONFIG_MCA
          +         	MCA_bus = x;
          +         #endif
          +         }
          +         
          +         unsigned int def_to_bigsmp;
          +         
          +         /* for MCA, but anyone else can use it if they want */
          +         unsigned int machine_id;
          +         unsigned int machine_submodel_id;
          +         unsigned int BIOS_revision;
          +         
          +         struct apm_info apm_info;
          +         EXPORT_SYMBOL(apm_info);
          +         
          +         #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
          +         	defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
          +         struct ist_info ist_info;
          +         EXPORT_SYMBOL(ist_info);
          +         #else
          +         struct ist_info ist_info;
                    #endif
                    
          -         #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
          +         #else
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly;
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         #endif
          +         
          +         
          +         #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
          +         unsigned long mmu_cr4_features;
          +         #else
          +         unsigned long mmu_cr4_features = X86_CR4_PAE;
          +         #endif
          +         
          +         /* Boot loader ID as an integer, for the benefit of proc_dointvec */
          +         int bootloader_type;
          +         
                    /*
          -          * Copy data used in early init routines from the initial arrays to the
          -          * per cpu data areas.  These arrays then become expendable and the
          -          * *_early_ptr's are zeroed indicating that the static arrays are gone.
          +          * Early DMI memory
                     */
          -         static void __init setup_per_cpu_maps(void)
          +         int dmi_alloc_index;
          +         char dmi_alloc_data[DMI_MAX_DATA];
          +         
          +         /*
          +          * Setup options
          +          */
          +         struct screen_info screen_info;
          +         EXPORT_SYMBOL(screen_info);
          +         struct edid_info edid_info;
          +         EXPORT_SYMBOL_GPL(edid_info);
          +         
          +         extern int root_mountflags;
          +         
          +         unsigned long saved_video_mode;
          +         
          +         #define RAMDISK_IMAGE_START_MASK	0x07FF
          +         #define RAMDISK_PROMPT_FLAG		0x8000
          +         #define RAMDISK_LOAD_FLAG		0x4000
          +         
          +         static char __initdata command_line[COMMAND_LINE_SIZE];
          +         
          +         #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
          +         struct edd edd;
          +         #ifdef CONFIG_EDD_MODULE
          +         EXPORT_SYMBOL(edd);
          +         #endif
          +         /**
          +          * copy_edd() - Copy the BIOS EDD information
          +          *              from boot_params into a safe place.
          +          *
          +          */
          +         static inline void copy_edd(void)
          +         {
          +              memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
          +         	    sizeof(edd.mbr_signature));
          +              memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
          +              edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
          +              edd.edd_info_nr = boot_params.eddbuf_entries;
          +         }
          +         #else
          +         static inline void copy_edd(void)
          +         {
          +         }
          +         #endif
          +         
          +         #ifdef CONFIG_BLK_DEV_INITRD
          +         
          +         #ifdef CONFIG_X86_32
          +         
          +         #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
          +         static void __init relocate_initrd(void)
                    {
          -         	int cpu;
                    
          -         	for_each_possible_cpu(cpu) {
          -         		per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
          -         		per_cpu(x86_bios_cpu_apicid, cpu) =
          -         						x86_bios_cpu_apicid_init[cpu];
          -         #ifdef CONFIG_NUMA
          -         		per_cpu(x86_cpu_to_node_map, cpu) =
          -         						x86_cpu_to_node_map_init[cpu];
          +         	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +         	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +         	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +         	u64 ramdisk_here;
          +         	unsigned long slop, clen, mapaddr;
          +         	char *p, *q;
          +         
          +         	/* We need to move the initrd down into lowmem */
          +         	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
          +         					 PAGE_SIZE);
          +         
          +         	if (ramdisk_here == -1ULL)
          +         		panic("Cannot find place for new RAMDISK of size %lld\n",
          +         			 ramdisk_size);
          +         
          +         	/* Note: this includes all the lowmem currently occupied by
          +         	   the initrd, we rely on that fact to keep the data intact. */
          +         	reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
          +         			 "NEW RAMDISK");
          +         	initrd_start = ramdisk_here + PAGE_OFFSET;
          +         	initrd_end   = initrd_start + ramdisk_size;
          +         	printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
          +         			 ramdisk_here, ramdisk_here + ramdisk_size);
          +         
          +         	q = (char *)initrd_start;
          +         
          +         	/* Copy any lowmem portion of the initrd */
          +         	if (ramdisk_image < end_of_lowmem) {
          +         		clen = end_of_lowmem - ramdisk_image;
          +         		p = (char *)__va(ramdisk_image);
          +         		memcpy(q, p, clen);
          +         		q += clen;
          +         		ramdisk_image += clen;
          +         		ramdisk_size  -= clen;
          +         	}
          +         
          +         	/* Copy the highmem portion of the initrd */
          +         	while (ramdisk_size) {
          +         		slop = ramdisk_image & ~PAGE_MASK;
          +         		clen = ramdisk_size;
          +         		if (clen > MAX_MAP_CHUNK-slop)
          +         			clen = MAX_MAP_CHUNK-slop;
          +         		mapaddr = ramdisk_image & PAGE_MASK;
          +         		p = early_ioremap(mapaddr, clen+slop);
          +         		memcpy(q, p+slop, clen);
          +         		early_iounmap(p, clen+slop);
          +         		q += clen;
          +         		ramdisk_image += clen;
          +         		ramdisk_size  -= clen;
          +         	}
          +         	/* high pages is not converted by early_res_to_bootmem */
          +         	ramdisk_image = boot_params.hdr.ramdisk_image;
          +         	ramdisk_size  = boot_params.hdr.ramdisk_size;
          +         	printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
          +         		" %08llx - %08llx\n",
          +         		ramdisk_image, ramdisk_image + ramdisk_size - 1,
          +         		ramdisk_here, ramdisk_here + ramdisk_size - 1);
          +         }
                    #endif
          +         
          +         static void __init reserve_initrd(void)
          +         {
          +         	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +         	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +         	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
          +         	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +         
          +         	if (!boot_params.hdr.type_of_loader ||
          +         	    !ramdisk_image || !ramdisk_size)
          +         		return;		/* No initrd provided by bootloader */
          +         
          +         	initrd_start = 0;
          +         
          +         	if (ramdisk_size >= (end_of_lowmem>>1)) {
          +         		free_early(ramdisk_image, ramdisk_end);
          +         		printk(KERN_ERR "initrd too large to handle, "
          +         		       "disabling initrd\n");
          +         		return;
          +         	}
          +         
          +         	printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
          +         			ramdisk_end);
          +         
          +         
          +         	if (ramdisk_end <= end_of_lowmem) {
          +         		/* All in lowmem, easy case */
          +         		/*
          +         		 * don't need to reserve again, already reserved early
          +         		 * in i386_start_kernel
          +         		 */
          +         		initrd_start = ramdisk_image + PAGE_OFFSET;
          +         		initrd_end = initrd_start + ramdisk_size;
          +         		return;
                    	}
                    
          -         	/* indicate the early static arrays will soon be gone */
          -         	x86_cpu_to_apicid_early_ptr = NULL;
          -         	x86_bios_cpu_apicid_early_ptr = NULL;
          -         #ifdef CONFIG_NUMA
          -         	x86_cpu_to_node_map_early_ptr = NULL;
          +         #ifdef CONFIG_X86_32
          +         	relocate_initrd();
          +         #else
          +         	printk(KERN_ERR "initrd extends beyond end of memory "
          +         	       "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
          +         	       ramdisk_end, end_of_lowmem);
          +         	initrd_start = 0;
                    #endif
          +         	free_early(ramdisk_image, ramdisk_end);
                    }
          +         #else
          +         static void __init reserve_initrd(void)
          +         {
          +         }
          +         #endif /* CONFIG_BLK_DEV_INITRD */
          +         
          +         static void __init parse_setup_data(void)
          +         {
          +         	struct setup_data *data;
          +         	u64 pa_data;
          +         
          +         	if (boot_params.hdr.version < 0x0209)
          +         		return;
          +         	pa_data = boot_params.hdr.setup_data;
          +         	while (pa_data) {
          +         		data = early_ioremap(pa_data, PAGE_SIZE);
          +         		switch (data->type) {
          +         		case SETUP_E820_EXT:
          +         			parse_e820_ext(data, pa_data);
          +         			break;
          +         		default:
          +         			break;
          +         		}
          +         		pa_data = data->next;
          +         		early_iounmap(data, PAGE_SIZE);
          +         	}
          +         }
          +         
          +         static void __init e820_reserve_setup_data(void)
          +         {
          +         	struct setup_data *data;
          +         	u64 pa_data;
          +         	int found = 0;
          +         
          +         	if (boot_params.hdr.version < 0x0209)
          +         		return;
          +         	pa_data = boot_params.hdr.setup_data;
          +         	while (pa_data) {
          +         		data = early_ioremap(pa_data, sizeof(*data));
          +         		e820_update_range(pa_data, sizeof(*data)+data->len,
          +         			 E820_RAM, E820_RESERVED_KERN);
          +         		found = 1;
          +         		pa_data = data->next;
          +         		early_iounmap(data, sizeof(*data));
          +         	}
          +         	if (!found)
          +         		return;
                    
          -         #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
          -         cpumask_t *cpumask_of_cpu_map __read_mostly;
          -         EXPORT_SYMBOL(cpumask_of_cpu_map);
          +         	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +         	memcpy(&e820_saved, &e820, sizeof(struct e820map));
          +         	printk(KERN_INFO "extended physical RAM map:\n");
          +         	e820_print_map("reserve setup_data");
          +         }
                    
          -         /* requires nr_cpu_ids to be initialized */
          -         static void __init setup_cpumask_of_cpu(void)
          +         static void __init reserve_early_setup_data(void)
                    {
          -         	int i;
          +         	struct setup_data *data;
          +         	u64 pa_data;
          +         	char buf[32];
          +         
          +         	if (boot_params.hdr.version < 0x0209)
          +         		return;
          +         	pa_data = boot_params.hdr.setup_data;
          +         	while (pa_data) {
          +         		data = early_ioremap(pa_data, sizeof(*data));
          +         		sprintf(buf, "setup data %x", data->type);
          +         		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
          +         		pa_data = data->next;
          +         		early_iounmap(data, sizeof(*data));
          +         	}
          +         }
          +         
          +         /*
          +          * --------- Crashkernel reservation ------------------------------
          +          */
          +         
          +         #ifdef CONFIG_KEXEC
          +         
          +         /**
          +          * Reserve @size bytes of crashkernel memory at any suitable offset.
          +          *
          +          * @size: Size of the crashkernel memory to reserve.
          +          * Returns the base address on success, and -1ULL on failure.
          +          */
          +         unsigned long long find_and_reserve_crashkernel(unsigned long long size)
          +         {
          +         	const unsigned long long alignment = 16<<20; 	/* 16M */
          +         	unsigned long long start = 0LL;
          +         
          +         	while (1) {
          +         		int ret;
          +         
          +         		start = find_e820_area(start, ULONG_MAX, size, alignment);
          +         		if (start == -1ULL)
          +         			return start;
          +         
          +         		/* try to reserve it */
          +         		ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
          +         		if (ret >= 0)
          +         			return start;
                    
          -         	/* alloc_bootmem zeroes memory */
          -         	cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
          -         	for (i = 0; i < nr_cpu_ids; i++)
          -         		cpu_set(i, cpumask_of_cpu_map[i]);
          +         		start += alignment;
          +         	}
          +         }
          +         
          +         static inline unsigned long long get_total_mem(void)
          +         {
          +         	unsigned long long total;
          +         
          +         	total = max_low_pfn - min_low_pfn;
          +         #ifdef CONFIG_HIGHMEM
          +         	total += highend_pfn - highstart_pfn;
          +         #endif
          +         
          +         	return total << PAGE_SHIFT;
          +         }
          +         
          +         static void __init reserve_crashkernel(void)
          +         {
          +         	unsigned long long total_mem;
          +         	unsigned long long crash_size, crash_base;
          +         	int ret;
          +         
          +         	total_mem = get_total_mem();
          +         
          +         	ret = parse_crashkernel(boot_command_line, total_mem,
          +         			&crash_size, &crash_base);
          +         	if (ret != 0 || crash_size <= 0)
          +         		return;
          +         
          +         	/* 0 means: find the address automatically */
          +         	if (crash_base <= 0) {
          +         		crash_base = find_and_reserve_crashkernel(crash_size);
          +         		if (crash_base == -1ULL) {
          +         			pr_info("crashkernel reservation failed. "
          +         				"No suitable area found.\n");
          +         			return;
          +         		}
          +         	} else {
          +         		ret = reserve_bootmem_generic(crash_base, crash_size,
          +         					BOOTMEM_EXCLUSIVE);
          +         		if (ret < 0) {
          +         			pr_info("crashkernel reservation failed - "
          +         				"memory is in use\n");
          +         			return;
          +         		}
          +         	}
          +         
          +         	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
          +         			"for crashkernel (System RAM: %ldMB)\n",
          +         			(unsigned long)(crash_size >> 20),
          +         			(unsigned long)(crash_base >> 20),
          +         			(unsigned long)(total_mem >> 20));
          +         
          +         	crashk_res.start = crash_base;
          +         	crashk_res.end   = crash_base + crash_size - 1;
          +         	insert_resource(&iomem_resource, &crashk_res);
                    }
                    #else
          -         static inline void setup_cpumask_of_cpu(void) { }
          +         static void __init reserve_crashkernel(void)
          +         {
          +         }
                    #endif
                    
          -         #ifdef CONFIG_X86_32
          -         /*
          -          * Great future not-so-futuristic plan: make i386 and x86_64 do it
          -          * the same way
          +         static struct resource standard_io_resources[] = {
          +         	{ .name = "dma1", .start = 0x00, .end = 0x1f,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "pic1", .start = 0x20, .end = 0x21,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "timer0", .start = 0x40, .end = 0x43,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "timer1", .start = 0x50, .end = 0x53,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "keyboard", .start = 0x60, .end = 0x60,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "keyboard", .start = 0x64, .end = 0x64,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "pic2", .start = 0xa0, .end = 0xa1,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "dma2", .start = 0xc0, .end = 0xdf,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +         	{ .name = "fpu", .start = 0xf0, .end = 0xff,
          +         		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
          +         };
          +         
          +         static void __init reserve_standard_io_resources(void)
          +         {
          +         	int i;
          +         
          +         	/* request I/O space for devices used on all i[345]86 PCs */
          +         	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
          +         		request_resource(&ioport_resource, &standard_io_resources[i]);
          +         
          +         }
          +         
          +         #ifdef CONFIG_PROC_VMCORE
          +         /* elfcorehdr= specifies the location of elf core header
          +          * stored by the crashed kernel. This option will be passed
          +          * by kexec loader to the capture kernel.
                     */
          -         unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
          -         EXPORT_SYMBOL(__per_cpu_offset);
          +         static int __init setup_elfcorehdr(char *arg)
          +         {
          +         	char *end;
          +         	if (!arg)
          +         		return -EINVAL;
          +         	elfcorehdr_addr = memparse(arg, &end);
          +         	return end > arg ? 0 : -EINVAL;
          +         }
          +         early_param("elfcorehdr", setup_elfcorehdr);
                    #endif
                    
+++ ++++++++++++++++static struct x86_quirks default_x86_quirks __initdata;
+++ ++++++++++++++++
+++ ++++++++++++++++struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+++ ++++++++++++++++
          +         /*
          +          * Determine if we were loaded by an EFI loader.  If so, then we have also been
          +          * passed the efi memmap, systab, etc., so we should use these data structures
          +          * for initialization.  Note, the efi init code path is determined by the
          +          * global efi_enabled. This allows the same kernel image to be used on existing
          +          * systems (with a traditional BIOS) as well as on EFI systems.
          +          */
                    /*
          -          * Great future plan:
          -          * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
          -          * Always point %gs to its beginning
          +          * setup_arch - architecture-specific boot-time initializations
          +          *
          +          * Note: On x86_64, fixmaps are ready for use even before this is called.
                     */
          -         void __init setup_per_cpu_areas(void)
          +         
          +         void __init setup_arch(char **cmdline_p)
                    {
          -         	int i, highest_cpu = 0;
          -         	unsigned long size;
          +         #ifdef CONFIG_X86_32
          +         	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
       +  +         	visws_early_detect();
          +         	pre_setup_arch_hook();
          +         	early_cpu_init();
          +         #else
          +         	printk(KERN_INFO "Command line: %s\n", boot_command_line);
          +         #endif
                    
          -         #ifdef CONFIG_HOTPLUG_CPU
          -         	prefill_possible_map();
          +         	early_ioremap_init();
          +         
          +         	ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
          +         	screen_info = boot_params.screen_info;
          +         	edid_info = boot_params.edid_info;
          +         #ifdef CONFIG_X86_32
          +         	apm_info.bios = boot_params.apm_bios_info;
          +         	ist_info = boot_params.ist_info;
          +         	if (boot_params.sys_desc_table.length != 0) {
          +         		set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
          +         		machine_id = boot_params.sys_desc_table.table[0];
          +         		machine_submodel_id = boot_params.sys_desc_table.table[1];
          +         		BIOS_revision = boot_params.sys_desc_table.table[2];
          +         	}
          +         #endif
          +         	saved_video_mode = boot_params.hdr.vid_mode;
          +         	bootloader_type = boot_params.hdr.type_of_loader;
          +         
          +         #ifdef CONFIG_BLK_DEV_RAM
          +         	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
          +         	rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
          +         	rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
          +         #endif
          +         #ifdef CONFIG_EFI
          +         	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
          +         #ifdef CONFIG_X86_32
          +         		     "EL32",
          +         #else
          +         		     "EL64",
                    #endif
          +         	 4)) {
          +         		efi_enabled = 1;
          +         		efi_reserve_early();
          +         	}
          +         #endif
          +         
          +         	ARCH_SETUP
          +         
          +         	setup_memory_map();
          +         	parse_setup_data();
          +         	/* update the e820_saved too */
          +         	e820_reserve_setup_data();
                    
          -         	/* Copy section for each CPU (we discard the original) */
          -         	size = PERCPU_ENOUGH_ROOM;
          -         	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
          -         			  size);
          +         	copy_edd();
                    
          -         	for_each_possible_cpu(i) {
          -         		char *ptr;
          -         #ifndef CONFIG_NEED_MULTIPLE_NODES
          -         		ptr = alloc_bootmem_pages(size);
          +         	if (!boot_params.hdr.root_flags)
          +         		root_mountflags &= ~MS_RDONLY;
          +         	init_mm.start_code = (unsigned long) _text;
          +         	init_mm.end_code = (unsigned long) _etext;
          +         	init_mm.end_data = (unsigned long) _edata;
          +         #ifdef CONFIG_X86_32
          +         	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
                    #else
          -         		int node = early_cpu_to_node(i);
          -         		if (!node_online(node) || !NODE_DATA(node)) {
          -         			ptr = alloc_bootmem_pages(size);
          -         			printk(KERN_INFO
          -         			       "cpu %d has no node or node-local memory\n", i);
          -         		}
          -         		else
          -         			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
          +         	init_mm.brk = (unsigned long) &_end;
                    #endif
          -         		if (!ptr)
          -         			panic("Cannot allocate cpu data for CPU %d\n", i);
          +         
          +         	code_resource.start = virt_to_phys(_text);
          +         	code_resource.end = virt_to_phys(_etext)-1;
          +         	data_resource.start = virt_to_phys(_etext);
          +         	data_resource.end = virt_to_phys(_edata)-1;
          +         	bss_resource.start = virt_to_phys(&__bss_start);
          +         	bss_resource.end = virt_to_phys(&__bss_stop)-1;
          +         
                    #ifdef CONFIG_X86_64
          -         		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
          +         	early_cpu_init();
          +         #endif
          +         	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
          +         	*cmdline_p = command_line;
          +         
          +         	parse_early_param();
          +         
          +         	/* after early param, so could get panic from serial */
          +         	reserve_early_setup_data();
          +         
          +         	if (acpi_mps_check()) {
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         		disable_apic = 1;
          +         #endif
          +         		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +         	}
          +         
      +++ + ++  +   #ifdef CONFIG_PCI
      +++ + ++  +   	if (pci_early_dump_regs)
      +++ + ++  +   		early_dump_pci_devices();
      +++ + ++  +   #endif
      +++ + ++  +   
          +         	finish_e820_parsing();
          +         
          +         #ifdef CONFIG_X86_32
          +         	probe_roms();
          +         #endif
          +         
          +         	/* after parse_early_param, so could debug it */
          +         	insert_resource(&iomem_resource, &code_resource);
          +         	insert_resource(&iomem_resource, &data_resource);
          +         	insert_resource(&iomem_resource, &bss_resource);
          +         
          +         	if (efi_enabled)
          +         		efi_init();
          +         
          +         #ifdef CONFIG_X86_32
          +         	if (ppro_with_ram_bug()) {
          +         		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
          +         				  E820_RESERVED);
          +         		sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +         		printk(KERN_INFO "fixed physical RAM map:\n");
          +         		e820_print_map("bad_ppro");
          +         	}
          +         #else
          +         	early_gart_iommu_check();
          +         #endif
          +         
          +         	/*
          +         	 * partially used pages are not usable - thus
          +         	 * we are rounding upwards:
          +         	 */
          +         	max_pfn = e820_end_of_ram_pfn();
          +         
          +         	/* preallocate 4k for mptable mpc */
          +         	early_reserve_e820_mpc_new();
          +         	/* update e820 for memory not covered by WB MTRRs */
          +         	mtrr_bp_init();
          +         	if (mtrr_trim_uncached_memory(max_pfn))
          +         		max_pfn = e820_end_of_ram_pfn();
          +         
          +         #ifdef CONFIG_X86_32
          +         	/* max_low_pfn get updated here */
          +         	find_low_pfn_range();
                    #else
          -         		__per_cpu_offset[i] = ptr - __per_cpu_start;
          +         	num_physpages = max_pfn;
          +         
          +         	check_efer();
          +         
          +         	/* How many end-of-memory variables you have, grandma! */
          +         	/* need this before calling reserve_initrd */
          +         	if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
          +         		max_low_pfn = e820_end_of_low_ram_pfn();
          +         	else
          +         		max_low_pfn = max_pfn;
          +         
          +         	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
                    #endif
          -         		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
                    
          -         		highest_cpu = i;
          +         	/* max_pfn_mapped is updated here */
          +         	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
          +         	max_pfn_mapped = max_low_pfn_mapped;
          +         
          +         #ifdef CONFIG_X86_64
          +         	if (max_pfn > max_low_pfn) {
          +         		max_pfn_mapped = init_memory_mapping(1UL<<32,
          +         						     max_pfn<<PAGE_SHIFT);
          +         		/* can we preseve max_low_pfn ?*/
          +         		max_low_pfn = max_pfn;
                    	}
          +         #endif
                    
          -         	nr_cpu_ids = highest_cpu + 1;
          -         	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
          +         	/*
          +         	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
          +         	 */
                    
          -         	/* Setup percpu data maps */
          -         	setup_per_cpu_maps();
          +         #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
          +         	if (init_ohci1394_dma_early)
          +         		init_ohci1394_dma_on_all_controllers();
          +         #endif
                    
          -         	/* Setup cpumask_of_cpu map */
          -         	setup_cpumask_of_cpu();
          -         }
          +         	reserve_initrd();
          +         
          +         #ifdef CONFIG_X86_64
          +         	vsmp_init();
          +         #endif
          +         
          +         	dmi_scan_machine();
          +         
          +         	io_delay_init();
          +         
          +         	/*
          +         	 * Parse the ACPI tables for possible boot-time SMP configuration.
          +         	 */
          +         	acpi_boot_table_init();
          +         
          +         #ifdef CONFIG_ACPI_NUMA
          +         	/*
          +         	 * Parse SRAT to discover nodes.
          +         	 */
          +         	acpi_numa_init();
          +         #endif
          +         
          +         	initmem_init(0, max_pfn);
          +         
          +         #ifdef CONFIG_X86_64
          +         	dma32_reserve_bootmem();
          +         #endif
                    
          +         #ifdef CONFIG_ACPI_SLEEP
          +         	/*
          +         	 * Reserve low memory region for sleep support.
          +         	 */
          +         	acpi_reserve_bootmem();
                    #endif
          +         #ifdef CONFIG_X86_FIND_SMP_CONFIG
          +         	/*
          +         	 * Find and reserve possible boot-time SMP configuration:
          +         	 */
          +         	find_smp_config();
          +         #endif
          +         	reserve_crashkernel();
          +         
          +         	reserve_ibft_region();
          +         
          +         #ifdef CONFIG_KVM_CLOCK
          +         	kvmclock_init();
          +         #endif
          +         
          +         #if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
          +         	/*
          +         	 * Must be after max_low_pfn is determined, and before kernel
          +         	 * pagetables are setup.
          +         	 */
          +         	vmi_init();
          +         #endif
          +         
+++++++++++++++++++ 	paravirt_pagetable_setup_start(swapper_pg_dir);
          +         	paging_init();
+++++++++++++++++++ 	paravirt_pagetable_setup_done(swapper_pg_dir);
+++++++++++++++++++ 	paravirt_post_allocator_init();
          +         
          +         #ifdef CONFIG_X86_64
          +         	map_vsyscall();
          +         #endif
          +         
          +         #ifdef CONFIG_X86_GENERICARCH
          +         	generic_apic_probe();
          +         #endif
          +         
          +         	early_quirks();
          +         
          +         	/*
          +         	 * Read APIC and some other early information from ACPI tables.
          +         	 */
          +         	acpi_boot_init();
          +         
          +         #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
          +         	/*
          +         	 * get boot-time SMP configuration:
          +         	 */
          +         	if (smp_found_config)
          +         		get_smp_config();
          +         #endif
          +         
          +         	prefill_possible_map();
          +         #ifdef CONFIG_X86_64
          +         	init_cpu_to_node();
          +         #endif
          +         
--- --- -- ----- ---#ifdef CONFIG_X86_NUMAQ
--- --- -- ----- ---	/*
--- --- -- ----- ---	 * need to check online nodes num, call it
--- --- -- ----- ---	 * here before time_init/tsc_init
--- --- -- ----- ---	 */
--- --- -- ----- ---	numaq_tsc_disable();
--- --- -- ----- ---#endif
--- --- -- ----- ---
          +         	init_apic_mappings();
          +         	ioapic_init_mappings();
          +         
          +         #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
          +         	if (def_to_bigsmp)
          +         		printk(KERN_WARNING "More than 8 CPUs detected and "
          +         			"CONFIG_X86_PC cannot handle it.\nUse "
          +         			"CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
          +         #endif
          +         	kvm_guest_init();
          +         
          +         	e820_reserve_resources();
          +         	e820_mark_nosave_regions(max_low_pfn);
          +         
          +         #ifdef CONFIG_X86_32
          +         	request_resource(&iomem_resource, &video_ram_resource);
          +         #endif
          +         	reserve_standard_io_resources();
          +         
          +         	e820_setup_gap();
          +         
          +         #ifdef CONFIG_VT
          +         #if defined(CONFIG_VGA_CONSOLE)
          +         	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
          +         		conswitchp = &vga_con;
          +         #elif defined(CONFIG_DUMMY_CONSOLE)
          +         	conswitchp = &dummy_con;
          +         #endif
          +         #endif
          +         }
diff --cc arch/x86/kernel/smpboot.c
index 2745657,687376a,f251f5c3,a9ca7da,687376a,687376a,f35c2d8,f35c2d8,687376a,687376a,56078d6,687376a,f35c2d8,687376a,687376a,687376a,f35c2d8,687376a,687376a,1deb3b6..2764019
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@@@@@@@@@@@@@@@@@@@ -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -345,19 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 +327,12 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit start_secondary(v
                    	 * lock helps us to not include this cpu in a currently in progress
                    	 * smp_call_function().
                    	 */
      --  - -   -   	lock_ipi_call_lock();
          -         #ifdef CONFIG_X86_64
          -         	spin_lock(&vector_lock);
          -         
          -         	/* Setup the per cpu irq handling data structures */
          -         	__setup_vector_irq(smp_processor_id());
          -         	/*
          -         	 * Allow the master to continue.
          -         	 */
          -         	spin_unlock(&vector_lock);
      ++  + +   +   	ipi_call_lock_irq();
          +         #ifdef CONFIG_X86_IO_APIC
          +         	setup_vector_irq(smp_processor_id());
                    #endif
                    	cpu_set(smp_processor_id(), cpu_online_map);
      --  - -   -   	unlock_ipi_call_lock();
      ++  + +   +   	ipi_call_unlock_irq();
                    	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
                    
                    	setup_secondary_clock();
@@@@@@@@@@@@@@@@@@@@@ -762,45 -762,45 -751,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -832,6 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 +751,45 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit do_fork_idle(stru
                    	complete(&c_idle->done);
                    }
                    
          +         #ifdef CONFIG_X86_64
          +         /*
          +          * Allocate node local memory for the AP pda.
          +          *
          +          * Must be called after the _cpu_pda pointer table is initialized.
          +          */
---------- -------- static int __cpuinit get_local_pda(int cpu)
+++++++++++++++++++ int __cpuinit get_local_pda(int cpu)
          +         {
          +         	struct x8664_pda *oldpda, *newpda;
          +         	unsigned long size = sizeof(struct x8664_pda);
          +         	int node = cpu_to_node(cpu);
          +         
          +         	if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
          +         		return 0;
          +         
          +         	oldpda = cpu_pda(cpu);
          +         	newpda = kmalloc_node(size, GFP_ATOMIC, node);
          +         	if (!newpda) {
          +         		printk(KERN_ERR "Could not allocate node local PDA "
          +         			"for CPU %d on node %d\n", cpu, node);
          +         
          +         		if (oldpda)
          +         			return 0;	/* have a usable pda */
          +         		else
          +         			return -1;
          +         	}
          +         
          +         	if (oldpda) {
          +         		memcpy(newpda, oldpda, size);
          +         		if (!after_bootmem)
          +         			free_bootmem((unsigned long)oldpda, size);
          +         	}
          +         
          +         	newpda->in_bootmem = 0;
          +         	cpu_pda(cpu) = newpda;
          +         	return 0;
          +         }
          +         #endif /* CONFIG_X86_64 */
          +         
                    static int __cpuinit do_boot_cpu(int apicid, int cpu)
                    /*
                     * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@@@@@@@@@@@@@@@@@@@@ -1311,8 -1311,8 -1300,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1372,10 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 +1300,8 @@@@@@@@@@@@@@@@@@@@@ static void __ref remove_cpu_from_maps(
                    	cpu_clear(cpu, cpu_callout_map);
                    	cpu_clear(cpu, cpu_callin_map);
                    	/* was set by cpu_init() */
 -------------------	clear_bit(cpu, (unsigned long *)&cpu_initialized);
          -         	clear_node_cpumask(cpu);
          -         #endif
 +++++++++++++++++++	cpu_clear(cpu, cpu_initialized);
          +         	numa_remove_cpu(cpu);
                    }
                    
                    int __cpu_disable(void)
diff --cc arch/x86/mm/init_32.c
index 9689a51,9689a51,9689a51,9689a51,9689a51,9689a51,9689a51,029e8cf,9689a51,3eeab6d,ec30d10,9689a51,9689a51,9689a51,9689a51,9689a51,029e8cf,9689a51,9689a51,7113acd..d37f293
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@@@@@@@@@@@@@@@@@@@@ -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,39 -529,7 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,34 +840,37 @@@@@@@@@@@@@@@@@@@@@ unsigned long __init_refok init_memory_
                    
                    	__flush_tlb_all();
                    
          +         	if (!after_init_bootmem)
          +         		reserve_early(table_start << PAGE_SHIFT,
          +         				 table_end << PAGE_SHIFT, "PGTABLE");
          +         
+++++++++ ++++++++++	if (!after_init_bootmem)
+++++++++ ++++++++++		early_memtest(start, end);
+++++++++ ++++++++++
          +         	return end >> PAGE_SHIFT;
          +         }
          +         
          +         
          +         /*
          +          * paging_init() sets up the page tables - note that the first 8MB are
          +          * already mapped by head.S.
          +          *
          +          * This routines also unmaps the page at virtual kernel address 0, so
          +          * that we can trap those pesky NULL-reference errors in the kernel.
          +          */
          +         void __init paging_init(void)
          +         {
          +         	pagetable_init();
          +         
          +         	__flush_tlb_all();
          +         
                    	kmap_init();
          +         
          +         	/*
          +         	 * NOTE: at this point the bootmem allocator is fully available.
          +         	 */
          +         	sparse_init();
          +         	zone_sizes_init();
---------- -------- 
---------- -------- 	paravirt_post_allocator_init();
                    }
                    
                    /*
diff --cc arch/x86/mm/pat.c
index 6bb597f,d458507,d458507,d458507,d458507,d458507,d458507,749766c3,d458507,d458507,06b7a1c,d458507,0917a54,d458507,d458507,d458507,749766c3,d458507,d458507,d458507..2fe30916
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@@@@@@@@@@@@@@@@@@@@ -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -547,8 -449,9 -451,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 +451,9 @@@@@@@@@@@@@@@@@@@@@ int phys_mem_access_prot_allowed(struc
                    	if (retval < 0)
                    		return 0;
                    
       -        -   	if (((pfn <= max_low_pfn_mapped) ||
       -        -   	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
          -         	if (pfn <= max_pfn_mapped &&
          -                     ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
       +  +     +   	if (((pfn < max_low_pfn_mapped) ||
       +  +     +   	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
          +         	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                    		free_memtype(offset, offset + size);
                    		printk(KERN_INFO
                    		"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@@@@@@@@@@@@@@@@@@@@ -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -586,4 -489,3 -491,89 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 +491,89 @@@@@@@@@@@@@@@@@@@@@ void unmap_devmem(unsigned long pfn, un
                    
                    	free_memtype(addr, addr + size);
                    }
++++++++++ + +++++++
++++++++++++ +++++++#if defined(CONFIG_DEBUG_FS)
++++++++++++ +++++++
++++++++++++ +++++++/* get Nth element of the linked list */
++++++++++++ +++++++static struct memtype *memtype_get_idx(loff_t pos)
++++++++++++ +++++++{
++++++++++++ +++++++	struct memtype *list_node, *print_entry;
++++++++++++ +++++++	int i = 1;
++++++++++++ +++++++
++++++++++++ +++++++	print_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
++++++++++++ +++++++	if (!print_entry)
++++++++++++ +++++++		return NULL;
++++++++++++ +++++++
++++++++++++ +++++++	spin_lock(&memtype_lock);
++++++++++++ +++++++	list_for_each_entry(list_node, &memtype_list, nd) {
++++++++++++ +++++++		if (pos == i) {
++++++++++++ +++++++			*print_entry = *list_node;
++++++++++++ +++++++			spin_unlock(&memtype_lock);
++++++++++++ +++++++			return print_entry;
++++++++++++ +++++++		}
++++++++++++ +++++++		++i;
++++++++++++ +++++++	}
++++++++++++ +++++++	spin_unlock(&memtype_lock);
++++++++++++ +++++++	kfree(print_entry);
++++++++++++ +++++++	return NULL;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++	if (*pos == 0) {
++++++++++++ +++++++		++*pos;
++++++++++++ +++++++		seq_printf(seq, "PAT memtype list:\n");
++++++++++++ +++++++	}
++++++++++++ +++++++
++++++++++++ +++++++	return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++	++*pos;
++++++++++++ +++++++	return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void memtype_seq_stop(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_show(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++	struct memtype *print_entry = (struct memtype *)v;
++++++++++++ +++++++
++++++++++++ +++++++	seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
++++++++++++ +++++++			print_entry->start, print_entry->end);
++++++++++++ +++++++	kfree(print_entry);
++++++++++++ +++++++	return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static struct seq_operations memtype_seq_ops = {
++++++++++++ +++++++	.start = memtype_seq_start,
++++++++++++ +++++++	.next  = memtype_seq_next,
++++++++++++ +++++++	.stop  = memtype_seq_stop,
++++++++++++ +++++++	.show  = memtype_seq_show,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_open(struct inode *inode, struct file *file)
++++++++++++ +++++++{
++++++++++++ +++++++	return seq_open(file, &memtype_seq_ops);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static const struct file_operations memtype_fops = {
++++++++++++ +++++++	.open    = memtype_seq_open,
++++++++++++ +++++++	.read    = seq_read,
++++++++++++ +++++++	.llseek  = seq_lseek,
++++++++++++ +++++++	.release = seq_release,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int __init pat_memtype_list_init(void)
++++++++++++ +++++++{
++++++++++++ +++++++	debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
++++++++++++ +++++++				NULL, &memtype_fops);
++++++++++++ +++++++	return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++late_initcall(pat_memtype_list_init);
++++++++++++ +++++++
++++++++++++ +++++++#endif /* CONFIG_DEBUG_FS */
diff --cc arch/x86/pci/pci.h
index 3e25deb,3e25deb,3e25deb,3e25deb,3e25deb,3e25deb,b2270a5,5824174,b2270a5,3e25deb,720c4c5,3e25deb,b2270a5,b2270a5,3e25deb,3e25deb,a2c55ee,3e25deb,3e25deb,3e25deb..15b9cf6b
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@@@@@@@@@@@@@@@@@@@@ -27,8 -27,8 -27,8 -27,8 -27,8 -27,8 -27,7 -27,7 -27,7 -27,8 -27,6 -27,8 -27,7 -27,7 -27,8 -27,8 -27,7 -27,8 -27,8 -27,8 +27,8 @@@@@@@@@@@@@@@@@@@@@
                    #define PCI_CAN_SKIP_ISA_ALIGN	0x8000
                    #define PCI_USE__CRS		0x10000
                    #define PCI_CHECK_ENABLE_AMD_MMCONF	0x20000
          +         #define PCI_HAS_IO_ECS		0x40000
      +++ + ++  +   #define PCI_NOASSIGN_ROMS	0x80000
                    
                    extern unsigned int pci_probe;
                    extern unsigned long pirq_table_addr;
@@@@@@@@@@@@@@@@@@@@@ -102,14 -102,14 -102,14 -102,14 -102,14 -102,14 -101,14 -101,13 -101,14 -102,14 -102,6 -102,14 -101,14 -101,14 -102,14 -102,14 -101,15 -102,14 -102,14 -102,14 +102,15 @@@@@@@@@@@@@@@@@@@@@ extern int pci_direct_probe(void)
                    extern void pci_direct_init(int type);
                    extern void pci_pcbios_init(void);
                    extern int pci_olpc_init(void);
          +         extern void __init dmi_check_pciprobe(void);
          +         extern void __init dmi_check_skip_isa_align(void);
          +         
          +         /* some common used subsys_initcalls */
          +         extern int __init pci_acpi_init(void);
          +         extern int __init pcibios_irq_init(void);
------- -- ----- ---extern int __init pci_numa_init(void);
++++++++++++++++ +++extern int __init pci_visws_init(void);
++++++++++++++++ +++extern int __init pci_numaq_init(void);
          +         extern int __init pcibios_init(void);
                    
                    /* pci-mmconfig.c */
                    
diff --cc arch/x86/xen/enlighten.c
index bb50845,bb50845,7f26c37,bb50845,bb50845,bb50845,dcd4e51,dcd4e51,bb50845,bb50845,f09c1c6,bb50845,dcd4e51,bb50845,bb50845,bb50845,dcd4e51,bb50845,5328e46,3da6acb..194bbd6
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@@@@@@@@@@@@@@@@@@@@ -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -136,11 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,45 -150,41 +150,45 @@@@@@@@@@@@@@@@@@@@@ static void xen_vcpu_setup(int cpu
                    	}
                    }
                    
          +         /*
          +          * On restore, set the vcpu placement up again.
          +          * If it fails, then we're in a bad state, since
          +          * we can't back out from using it...
          +          */
          +         void xen_vcpu_restore(void)
          +         {
          +         	if (have_vcpu_info_placement) {
          +         		int cpu;
          +         
          +         		for_each_online_cpu(cpu) {
          +         			bool other_cpu = (cpu != smp_processor_id());
          +         
          +         			if (other_cpu &&
          +         			    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
          +         				BUG();
          +         
          +         			xen_vcpu_setup(cpu);
          +         
          +         			if (other_cpu &&
          +         			    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
          +         				BUG();
          +         		}
          +         
          +         		BUG_ON(!have_vcpu_info_placement);
          +         	}
          +         }
          +         
                    static void __init xen_banner(void)
                    {
++++++++++++++++++ +	unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
++++++++++++++++++ +	struct xen_extraversion extra;
++++++++++++++++++ +	HYPERVISOR_xen_version(XENVER_extraversion, &extra);
++++++++++++++++++ +
                    	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
                    	       pv_info.name);
---------- ------- -	printk(KERN_INFO "Hypervisor signature: %s%s\n",
---------- ------- -	       xen_start_info->magic,
          -         	printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
++++++++++++++++++ +	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
++++++++++++++++++ +	       version >> 16, version & 0xffff, extra.extraversion,
          +         	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
                    }
                    
                    static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@@@@@@@@@@@@@@@@@@@@ -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -784,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -845,68 -979,16 +983,16 @@@@@@@@@@@@@@@@@@@@@ static __init void xen_set_pte_init(pte
                    
                    static __init void xen_pagetable_setup_start(pgd_t *base)
                    {
------------------- 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
------------------- 	int i;
------------------- 
------------------- 	/* special set_pte for pagetable initialization */
------------------- 	pv_mmu_ops.set_pte = xen_set_pte_init;
------------------- 
------------------- 	init_mm.pgd = base;
------------------- 	/*
------------------- 	 * copy top-level of Xen-supplied pagetable into place.  This
------------------- 	 * is a stand-in while we copy the pmd pages.
------------------- 	 */
------------------- 	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
------------------- 
------------------- 	/*
------------------- 	 * For PAE, need to allocate new pmds, rather than
------------------- 	 * share Xen's, since Xen doesn't like pmd's being
------------------- 	 * shared between address spaces.
------------------- 	 */
------------------- 	for (i = 0; i < PTRS_PER_PGD; i++) {
------------------- 		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
------------------- 			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
------------------- 
------------------- 			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
------------------- 			       PAGE_SIZE);
------------------- 
------------------- 			make_lowmem_page_readonly(pmd);
------------------- 
------------------- 			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
------------------- 		} else
------------------- 			pgd_clear(&base[i]);
------------------- 	}
------------------- 
------------------- 	/* make sure zero_page is mapped RO so we can use it in pagetables */
------------------- 	make_lowmem_page_readonly(empty_zero_page);
------------------- 	make_lowmem_page_readonly(base);
------------------- 	/*
------------------- 	 * Switch to new pagetable.  This is done before
------------------- 	 * pagetable_init has done anything so that the new pages
------------------- 	 * added to the table can be prepared properly for Xen.
------------------- 	 */
------------------- 	xen_write_cr3(__pa(base));
------------------- 
------------------- 	/* Unpin initial Xen pagetable */
------------------- 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
------------------- 			  PFN_DOWN(__pa(xen_start_info->pt_base)));
                    }
                    
          -         static __init void setup_shared_info(void)
          +         void xen_setup_shared_info(void)
                    {
                    	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
------------------- 		unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
------------------- 
------------------- 		/*
------------------- 		 * Create a mapping for the shared info page.
------------------- 		 * Should be set_fixmap(), but shared_info is a machine
------------------- 		 * address with no corresponding pseudo-phys address.
------------------- 		 */
------------------- 		set_pte_mfn(addr,
------------------- 			    PFN_DOWN(xen_start_info->shared_info),
------------------- 			    PAGE_KERNEL);
------------------- 
------------------- 		HYPERVISOR_shared_info = (struct shared_info *)addr;
+++++++++++++++++++ 		set_fixmap(FIX_PARAVIRT_BOOTMAP,
+++++++++++++++++++ 			   xen_start_info->shared_info);
+++++++++++++++++++ 
+++++++++++++++++++ 		HYPERVISOR_shared_info =
+++++++++++++++++++ 			(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
                    	} else
                    		HYPERVISOR_shared_info =
                    			(struct shared_info *)__va(xen_start_info->shared_info);
@@@@@@@@@@@@@@@@@@@@@ -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -858,19 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -917,27 -921,27 -1003,33 +1007,33 @@@@@@@@@@@@@@@@@@@@@
                    
                    static __init void xen_pagetable_setup_done(pgd_t *base)
                    {
---------- -------- 	/* This will work as long as patching hasn't happened yet
---------- -------- 	   (which it hasn't) */
---------- -------- 	pv_mmu_ops.alloc_pte = xen_alloc_pte;
---------- -------- 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
---------- -------- 	pv_mmu_ops.release_pte = xen_release_pte;
---------- -------- 	pv_mmu_ops.release_pmd = xen_release_pmd;
---------- -------- 	pv_mmu_ops.set_pte = xen_set_pte;
---------- -------- 
          +         	xen_setup_shared_info();
---------- -------- 
---------- -------- 	/* Actually pin the pagetable down, but we can't set PG_pinned
---------- -------- 	   yet because the page structures don't exist yet. */
---------- -------- 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
          +         }
          +         
          +         static __init void xen_post_allocator_init(void)
          +         {
+++++++++++++++++++ 	pv_mmu_ops.set_pte = xen_set_pte;
          +         	pv_mmu_ops.set_pmd = xen_set_pmd;
          +         	pv_mmu_ops.set_pud = xen_set_pud;
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++ 	pv_mmu_ops.set_pgd = xen_set_pgd;
+++++++++++++++++++ #endif
+++++++++++++++++++ 
++++++++++ ++++++++ 	/* This will work as long as patching hasn't happened yet
++++++++++ ++++++++ 	   (which it hasn't) */
++++++++++ ++++++++ 	pv_mmu_ops.alloc_pte = xen_alloc_pte;
++++++++++ ++++++++ 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
++++++++++ ++++++++ 	pv_mmu_ops.release_pte = xen_release_pte;
++++++++++ ++++++++ 	pv_mmu_ops.release_pmd = xen_release_pmd;
          -         	pv_mmu_ops.set_pte = xen_set_pte;
          -         
          -         	setup_shared_info();
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++ 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+++++++++++++++++++ 	pv_mmu_ops.release_pud = xen_release_pud;
+++++++++++++++++++ #endif
                    
          -         	/* Actually pin the pagetable down, but we can't set PG_pinned
          -         	   yet because the page structures don't exist yet. */
          -         	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	SetPagePinned(virt_to_page(level3_user_vsyscall));
+++++++++++++++++++ #endif
          +         	xen_mark_init_mm_pinned();
                    }
                    
                    /* This is called once we have the cpu_possible_map */
@@@@@@@@@@@@@@@@@@@@@ -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -947,6 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1014,33 -1018,33 -1110,49 +1114,49 @@@@@@@@@@@@@@@@@@@@@ static unsigned xen_patch(u8 type, u16 
                    	return ret;
                    }
                    
          +         static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
          +         {
          +         	pte_t pte;
          +         
          +         	phys >>= PAGE_SHIFT;
          +         
          +         	switch (idx) {
          +         	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
          +         #ifdef CONFIG_X86_F00F_BUG
          +         	case FIX_F00F_IDT:
          +         #endif
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +         	case FIX_WP_TEST:
          +         	case FIX_VDSO:
+++++++++++++++++++ # ifdef CONFIG_HIGHMEM
+++++++++++++++++++ 	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+++++++++++++++++++ # endif
+++++++++++++++++++ #else
+++++++++++++++++++ 	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+++++++++++++++++++ #endif
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         	case FIX_APIC_BASE:	/* maps dummy local APIC */
          +         #endif
          +         		pte = pfn_pte(phys, prot);
          +         		break;
          +         
          +         	default:
          +         		pte = mfn_pte(phys, prot);
          +         		break;
          +         	}
          +         
          +         	__native_set_fixmap(idx, pte);
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	/* Replicate changes to map the vsyscall page into the user
+++++++++++++++++++ 	   pagetable vsyscall mapping. */
+++++++++++++++++++ 	if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+++++++++++++++++++ 		unsigned long vaddr = __fix_to_virt(idx);
+++++++++++++++++++ 		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+++++++++++++++++++ 	}
+++++++++++++++++++ #endif
          +         }
          +         
                    static const struct pv_info xen_info __initdata = {
                    	.paravirt_enabled = 1,
                    	.shared_kernel_pmd = 0,
@@@@@@@@@@@@@@@@@@@@@ -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -995,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1093,7 -1201,11 +1205,11 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_cpu_ops xen_cpu_
                    	.read_pmc = native_read_pmc,
                    
                    	.iret = xen_iret,
          -         	.irq_enable_syscall_ret = xen_sysexit,
          +         	.irq_enable_sysexit = xen_sysexit,
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	.usergs_sysret32 = xen_sysret32,
+++++++++++++++++++ 	.usergs_sysret64 = xen_sysret64,
+++++++++++++++++++ #endif
                    
                    	.load_tr_desc = paravirt_nop,
                    	.set_ldt = xen_set_ldt,
@@@@@@@@@@@@@@@@@@@@@ -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1029,6 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1127,9 -1262,9 +1266,9 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_irq_ops xen_irq_
                    	.irq_enable = xen_irq_enable,
                    	.safe_halt = xen_safe_halt,
                    	.halt = xen_halt,
          +         #ifdef CONFIG_X86_64
---------- -------- 	.adjust_exception_frame = paravirt_nop,
+++++++++++++++++++ 	.adjust_exception_frame = xen_adjust_exception_frame,
          +         #endif
                    };
                    
                    static const struct pv_apic_ops xen_apic_ops __initdata = {
@@@@@@@@@@@@@@@@@@@@@ -1157,9 -1157,9 -1156,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1060,6 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1161,9 -1296,9 +1299,9 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_mmu_ops xen_mmu_
                    	.pte_update = paravirt_nop,
                    	.pte_update_defer = paravirt_nop,
                    
---------- -------- 	.pgd_alloc = __paravirt_pgd_alloc,
---------- -------- 	.pgd_free = paravirt_nop,
+++++++++++++++++++ 	.pgd_alloc = xen_pgd_alloc,
+++++++++++++++++++ 	.pgd_free = xen_pgd_free,
          +         
                    	.alloc_pte = xen_alloc_pte_init,
                    	.release_pte = xen_release_pte_init,
                    	.alloc_pmd = xen_alloc_pte_init,
@@@@@@@@@@@@@@@@@@@@@ -1170,25 -1170,25 -1169,25 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1070,21 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1170,25 -1174,25 -1309,31 +1312,31 @@@@@@@@@@@@@@@@@@@@@
                    	.kmap_atomic_pte = xen_kmap_atomic_pte,
                    #endif
                    
------------------- 	.set_pte = NULL,	/* see xen_pagetable_setup_* */
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	.set_pte = xen_set_pte,
+++++++++++++++++++ #else
+++++++++++++++++++ 	.set_pte = xen_set_pte_init,
+++++++++++++++++++ #endif
                    	.set_pte_at = xen_set_pte_at,
          -         	.set_pmd = xen_set_pmd,
          +         	.set_pmd = xen_set_pmd_hyper,
          +         
          +         	.ptep_modify_prot_start = __ptep_modify_prot_start,
          +         	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
                    
                    	.pte_val = xen_pte_val,
          +         	.pte_flags = native_pte_val,
                    	.pgd_val = xen_pgd_val,
                    
                    	.make_pte = xen_make_pte,
                    	.make_pgd = xen_make_pgd,
                    
+++++++++++++++++++ #ifdef CONFIG_X86_PAE
                    	.set_pte_atomic = xen_set_pte_atomic,
                    	.set_pte_present = xen_set_pte_at,
---------- -------- 	.set_pud = xen_set_pud_hyper,
          -         	.set_pud = xen_set_pud,
                    	.pte_clear = xen_pte_clear,
                    	.pmd_clear = xen_pmd_clear,
+++++++++++++++++++ #endif	/* CONFIG_X86_PAE */
+++++++++++++++++++ 	.set_pud = xen_set_pud_hyper,
                    
                    	.make_pmd = xen_make_pmd,
                    	.pmd_val = xen_pmd_val,
@@@@@@@@@@@@@@@@@@@@@ -1201,29 -1201,29 -1200,29 -1201,29 -1201,29 -1201,29 -1201,27 -1201,27 -1201,29 -1201,29 -1097,23 -1201,29 -1201,27 -1201,29 -1201,29 -1201,29 -1201,27 -1201,29 -1205,29 -1355,14 +1358,14 @@@@@@@@@@@@@@@@@@@@@
                    		.enter = paravirt_enter_lazy_mmu,
                    		.leave = xen_leave_lazy,
                    	},
          -         };
                    
          -         #ifdef CONFIG_SMP
          -         static const struct smp_ops xen_smp_ops __initdata = {
          -         	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
          -         	.smp_prepare_cpus = xen_smp_prepare_cpus,
          -         	.cpu_up = xen_cpu_up,
          -         	.smp_cpus_done = xen_smp_cpus_done,
          -         
          -         	.smp_send_stop = xen_smp_send_stop,
          -         	.smp_send_reschedule = xen_smp_send_reschedule,
          -         	.smp_call_function_mask = xen_smp_call_function_mask,
          +         	.set_fixmap = xen_set_fixmap,
                    };
          -         #endif	/* CONFIG_SMP */
                    
---------- -------- #ifdef CONFIG_SMP
---------- -------- static const struct smp_ops xen_smp_ops __initdata = {
---------- -------- 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
---------- -------- 	.smp_prepare_cpus = xen_smp_prepare_cpus,
---------- -------- 	.cpu_up = xen_cpu_up,
---------- -------- 	.smp_cpus_done = xen_smp_cpus_done,
---------- -------- 
---------- -------- 	.smp_send_stop = xen_smp_send_stop,
---------- -------- 	.smp_send_reschedule = xen_smp_send_reschedule,
------  -- - --- -- 
------  -- - --- -- 	.send_call_func_ipi = xen_smp_send_call_function_ipi,
------  -- - --- -- 	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
      --    -   -   	.smp_call_function_mask = xen_smp_call_function_mask,
---------- -------- };
---------- -------- #endif	/* CONFIG_SMP */
---------- -------- 
                    static void xen_reboot(int reason)
                    {
          +         	struct sched_shutdown r = { .reason = reason };
          +         
                    #ifdef CONFIG_SMP
                    	smp_send_stop();
                    #endif
@@@@@@@@@@@@@@@@@@@@@ -1271,8 -1271,8 -1270,8 -1271,8 -1271,8 -1271,8 -1269,8 -1269,8 -1271,8 -1271,8 -1161,8 -1271,8 -1269,8 -1271,8 -1271,8 -1271,8 -1269,8 -1271,8 -1275,8 -1411,248 +1414,248 @@@@@@@@@@@@@@@@@@@@@ static void __init xen_reserve_top(void
                    		top = pp.virt_start;
                    
                    	reserve_top_address(-top + 2 * PAGE_SIZE);
+++++++++++++++++++ #endif	/* CONFIG_X86_32 */
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Like __va(), but returns address in the kernel mapping (which is
+++++++++++++++++++  * all we have until the physical memory mapping has been set up.
+++++++++++++++++++  */
+++++++++++++++++++ static void *__ka(phys_addr_t paddr)
+++++++++++++++++++ {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	return (void *)(paddr + __START_KERNEL_map);
+++++++++++++++++++ #else
+++++++++++++++++++ 	return __va(paddr);
+++++++++++++++++++ #endif
      ++  + +   + + }
      ++  + +   + + 
+++++++++++++++++++ /* Convert a machine address to physical address */
+++++++++++++++++++ static unsigned long m2p(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++ 	phys_addr_t paddr;
+++++++++++++++++++ 
+++++++++++++++++++ 	maddr &= PTE_MASK;
+++++++++++++++++++ 	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+++++++++++++++++++ 
+++++++++++++++++++ 	return paddr;
++++++++++ ++++++++ }
++++++++++ ++++++++ 
+++++++++++++++++++ /* Convert a machine address to kernel virtual */
+++++++++++++++++++ static void *m2v(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++ 	return __ka(m2p(maddr));
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void walk(pgd_t *pgd, unsigned long addr)
+++++++++++++++++++ {
+++++++++++++++++++ 	unsigned l4idx = pgd_index(addr);
+++++++++++++++++++ 	unsigned l3idx = pud_index(addr);
+++++++++++++++++++ 	unsigned l2idx = pmd_index(addr);
+++++++++++++++++++ 	unsigned l1idx = pte_index(addr);
+++++++++++++++++++ 	pgd_t l4;
+++++++++++++++++++ 	pud_t l3;
+++++++++++++++++++ 	pmd_t l2;
+++++++++++++++++++ 	pte_t l1;
+++++++++++++++++++ 
+++++++++++++++++++ 	xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+++++++++++++++++++ 		       pgd, addr, l4idx, l3idx, l2idx, l1idx);
+++++++++++++++++++ 
+++++++++++++++++++ 	l4 = pgd[l4idx];
+++++++++++++++++++ 	xen_raw_printk("  l4: %016lx\n", l4.pgd);
+++++++++++++++++++ 	xen_raw_printk("      %016lx\n", pgd_val(l4));
+++++++++++++++++++ 
+++++++++++++++++++ 	l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+++++++++++++++++++ 	xen_raw_printk("  l3: %016lx\n", l3.pud);
+++++++++++++++++++ 	xen_raw_printk("      %016lx\n", pud_val(l3));
+++++++++++++++++++ 
+++++++++++++++++++ 	l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+++++++++++++++++++ 	xen_raw_printk("  l2: %016lx\n", l2.pmd);
+++++++++++++++++++ 	xen_raw_printk("      %016lx\n", pmd_val(l2));
+++++++++++++++++++ 
+++++++++++++++++++ 	l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+++++++++++++++++++ 	xen_raw_printk("  l1: %016lx\n", l1.pte);
+++++++++++++++++++ 	xen_raw_printk("      %016lx\n", pte_val(l1));
+++++++++++++++++++ }
+++++++++++++++++++ #endif
+++++++++++++++++++ 
+++++++++++++++++++ static void set_page_prot(void *addr, pgprot_t prot)
+++++++++++++++++++ {
+++++++++++++++++++ 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+++++++++++++++++++ 	pte_t pte = pfn_pte(pfn, prot);
+++++++++++++++++++ 
+++++++++++++++++++ 	xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
+++++++++++++++++++ 		       addr, pfn, get_phys_to_machine(pfn),
+++++++++++++++++++ 		       pgprot_val(prot), pte.pte);
+++++++++++++++++++ 
+++++++++++++++++++ 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+++++++++++++++++++ 		BUG();
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++ 	unsigned pmdidx, pteidx;
+++++++++++++++++++ 	unsigned ident_pte;
+++++++++++++++++++ 	unsigned long pfn;
+++++++++++++++++++ 
+++++++++++++++++++ 	ident_pte = 0;
+++++++++++++++++++ 	pfn = 0;
+++++++++++++++++++ 	for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+++++++++++++++++++ 		pte_t *pte_page;
+++++++++++++++++++ 
+++++++++++++++++++ 		/* Reuse or allocate a page of ptes */
+++++++++++++++++++ 		if (pmd_present(pmd[pmdidx]))
+++++++++++++++++++ 			pte_page = m2v(pmd[pmdidx].pmd);
+++++++++++++++++++ 		else {
+++++++++++++++++++ 			/* Check for free pte pages */
+++++++++++++++++++ 			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+++++++++++++++++++ 				break;
+++++++++++++++++++ 
+++++++++++++++++++ 			pte_page = &level1_ident_pgt[ident_pte];
+++++++++++++++++++ 			ident_pte += PTRS_PER_PTE;
+++++++++++++++++++ 
+++++++++++++++++++ 			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+++++++++++++++++++ 		}
+++++++++++++++++++ 
+++++++++++++++++++ 		/* Install mappings */
+++++++++++++++++++ 		for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+++++++++++++++++++ 			pte_t pte;
+++++++++++++++++++ 
+++++++++++++++++++ 			if (pfn > max_pfn_mapped)
+++++++++++++++++++ 				max_pfn_mapped = pfn;
+++++++++++++++++++ 
+++++++++++++++++++ 			if (!pte_none(pte_page[pteidx]))
+++++++++++++++++++ 				continue;
+++++++++++++++++++ 
+++++++++++++++++++ 			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+++++++++++++++++++ 			pte_page[pteidx] = pte;
+++++++++++++++++++ 		}
+++++++++++++++++++ 	}
+++++++++++++++++++ 
+++++++++++++++++++ 	for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+++++++++++++++++++ 		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++ 	set_page_prot(pmd, PAGE_KERNEL_RO);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void convert_pfn_mfn(void *v)
+++++++++++++++++++ {
+++++++++++++++++++ 	pte_t *pte = v;
+++++++++++++++++++ 	int i;
+++++++++++++++++++ 
+++++++++++++++++++ 	/* All levels are converted the same way, so just treat them
+++++++++++++++++++ 	   as ptes. */
+++++++++++++++++++ 	for(i = 0; i < PTRS_PER_PTE; i++)
+++++++++++++++++++ 		pte[i] = xen_make_pte(pte[i].pte);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Set up the inital kernel pagetable.
+++++++++++++++++++  *
+++++++++++++++++++  * We can construct this by grafting the Xen provided pagetable into
+++++++++++++++++++  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+++++++++++++++++++  * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+++++++++++++++++++  * means that only the kernel has a physical mapping to start with -
+++++++++++++++++++  * but that's enough to get __va working.  We need to fill in the rest
+++++++++++++++++++  * of the physical mapping once some sort of allocator has been set
+++++++++++++++++++  * up.
+++++++++++++++++++  */
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++ 	pud_t *l3;
+++++++++++++++++++ 	pmd_t *l2;
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Zap identity mapping */
+++++++++++++++++++ 	init_level4_pgt[0] = __pgd(0);
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Pre-constructed entries are in pfn, so convert to mfn */
+++++++++++++++++++ 	convert_pfn_mfn(init_level4_pgt);
+++++++++++++++++++ 	convert_pfn_mfn(level3_ident_pgt);
+++++++++++++++++++ 	convert_pfn_mfn(level3_kernel_pgt);
+++++++++++++++++++ 
+++++++++++++++++++ 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+++++++++++++++++++ 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+++++++++++++++++++ 
+++++++++++++++++++ 	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++ 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+++++++++++++++++++ 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+++++++++++++++++++ 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Set up identity map */
+++++++++++++++++++ 	xen_map_identity_early(level2_ident_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Make pagetable pieces RO */
+++++++++++++++++++ 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Pin down new L4 */
+++++++++++++++++++ 	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+++++++++++++++++++ 			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Unpin Xen-provided one */
+++++++++++++++++++ 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++ 	/* Switch over */
+++++++++++++++++++ 	pgd = init_level4_pgt;
+++++++++++++++++++ 
+++++++++++++++++++ 	/*
+++++++++++++++++++ 	 * At this stage there can be no user pgd, and no page
+++++++++++++++++++ 	 * structure to attach it to, so make sure we just set kernel
+++++++++++++++++++ 	 * pgd.
+++++++++++++++++++ 	 */
+++++++++++++++++++ 	xen_mc_batch();
+++++++++++++++++++ 	__xen_write_cr3(true, __pa(pgd));
+++++++++++++++++++ 	xen_mc_issue(PARAVIRT_LAZY_CPU);
+++++++++++++++++++ 
+++++++++++++++++++ 	reserve_early(__pa(xen_start_info->pt_base),
+++++++++++++++++++ 		      __pa(xen_start_info->pt_base +
+++++++++++++++++++ 			   xen_start_info->nr_pt_frames * PAGE_SIZE),
+++++++++++++++++++ 		      "XEN PAGETABLES");
+++++++++++++++++++ 
+++++++++++++++++++ 	return pgd;
+++++++++++++++++++ }
+++++++++++++++++++ #else	/* !CONFIG_X86_64 */
+++++++++++++++++++ static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+++++++++++++++++++ 
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++ 	pmd_t *kernel_pmd;
+++++++++++++++++++ 
+++++++++++++++++++ 	init_pg_tables_start = __pa(pgd);
+++++++++++++++++++ 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+++++++++++++++++++ 	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+++++++++++++++++++ 
+++++++++++++++++++ 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+++++++++++++++++++ 	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++ 	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++ 	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+++++++++++++++++++ 	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+++++++++++++++++++ 			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+++++++++++++++++++ 
+++++++++++++++++++ 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+++++++++++++++++++ 	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++ 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++ 	xen_write_cr3(__pa(swapper_pg_dir));
+++++++++++++++++++ 
+++++++++++++++++++ 	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+++++++++++++++++++ 
+++++++++++++++++++ 	return swapper_pg_dir;
++++++  ++++ +++ +  }
+++++++++++++++++++ #endif	/* CONFIG_X86_64 */
++++++  ++++ +++ +  
                    /* First C function to be called on Xen boot */
                    asmlinkage void __init xen_start_kernel(void)
                    {
@@@@@@@@@@@@@@@@@@@@@ -1294,33 -1294,33 -1293,33 -1294,33 -1294,33 -1294,33 -1292,33 -1292,33 -1294,33 -1294,33 -1182,28 -1294,33 -1292,33 -1294,33 -1294,33 -1294,33 -1292,33 -1294,33 -1298,33 -1674,31 +1677,31 @@@@@@@@@@@@@@@@@@@@@
                    	pv_apic_ops = xen_apic_ops;
                    	pv_mmu_ops = xen_mmu_ops;
                    
          +         	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
          +         		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
          +         		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
          +         	}
          +         
                    	machine_ops = xen_machine_ops;
                    
------------------- #ifdef CONFIG_SMP
------------------- 	smp_ops = xen_smp_ops;
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ 	/* Disable until direct per-cpu data access. */
+++++++++++++++++++ 	have_vcpu_info_placement = 0;
+++++++++++++++++++ 	x86_64_init_pda();
                    #endif
                    
          -         	xen_setup_features();
+++++++++++++++++++ 	xen_smp_init();
++++++++++ ++++++++ 
                    	/* Get mfn list */
                    	if (!xen_feature(XENFEAT_auto_translated_physmap))
          -         		phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
          +         		xen_build_dynamic_phys_to_machine();
                    
                    	pgd = (pgd_t *)xen_start_info->pt_base;
                    
---------- -------- 	init_pg_tables_start = __pa(pgd);
------------------- 	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
---------- -------- 	max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
------------------- 
------------------- 	init_mm.pgd = pgd; /* use the Xen pagetables to start */
------------------- 
------------------- 	/* keep using Xen gdt for now; no urgent need to change it */
------------------- 
------------------- 	x86_write_percpu(xen_cr3, __pa(pgd));
------------------- 	x86_write_percpu(xen_current_cr3, __pa(pgd));
+++++++++++++++++++ 	/* Prevent unwanted bits from being set in PTEs. */
+++++++++++++++++++ 	__supported_pte_mask &= ~_PAGE_GLOBAL;
+++++++++++++++++++ 	if (!is_initial_xendomain())
+++++++++++++++++++ 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
                    
                    	/* Don't do the full vcpu_info placement stuff until we have a
                    	   possible map and a non-dummy shared_info. */
@@@@@@@@@@@@@@@@@@@@@ -1348,13 -1348,13 -1347,13 -1348,13 -1348,13 -1348,13 -1346,13 -1346,13 -1348,13 -1348,13 -1231,10 -1348,13 -1346,13 -1348,13 -1348,13 -1348,13 -1346,13 -1348,13 -1352,13 -1730,29 +1733,29 @@@@@@@@@@@@@@@@@@@@@
                    	boot_params.hdr.ramdisk_image = xen_start_info->mod_start
                    		? __pa(xen_start_info->mod_start) : 0;
                    	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+++++++++++++++++++ 	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
                    
          -         	if (!is_initial_xendomain())
          +         	if (!is_initial_xendomain()) {
          +         		add_preferred_console("xenboot", 0, NULL);
          +         		add_preferred_console("tty", 0, NULL);
                    		add_preferred_console("hvc", 0, NULL);
          +         	}
          +         
+++++++++++++++++++ 	xen_raw_console_write("about to get started...\n");
+++++++++++++++++++ 
+++++++++++++++++++ #if 0
+++++++++++++++++++ 	xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+++++++++++++++++++ 		       &boot_params, __pa_symbol(&boot_params),
+++++++++++++++++++ 		       __va(__pa_symbol(&boot_params)));
+++++++++++++++++++ 
+++++++++++++++++++ 	walk(pgd, &boot_params);
+++++++++++++++++++ 	walk(pgd, __va(__pa(&boot_params)));
+++++++++++++++++++ #endif
++++++++++ ++++++++ 
                    	/* Start the world */
          -         	start_kernel();
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +         	i386_start_kernel();
+++++++++++++++++++ #else
+++++++++++++++++++ 	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
+++++++++++++++++++ #endif
                    }
diff --cc include/asm-x86/paravirt.h
index ef5e8ec,ef5e8ec,719d959,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,0f13b94,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,ef5e8ec,eef8095..695ce93
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@@@@@@@@@@@@@@@@@@@@ -1489,11 -1489,11 -1483,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1370,10 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,11 -1489,29 +1483,29 @@@@@@@@@@@@@@@@@@@@@ static inline unsigned long __raw_local
                    
                    
                    #ifdef CONFIG_X86_64
------------------- #define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
------------------- #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+++++++++++++++++++ #define PV_SAVE_REGS				\
+++++++++++++++++++ 	push %rax;				\
+++++++++++++++++++ 	push %rcx;				\
+++++++++++++++++++ 	push %rdx;				\
+++++++++++++++++++ 	push %rsi;				\
+++++++++++++++++++ 	push %rdi;				\
+++++++++++++++++++ 	push %r8;				\
+++++++++++++++++++ 	push %r9;				\
+++++++++++++++++++ 	push %r10;				\
+++++++++++++++++++ 	push %r11
+++++++++++++++++++ #define PV_RESTORE_REGS				\
+++++++++++++++++++ 	pop %r11;				\
+++++++++++++++++++ 	pop %r10;				\
+++++++++++++++++++ 	pop %r9;				\
+++++++++++++++++++ 	pop %r8;				\
+++++++++++++++++++ 	pop %rdi;				\
+++++++++++++++++++ 	pop %rsi;				\
+++++++++++++++++++ 	pop %rdx;				\
+++++++++++++++++++ 	pop %rcx;				\
+++++++++++++++++++ 	pop %rax
                    #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
                    #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
          +         #define PARA_INDIRECT(addr)	*addr(%rip)
                    #else
                    #define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
                    #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
diff --cc include/asm-x86/setup.h
index 90ab222,90ab222,90ab222,f003cea,90ab222,90ab222,90ab222,1d121c6,90ab222,90ab222,fa6763a,90ab222,90ab222,90ab222,90ab222,90ab222,90ab222,90ab222,90ab222,6594926..a07c6f1
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@@@@@@@@@@@@@@@@@@@@ -8,25 -8,25 -8,25 -8,40 -8,25 -8,25 -8,25 -8,6 -8,25 -8,25 -8,7 -8,25 -8,25 -8,25 -8,25 -8,25 -8,25 -8,25 -8,25 -8,25 +8,40 @@@@@@@@@@@@@@@@@@@@@
                    /* Interrupt control for vSMPowered x86_64 systems */
                    void vsmp_init(void);
                    
          -         char *machine_specific_memory_setup(void);
       +  +         #ifdef CONFIG_X86_VISWS
       +  +         extern void visws_early_detect(void);
       +  +         extern int is_visws_box(void);
       +  +         #else
       +  +         static inline void visws_early_detect(void) { }
       +  +         static inline int is_visws_box(void) { return 0; }
       +  +         #endif
       +  +         
       +  +         /*
       +  +          * Any setup quirks to be performed?
       +  +          */
--- --- -- ---------extern int (*arch_time_init_quirk)(void);
--- --- -- ---------extern int (*arch_pre_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_trap_init_quirk)(void);
--- --- -- ---------extern char * (*arch_memory_setup_quirk)(void);
--- --- -- ---------extern int (*mach_get_smp_config_quirk)(unsigned int early);
--- --- -- ---------extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
+++ ++++++++++++++++struct mpc_config_processor;
+++ ++++++++++++++++struct mpc_config_bus;
+++ ++++++++++++++++struct mp_config_oemtable;
+++ ++++++++++++++++struct x86_quirks {
+++ ++++++++++++++++	int (*arch_pre_time_init)(void);
+++ ++++++++++++++++	int (*arch_time_init)(void);
+++ ++++++++++++++++	int (*arch_pre_intr_init)(void);
+++ ++++++++++++++++	int (*arch_intr_init)(void);
+++ ++++++++++++++++	int (*arch_trap_init)(void);
+++ ++++++++++++++++	char * (*arch_memory_setup)(void);
+++ ++++++++++++++++	int (*mach_get_smp_config)(unsigned int early);
+++ ++++++++++++++++	int (*mach_find_smp_config)(unsigned int reserve);
+++ ++++++++++++++++
+++ ++++++++++++++++	int *mpc_record;
+++ ++++++++++++++++	int (*mpc_apic_id)(struct mpc_config_processor *m);
+++ ++++++++++++++++	void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
+++ ++++++++++++++++	void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
+++ ++++++++++++++++	void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
+++ ++++++++++++++++                                    unsigned short oemsize);
+++ ++++++++++++++++};
+++ ++++++++++++++++
+++ ++++++++++++++++extern struct x86_quirks *x86_quirks;
       +  +         
                    #ifndef CONFIG_PARAVIRT
                    #define paravirt_post_allocator_init()	do {} while (0)
                    #endif
@@@@@@@@@@@@@@@@@@@@@ -67,17 -67,17 -67,17 -82,17 -67,17 -67,17 -67,17 -48,17 -67,17 -67,17 -50,19 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,18 +82,18 @@@@@@@@@@@@@@@@@@@@@ extern struct boot_params boot_params
                     */
                    #define LOWMEMSIZE()	(0x9f000)
                    
          -         struct e820entry;
          -         
          -         char * __init machine_specific_memory_setup(void);
          -         char *memory_setup(void);
          +         #ifdef __i386__
                    
          -         int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
          -         int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
          -         void __init add_memory_region(unsigned long long start,
          -         			      unsigned long long size, int type);
          +         void __init i386_start_kernel(void);
          +         extern void probe_roms(void);
                    
          +         extern unsigned long init_pg_tables_start;
                    extern unsigned long init_pg_tables_end;
                    
          -         
          +         #else
+++++++++++++++++++ void __init x86_64_init_pda(void);
          +         void __init x86_64_start_kernel(char *real_mode);
          +         void __init x86_64_start_reservations(char *real_mode_data);
                    
                    #endif /* __i386__ */
                    #endif /* _SETUP */