memory-hotplug: remove memmap of sparse-vmemmap
[platform/adaptation/renesas_rcar/renesas_kernel.git] / arch / x86 / mm / init_64.c
index 75c9a6a..474e28f 100644 (file)
 #include <asm/uv/uv.h>
 #include <asm/setup.h>
 
+#include "mm_internal.h"
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+                          unsigned long addr, unsigned long end)
+{
+       addr &= PMD_MASK;
+       for (; addr < end; addr += PMD_SIZE) {
+               pmd_t *pmd = pmd_page + pmd_index(addr);
+
+               if (!pmd_present(*pmd))
+                       set_pmd(pmd, __pmd(addr | pmd_flag));
+       }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+                         unsigned long addr, unsigned long end)
+{
+       unsigned long next;
+
+       for (; addr < end; addr = next) {
+               pud_t *pud = pud_page + pud_index(addr);
+               pmd_t *pmd;
+
+               next = (addr & PUD_MASK) + PUD_SIZE;
+               if (next > end)
+                       next = end;
+
+               if (pud_present(*pud)) {
+                       pmd = pmd_offset(pud, 0);
+                       ident_pmd_init(info->pmd_flag, pmd, addr, next);
+                       continue;
+               }
+               pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+               if (!pmd)
+                       return -ENOMEM;
+               ident_pmd_init(info->pmd_flag, pmd, addr, next);
+               set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+       }
+
+       return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+                             unsigned long addr, unsigned long end)
+{
+       unsigned long next;
+       int result;
+       int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+       for (; addr < end; addr = next) {
+               pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+               pud_t *pud;
+
+               next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+               if (next > end)
+                       next = end;
+
+               if (pgd_present(*pgd)) {
+                       pud = pud_offset(pgd, 0);
+                       result = ident_pud_init(info, pud, addr, next);
+                       if (result)
+                               return result;
+                       continue;
+               }
+
+               pud = (pud_t *)info->alloc_pgt_page(info->context);
+               if (!pud)
+                       return -ENOMEM;
+               result = ident_pud_init(info, pud, addr, next);
+               if (result)
+                       return result;
+               set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+       }
+
+       return 0;
+}
+
 static int __init parse_direct_gbpages_off(char *arg)
 {
        direct_gbpages = 0;
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
 void __init cleanup_highmap(void)
 {
        unsigned long vaddr = __START_KERNEL_map;
-       unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+       unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
        unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
        pmd_t *pmd = level2_kernel_pgt;
 
+       /*
+        * Native path, max_pfn_mapped is not set yet.
+        * Xen has valid max_pfn_mapped set in
+        *      arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
+        */
+       if (max_pfn_mapped)
+               vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+
        for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
                if (pmd_none(*pmd))
                        continue;
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
        }
 }
 
-static __ref void *alloc_low_page(unsigned long *phys)
-{
-       unsigned long pfn = pgt_buf_end++;
-       void *adr;
-
-       if (after_bootmem) {
-               adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
-               *phys = __pa(adr);
-
-               return adr;
-       }
-
-       if (pfn >= pgt_buf_top)
-               panic("alloc_low_page: ran out of memory");
-
-       adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
-       clear_page(adr);
-       *phys  = pfn * PAGE_SIZE;
-       return adr;
-}
-
-static __ref void *map_low_page(void *virt)
-{
-       void *adr;
-       unsigned long phys, left;
-
-       if (after_bootmem)
-               return virt;
-
-       phys = __pa(virt);
-       left = phys & (PAGE_SIZE - 1);
-       adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
-       adr = (void *)(((unsigned long)adr) | left);
-
-       return adr;
-}
-
-static __ref void unmap_low_page(void *adr)
-{
-       if (after_bootmem)
-               return;
-
-       early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
-}
-
 static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
              pgprot_t prot)
 {
-       unsigned pages = 0;
+       unsigned long pages = 0, next;
        unsigned long last_map_addr = end;
        int i;
 
        pte_t *pte = pte_page + pte_index(addr);
 
-       for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
-
+       for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
+               next = (addr & PAGE_MASK) + PAGE_SIZE;
                if (addr >= end) {
-                       if (!after_bootmem) {
-                               for(; i < PTRS_PER_PTE; i++, pte++)
-                                       set_pte(pte, __pte(0));
-                       }
-                       break;
+                       if (!after_bootmem &&
+                           !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+                               set_pte(pte, __pte(0));
+                       continue;
                }
 
                /*
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
        int i = pmd_index(address);
 
        for (; i < PTRS_PER_PMD; i++, address = next) {
-               unsigned long pte_phys;
                pmd_t *pmd = pmd_page + pmd_index(address);
                pte_t *pte;
                pgprot_t new_prot = prot;
 
+               next = (address & PMD_MASK) + PMD_SIZE;
                if (address >= end) {
-                       if (!after_bootmem) {
-                               for (; i < PTRS_PER_PMD; i++, pmd++)
-                                       set_pmd(pmd, __pmd(0));
-                       }
-                       break;
+                       if (!after_bootmem &&
+                           !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+                               set_pmd(pmd, __pmd(0));
+                       continue;
                }
 
-               next = (address & PMD_MASK) + PMD_SIZE;
-
                if (pmd_val(*pmd)) {
                        if (!pmd_large(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
-                               pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+                               pte = (pte_t *)pmd_page_vaddr(*pmd);
                                last_map_addr = phys_pte_init(pte, address,
                                                                end, prot);
-                               unmap_low_page(pte);
                                spin_unlock(&init_mm.page_table_lock);
                                continue;
                        }
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pmd,
-                               pfn_pte(address >> PAGE_SHIFT,
+                               pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
                                        __pgprot(pgprot_val(prot) | _PAGE_PSE)));
                        spin_unlock(&init_mm.page_table_lock);
                        last_map_addr = next;
                        continue;
                }
 
-               pte = alloc_low_page(&pte_phys);
+               pte = alloc_low_page();
                last_map_addr = phys_pte_init(pte, address, end, new_prot);
-               unmap_low_page(pte);
 
                spin_lock(&init_mm.page_table_lock);
-               pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+               pmd_populate_kernel(&init_mm, pmd, pte);
                spin_unlock(&init_mm.page_table_lock);
        }
        update_page_count(PG_LEVEL_2M, pages);
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
        int i = pud_index(addr);
 
        for (; i < PTRS_PER_PUD; i++, addr = next) {
-               unsigned long pmd_phys;
                pud_t *pud = pud_page + pud_index(addr);
                pmd_t *pmd;
                pgprot_t prot = PAGE_KERNEL;
 
-               if (addr >= end)
-                       break;
-
                next = (addr & PUD_MASK) + PUD_SIZE;
-
-               if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
-                       set_pud(pud, __pud(0));
+               if (addr >= end) {
+                       if (!after_bootmem &&
+                           !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+                               set_pud(pud, __pud(0));
                        continue;
                }
 
                if (pud_val(*pud)) {
                        if (!pud_large(*pud)) {
-                               pmd = map_low_page(pmd_offset(pud, 0));
+                               pmd = pmd_offset(pud, 0);
                                last_map_addr = phys_pmd_init(pmd, addr, end,
                                                         page_size_mask, prot);
-                               unmap_low_page(pmd);
                                __flush_tlb_all();
                                continue;
                        }
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pud,
-                               pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+                               pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+                                       PAGE_KERNEL_LARGE));
                        spin_unlock(&init_mm.page_table_lock);
                        last_map_addr = next;
                        continue;
                }
 
-               pmd = alloc_low_page(&pmd_phys);
+               pmd = alloc_low_page();
                last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
                                              prot);
-               unmap_low_page(pmd);
 
                spin_lock(&init_mm.page_table_lock);
-               pud_populate(&init_mm, pud, __va(pmd_phys));
+               pud_populate(&init_mm, pud, pmd);
                spin_unlock(&init_mm.page_table_lock);
        }
        __flush_tlb_all();
@@ -578,34 +610,29 @@ kernel_physical_mapping_init(unsigned long start,
 
        for (; start < end; start = next) {
                pgd_t *pgd = pgd_offset_k(start);
-               unsigned long pud_phys;
                pud_t *pud;
 
-               next = (start + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end)
-                       next = end;
+               next = (start & PGDIR_MASK) + PGDIR_SIZE;
 
                if (pgd_val(*pgd)) {
-                       pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+                       pud = (pud_t *)pgd_page_vaddr(*pgd);
                        last_map_addr = phys_pud_init(pud, __pa(start),
                                                 __pa(end), page_size_mask);
-                       unmap_low_page(pud);
                        continue;
                }
 
-               pud = alloc_low_page(&pud_phys);
-               last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+               pud = alloc_low_page();
+               last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
                                                 page_size_mask);
-               unmap_low_page(pud);
 
                spin_lock(&init_mm.page_table_lock);
-               pgd_populate(&init_mm, pgd, __va(pud_phys));
+               pgd_populate(&init_mm, pgd, pud);
                spin_unlock(&init_mm.page_table_lock);
                pgd_changed = true;
        }
 
        if (pgd_changed)
-               sync_global_pgds(addr, end);
+               sync_global_pgds(addr, end - 1);
 
        __flush_tlb_all();
 
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdat = NODE_DATA(nid);
        struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
-       unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
 
-       last_mapped_pfn = init_memory_mapping(start, start + size);
-       if (last_mapped_pfn > max_pfn_mapped)
-               max_pfn_mapped = last_mapped_pfn;
+       init_memory_mapping(start, start + size);
 
        ret = __add_pages(nid, zone, start_pfn, nr_pages);
        WARN_ON_ONCE(ret);
@@ -682,10 +707,357 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
+#define PAGE_INUSE 0xFD
+
+static void __meminit free_pagetable(struct page *page, int order)
+{
+       struct zone *zone;
+       bool bootmem = false;
+       unsigned long magic;
+       unsigned int nr_pages = 1 << order;
+
+       /* bootmem page has reserved flag */
+       if (PageReserved(page)) {
+               __ClearPageReserved(page);
+               bootmem = true;
+
+               magic = (unsigned long)page->lru.next;
+               if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+                       while (nr_pages--)
+                               put_page_bootmem(page++);
+               } else
+                       __free_pages_bootmem(page, order);
+       } else
+               free_pages((unsigned long)page_address(page), order);
+
+       /*
+        * SECTION_INFO pages and MIX_SECTION_INFO pages
+        * are all allocated by bootmem.
+        */
+       if (bootmem) {
+               zone = page_zone(page);
+               zone_span_writelock(zone);
+               zone->present_pages += nr_pages;
+               zone_span_writeunlock(zone);
+               totalram_pages += nr_pages;
+       }
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+       pte_t *pte;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PTE; i++) {
+               pte = pte_start + i;
+               if (pte_val(*pte))
+                       return;
+       }
+
+       /* free a pte talbe */
+       free_pagetable(pmd_page(*pmd), 0);
+       spin_lock(&init_mm.page_table_lock);
+       pmd_clear(pmd);
+       spin_unlock(&init_mm.page_table_lock);
+}
+
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+       pmd_t *pmd;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PMD; i++) {
+               pmd = pmd_start + i;
+               if (pmd_val(*pmd))
+                       return;
+       }
+
+       /* free a pmd talbe */
+       free_pagetable(pud_page(*pud), 0);
+       spin_lock(&init_mm.page_table_lock);
+       pud_clear(pud);
+       spin_unlock(&init_mm.page_table_lock);
+}
+
+/* Return true if pgd is changed, otherwise return false. */
+static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
+{
+       pud_t *pud;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               pud = pud_start + i;
+               if (pud_val(*pud))
+                       return false;
+       }
+
+       /* free a pud table */
+       free_pagetable(pgd_page(*pgd), 0);
+       spin_lock(&init_mm.page_table_lock);
+       pgd_clear(pgd);
+       spin_unlock(&init_mm.page_table_lock);
+
+       return true;
+}
+
+static void __meminit
+remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
+                bool direct)
+{
+       unsigned long next, pages = 0;
+       pte_t *pte;
+       void *page_addr;
+       phys_addr_t phys_addr;
+
+       pte = pte_start + pte_index(addr);
+       for (; addr < end; addr = next, pte++) {
+               next = (addr + PAGE_SIZE) & PAGE_MASK;
+               if (next > end)
+                       next = end;
+
+               if (!pte_present(*pte))
+                       continue;
+
+               /*
+                * We mapped [0,1G) memory as identity mapping when
+                * initializing, in arch/x86/kernel/head_64.S. These
+                * pagetables cannot be removed.
+                */
+               phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
+               if (phys_addr < (phys_addr_t)0x40000000)
+                       return;
+
+               if (IS_ALIGNED(addr, PAGE_SIZE) &&
+                   IS_ALIGNED(next, PAGE_SIZE)) {
+                       /*
+                        * Do not free direct mapping pages since they were
+                        * freed when offlining, or simplely not in use.
+                        */
+                       if (!direct)
+                               free_pagetable(pte_page(*pte), 0);
+
+                       spin_lock(&init_mm.page_table_lock);
+                       pte_clear(&init_mm, addr, pte);
+                       spin_unlock(&init_mm.page_table_lock);
+
+                       /* For non-direct mapping, pages means nothing. */
+                       pages++;
+               } else {
+                       /*
+                        * If we are here, we are freeing vmemmap pages since
+                        * direct mapped memory ranges to be freed are aligned.
+                        *
+                        * If we are not removing the whole page, it means
+                        * other page structs in this page are being used and
+                        * we canot remove them. So fill the unused page_structs
+                        * with 0xFD, and remove the page when it is wholly
+                        * filled with 0xFD.
+                        */
+                       memset((void *)addr, PAGE_INUSE, next - addr);
+
+                       page_addr = page_address(pte_page(*pte));
+                       if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+                               free_pagetable(pte_page(*pte), 0);
+
+                               spin_lock(&init_mm.page_table_lock);
+                               pte_clear(&init_mm, addr, pte);
+                               spin_unlock(&init_mm.page_table_lock);
+                       }
+               }
+       }
+
+       /* Call free_pte_table() in remove_pmd_table(). */
+       flush_tlb_all();
+       if (direct)
+               update_page_count(PG_LEVEL_4K, -pages);
+}
+
+static void __meminit
+remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
+                bool direct)
+{
+       unsigned long next, pages = 0;
+       pte_t *pte_base;
+       pmd_t *pmd;
+       void *page_addr;
+
+       pmd = pmd_start + pmd_index(addr);
+       for (; addr < end; addr = next, pmd++) {
+               next = pmd_addr_end(addr, end);
+
+               if (!pmd_present(*pmd))
+                       continue;
+
+               if (pmd_large(*pmd)) {
+                       if (IS_ALIGNED(addr, PMD_SIZE) &&
+                           IS_ALIGNED(next, PMD_SIZE)) {
+                               if (!direct)
+                                       free_pagetable(pmd_page(*pmd),
+                                                      get_order(PMD_SIZE));
+
+                               spin_lock(&init_mm.page_table_lock);
+                               pmd_clear(pmd);
+                               spin_unlock(&init_mm.page_table_lock);
+                               pages++;
+                       } else {
+                               /* If here, we are freeing vmemmap pages. */
+                               memset((void *)addr, PAGE_INUSE, next - addr);
+
+                               page_addr = page_address(pmd_page(*pmd));
+                               if (!memchr_inv(page_addr, PAGE_INUSE,
+                                               PMD_SIZE)) {
+                                       free_pagetable(pmd_page(*pmd),
+                                                      get_order(PMD_SIZE));
+
+                                       spin_lock(&init_mm.page_table_lock);
+                                       pmd_clear(pmd);
+                                       spin_unlock(&init_mm.page_table_lock);
+                               }
+                       }
+
+                       continue;
+               }
+
+               pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+               remove_pte_table(pte_base, addr, next, direct);
+               free_pte_table(pte_base, pmd);
+       }
+
+       /* Call free_pmd_table() in remove_pud_table(). */
+       if (direct)
+               update_page_count(PG_LEVEL_2M, -pages);
+}
+
+static void __meminit
+remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
+                bool direct)
+{
+       unsigned long next, pages = 0;
+       pmd_t *pmd_base;
+       pud_t *pud;
+       void *page_addr;
+
+       pud = pud_start + pud_index(addr);
+       for (; addr < end; addr = next, pud++) {
+               next = pud_addr_end(addr, end);
+
+               if (!pud_present(*pud))
+                       continue;
+
+               if (pud_large(*pud)) {
+                       if (IS_ALIGNED(addr, PUD_SIZE) &&
+                           IS_ALIGNED(next, PUD_SIZE)) {
+                               if (!direct)
+                                       free_pagetable(pud_page(*pud),
+                                                      get_order(PUD_SIZE));
+
+                               spin_lock(&init_mm.page_table_lock);
+                               pud_clear(pud);
+                               spin_unlock(&init_mm.page_table_lock);
+                               pages++;
+                       } else {
+                               /* If here, we are freeing vmemmap pages. */
+                               memset((void *)addr, PAGE_INUSE, next - addr);
+
+                               page_addr = page_address(pud_page(*pud));
+                               if (!memchr_inv(page_addr, PAGE_INUSE,
+                                               PUD_SIZE)) {
+                                       free_pagetable(pud_page(*pud),
+                                                      get_order(PUD_SIZE));
+
+                                       spin_lock(&init_mm.page_table_lock);
+                                       pud_clear(pud);
+                                       spin_unlock(&init_mm.page_table_lock);
+                               }
+                       }
+
+                       continue;
+               }
+
+               pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+               remove_pmd_table(pmd_base, addr, next, direct);
+               free_pmd_table(pmd_base, pud);
+       }
+
+       if (direct)
+               update_page_count(PG_LEVEL_1G, -pages);
+}
+
+/* start and end are both virtual address. */
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+       unsigned long next;
+       pgd_t *pgd;
+       pud_t *pud;
+       bool pgd_changed = false;
+
+       for (; start < end; start = next) {
+               next = pgd_addr_end(start, end);
+
+               pgd = pgd_offset_k(start);
+               if (!pgd_present(*pgd))
+                       continue;
+
+               pud = (pud_t *)pgd_page_vaddr(*pgd);
+               remove_pud_table(pud, start, next, direct);
+               if (free_pud_table(pud, pgd))
+                       pgd_changed = true;
+       }
+
+       if (pgd_changed)
+               sync_global_pgds(start, end - 1);
+
+       flush_tlb_all();
+}
+
+void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+       unsigned long start = (unsigned long)memmap;
+       unsigned long end = (unsigned long)(memmap + nr_pages);
+
+       remove_pagetable(start, end, false);
+}
+
+static void __meminit
+kernel_physical_mapping_remove(unsigned long start, unsigned long end)
+{
+       start = (unsigned long)__va(start);
+       end = (unsigned long)__va(end);
+
+       remove_pagetable(start, end, true);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int __ref arch_remove_memory(u64 start, u64 size)
+{
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       struct zone *zone;
+       int ret;
+
+       zone = page_zone(pfn_to_page(start_pfn));
+       kernel_physical_mapping_remove(start, start + size);
+       ret = __remove_pages(zone, start_pfn, nr_pages);
+       WARN_ON_ONCE(ret);
+
+       return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
 
+static void __init register_page_bootmem_info(void)
+{
+#ifdef CONFIG_NUMA
+       int i;
+
+       for_each_online_node(i)
+               register_page_bootmem_info_node(NODE_DATA(i));
+#endif
+}
+
 void __init mem_init(void)
 {
        long codesize, reservedpages, datasize, initsize;
@@ -698,11 +1070,8 @@ void __init mem_init(void)
        reservedpages = 0;
 
        /* this will put all low memory onto the freelists */
-#ifdef CONFIG_NUMA
-       totalram_pages = numa_free_all_bootmem();
-#else
+       register_page_bootmem_info();
        totalram_pages = free_all_bootmem();
-#endif
 
        absent_pages = absent_pages_in_range(0, max_pfn);
        reservedpages = max_pfn - totalram_pages - absent_pages;
@@ -772,12 +1141,11 @@ void set_kernel_text_ro(void)
 void mark_rodata_ro(void)
 {
        unsigned long start = PFN_ALIGN(_text);
-       unsigned long rodata_start =
-               ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+       unsigned long rodata_start = PFN_ALIGN(__start_rodata);
        unsigned long end = (unsigned long) &__end_rodata_hpage_align;
-       unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
-       unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
-       unsigned long data_start = (unsigned long) &_sdata;
+       unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
+       unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+       unsigned long all_end = PFN_ALIGN(&_end);
 
        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
               (end - start) >> 10);
@@ -786,10 +1154,10 @@ void mark_rodata_ro(void)
        kernel_set_to_readonly = 1;
 
        /*
-        * The rodata section (but not the kernel text!) should also be
-        * not-executable.
+        * The rodata/data/bss/brk section (but not the kernel text!)
+        * should also be not-executable.
         */
-       set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
+       set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
 
        rodata_test();
 
@@ -802,12 +1170,12 @@ void mark_rodata_ro(void)
 #endif
 
        free_init_pages("unused kernel memory",
-                       (unsigned long) page_address(virt_to_page(text_end)),
-                       (unsigned long)
-                                page_address(virt_to_page(rodata_start)));
+                       (unsigned long) __va(__pa_symbol(text_end)),
+                       (unsigned long) __va(__pa_symbol(rodata_start)));
+
        free_init_pages("unused kernel memory",
-                       (unsigned long) page_address(virt_to_page(rodata_end)),
-                       (unsigned long) page_address(virt_to_page(data_start)));
+                       (unsigned long) __va(__pa_symbol(rodata_end)),
+                       (unsigned long) __va(__pa_symbol(_sdata)));
 }
 
 #endif
@@ -984,10 +1352,70 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
                }
 
        }
-       sync_global_pgds((unsigned long)start_page, end);
+       sync_global_pgds((unsigned long)start_page, end - 1);
        return 0;
 }
 
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
+void register_page_bootmem_memmap(unsigned long section_nr,
+                                 struct page *start_page, unsigned long size)
+{
+       unsigned long addr = (unsigned long)start_page;
+       unsigned long end = (unsigned long)(start_page + size);
+       unsigned long next;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       unsigned int nr_pages;
+       struct page *page;
+
+       for (; addr < end; addr = next) {
+               pte_t *pte = NULL;
+
+               pgd = pgd_offset_k(addr);
+               if (pgd_none(*pgd)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+               pud = pud_offset(pgd, addr);
+               if (pud_none(*pud)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+               if (!cpu_has_pse) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       pmd = pmd_offset(pud, addr);
+                       if (pmd_none(*pmd))
+                               continue;
+                       get_page_bootmem(section_nr, pmd_page(*pmd),
+                                        MIX_SECTION_INFO);
+
+                       pte = pte_offset_kernel(pmd, addr);
+                       if (pte_none(*pte))
+                               continue;
+                       get_page_bootmem(section_nr, pte_page(*pte),
+                                        SECTION_INFO);
+               } else {
+                       next = pmd_addr_end(addr, end);
+
+                       pmd = pmd_offset(pud, addr);
+                       if (pmd_none(*pmd))
+                               continue;
+
+                       nr_pages = 1 << (get_order(PMD_SIZE));
+                       page = pmd_page(*pmd);
+                       while (nr_pages--)
+                               get_page_bootmem(section_nr, page++,
+                                                SECTION_INFO);
+               }
+       }
+}
+#endif
+
 void __meminit vmemmap_populate_print_last(void)
 {
        if (p_start) {