[PATCH] freepgt: free_pgtables use vma list

author Hugh Dickins <hugh@veritas.com>

Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)

committer Linus Torvalds <torvalds@ppc970.osdl.org.(none)>

Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)
author Hugh Dickins <hugh@veritas.com>
Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)
committer Linus Torvalds <torvalds@ppc970.osdl.org.(none)>
Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c

index 0742d54..dd81479 100644 (file)
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -255,6 +255,6 @@ void pgd_free(pgd_t *pgd)
         if (PTRS_PER_PMD > 1)
                 for (i = 0; i < USER_PTRS_PER_PGD; ++i)
                         kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
-       /* in the non-PAE case, clear_page_range() clears user pgd entries */
+       /* in the non-PAE case, free_pgtables() clears user pgd entries */
         kmem_cache_free(pgd_cache, pgd);
  }
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c

index 40ad832..626258a 100644 (file)
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -187,45 +187,12 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri
  }
  
  /*
- * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
- * are hugetlb region specific.
+ * Do nothing, until we've worked out what to do!  To allow build, we
+ * must remove reference to clear_page_range since it no longer exists.
   */
  void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
         unsigned long start, unsigned long end)
  {
-       unsigned long first = start & HUGETLB_PGDIR_MASK;
-       unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
-       struct mm_struct *mm = tlb->mm;
-
-       if (!prev) {
-               prev = mm->mmap;
-               if (!prev)
-                       goto no_mmaps;
-               if (prev->vm_end > start) {
-                       if (last > prev->vm_start)
-                               last = prev->vm_start;
-                       goto no_mmaps;
-               }
-       }
-       for (;;) {
-               struct vm_area_struct *next = prev->vm_next;
-
-               if (next) {
-                       if (next->vm_start < start) {
-                               prev = next;
-                               continue;
-                       }
-                       if (last > next->vm_start)
-                               last = next->vm_start;
-               }
-               if (prev->vm_end > first)
-                       first = prev->vm_end;
-               break;
-       }
-no_mmaps:
-       if (last < first)       /* for arches with discontiguous pgd indices */
-               return;
-       clear_page_range(tlb, first, last);
  }
  
  void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 85f7d1b..c3f6c39 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -592,7 +592,8 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                 struct vm_area_struct *start_vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *);
-void clear_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end);
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+               unsigned long floor, unsigned long ceiling);
  int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
                         struct vm_area_struct *vma);
  int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
diff --git a/mm/memory.c b/mm/memory.c

index fb6e5de..fee5dc8 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -110,87 +110,165 @@ void pmd_clear_bad(pmd_t *pmd)
   * Note: this doesn't free the actual pages themselves. That
   * has been handled earlier when unmapping all the memory regions.
   */
-static inline void clear_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
-                               unsigned long addr, unsigned long end)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
  {
-       if (!((addr | end) & ~PMD_MASK)) {
-               /* Only free fully aligned ranges */
-               struct page *page = pmd_page(*pmd);
-               pmd_clear(pmd);
-               dec_page_state(nr_page_table_pages);
-               tlb->mm->nr_ptes--;
-               pte_free_tlb(tlb, page);
-       }
+       struct page *page = pmd_page(*pmd);
+       pmd_clear(pmd);
+       pte_free_tlb(tlb, page);
+       dec_page_state(nr_page_table_pages);
+       tlb->mm->nr_ptes--;
  }
  
-static inline void clear_pmd_range(struct mmu_gather *tlb, pud_t *pud,
-                               unsigned long addr, unsigned long end)
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
  {
         pmd_t *pmd;
         unsigned long next;
-       pmd_t *empty_pmd = NULL;
+       unsigned long start;
  
+       start = addr;
         pmd = pmd_offset(pud, addr);
-
-       /* Only free fully aligned ranges */
-       if (!((addr | end) & ~PUD_MASK))
-               empty_pmd = pmd;
         do {
                 next = pmd_addr_end(addr, end);
                 if (pmd_none_or_clear_bad(pmd))
                         continue;
-               clear_pte_range(tlb, pmd, addr, next);
+               free_pte_range(tlb, pmd);
         } while (pmd++, addr = next, addr != end);
  
-       if (empty_pmd) {
-               pud_clear(pud);
-               pmd_free_tlb(tlb, empty_pmd);
+       start &= PUD_MASK;
+       if (start < floor)
+               return;
+       if (ceiling) {
+               ceiling &= PUD_MASK;
+               if (!ceiling)
+                       return;
         }
+       if (end - 1 > ceiling - 1)
+               return;
+
+       pmd = pmd_offset(pud, start);
+       pud_clear(pud);
+       pmd_free_tlb(tlb, pmd);
  }
  
-static inline void clear_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
-                               unsigned long addr, unsigned long end)
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
  {
         pud_t *pud;
         unsigned long next;
-       pud_t *empty_pud = NULL;
+       unsigned long start;
  
+       start = addr;
         pud = pud_offset(pgd, addr);
-
-       /* Only free fully aligned ranges */
-       if (!((addr | end) & ~PGDIR_MASK))
-               empty_pud = pud;
         do {
                 next = pud_addr_end(addr, end);
                 if (pud_none_or_clear_bad(pud))
                         continue;
-               clear_pmd_range(tlb, pud, addr, next);
+               free_pmd_range(tlb, pud, addr, next, floor, ceiling);
         } while (pud++, addr = next, addr != end);
  
-       if (empty_pud) {
-               pgd_clear(pgd);
-               pud_free_tlb(tlb, empty_pud);
+       start &= PGDIR_MASK;
+       if (start < floor)
+               return;
+       if (ceiling) {
+               ceiling &= PGDIR_MASK;
+               if (!ceiling)
+                       return;
         }
+       if (end - 1 > ceiling - 1)
+               return;
+
+       pud = pud_offset(pgd, start);
+       pgd_clear(pgd);
+       pud_free_tlb(tlb, pud);
  }
  
  /*
- * This function clears user-level page tables of a process.
- * Unlike other pagetable walks, some memory layouts might give end 0.
+ * This function frees user-level page tables of a process.
+ *
   * Must be called with pagetable lock held.
   */
-void clear_page_range(struct mmu_gather *tlb,
-                               unsigned long addr, unsigned long end)
+static inline void free_pgd_range(struct mmu_gather *tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
  {
         pgd_t *pgd;
         unsigned long next;
+       unsigned long start;
  
+       /*
+        * The next few lines have given us lots of grief...
+        *
+        * Why are we testing PMD* at this top level?  Because often
+        * there will be no work to do at all, and we'd prefer not to
+        * go all the way down to the bottom just to discover that.
+        *
+        * Why all these "- 1"s?  Because 0 represents both the bottom
+        * of the address space and the top of it (using -1 for the
+        * top wouldn't help much: the masks would do the wrong thing).
+        * The rule is that addr 0 and floor 0 refer to the bottom of
+        * the address space, but end 0 and ceiling 0 refer to the top
+        * Comparisons need to use "end - 1" and "ceiling - 1" (though
+        * that end 0 case should be mythical).
+        *
+        * Wherever addr is brought up or ceiling brought down, we must
+        * be careful to reject "the opposite 0" before it confuses the
+        * subsequent tests.  But what about where end is brought down
+        * by PMD_SIZE below? no, end can't go down to 0 there.
+        *
+        * Whereas we round start (addr) and ceiling down, by different
+        * masks at different levels, in order to test whether a table
+        * now has no other vmas using it, so can be freed, we don't
+        * bother to round floor or end up - the tests don't need that.
+        */
+
+       addr &= PMD_MASK;
+       if (addr < floor) {
+               addr += PMD_SIZE;
+               if (!addr)
+                       return;
+       }
+       if (ceiling) {
+               ceiling &= PMD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               end -= PMD_SIZE;
+       if (addr > end - 1)
+               return;
+
+       start = addr;
         pgd = pgd_offset(tlb->mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
                 if (pgd_none_or_clear_bad(pgd))
                         continue;
-               clear_pud_range(tlb, pgd, addr, next);
+               free_pud_range(tlb, pgd, addr, next, floor, ceiling);
         } while (pgd++, addr = next, addr != end);
+
+       if (!tlb_is_full_mm(tlb))
+               flush_tlb_pgtables(tlb->mm, start, end);
+}
+
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+                               unsigned long floor, unsigned long ceiling)
+{
+       while (vma) {
+               struct vm_area_struct *next = vma->vm_next;
+               unsigned long addr = vma->vm_start;
+
+               /* Optimization: gather nearby vmas into a single call down */
+               while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
+                       vma = next;
+                       next = vma->vm_next;
+               }
+               free_pgd_range(*tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               vma = next;
+       }
  }
  
  pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
diff --git a/mm/mmap.c b/mm/mmap.c

index a95ebda..926d030 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -29,6 +29,10 @@
  #include <asm/cacheflush.h>
  #include <asm/tlb.h>
  
+static void unmap_region(struct mm_struct *mm,
+               struct vm_area_struct *vma, struct vm_area_struct *prev,
+               unsigned long start, unsigned long end);
+
  /*
   * WARNING: the debugging will use recursive algorithms so never enable this
   * unless you know what you are doing.
@@ -1129,7 +1133,8 @@ unmap_and_free_vma:
         fput(file);
  
         /* Undo any partial mapping done by a device driver. */
-       zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+       charged = 0;
  free_vma:
         kmem_cache_free(vm_area_cachep, vma);
  unacct_error:
@@ -1572,66 +1577,6 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
  }
  #endif
  
-/*
- * Try to free as many page directory entries as we can,
- * without having to work very hard at actually scanning
- * the page tables themselves.
- *
- * Right now we try to free page tables if we have a nice
- * PGDIR-aligned area that got free'd up. We could be more
- * granular if we want to, but this is fast and simple,
- * and covers the bad cases.
- *
- * "prev", if it exists, points to a vma before the one
- * we just free'd - but there's no telling how much before.
- */
-static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
-       unsigned long start, unsigned long end)
-{
-       unsigned long first = start & PGDIR_MASK;
-       unsigned long last = end + PGDIR_SIZE - 1;
-       struct mm_struct *mm = tlb->mm;
-
-       if (last > MM_VM_SIZE(mm) || last < end)
-               last = MM_VM_SIZE(mm);
-
-       if (!prev) {
-               prev = mm->mmap;
-               if (!prev)
-                       goto no_mmaps;
-               if (prev->vm_end > start) {
-                       if (last > prev->vm_start)
-                               last = prev->vm_start;
-                       goto no_mmaps;
-               }
-       }
-       for (;;) {
-               struct vm_area_struct *next = prev->vm_next;
-
-               if (next) {
-                       if (next->vm_start < start) {
-                               prev = next;
-                               continue;
-                       }
-                       if (last > next->vm_start)
-                               last = next->vm_start;
-               }
-               if (prev->vm_end > first)
-                       first = prev->vm_end;
-               break;
-       }
-no_mmaps:
-       if (last < first)       /* for arches with discontiguous pgd indices */
-               return;
-       if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
-               first = FIRST_USER_PGD_NR * PGDIR_SIZE;
-       /* No point trying to free anything if we're in the same pte page */
-       if ((first & PMD_MASK) < (last & PMD_MASK)) {
-               clear_page_range(tlb, first, last);
-               flush_tlb_pgtables(mm, first, last);
-       }
-}
-
  /* Normal function to fix up a mapping
   * This function is the default for when an area has no specific
   * function.  This may be used as part of a more specific routine.
@@ -1674,24 +1619,22 @@ static void unmap_vma_list(struct mm_struct *mm,
   * Called with the page table lock held.
   */
  static void unmap_region(struct mm_struct *mm,
-       struct vm_area_struct *vma,
-       struct vm_area_struct *prev,
-       unsigned long start,
-       unsigned long end)
+               struct vm_area_struct *vma, struct vm_area_struct *prev,
+               unsigned long start, unsigned long end)
  {
+       struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
         struct mmu_gather *tlb;
         unsigned long nr_accounted = 0;
  
         lru_add_drain();
+       spin_lock(&mm->page_table_lock);
         tlb = tlb_gather_mmu(mm, 0);
         unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-
-       if (is_hugepage_only_range(mm, start, end - start))
-               hugetlb_free_pgtables(tlb, prev, start, end);
-       else
-               free_pgtables(tlb, prev, start, end);
+       free_pgtables(&tlb, vma, prev? prev->vm_end: 0,
+                                next? next->vm_start: 0);
         tlb_finish_mmu(tlb, start, end);
+       spin_unlock(&mm->page_table_lock);
  }
  
  /*
@@ -1823,9 +1766,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
          * Remove the vma's, and unmap the actual pages
          */
         detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
-       spin_lock(&mm->page_table_lock);
         unmap_region(mm, mpnt, prev, start, end);
-       spin_unlock(&mm->page_table_lock);
  
         /* Fix up all other VM information */
         unmap_vma_list(mm, mpnt);
@@ -1957,25 +1898,21 @@ EXPORT_SYMBOL(do_brk);
  void exit_mmap(struct mm_struct *mm)
  {
         struct mmu_gather *tlb;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = mm->mmap;
         unsigned long nr_accounted = 0;
  
         lru_add_drain();
  
         spin_lock(&mm->page_table_lock);
  
-       tlb = tlb_gather_mmu(mm, 1);
         flush_cache_mm(mm);
-       /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
-       mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
-                                       ~0UL, &nr_accounted, NULL);
+       tlb = tlb_gather_mmu(mm, 1);
+       /* Use -1 here to ensure all VMAs in the mm are unmapped */
+       mm->map_count -= unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-       BUG_ON(mm->map_count);  /* This is just debugging */
-       clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
-       
+       free_pgtables(&tlb, vma, 0, 0);
         tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
  
-       vma = mm->mmap;
         mm->mmap = mm->mmap_cache = NULL;
         mm->mm_rb = RB_ROOT;
         set_mm_counter(mm, rss, 0);
@@ -1993,6 +1930,9 @@ void exit_mmap(struct mm_struct *mm)
                 remove_vm_struct(vma);
                 vma = next;
         }
+
+       BUG_ON(mm->map_count);  /* This is just debugging */
+       BUG_ON(mm->nr_ptes);    /* This is just debugging */
  }
  
  /* Insert vm structure into process list sorted by address
author	Hugh Dickins <hugh@veritas.com>
	Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)
committer	Linus Torvalds <torvalds@ppc970.osdl.org.(none)>
	Tue, 19 Apr 2005 20:29:15 +0000 (13:29 -0700)
arch/i386/mm/pgtable.c		patch \| blob \| history
arch/ia64/mm/hugetlbpage.c		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history