unsigned long size, pgoff;
int did_readaround = 0, majmin = VM_FAULT_MINOR;
+ BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE));
+
pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
-retry_all:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (pgoff >= size)
goto outside_data_content;
* Do we have something in the page cache already?
*/
retry_find:
- page = find_get_page(mapping, pgoff);
+ page = find_lock_page(mapping, pgoff);
if (!page) {
unsigned long ra_pages;
start = pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
}
- page = find_get_page(mapping, pgoff);
+ page = find_lock_page(mapping, pgoff);
if (!page)
goto no_cached_page;
}
ra->mmap_hit++;
/*
- * Ok, found a page in the page cache, now we need to check
- * that it's up-to-date.
+ * We have a locked page in the page cache, now we need to check
+ * that it's up-to-date. If not, it is going to be due to an error.
*/
- if (!PageUptodate(page))
+ if (unlikely(!PageUptodate(page)))
goto page_not_uptodate;
-success:
+ /* Must recheck i_size under page lock */
+ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(pgoff >= size)) {
+ unlock_page(page);
+ goto outside_data_content;
+ }
+
/*
* Found the page and have a reference on it.
*/
return NOPAGE_SIGBUS;
page_not_uptodate:
+ /* IO error path */
if (!did_readaround) {
majmin = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
* because there really aren't any performance issues here
* and we need to check for errors.
*/
- lock_page(page);
-
- /* Somebody truncated the page on us? */
- if (!page->mapping) {
- unlock_page(page);
- page_cache_release(page);
- goto retry_all;
- }
-
- /* Somebody else successfully read it in? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
ClearPageError(page);
error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
+ page_cache_release(page);
+
+ if (!error || error == AOP_TRUNCATED_PAGE)
goto retry_find;
- }
- /*
- * Things didn't work out. Return zero to tell the
- * mm layer so, possibly freeing the page cache page first.
- */
+ /* Things didn't work out. Return zero to tell the mm layer so. */
shrink_readahead_size_eio(file, ra);
- page_cache_release(page);
return NOPAGE_SIGBUS;
}
EXPORT_SYMBOL(filemap_nopage);
return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &generic_file_vm_ops;
+ vma->vm_flags |= VM_CAN_INVALIDATE;
return 0;
}
unsigned long restart_addr;
int need_break;
+ /*
+ * files that support invalidating or truncating portions of the
+ * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
+ * have their .nopage function return the page locked.
+ */
+ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+
again:
restart_addr = vma->vm_truncate_count;
if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
spin_lock(&mapping->i_mmap_lock);
- /* serialize i_size write against truncate_count write */
- smp_wmb();
- /* Protect against page faults, and endless unmapping loops */
+ /* Protect against endless unmapping loops */
mapping->truncate_count++;
- /*
- * For archs where spin_lock has inclusive semantics like ia64
- * this smp_mb() will prevent to read pagetable contents
- * before the truncate_count increment is visible to
- * other cpus.
- */
- smp_mb();
if (unlikely(is_restart_addr(mapping->truncate_count))) {
if (mapping->truncate_count == 0)
reset_vma_truncate_counts(mapping);
if (IS_SWAPFILE(inode))
goto out_busy;
i_size_write(inode, offset);
+
+ /*
+ * unmap_mapping_range is called twice, first simply for efficiency
+ * so that truncate_inode_pages does fewer single-page unmaps. However
+ * after this first call, and before truncate_inode_pages finishes,
+ * it is possible for private pages to be COWed, which remain after
+ * truncate_inode_pages finishes, hence the second unmap_mapping_range
+ * call must be made for correctness.
+ */
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
+ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
goto out_truncate;
do_expand:
down_write(&inode->i_alloc_sem);
unmap_mapping_range(mapping, offset, (end - offset), 1);
truncate_inode_pages_range(mapping, offset, end);
+ unmap_mapping_range(mapping, offset, (end - offset), 1);
inode->i_op->truncate_range(inode, offset, end);
up_write(&inode->i_alloc_sem);
mutex_unlock(&inode->i_mutex);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
- lazy_mmu_prot_update(pte);
unlock:
pte_unmap_unlock(page_table, ptl);
out:
int write_access)
{
spinlock_t *ptl;
- struct page *new_page;
- struct address_space *mapping = NULL;
+ struct page *page, *nopage_page;
pte_t entry;
- unsigned int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
struct page *dirty_page = NULL;
pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP);
- if (vma->vm_file) {
- mapping = vma->vm_file->f_mapping;
- sequence = mapping->truncate_count;
- smp_rmb(); /* serializes i_size against truncate_count */
- }
-retry:
- new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
- /*
- * No smp_rmb is needed here as long as there's a full
- * spin_lock/unlock sequence inside the ->nopage callback
- * (for the pagecache lookup) that acts as an implicit
- * smp_mb() and prevents the i_size read to happen
- * after the next truncate_count read.
- */
-
+ nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
/* no page was available -- either SIGBUS, OOM or REFAULT */
- if (unlikely(new_page == NOPAGE_SIGBUS))
+ if (unlikely(nopage_page == NOPAGE_SIGBUS))
return VM_FAULT_SIGBUS;
- else if (unlikely(new_page == NOPAGE_OOM))
+ else if (unlikely(nopage_page == NOPAGE_OOM))
return VM_FAULT_OOM;
- else if (unlikely(new_page == NOPAGE_REFAULT))
+ else if (unlikely(nopage_page == NOPAGE_REFAULT))
return VM_FAULT_MINOR;
+ BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
+ /*
+ * For consistency in subsequent calls, make the nopage_page always
+ * locked.
+ */
+ if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
+ lock_page(nopage_page);
+
/*
* Should we do an early C-O-W break?
*/
+ page = nopage_page;
if (write_access) {
if (!(vma->vm_flags & VM_SHARED)) {
- struct page *page;
-
- if (unlikely(anon_vma_prepare(vma)))
- goto oom;
- page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
- vma, address);
- if (!page)
- goto oom;
- copy_user_highpage(page, new_page, address, vma);
- page_cache_release(new_page);
- new_page = page;
+ if (unlikely(anon_vma_prepare(vma))) {
+ ret = VM_FAULT_OOM;
+ goto out_error;
+ }
+ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out_error;
+ }
+ copy_user_highpage(page, nopage_page, address, vma);
anon = 1;
-
} else {
/* if the page will be shareable, see if the backing
* address space wants to know that the page is about
* to become writable */
if (vma->vm_ops->page_mkwrite &&
- vma->vm_ops->page_mkwrite(vma, new_page) < 0
- ) {
- page_cache_release(new_page);
- return VM_FAULT_SIGBUS;
+ vma->vm_ops->page_mkwrite(vma, page) < 0) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_error;
}
}
}
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
- /*
- * For a file-backed vma, someone could have truncated or otherwise
- * invalidated this page. If unmap_mapping_range got called,
- * retry getting the page.
- */
- if (mapping && unlikely(sequence != mapping->truncate_count)) {
- pte_unmap_unlock(page_table, ptl);
- page_cache_release(new_page);
- cond_resched();
- sequence = mapping->truncate_count;
- smp_rmb();
- goto retry;
- }
/*
* This silly early PAGE_DIRTY setting removes a race
* handle that later.
*/
/* Only go through if we didn't race with anybody else... */
- if (pte_none(*page_table)) {
- flush_icache_page(vma, new_page);
- entry = mk_pte(new_page, vma->vm_page_prot);
+ if (likely(pte_none(*page_table))) {
+ flush_icache_page(vma, page);
+ entry = mk_pte(page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
- inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(new_page);
- page_add_new_anon_rmap(new_page, vma, address);
+ inc_mm_counter(mm, anon_rss);
+ lru_cache_add_active(page);
+ page_add_new_anon_rmap(page, vma, address);
} else {
inc_mm_counter(mm, file_rss);
- page_add_file_rmap(new_page);
+ page_add_file_rmap(page);
if (write_access) {
- dirty_page = new_page;
+ dirty_page = page;
get_page(dirty_page);
}
}
+
+ /* no need to invalidate: a not-present page won't be cached */
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
} else {
- /* One of our sibling threads was faster, back out. */
- page_cache_release(new_page);
- goto unlock;
+ if (anon)
+ page_cache_release(page);
+ else
+ anon = 1; /* not anon, but release nopage_page */
}
- /* no need to invalidate: a not-present page shouldn't be cached */
- update_mmu_cache(vma, address, entry);
- lazy_mmu_prot_update(entry);
-unlock:
pte_unmap_unlock(page_table, ptl);
- if (dirty_page) {
+
+out:
+ unlock_page(nopage_page);
+ if (anon)
+ page_cache_release(nopage_page);
+ else if (dirty_page) {
set_page_dirty_balance(dirty_page);
put_page(dirty_page);
}
+
return ret;
-oom:
- page_cache_release(new_page);
- return VM_FAULT_OOM;
+
+out_error:
+ anon = 1; /* relase nopage_page */
+ goto out;
}
/*