* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affilates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
#include <linux/compiler.h>
#include <linux/srcu.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
return 0;
}
-static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
+ struct kmem_cache *cache)
{
while (mc->nobjs)
- kfree(mc->objects[--mc->nobjs]);
+ kmem_cache_free(cache, mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
- mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache);
- mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache);
+ mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache);
+ mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache);
mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
- mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
+ mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
+ mmu_page_header_cache);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
static void mmu_free_pte_chain(struct kvm_pte_chain *pc)
{
- kfree(pc);
+ kmem_cache_free(pte_chain_cache, pc);
}
static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
{
- kfree(rd);
+ kmem_cache_free(rmap_desc_cache, rd);
}
/*
list_del(&sp->link);
__free_page(virt_to_page(sp->spt));
__free_page(virt_to_page(sp->gfns));
- kfree(sp);
+ kmem_cache_free(mmu_page_header_cache, sp);
++kvm->arch.n_free_mmu_pages;
}
return __mmu_unsync_walk(sp, pvec);
}
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
-{
- unsigned index;
- struct hlist_head *bucket;
- struct kvm_mmu_page *sp;
- struct hlist_node *node;
-
- pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
- index = kvm_page_table_hashfn(gfn);
- bucket = &kvm->arch.mmu_page_hash[index];
- hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.direct
- && !sp->role.invalid) {
- pgprintk("%s: found role %x\n",
- __func__, sp->role.word);
- return sp;
- }
- return NULL;
-}
-
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ bool clear_unsync)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
}
- if (rmap_write_protect(vcpu->kvm, sp->gfn))
- kvm_flush_remote_tlbs(vcpu->kvm);
- kvm_unlink_unsync_page(vcpu->kvm, sp);
+ if (clear_unsync) {
+ if (rmap_write_protect(vcpu->kvm, sp->gfn))
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
+ }
+
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
return 0;
}
+static void mmu_convert_notrap(struct kvm_mmu_page *sp);
+static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp)
+{
+ int ret;
+
+ ret = __kvm_sync_page(vcpu, sp, false);
+ if (!ret)
+ mmu_convert_notrap(sp);
+ return ret;
+}
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ return __kvm_sync_page(vcpu, sp, true);
+}
+
struct mmu_page_path {
struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
unsigned int idx[PT64_ROOT_LEVEL-1];
unsigned index;
unsigned quadrant;
struct hlist_head *bucket;
- struct kvm_mmu_page *sp;
+ struct kvm_mmu_page *sp, *unsync_sp = NULL;
struct hlist_node *node, *tmp;
role = vcpu->arch.mmu.base_role;
hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
if (sp->gfn == gfn) {
if (sp->unsync)
- if (kvm_sync_page(vcpu, sp))
- continue;
+ unsync_sp = sp;
if (sp->role.word != role.word)
continue;
+ if (!direct && unsync_sp &&
+ kvm_sync_page_transient(vcpu, unsync_sp)) {
+ unsync_sp = NULL;
+ break;
+ }
+
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
if (sp->unsync_children) {
set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
kvm_mmu_mark_parents_unsync(sp);
- }
+ } else if (sp->unsync)
+ kvm_mmu_mark_parents_unsync(sp);
+
trace_kvm_mmu_get_page(sp, false);
return sp;
}
+ if (!direct && unsync_sp)
+ kvm_sync_page(vcpu, unsync_sp);
+
++vcpu->kvm->stat.mmu_cache_miss;
sp = kvm_mmu_alloc_page(vcpu, parent_pte);
if (!sp)
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
if (!sp->root_count) {
+ /* Count self */
+ ret++;
hlist_del(&sp->hash_link);
kvm_mmu_free_page(kvm, sp);
} else {
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
used_pages -= kvm_mmu_zap_page(kvm, page);
- used_pages--;
}
kvm_nr_mmu_pages = used_pages;
kvm->arch.n_free_mmu_pages = 0;
}
EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
-static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ trace_kvm_mmu_unsync_page(sp);
+ ++vcpu->kvm->stat.mmu_unsync;
+ sp->unsync = 1;
+
+ kvm_mmu_mark_parents_unsync(sp);
+ mmu_convert_notrap(sp);
+}
+
+static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- unsigned index;
struct hlist_head *bucket;
struct kvm_mmu_page *s;
struct hlist_node *node, *n;
+ unsigned index;
- index = kvm_page_table_hashfn(sp->gfn);
+ index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
- /* don't unsync if pagetable is shadowed with multiple roles */
+
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.direct)
+ if (s->gfn != gfn || s->role.direct || s->unsync ||
+ s->role.invalid)
continue;
- if (s->role.word != sp->role.word)
- return 1;
+ WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+ __kvm_unsync_page(vcpu, s);
}
- trace_kvm_mmu_unsync_page(sp);
- ++vcpu->kvm->stat.mmu_unsync;
- sp->unsync = 1;
-
- kvm_mmu_mark_parents_unsync(sp);
-
- mmu_convert_notrap(sp);
- return 0;
}
static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
bool can_unsync)
{
- struct kvm_mmu_page *shadow;
+ unsigned index;
+ struct hlist_head *bucket;
+ struct kvm_mmu_page *s;
+ struct hlist_node *node, *n;
+ bool need_unsync = false;
+
+ index = kvm_page_table_hashfn(gfn);
+ bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+ hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+ if (s->gfn != gfn || s->role.direct || s->role.invalid)
+ continue;
- shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
- if (shadow) {
- if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+ if (s->role.level != PT_PAGE_TABLE_LEVEL)
return 1;
- if (shadow->unsync)
- return 0;
- if (can_unsync && oos_shadow)
- return kvm_unsync_page(vcpu, shadow);
- return 1;
+
+ if (!need_unsync && !s->unsync) {
+ if (!can_unsync || !oos_shadow)
+ return 1;
+ need_unsync = true;
+ }
}
+ if (need_unsync)
+ kvm_unsync_pages(vcpu, gfn);
return 0;
}
if (level > PT_PAGE_TABLE_LEVEL &&
has_wrprotected_page(vcpu->kvm, gfn, level)) {
ret = 1;
+ rmap_remove(vcpu->kvm, sptep);
spte = shadow_trap_nonpresent_pte;
goto set_pte;
}
return pt_write;
}
+static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn)
+{
+ char buf[1];
+ void __user *hva;
+ int r;
+
+ /* Touch the page, so send SIGBUS */
+ hva = (void __user *)gfn_to_hva(kvm, gfn);
+ r = copy_from_user(buf, hva, 1);
+}
+
+static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
+{
+ kvm_release_pfn_clean(pfn);
+ if (is_hwpoison_pfn(pfn)) {
+ kvm_send_hwpoison_signal(kvm, gfn);
+ return 0;
+ }
+ return 1;
+}
+
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int r;
pfn = gfn_to_pfn(vcpu->kvm, gfn);
/* mmio */
- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- return 1;
- }
+ if (is_error_pfn(pfn))
+ return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
root_gfn = 0;
}
spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root_gfn = i << 30;
}
spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);
- if (is_error_pfn(pfn)) {
- kvm_release_pfn_clean(pfn);
- return 1;
- }
+ if (is_error_pfn(pfn))
+ return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
- if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+ if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ /* mmu.free() should set root_hpa = INVALID_PAGE */
vcpu->arch.mmu.free(vcpu);
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
- }
}
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
r = mmu_topup_memory_caches(vcpu);
if (r)
goto out;
- spin_lock(&vcpu->kvm->mmu_lock);
- kvm_mmu_free_some_pages(vcpu);
- spin_unlock(&vcpu->kvm->mmu_lock);
r = mmu_alloc_roots(vcpu);
spin_lock(&vcpu->kvm->mmu_lock);
mmu_sync_roots(vcpu);
return 1;
case EMULATE_DO_MMIO:
++vcpu->stat.mmio_exits;
- return 0;
+ /* fall through */
case EMULATE_FAIL:
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
return 0;
default:
BUG();
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- return kvm_mmu_zap_page(kvm, page) + 1;
+ return kvm_mmu_zap_page(kvm, page);
}
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
struct kvm *kvm;
struct kvm *kvm_freed = NULL;