KVM: MMU: reclaim the zapped-obsolete page first
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Fri, 31 May 2013 00:36:29 +0000 (08:36 +0800)
committerGleb Natapov <gleb@redhat.com>
Wed, 5 Jun 2013 09:33:33 +0000 (12:33 +0300)
As Marcelo pointed out that
| "(retention of large number of pages while zapping)
| can be fatal, it can lead to OOM and host crash"

We introduce a list, kvm->arch.zapped_obsolete_pages, to link all
the pages which are deleted from the mmu cache but not actually
freed. When page reclaiming is needed, we always zap this kind of
pages first.

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu.c
arch/x86/kvm/x86.c

index bff7d46..1f98c1b 100644 (file)
@@ -536,6 +536,8 @@ struct kvm_arch {
         * Hash table of struct kvm_mmu_page.
         */
        struct list_head active_mmu_pages;
+       struct list_head zapped_obsolete_pages;
+
        struct list_head assigned_dev_head;
        struct iommu_domain *iommu_domain;
        int iommu_flags;
index 674c044..79af88a 100644 (file)
@@ -4211,7 +4211,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 static void kvm_zap_obsolete_pages(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
-       LIST_HEAD(invalid_list);
        int batch = 0;
 
 restart:
@@ -4244,7 +4243,8 @@ restart:
                        goto restart;
                }
 
-               ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+               ret = kvm_mmu_prepare_zap_page(kvm, sp,
+                               &kvm->arch.zapped_obsolete_pages);
                batch += ret;
 
                if (ret)
@@ -4255,7 +4255,7 @@ restart:
         * Should flush tlb before free page tables since lockless-walking
         * may use the pages.
         */
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+       kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
 }
 
 /*
@@ -4306,6 +4306,11 @@ restart:
        spin_unlock(&kvm->mmu_lock);
 }
 
+static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+{
+       return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
+}
+
 static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct kvm *kvm;
@@ -4334,15 +4339,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
                 * want to shrink a VM that only started to populate its MMU
                 * anyway.
                 */
-               if (!kvm->arch.n_used_mmu_pages)
+               if (!kvm->arch.n_used_mmu_pages &&
+                     !kvm_has_zapped_obsolete_pages(kvm))
                        continue;
 
                idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
 
+               if (kvm_has_zapped_obsolete_pages(kvm)) {
+                       kvm_mmu_commit_zap_page(kvm,
+                             &kvm->arch.zapped_obsolete_pages);
+                       goto unlock;
+               }
+
                prepare_zap_oldest_mmu_page(kvm, &invalid_list);
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
+unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
 
index 15e10f7..6402951 100644 (file)
@@ -6832,6 +6832,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
                return -EINVAL;
 
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */