]> git.baikalelectronics.ru Git - kernel.git/commitdiff
KVM: x86/MMU: Zap non-leaf SPTEs when disabling dirty logging
authorBen Gardon <bgardon@google.com>
Wed, 25 May 2022 23:09:04 +0000 (23:09 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 7 Jun 2022 15:28:49 +0000 (11:28 -0400)
Currently disabling dirty logging with the TDP MMU is extremely slow.
On a 96 vCPU / 96G VM backed with gigabyte pages, it takes ~200 seconds
to disable dirty logging with the TDP MMU, as opposed to ~4 seconds with
the shadow MMU.

When disabling dirty logging, zap non-leaf parent entries to allow
replacement with huge pages instead of recursing and zapping all of the
child, leaf entries. This reduces the number of TLB flushes required.
and reduces the disable dirty log time with the TDP MMU to ~3 seconds.

Opportunistically add a WARN() to catch GFNs that are mapped at a
higher level than their max level.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20220525230904.1584480-1-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/mmu/tdp_iter.c
arch/x86/kvm/mmu/tdp_iter.h
arch/x86/kvm/mmu/tdp_mmu.c

index 6d3b3e5a5533b174ab30a5cc4c7ef12f3d26fb72..ee4802d7b36cd308c83c6966db67993613c1d5fc 100644 (file)
@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter)
        return true;
 }
 
+/*
+ * Step the iterator back up a level in the paging structure. Should only be
+ * used when the iterator is below the root level.
+ */
+void tdp_iter_step_up(struct tdp_iter *iter)
+{
+       WARN_ON(!try_step_up(iter));
+}
+
 /*
  * Step to the next SPTE in a pre-order traversal of the paging structure.
  * To get to the next SPTE, the iterator either steps down towards the goal
index f0af385c56e035e74ce9e0d01ed2f0d2ce4c5c20..adfca0cf94d3a0cb666a6e319817474f04b32362 100644 (file)
@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
                    int min_level, gfn_t next_last_level_gfn);
 void tdp_iter_next(struct tdp_iter *iter);
 void tdp_iter_restart(struct tdp_iter *iter);
+void tdp_iter_step_up(struct tdp_iter *iter);
 
 #endif /* __KVM_X86_MMU_TDP_ITER_H */
index 841feaa48be5eb6425bc3c26a0af19210bfe1266..7b9265d6713101f3c1808a7eb84633a4267bbdb5 100644 (file)
@@ -1742,12 +1742,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
        gfn_t start = slot->base_gfn;
        gfn_t end = start + slot->npages;
        struct tdp_iter iter;
+       int max_mapping_level;
        kvm_pfn_t pfn;
 
        rcu_read_lock();
 
        tdp_root_for_each_pte(iter, root, start, end) {
-retry:
                if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
 
@@ -1755,15 +1755,41 @@ retry:
                    !is_last_spte(iter.old_spte, iter.level))
                        continue;
 
+               /*
+                * This is a leaf SPTE. Check if the PFN it maps can
+                * be mapped at a higher level.
+                */
                pfn = spte_to_pfn(iter.old_spte);
-               if (kvm_is_reserved_pfn(pfn) ||
-                   iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
-                                                           pfn, PG_LEVEL_NUM))
+
+               if (kvm_is_reserved_pfn(pfn))
                        continue;
 
+               max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
+                               iter.gfn, pfn, PG_LEVEL_NUM);
+
+               WARN_ON(max_mapping_level < iter.level);
+
+               /*
+                * If this page is already mapped at the highest
+                * viable level, there's nothing more to do.
+                */
+               if (max_mapping_level == iter.level)
+                       continue;
+
+               /*
+                * The page can be remapped at a higher level, so step
+                * up to zap the parent SPTE.
+                */
+               while (max_mapping_level > iter.level)
+                       tdp_iter_step_up(&iter);
+
                /* Note, a successful atomic zap also does a remote TLB flush. */
-               if (tdp_mmu_zap_spte_atomic(kvm, &iter))
-                       goto retry;
+               tdp_mmu_zap_spte_atomic(kvm, &iter);
+
+               /*
+                * If the atomic zap fails, the iter will recurse back into
+                * the same subtree to retry.
+                */
        }
 
        rcu_read_unlock();