KVM: x86/mmu: Don't bottom out on leafs when zapping collapsible SPTEs

author Sean Christopherson <seanjc@google.com>

Fri, 15 Jul 2022 23:21:06 +0000 (23:21 +0000)

committer Paolo Bonzini <pbonzini@redhat.com>

Thu, 28 Jul 2022 17:22:24 +0000 (13:22 -0400)
author Sean Christopherson <seanjc@google.com>
Fri, 15 Jul 2022 23:21:06 +0000 (23:21 +0000)
committer Paolo Bonzini <pbonzini@redhat.com>
Thu, 28 Jul 2022 17:22:24 +0000 (13:22 -0400)
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c

index 9c65a64a56d9b234f548ba5147088fea048d3ebe..39b48e7d7d1a8f825f8915b62f8b3c4f31e1387d 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -145,15 +145,6 @@ static bool try_step_up(struct tdp_iter *iter)
         return true;
  }
  
-/*
- * Step the iterator back up a level in the paging structure. Should only be
- * used when the iterator is below the root level.
- */
-void tdp_iter_step_up(struct tdp_iter *iter)
-{
-       WARN_ON(!try_step_up(iter));
-}
-
  /*
   * Step to the next SPTE in a pre-order traversal of the paging structure.
   * To get to the next SPTE, the iterator either steps down towards the goal
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h

index adfca0cf94d3a0cb666a6e319817474f04b32362..f0af385c56e035e74ce9e0d01ed2f0d2ce4c5c20 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -114,6 +114,5 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
                     int min_level, gfn_t next_last_level_gfn);
  void tdp_iter_next(struct tdp_iter *iter);
  void tdp_iter_restart(struct tdp_iter *iter);
-void tdp_iter_step_up(struct tdp_iter *iter);
  
  #endif /* __KVM_X86_MMU_TDP_ITER_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index d75d93edc40a8ec77671de7f6e56979388b74b4a..40ccb5fba870afc9655aee5e7adee8fdad929224 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1721,10 +1721,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
                 clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
  }
  
-/*
- * Clear leaf entries which could be replaced by large mappings, for
- * GFNs within the slot.
- */
  static void zap_collapsible_spte_range(struct kvm *kvm,
                                        struct kvm_mmu_page *root,
                                        const struct kvm_memory_slot *slot)
@@ -1736,48 +1732,49 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
  
         rcu_read_lock();
  
-       tdp_root_for_each_pte(iter, root, start, end) {
+       for_each_tdp_pte_min_level(iter, root, PG_LEVEL_2M, start, end) {
+retry:
                 if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                         continue;
  
-               if (!is_shadow_present_pte(iter.old_spte) ||
-                   !is_last_spte(iter.old_spte, iter.level))
+               if (iter.level > KVM_MAX_HUGEPAGE_LEVEL ||
+                   !is_shadow_present_pte(iter.old_spte))
                         continue;
  
-               max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
-                                                             iter.gfn, PG_LEVEL_NUM);
-
-               WARN_ON(max_mapping_level < iter.level);
-
                 /*
-                * If this page is already mapped at the highest
-                * viable level, there's nothing more to do.
+                * Don't zap leaf SPTEs, if a leaf SPTE could be replaced with
+                * a large page size, then its parent would have been zapped
+                * instead of stepping down.
                  */
-               if (max_mapping_level == iter.level)
+               if (is_last_spte(iter.old_spte, iter.level))
                         continue;
  
                 /*
-                * The page can be remapped at a higher level, so step
-                * up to zap the parent SPTE.
+                * If iter.gfn resides outside of the slot, i.e. the page for
+                * the current level overlaps but is not contained by the slot,
+                * then the SPTE can't be made huge.  More importantly, trying
+                * to query that info from slot->arch.lpage_info will cause an
+                * out-of-bounds access.
                  */
-               while (max_mapping_level > iter.level)
-                       tdp_iter_step_up(&iter);
+               if (iter.gfn < start || iter.gfn >= end)
+                       continue;
  
-               /* Note, a successful atomic zap also does a remote TLB flush. */
-               tdp_mmu_zap_spte_atomic(kvm, &iter);
+               max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
+                                                             iter.gfn, PG_LEVEL_NUM);
+               if (max_mapping_level < iter.level)
+                       continue;
  
-               /*
-                * If the atomic zap fails, the iter will recurse back into
-                * the same subtree to retry.
-                */
+               /* Note, a successful atomic zap also does a remote TLB flush. */
+               if (tdp_mmu_zap_spte_atomic(kvm, &iter))
+                       goto retry;
         }
  
         rcu_read_unlock();
  }
  
  /*
- * Clear non-leaf entries (and free associated page tables) which could
- * be replaced by large mappings, for GFNs within the slot.
+ * Zap non-leaf SPTEs (and free their associated page tables) which could
+ * be replaced by huge pages, for GFNs within the slot.
   */
  void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                        const struct kvm_memory_slot *slot)
author	Sean Christopherson <seanjc@google.com>
	Fri, 15 Jul 2022 23:21:06 +0000 (23:21 +0000)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 28 Jul 2022 17:22:24 +0000 (13:22 -0400)
arch/x86/kvm/mmu/tdp_iter.c		patch \| blob \| history
arch/x86/kvm/mmu/tdp_iter.h		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history