KVM: x86/mmu: Defer TLB flush to caller when freeing TDP MMU shadow pages

author Sean Christopherson <seanjc@google.com>

Sat, 26 Feb 2022 00:15:37 +0000 (00:15 +0000)

committer Paolo Bonzini <pbonzini@redhat.com>

Tue, 8 Mar 2022 14:31:57 +0000 (09:31 -0500)
author Sean Christopherson <seanjc@google.com>
Sat, 26 Feb 2022 00:15:37 +0000 (00:15 +0000)
committer Paolo Bonzini <pbonzini@redhat.com>
Tue, 8 Mar 2022 14:31:57 +0000 (09:31 -0500)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index c6e68ed68618f2a6e3cf7b6bfe9cb5c3c04bc5c2..fe026e5be1871c7117739908632254d1a0ba0f18 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6360,6 +6360,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
         rcu_idx = srcu_read_lock(&kvm->srcu);
         write_lock(&kvm->mmu_lock);
  
+       /*
+        * Zapping TDP MMU shadow pages, including the remote TLB flush, must
+        * be done under RCU protection, because the pages are freed via RCU
+        * callback.
+        */
+       rcu_read_lock();
+
         ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
         to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
         for ( ; to_zap; --to_zap) {
@@ -6384,12 +6391,18 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
  
                 if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
                         kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+                       rcu_read_unlock();
+
                         cond_resched_rwlock_write(&kvm->mmu_lock);
                         flush = false;
+
+                       rcu_read_lock();
                 }
         }
         kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
  
+       rcu_read_unlock();
+
         write_unlock(&kvm->mmu_lock);
         srcu_read_unlock(&kvm->srcu, rcu_idx);
  }
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h

index e2a7e267a77dbada903bfbae7f2e7ead1c9cdb1c..b1eaf6ec0e0b1fe8c859a44f67f047a01db6225f 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -9,10 +9,9 @@
  
  /*
   * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
- * to be zapped while holding mmu_lock for read.  Holding RCU isn't required for
- * correctness if mmu_lock is held for write, but plumbing "struct kvm" down to
- * the lower depths of the TDP MMU just to make lockdep happy is a nightmare, so
- * all accesses to SPTEs are done under RCU protection.
+ * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
+ * batched without having to collect the list of zapped SPs.  Flows that can
+ * remove SPs must service pending TLB flushes prior to dropping RCU protection.
   */
  static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
  {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 3a866fcb5ea94f3153f4821b8c76601d0f457686..5038de0c872d5946e4bf1c8c30dc1b23fe9d5e8e 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -391,9 +391,6 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
                                     shared);
         }
  
-       kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
-                                          KVM_PAGES_PER_HPAGE(level + 1));
-
         call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
  }
  
@@ -817,19 +814,13 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
         if (WARN_ON_ONCE(!sp->ptep))
                 return false;
  
-       rcu_read_lock();
-
         old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
-       if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte))) {
-               rcu_read_unlock();
+       if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
                 return false;
-       }
  
         __tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
                            sp->gfn, sp->role.level + 1, true, true);
  
-       rcu_read_unlock();
-
         return true;
  }
  
@@ -870,6 +861,11 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
         }
  
         rcu_read_unlock();
+
+       /*
+        * Because this flow zaps _only_ leaf SPTEs, the caller doesn't need
+        * to provide RCU protection as no 'struct kvm_mmu_page' will be freed.
+        */
         return flush;
  }
  
@@ -1036,6 +1032,10 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
                 ret = RET_PF_SPURIOUS;
         else if (tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
                 return RET_PF_RETRY;
+       else if (is_shadow_present_pte(iter->old_spte) &&
+                !is_last_spte(iter->old_spte, iter->level))
+               kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
+                                                  KVM_PAGES_PER_HPAGE(iter->level + 1));
  
         /*
          * If the page fault was caused by a write but the page is write
author	Sean Christopherson <seanjc@google.com>
	Sat, 26 Feb 2022 00:15:37 +0000 (00:15 +0000)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Tue, 8 Mar 2022 14:31:57 +0000 (09:31 -0500)
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/tdp_iter.h		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history