KVM: x86/mmu: Derive shadow MMU page role from parent

author David Matlack <dmatlack@google.com>

Wed, 22 Jun 2022 19:26:51 +0000 (15:26 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Fri, 24 Jun 2022 08:51:53 +0000 (04:51 -0400)
author David Matlack <dmatlack@google.com>
Wed, 22 Jun 2022 19:26:51 +0000 (15:26 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Fri, 24 Jun 2022 08:51:53 +0000 (04:51 -0400)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 2e30398fe59f6bf3c74773807dc751c688728fd7..fd1b479bf7fc2f158b37e373d8fbc5443e5d140f 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1993,49 +1993,15 @@ static void clear_sp_write_flooding_count(u64 *spte)
         __clear_sp_write_flooding_count(sptep_to_sp(spte));
  }
  
-static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
-                                            gfn_t gfn,
-                                            gva_t gaddr,
-                                            unsigned level,
-                                            bool direct,
-                                            unsigned int access)
+static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+                                            union kvm_mmu_page_role role)
  {
-       union kvm_mmu_page_role role;
         struct hlist_head *sp_list;
-       unsigned quadrant;
         struct kvm_mmu_page *sp;
         int ret;
         int collisions = 0;
         LIST_HEAD(invalid_list);
  
-       role = vcpu->arch.mmu->root_role;
-       role.level = level;
-       role.direct = direct;
-       role.access = access;
-       if (role.has_4_byte_gpte) {
-               /*
-                * If the guest has 4-byte PTEs then that means it's using 32-bit,
-                * 2-level, non-PAE paging. KVM shadows such guests with PAE paging
-                * (i.e. 8-byte PTEs). The difference in PTE size means that KVM must
-                * shadow each guest page table with multiple shadow page tables, which
-                * requires extra bookkeeping in the role.
-                *
-                * Specifically, to shadow the guest's page directory (which covers a
-                * 4GiB address space), KVM uses 4 PAE page directories, each mapping
-                * 1GiB of the address space. @role.quadrant encodes which quarter of
-                * the address space each maps.
-                *
-                * To shadow the guest's page tables (which each map a 4MiB region), KVM
-                * uses 2 PAE page tables, each mapping a 2MiB region. For these,
-                * @role.quadrant encodes which half of the region they map.
-                */
-               quadrant = gaddr >> (PAGE_SHIFT + (SPTE_LEVEL_BITS * level));
-               quadrant &= (1 << level) - 1;
-               role.quadrant = quadrant;
-       }
-       if (level <= vcpu->arch.mmu->cpu_role.base.level)
-               role.passthrough = 0;
-
         sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)];
         for_each_valid_sp(vcpu->kvm, sp, sp_list) {
                 if (sp->gfn != gfn) {
@@ -2053,7 +2019,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                          * Unsync pages must not be left as is, because the new
                          * upper-level page will be write-protected.
                          */
-                       if (level > PG_LEVEL_4K && sp->unsync)
+                       if (role.level > PG_LEVEL_4K && sp->unsync)
                                 kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
                                                          &invalid_list);
                         continue;
@@ -2094,14 +2060,14 @@ trace_get_page:
  
         ++vcpu->kvm->stat.mmu_cache_miss;
  
-       sp = kvm_mmu_alloc_page(vcpu, direct);
+       sp = kvm_mmu_alloc_page(vcpu, role.direct);
  
         sp->gfn = gfn;
         sp->role = role;
         hlist_add_head(&sp->hash_link, sp_list);
         if (sp_has_gptes(sp)) {
                 account_shadowed(vcpu->kvm, sp);
-               if (level == PG_LEVEL_4K && kvm_vcpu_write_protect_gfn(vcpu, gfn))
+               if (role.level == PG_LEVEL_4K && kvm_vcpu_write_protect_gfn(vcpu, gfn))
                         kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
         }
         trace_kvm_mmu_get_page(sp, true);
@@ -2113,6 +2079,55 @@ out:
         return sp;
  }
  
+static union kvm_mmu_page_role kvm_mmu_child_role(u64 *sptep, bool direct, unsigned int access)
+{
+       struct kvm_mmu_page *parent_sp = sptep_to_sp(sptep);
+       union kvm_mmu_page_role role;
+
+       role = parent_sp->role;
+       role.level--;
+       role.access = access;
+       role.direct = direct;
+       role.passthrough = 0;
+
+       /*
+        * If the guest has 4-byte PTEs then that means it's using 32-bit,
+        * 2-level, non-PAE paging. KVM shadows such guests with PAE paging
+        * (i.e. 8-byte PTEs). The difference in PTE size means that KVM must
+        * shadow each guest page table with multiple shadow page tables, which
+        * requires extra bookkeeping in the role.
+        *
+        * Specifically, to shadow the guest's page directory (which covers a
+        * 4GiB address space), KVM uses 4 PAE page directories, each mapping
+        * 1GiB of the address space. @role.quadrant encodes which quarter of
+        * the address space each maps.
+        *
+        * To shadow the guest's page tables (which each map a 4MiB region), KVM
+        * uses 2 PAE page tables, each mapping a 2MiB region. For these,
+        * @role.quadrant encodes which half of the region they map.
+        *
+        * Note, the 4 PAE page directories are pre-allocated and the quadrant
+        * assigned in mmu_alloc_root(). So only page tables need to be handled
+        * here.
+        */
+       if (role.has_4_byte_gpte) {
+               WARN_ON_ONCE(role.level != PG_LEVEL_4K);
+               role.quadrant = (sptep - parent_sp->spt) % 2;
+       }
+
+       return role;
+}
+
+static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+                                                u64 *sptep, gfn_t gfn,
+                                                bool direct, unsigned int access)
+{
+       union kvm_mmu_page_role role;
+
+       role = kvm_mmu_child_role(sptep, direct, access);
+       return kvm_mmu_get_page(vcpu, gfn, role);
+}
+
  static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
                                         struct kvm_vcpu *vcpu, hpa_t root,
                                         u64 addr)
@@ -2964,8 +2979,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                 if (is_shadow_present_pte(*it.sptep))
                         continue;
  
-               sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
-                                     it.level - 1, true, ACC_ALL);
+               sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, ACC_ALL);
  
                 link_shadow_page(vcpu, it.sptep, sp);
                 if (fault->is_tdp && fault->huge_page_disallowed &&
@@ -3368,13 +3382,18 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
         return ret;
  }
  
-static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
+static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
                             u8 level)
  {
-       bool direct = vcpu->arch.mmu->root_role.direct;
+       union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
         struct kvm_mmu_page *sp;
  
-       sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
+       role.level = level;
+
+       if (role.has_4_byte_gpte)
+               role.quadrant = quadrant;
+
+       sp = kvm_mmu_get_page(vcpu, gfn, role);
         ++sp->root_count;
  
         return __pa(sp->spt);
@@ -3408,8 +3427,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                 for (i = 0; i < 4; ++i) {
                         WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));
  
-                       root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
-                                             i << 30, PT32_ROOT_LEVEL);
+                       root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), i,
+                                             PT32_ROOT_LEVEL);
                         mmu->pae_root[i] = root | PT_PRESENT_MASK |
                                            shadow_me_value;
                 }
@@ -3578,8 +3597,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                         root_gfn = pdptrs[i] >> PAGE_SHIFT;
                 }
  
-               root = mmu_alloc_root(vcpu, root_gfn, i << 30,
-                                     PT32_ROOT_LEVEL);
+               root = mmu_alloc_root(vcpu, root_gfn, i, PT32_ROOT_LEVEL);
                 mmu->pae_root[i] = root | pm_mask;
         }
  
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h

index e4655056e651440125024832b8e1a09c166821ca..6ecdd7a41a826758d39ee7c5c2403c59c4c9890a 100644 (file)
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -654,8 +654,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                 if (!is_shadow_present_pte(*it.sptep)) {
                         table_gfn = gw->table_gfn[it.level - 2];
                         access = gw->pt_access[it.level - 2];
-                       sp = kvm_mmu_get_page(vcpu, table_gfn, fault->addr,
-                                             it.level-1, false, access);
+                       sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn,
+                                                 false, access);
+
                         /*
                          * We must synchronize the pagetable before linking it
                          * because the guest doesn't need to flush tlb when
@@ -711,8 +712,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
                 drop_large_spte(vcpu, it.sptep);
  
                 if (!is_shadow_present_pte(*it.sptep)) {
-                       sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr,
-                                             it.level - 1, true, direct_access);
+                       sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn,
+                                                 true, direct_access);
                         link_shadow_page(vcpu, it.sptep, sp);
                         if (fault->huge_page_disallowed &&
                             fault->req_level >= it.level)
author	David Matlack <dmatlack@google.com>
	Wed, 22 Jun 2022 19:26:51 +0000 (15:26 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Fri, 24 Jun 2022 08:51:53 +0000 (04:51 -0400)
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/paging_tmpl.h		patch \| blob \| history