KVM: x86/mmu: Cache the access bits of shadowed translations

author David Matlack <dmatlack@google.com>

Wed, 22 Jun 2022 19:27:04 +0000 (15:27 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Fri, 24 Jun 2022 08:51:58 +0000 (04:51 -0400)
author David Matlack <dmatlack@google.com>
Wed, 22 Jun 2022 19:27:04 +0000 (15:27 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Fri, 24 Jun 2022 08:51:58 +0000 (04:51 -0400)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 7e4c31b57a75b8ee058d288c4d34cd0961f3711f..64efe8c90c31fe1ce83dd4354062caa0e44fe614 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -713,7 +713,7 @@ struct kvm_vcpu_arch {
  
         struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
         struct kvm_mmu_memory_cache mmu_shadow_page_cache;
-       struct kvm_mmu_memory_cache mmu_gfn_array_cache;
+       struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
         struct kvm_mmu_memory_cache mmu_page_header_cache;
  
         /*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 7cca28d89a85d72714dc16ecdf22208794fa6d3d..13a059ad5dc7eb1edeaf73cdc2e4272dc0bbd08b 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -656,7 +656,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
         if (r)
                 return r;
         if (maybe_indirect) {
-               r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_gfn_array_cache,
+               r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadowed_info_cache,
                                                PT64_ROOT_MAX_LEVEL);
                 if (r)
                         return r;
@@ -669,7 +669,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
  {
         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
-       kvm_mmu_free_memory_cache(&vcpu->arch.mmu_gfn_array_cache);
+       kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
  }
  
@@ -678,34 +678,68 @@ static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
         kmem_cache_free(pte_list_desc_cache, pte_list_desc);
  }
  
+static bool sp_has_gptes(struct kvm_mmu_page *sp);
+
  static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
  {
         if (sp->role.passthrough)
                 return sp->gfn;
  
         if (!sp->role.direct)
-               return sp->gfns[index];
+               return sp->shadowed_translation[index] >> PAGE_SHIFT;
  
         return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
  }
  
-static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+/*
+ * For leaf SPTEs, fetch the *guest* access permissions being shadowed. Note
+ * that the SPTE itself may have a more constrained access permissions that
+ * what the guest enforces. For example, a guest may create an executable
+ * huge PTE but KVM may disallow execution to mitigate iTLB multihit.
+ */
+static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
  {
-       if (sp->role.passthrough) {
-               WARN_ON_ONCE(gfn != sp->gfn);
-               return;
-       }
+       if (sp_has_gptes(sp))
+               return sp->shadowed_translation[index] & ACC_ALL;
  
-       if (!sp->role.direct) {
-               sp->gfns[index] = gfn;
+       /*
+        * For direct MMUs (e.g. TDP or non-paging guests) or passthrough SPs,
+        * KVM is not shadowing any guest page tables, so the "guest access
+        * permissions" are just ACC_ALL.
+        *
+        * For direct SPs in indirect MMUs (shadow paging), i.e. when KVM
+        * is shadowing a guest huge page with small pages, the guest access
+        * permissions being shadowed are the access permissions of the huge
+        * page.
+        *
+        * In both cases, sp->role.access contains the correct access bits.
+        */
+       return sp->role.access;
+}
+
+static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, gfn_t gfn, u32 access)
+{
+       if (sp_has_gptes(sp)) {
+               sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
                 return;
         }
  
-       if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
-               pr_err_ratelimited("gfn mismatch under direct page %llx "
-                                  "(expected %llx, got %llx)\n",
-                                  sp->gfn,
-                                  kvm_mmu_page_get_gfn(sp, index), gfn);
+       WARN_ONCE(access != kvm_mmu_page_get_access(sp, index),
+                 "access mismatch under %s page %llx (expected %u, got %u)\n",
+                 sp->role.passthrough ? "passthrough" : "direct",
+                 sp->gfn, kvm_mmu_page_get_access(sp, index), access);
+
+       WARN_ONCE(gfn != kvm_mmu_page_get_gfn(sp, index),
+                 "gfn mismatch under %s page %llx (expected %llx, got %llx)\n",
+                 sp->role.passthrough ? "passthrough" : "direct",
+                 sp->gfn, kvm_mmu_page_get_gfn(sp, index), gfn);
+}
+
+static void kvm_mmu_page_set_access(struct kvm_mmu_page *sp, int index, u32 access)
+{
+       gfn_t gfn = kvm_mmu_page_get_gfn(sp, index);
+
+       kvm_mmu_page_set_translation(sp, index, gfn, access);
  }
  
  /*
@@ -1554,14 +1588,14 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
  static void __rmap_add(struct kvm *kvm,
                        struct kvm_mmu_memory_cache *cache,
                        const struct kvm_memory_slot *slot,
-                      u64 *spte, gfn_t gfn)
+                      u64 *spte, gfn_t gfn, u32 access)
  {
         struct kvm_mmu_page *sp;
         struct kvm_rmap_head *rmap_head;
         int rmap_count;
  
         sp = sptep_to_sp(spte);
-       kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
+       kvm_mmu_page_set_translation(sp, spte - sp->spt, gfn, access);
         kvm_update_page_stats(kvm, sp->role.level, 1);
  
         rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
@@ -1575,11 +1609,11 @@ static void __rmap_add(struct kvm *kvm,
  }
  
  static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
-                    u64 *spte, gfn_t gfn)
+                    u64 *spte, gfn_t gfn, u32 access)
  {
         struct kvm_mmu_memory_cache *cache = &vcpu->arch.mmu_pte_list_desc_cache;
  
-       __rmap_add(vcpu->kvm, cache, slot, spte, gfn);
+       __rmap_add(vcpu->kvm, cache, slot, spte, gfn, access);
  }
  
  bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1643,7 +1677,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
         list_del(&sp->link);
         free_page((unsigned long)sp->spt);
         if (!sp->role.direct)
-               free_page((unsigned long)sp->gfns);
+               free_page((unsigned long)sp->shadowed_translation);
         kmem_cache_free(mmu_page_header_cache, sp);
  }
  
@@ -2070,7 +2104,7 @@ out:
  struct shadow_page_caches {
         struct kvm_mmu_memory_cache *page_header_cache;
         struct kvm_mmu_memory_cache *shadow_page_cache;
-       struct kvm_mmu_memory_cache *gfn_array_cache;
+       struct kvm_mmu_memory_cache *shadowed_info_cache;
  };
  
  static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
@@ -2084,7 +2118,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
         sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
         sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
         if (!role.direct)
-               sp->gfns = kvm_mmu_memory_cache_alloc(caches->gfn_array_cache);
+               sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
  
         set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
  
@@ -2136,7 +2170,7 @@ static struct kvm_mmu_page *kvm_mmu_get_shadow_page(struct kvm_vcpu *vcpu,
         struct shadow_page_caches caches = {
                 .page_header_cache = &vcpu->arch.mmu_page_header_cache,
                 .shadow_page_cache = &vcpu->arch.mmu_shadow_page_cache,
-               .gfn_array_cache = &vcpu->arch.mmu_gfn_array_cache,
+               .shadowed_info_cache = &vcpu->arch.mmu_shadowed_info_cache,
         };
  
         return __kvm_mmu_get_shadow_page(vcpu->kvm, vcpu, &caches, gfn, role);
@@ -2785,7 +2819,10 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
  
         if (!was_rmapped) {
                 WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
-               rmap_add(vcpu, slot, sptep, gfn);
+               rmap_add(vcpu, slot, sptep, gfn, pte_access);
+       } else {
+               /* Already rmapped but the pte_access bits may have changed. */
+               kvm_mmu_page_set_access(sp, sptep - sp->spt, pte_access);
         }
  
         return ret;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h

index bb9d12ac0db3f5899c48e3d039afda1766d74e4d..ae2d660e2dab936a45aac6f8339a7d8652a01f83 100644 (file)
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -67,8 +67,21 @@ struct kvm_mmu_page {
         gfn_t gfn;
  
         u64 *spt;
-       /* hold the gfn of each spte inside spt */
-       gfn_t *gfns;
+
+       /*
+        * Stores the result of the guest translation being shadowed by each
+        * SPTE.  KVM shadows two types of guest translations: nGPA -> GPA
+        * (shadow EPT/NPT) and GVA -> GPA (traditional shadow paging). In both
+        * cases the result of the translation is a GPA and a set of access
+        * constraints.
+        *
+        * The GFN is stored in the upper bits (PAGE_SHIFT) and the shadowed
+        * access permissions are stored in the lower bits. Note, for
+        * convenience and uniformity across guests, the access permissions are
+        * stored in KVM format (e.g.  ACC_EXEC_MASK) not the raw guest format.
+        */
+       u64 *shadowed_translation;
+
         /* Currently serving as active root */
         union {
                 int root_count;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h

index 6ecdd7a41a826758d39ee7c5c2403c59c4c9890a..24f292f3f93fb59c3643838485c44e5bfa35553c 100644 (file)
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -985,7 +985,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
  }
  
  /*
- * Using the cached information from sp->gfns is safe because:
+ * Using the information in sp->shadowed_translation (kvm_mmu_page_get_gfn()) is
+ * safe because:
   * - The spte has a reference to the struct page, so the pfn for a given gfn
   *   can't change unless all sptes pointing to it are nuked first.
   *
@@ -1067,12 +1068,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
                  * "present" bit, as all other paging modes will create a
                  * read-only SPTE if pte_access is zero.
                  */
-               if ((!pte_access && !shadow_present_mask) || gfn != sp->gfns[i]) {
+               if ((!pte_access && !shadow_present_mask) ||
+                   gfn != kvm_mmu_page_get_gfn(sp, i)) {
                         drop_spte(vcpu->kvm, &sp->spt[i]);
                         flush = true;
                         continue;
                 }
  
+               /* Update the shadowed access bits in case they changed. */
+               kvm_mmu_page_set_access(sp, i, pte_access);
+
                 sptep = &sp->spt[i];
                 spte = *sptep;
                 host_writable = spte & shadow_host_writable_mask;
author	David Matlack <dmatlack@google.com>
	Wed, 22 Jun 2022 19:27:04 +0000 (15:27 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Fri, 24 Jun 2022 08:51:58 +0000 (04:51 -0400)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu_internal.h		patch \| blob \| history
arch/x86/kvm/mmu/paging_tmpl.h		patch \| blob \| history