]> git.baikalelectronics.ru Git - kernel.git/commitdiff
RISC-V: KVM: Implement MMU notifiers
authorAnup Patel <anup.patel@wdc.com>
Mon, 27 Sep 2021 11:40:10 +0000 (17:10 +0530)
committerAnup Patel <anup@brainfault.org>
Mon, 4 Oct 2021 10:33:39 +0000 (16:03 +0530)
This patch implements MMU notifiers for KVM RISC-V so that Guest
physical address space is in-sync with Host physical address space.

This will allow swapping, page migration, etc to work transparently
with KVM RISC-V.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>
arch/riscv/include/asm/kvm_host.h
arch/riscv/kvm/Kconfig
arch/riscv/kvm/mmu.c
arch/riscv/kvm/vm.c

index 2e71a353395e62a341330a5de8490e1b46d1edb0..17ed90a4798e7520d63a35531a120a0e9d9ed909 100644 (file)
@@ -196,6 +196,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
 void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa, unsigned long vmid);
 void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
 void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
index 633063edaee85beb191860fcb6d980d5748c550f..a712bb910cda8a9a5ffc8c072b36346d3ac37d0a 100644 (file)
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
        tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
        depends on RISCV_SBI && MMU
+       select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select ANON_INODES
        select KVM_MMIO
index fa9a4f9b9542a67faf76383caf6240226950e6cf..3a00c2df7640a27527372849a9e46e20227bcad5 100644 (file)
@@ -300,7 +300,8 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
        }
 }
 
-static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size)
+static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
+                              gpa_t size, bool may_block)
 {
        int ret;
        pte_t *ptep;
@@ -325,6 +326,13 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start, gpa_t size)
 
 next:
                addr += page_size;
+
+               /*
+                * If the range is too large, release the kvm->mmu_lock
+                * to prevent starvation and lockup detector warnings.
+                */
+               if (may_block && addr < end)
+                       cond_resched_lock(&kvm->mmu_lock);
        }
 }
 
@@ -405,7 +413,6 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
 out:
        stage2_cache_flush(&pcache);
        return ret;
-
 }
 
 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
@@ -547,7 +554,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
        spin_lock(&kvm->mmu_lock);
        if (ret)
                stage2_unmap_range(kvm, mem->guest_phys_addr,
-                                  mem->memory_size);
+                                  mem->memory_size, false);
        spin_unlock(&kvm->mmu_lock);
 
 out:
@@ -555,6 +562,73 @@ out:
        return ret;
 }
 
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       if (!kvm->arch.pgd)
+               return 0;
+
+       stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
+                          (range->end - range->start) << PAGE_SHIFT,
+                          range->may_block);
+       return 0;
+}
+
+bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       int ret;
+       kvm_pfn_t pfn = pte_pfn(range->pte);
+
+       if (!kvm->arch.pgd)
+               return 0;
+
+       WARN_ON(range->end - range->start != 1);
+
+       ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+                             __pfn_to_phys(pfn), PAGE_SIZE, true, true);
+       if (ret) {
+               kvm_debug("Failed to map stage2 page (error %d)\n", ret);
+               return 1;
+       }
+
+       return 0;
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       pte_t *ptep;
+       u32 ptep_level = 0;
+       u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+       if (!kvm->arch.pgd)
+               return 0;
+
+       WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+       if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+                                  &ptep, &ptep_level))
+               return 0;
+
+       return ptep_test_and_clear_young(NULL, 0, ptep);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       pte_t *ptep;
+       u32 ptep_level = 0;
+       u64 size = (range->end - range->start) << PAGE_SHIFT;
+
+       if (!kvm->arch.pgd)
+               return 0;
+
+       WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
+
+       if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+                                  &ptep, &ptep_level))
+               return 0;
+
+       return pte_young(*ptep);
+}
+
 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
                         struct kvm_memory_slot *memslot,
                         gpa_t gpa, unsigned long hva, bool is_write)
@@ -569,7 +643,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
        struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
        bool logging = (memslot->dirty_bitmap &&
                        !(memslot->flags & KVM_MEM_READONLY)) ? true : false;
-       unsigned long vma_pagesize;
+       unsigned long vma_pagesize, mmu_seq;
 
        mmap_read_lock(current->mm);
 
@@ -608,6 +682,8 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
                return ret;
        }
 
+       mmu_seq = kvm->mmu_notifier_seq;
+
        hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
        if (hfn == KVM_PFN_ERR_HWPOISON) {
                send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
@@ -626,6 +702,9 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
 
        spin_lock(&kvm->mmu_lock);
 
+       if (mmu_notifier_retry(kvm, mmu_seq))
+               goto out_unlock;
+
        if (writeable) {
                kvm_set_pfn_dirty(hfn);
                mark_page_dirty(kvm, gfn);
@@ -639,6 +718,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
        if (ret)
                kvm_err("Failed to map in stage2\n");
 
+out_unlock:
        spin_unlock(&kvm->mmu_lock);
        kvm_set_pfn_accessed(hfn);
        kvm_release_pfn_clean(hfn);
@@ -675,7 +755,7 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
 
        spin_lock(&kvm->mmu_lock);
        if (kvm->arch.pgd) {
-               stage2_unmap_range(kvm, 0UL, stage2_gpa_size);
+               stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
                pgd = READ_ONCE(kvm->arch.pgd);
                kvm->arch.pgd = NULL;
                kvm->arch.pgd_phys = 0;
index e2834ab9044c6427dd43bb0fbe4d449b78f0a49e..892d020674c06cd5d306c0a394e19bb61441bdbf 100644 (file)
@@ -65,6 +65,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_USER_MEMORY:
+       case KVM_CAP_SYNC_MMU:
        case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
        case KVM_CAP_ONE_REG:
        case KVM_CAP_READONLY_MEM: