]> git.baikalelectronics.ru Git - kernel.git/commitdiff
KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU
authorNicholas Piggin <npiggin@gmail.com>
Fri, 28 May 2021 09:07:41 +0000 (19:07 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 10 Jun 2021 12:12:14 +0000 (22:12 +1000)
Rather than partition the guest PID space + flush a rogue guest PID to
work around this problem, instead fix it by always disabling the MMU when
switching in or out of guest MMU context in HV mode.

This may be a bit less efficient, but it is a lot less complicated and
allows the P9 path to trivally implement the workaround too. Newer CPUs
are not subject to this issue.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-22-npiggin@gmail.com
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_p9_entry.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/mm/book3s64/radix_pgtable.c
arch/powerpc/mm/book3s64/radix_tlb.c
arch/powerpc/mm/mmu_context.c

index 4bc45d3ed8b0e8a8d0fb02e7342fecbf76d70ee4..84e192aa54fdd99df83f20ab60d6be8c09529f01 100644 (file)
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 }
 #endif
 
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
 extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
index 12c35b0561d370fa04729b2a890a1a0e5cb0b849..eb25605e23b9b3fa9371997f7f8a992ac6a95c85 100644 (file)
@@ -807,7 +807,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
                 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
                 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
                 */
-               if (mflags != 0 && mflags != 3)
+               if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+                               kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
                        return H_UNSUPPORTED_FLAG_START;
                return H_TOO_HARD;
        default:
@@ -1677,6 +1678,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
                lpcr &= ~LPCR_AIL;
        if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
                lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+       /*
+        * On some POWER9s we force AIL off for radix guests to prevent
+        * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+        * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+        * be cached, which the host TLB management does not expect.
+        */
+       if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               lpcr &= ~LPCR_AIL;
 
        /*
         * On POWER9, allow userspace to enable large decrementer for the
@@ -4360,12 +4369,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
        do {
-               /*
-                * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
-                * path, which also handles hash and dependent threads mode.
-                */
-               if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
-                   !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               if (kvm->arch.threads_indep && kvm_is_radix(kvm))
                        r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
                                                  vcpu->arch.vcore->lpcr);
                else
@@ -4995,6 +4999,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
                if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
                        pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
                        kvm->arch.threads_indep = true;
+               } else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+                       pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
+                       kvm->arch.threads_indep = true;
                } else {
                        kvm->arch.threads_indep = indep_threads_mode;
                }
index 0b5bd00c9d0fcfa14f2b7d6e4a30b070fa512bb7..178f771e299c6963c008ad8e7b75746615753fcf 100644 (file)
@@ -218,6 +218,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
        mtspr(SPRN_AMOR, ~0UL);
 
+       if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
        switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 
        /*
@@ -226,7 +229,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         */
        mtspr(SPRN_HDEC, hdec);
 
-       __mtmsrd(0, 1); /* clear RI */
+       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               __mtmsrd(0, 1); /* clear RI */
 
        mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
        mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
@@ -341,8 +345,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
        radix_clear_slb();
 
-       __mtmsrd(msr, 0);
-
        accumulate_time(vcpu, &vcpu->arch.rm_exit);
 
        /* Advance host PURR/SPURR by the amount used by guest */
@@ -408,6 +410,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
        switch_mmu_to_host_radix(kvm, host_pidr);
 
+       /*
+        * If we are in real mode, only switch MMU on after the MMU is
+        * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+        */
+       __mtmsrd(msr, 0);
+
        end_timing(vcpu);
 
        return trap;
index d742233dec55c161d4dfc3ba20cb9845e3e5f88d..3b8fd4bd2419952243829e2a51bfec764212798f 100644 (file)
@@ -1717,40 +1717,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
        eieio
        tlbsync
        ptesync
-
-BEGIN_FTR_SECTION
-       /* Radix: Handle the case where the guest used an illegal PID */
-       LOAD_REG_ADDR(r4, mmu_base_pid)
-       lwz     r3, VCPU_GUEST_PID(r9)
-       lwz     r5, 0(r4)
-       cmpw    cr0,r3,r5
-       blt     2f
-
-       /*
-        * Illegal PID, the HW might have prefetched and cached in the TLB
-        * some translations for the  LPID 0 / guest PID combination which
-        * Linux doesn't know about, so we need to flush that PID out of
-        * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
-        * the right context.
-       */
-       li      r0,0
-       mtspr   SPRN_LPID,r0
-       isync
-
-       /* Then do a congruence class local flush */
-       ld      r6,VCPU_KVM(r9)
-       lwz     r0,KVM_TLB_SETS(r6)
-       mtctr   r0
-       li      r7,0x400                /* IS field = 0b01 */
-       ptesync
-       sldi    r0,r3,32                /* RS has PID */
-1:     PPC_TLBIEL(7,0,2,1,1)           /* RIC=2, PRS=1, R=1 */
-       addi    r7,r7,0x1000
-       bdnz    1b
-       ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
 #endif /* CONFIG_PPC_RADIX_MMU */
 
        /*
index 5fef8db3b46342d4f9c3cc77bc021d5758b49150..fe236c38ce00faa5ca9bfc09b424535071c1127c 100644 (file)
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
        }
 
        /* Find out how many PID bits are supported */
-       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
-               if (!mmu_pid_bits)
-                       mmu_pid_bits = 20;
-               mmu_base_pid = 1;
-       } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
-               if (!mmu_pid_bits)
-                       mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+                       cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
                /*
-                * When KVM is possible, we only use the top half of the
-                * PID space to avoid collisions between host and guest PIDs
-                * which can cause problems due to prefetch when exiting the
-                * guest with AIL=3
+                * Older versions of KVM on these machines perfer if the
+                * guest only uses the low 19 PID bits.
                 */
-               mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
-               mmu_base_pid = 1;
-#endif
-       } else {
-               /* The guest uses the bottom half of the PID space */
                if (!mmu_pid_bits)
                        mmu_pid_bits = 19;
-               mmu_base_pid = 1;
+       } else {
+               if (!mmu_pid_bits)
+                       mmu_pid_bits = 20;
        }
+       mmu_base_pid = 1;
 
        /*
         * Allocate Partition table and process table for the
index 409e612107892c64061ce7ecfa3dfa830b5c5f65..312236a6b08550e9ad81471b690657b830241330 100644 (file)
@@ -1336,49 +1336,3 @@ void radix__flush_tlb_all(void)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
-{
-       unsigned long pid = mm->context.id;
-
-       if (unlikely(pid == MMU_NO_CONTEXT))
-               return;
-
-       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-               return;
-
-       /*
-        * If this context hasn't run on that CPU before and KVM is
-        * around, there's a slim chance that the guest on another
-        * CPU just brought in obsolete translation into the TLB of
-        * this CPU due to a bad prefetch using the guest PID on
-        * the way into the hypervisor.
-        *
-        * We work around this here. If KVM is possible, we check if
-        * any sibling thread is in KVM. If it is, the window may exist
-        * and thus we flush that PID from the core.
-        *
-        * A potential future improvement would be to mark which PIDs
-        * have never been used on the system and avoid it if the PID
-        * is new and the process has no other cpumask bit set.
-        */
-       if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
-               int cpu = smp_processor_id();
-               int sib = cpu_first_thread_sibling(cpu);
-               bool flush = false;
-
-               for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
-                       if (sib == cpu)
-                               continue;
-                       if (!cpu_possible(sib))
-                               continue;
-                       if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
-                               flush = true;
-               }
-               if (flush)
-                       _tlbiel_pid(pid, RIC_FLUSH_ALL);
-       }
-}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
index a857af401738fdf883a49501818af9691a4cd139..74246536b83263514b110d0b6d3ba5e8472a51fc 100644 (file)
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                asm volatile ("dssall");
 
-       if (new_on_cpu)
-               radix_kvm_prefetch_workaround(next);
-       else
+       if (!new_on_cpu)
                membarrier_arch_switch_mm(prev, next, tsk);
 
        /*