KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path

author Nicholas Piggin <npiggin@gmail.com>

Fri, 28 May 2021 09:07:33 +0000 (19:07 +1000)

committer Michael Ellerman <mpe@ellerman.id.au>

Thu, 10 Jun 2021 12:12:13 +0000 (22:12 +1000)
author Nicholas Piggin <npiggin@gmail.com>
Fri, 28 May 2021 09:07:33 +0000 (19:07 +1000)
committer Michael Ellerman <mpe@ellerman.id.au>
Thu, 10 Jun 2021 12:12:13 +0000 (22:12 +1000)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index 8c10c342716667add84849f1b0528fb64cabb23f..cb9e3c85c6052a5bd02c745e2041a99f1c9e04bf 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
  extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
  extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
  extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
  extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
  extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
  extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
  extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
  extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
  extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
  extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
  extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
  extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
  static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
  static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
         { return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+       { return 0; }
  #endif
  
  #ifdef CONFIG_KVM_XIVE
@@ -672,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
                                int level, bool line_status);
  extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
  extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
  
  static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
  {
@@ -713,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
                                       int level, bool line_status) { return -ENODEV; }
  static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
  static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
  
  static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
         { return 0; }
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c

index 2b691f4d1f26c56b0c03ff88f1e9da062fd9b30e..d69560d5bf16cf6c01d1c7c36cbd5d44071a071e 100644 (file)
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
  }
  EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
  
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+       kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
  void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
  {
         /* might as well deliver this straight away */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 3ec1dc1bad1604994dc84592178ec26416084899..8df02be9be72f833d44ec854cb76313dd4a60efd 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -899,6 +899,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
          * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
          * have useful work to do and should not confer) so we don't
          * recheck that here.
+        *
+        * In the case of the P9 single vcpu per vcore case, the real
+        * mode handler is not called but no other threads are in the
+        * source vcore.
          */
  
         spin_lock(&vcore->lock);
@@ -1142,12 +1146,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  }
  
  /*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
   * This has to be done early, not in kvmppc_pseries_do_hcall(), so
   * that the cede logic in kvmppc_run_single_vcpu() works properly.
   */
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
  {
         vcpu->arch.shregs.msr |= MSR_EE;
         vcpu->arch.ceded = 1;
@@ -1400,13 +1405,29 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
         }
         case BOOK3S_INTERRUPT_SYSCALL:
         {
-               /* hcall - punt to userspace */
                 int i;
  
-               /* hypercall with MSR_PR has already been handled in rmode,
-                * and never reaches here.
-                */
+               if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+                       /*
+                        * Guest userspace executed sc 1. This can only be
+                        * reached by the P9 path because the old path
+                        * handles this case in realmode hcall handlers.
+                        *
+                        * Radix guests can not run PR KVM or nested HV hash
+                        * guests which might run PR KVM, so this is always
+                        * a privilege fault. Send a program check to guest
+                        * kernel.
+                        */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+                       r = RESUME_GUEST;
+                       break;
+               }
  
+               /*
+                * hcall - gather args and set exit_reason. This will next be
+                * handled by kvmppc_pseries_do_hcall which may be able to deal
+                * with it and resume guest, or may punt to userspace.
+                */
                 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
                 for (i = 0; i < 9; ++i)
                         run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -3664,6 +3685,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
         return trap;
  }
  
+static inline bool hcall_is_xics(unsigned long req)
+{
+       return req == H_EOI || req == H_CPPR || req == H_IPI ||
+               req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
+}
+
  /*
   * Virtual-mode guest entry for POWER9 and later when the host and
   * guest are both using the radix MMU.  The LPIDR has already been set.
@@ -3787,15 +3814,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                 /* H_CEDE has to be handled now, not later */
                 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
                     kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
-                       kvmppc_nested_cede(vcpu);
+                       kvmppc_cede(vcpu);
                         kvmppc_set_gpr(vcpu, 3, 0);
                         trap = 0;
                 }
         } else {
                 kvmppc_xive_push_vcpu(vcpu);
                 trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+               if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+                   !(vcpu->arch.shregs.msr & MSR_PR)) {
+                       unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+                       /* H_CEDE has to be handled now, not later */
+                       if (req == H_CEDE) {
+                               kvmppc_cede(vcpu);
+                               kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+                               kvmppc_set_gpr(vcpu, 3, 0);
+                               trap = 0;
+
+                       /* XICS hcalls must be handled before xive is pulled */
+                       } else if (hcall_is_xics(req)) {
+                               int ret;
+
+                               ret = kvmppc_xive_xics_hcall(vcpu, req);
+                               if (ret != H_TOO_HARD) {
+                                       kvmppc_set_gpr(vcpu, 3, ret);
+                                       trap = 0;
+                               }
+                       }
+               }
                 kvmppc_xive_pull_vcpu(vcpu);
-
         }
  
         vcpu->arch.slb_max = 0;
@@ -4461,8 +4509,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
                 else
                         r = kvmppc_run_vcpu(vcpu);
  
-               if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
-                   !(vcpu->arch.shregs.msr & MSR_PR)) {
+               if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+                       if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+                               /*
+                                * These should have been caught reflected
+                                * into the guest by now. Final sanity check:
+                                * don't allow userspace to execute hcalls in
+                                * the hypervisor.
+                                */
+                               r = RESUME_GUEST;
+                               continue;
+                       }
                         trace_kvm_hcall_enter(vcpu);
                         r = kvmppc_pseries_do_hcall(vcpu);
                         trace_kvm_hcall_exit(vcpu, r);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 55d4d5495f5d076c9e9fc154f250b2045002b020..0637126be21ed73cb3af2f01fe50d03b4e0541ce 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1400,9 +1400,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         mr      r4,r9
         bge     fast_guest_return
  2:
+       /* If we came in through the P9 short path, no real mode hcalls */
+       lwz     r0, STACK_SLOT_SHORT_PATH(r1)
+       cmpwi   r0, 0
+       bne     no_try_real
         /* See if this is an hcall we can handle in real mode */
         cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
         beq     hcall_try_real_mode
+no_try_real:
  
         /* Hypervisor doorbell - exit only if host IPI flag set */
         cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index 741bf1f4387a5ed7b22a13a33ffa9fab70ace474..24c07094651a340e9af7fbd475fbcfe35ad01d4c 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -158,6 +158,40 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
  }
  EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
  
+void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+       void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+       if (!esc_vaddr)
+               return;
+
+       /* we are using XIVE with single escalation */
+
+       if (vcpu->arch.xive_esc_on) {
+               /*
+                * If we still have a pending escalation, abort the cede,
+                * and we must set PQ to 10 rather than 00 so that we don't
+                * potentially end up with two entries for the escalation
+                * interrupt in the XIVE interrupt queue.  In that case
+                * we also don't want to set xive_esc_on to 1 here in
+                * case we race with xive_esc_irq().
+                */
+               vcpu->arch.ceded = 0;
+               /*
+                * The escalation interrupts are special as we don't EOI them.
+                * There is no need to use the load-after-store ordering offset
+                * to set PQ to 10 as we won't use StoreEOI.
+                */
+               __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+       } else {
+               vcpu->arch.xive_esc_on = true;
+               mb();
+               __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+       }
+       mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
  /*
   * This is a simple trigger for a generic XIVE IRQ. This must
   * only be called for interrupts that support a trigger page
@@ -2106,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
         return 0;
  }
  
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+       /* The VM should have configured XICS mode before doing XICS hcalls. */
+       if (!kvmppc_xics_enabled(vcpu))
+               return H_TOO_HARD;
+
+       switch (req) {
+       case H_XIRR:
+               return xive_vm_h_xirr(vcpu);
+       case H_CPPR:
+               return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+       case H_EOI:
+               return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+       case H_IPI:
+               return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+                                         kvmppc_get_gpr(vcpu, 5));
+       case H_IPOLL:
+               return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+       case H_XIRR_X:
+               xive_vm_h_xirr(vcpu);
+               kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+               return H_SUCCESS;
+       }
+
+       return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
  int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
  {
         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
author	Nicholas Piggin <npiggin@gmail.com>
	Fri, 28 May 2021 09:07:33 +0000 (19:07 +1000)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Thu, 10 Jun 2021 12:12:13 +0000 (22:12 +1000)
arch/powerpc/include/asm/kvm_ppc.h		patch \| blob \| history
arch/powerpc/kvm/book3s.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S		patch \| blob \| history
arch/powerpc/kvm/book3s_xive.c		patch \| blob \| history