KVM: x86/xen: handle PV spinlocks slowpath

author Boris Ostrovsky <boris.ostrovsky@oracle.com>

Thu, 3 Mar 2022 15:41:26 +0000 (15:41 +0000)

committer Paolo Bonzini <pbonzini@redhat.com>

Sat, 2 Apr 2022 09:41:17 +0000 (05:41 -0400)
author Boris Ostrovsky <boris.ostrovsky@oracle.com>
Thu, 3 Mar 2022 15:41:26 +0000 (15:41 +0000)
committer Paolo Bonzini <pbonzini@redhat.com>
Sat, 2 Apr 2022 09:41:17 +0000 (05:41 -0400)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 998caf7a3ce96f4860a91ec1af9aec5e7a62c7f2..5370744b789cc26e148bdad262b64fe4b8ca4e18 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -619,6 +619,8 @@ struct kvm_vcpu_xen {
         u64 timer_expires; /* In guest epoch */
         atomic_t timer_pending;
         struct hrtimer timer;
+       int poll_evtchn;
+       struct timer_list poll_timer;
  };
  
  struct kvm_vcpu_arch {
@@ -1032,6 +1034,7 @@ struct kvm_xen {
         u8 upcall_vector;
         struct gfn_to_pfn_cache shinfo_cache;
         struct idr evtchn_ports;
+       unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
  };
  
  enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c

index 53232c9ff89c7c67bb821e49c6da24c364a5cb03..7e7c8a5bff52a3f5d003941261e7576726549ff1 100644 (file)
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -10,6 +10,7 @@
  #include "xen.h"
  #include "lapic.h"
  #include "hyperv.h"
+#include "lapic.h"
  
  #include <linux/eventfd.h>
  #include <linux/kvm_host.h>
@@ -954,9 +955,146 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
         return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
  }
  
-static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param, u64 *r)
+static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
+                              evtchn_port_t *ports)
+{
+       struct kvm *kvm = vcpu->kvm;
+       struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+       unsigned long *pending_bits;
+       unsigned long flags;
+       bool ret = true;
+       int idx, i;
+
+       read_lock_irqsave(&gpc->lock, flags);
+       idx = srcu_read_lock(&kvm->srcu);
+       if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+               goto out_rcu;
+
+       ret = false;
+       if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+               struct shared_info *shinfo = gpc->khva;
+               pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+       } else {
+               struct compat_shared_info *shinfo = gpc->khva;
+               pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+       }
+
+       for (i = 0; i < nr_ports; i++) {
+               if (test_bit(ports[i], pending_bits)) {
+                       ret = true;
+                       break;
+               }
+       }
+
+ out_rcu:
+       srcu_read_unlock(&kvm->srcu, idx);
+       read_unlock_irqrestore(&gpc->lock, flags);
+
+       return ret;
+}
+
+static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
+                                u64 param, u64 *r)
+{
+       int idx, i;
+       struct sched_poll sched_poll;
+       evtchn_port_t port, *ports;
+       gpa_t gpa;
+
+       if (!longmode || !lapic_in_kernel(vcpu) ||
+           !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
+               return false;
+
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+       if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
+                                       sizeof(sched_poll))) {
+               *r = -EFAULT;
+               return true;
+       }
+
+       if (unlikely(sched_poll.nr_ports > 1)) {
+               /* Xen (unofficially) limits number of pollers to 128 */
+               if (sched_poll.nr_ports > 128) {
+                       *r = -EINVAL;
+                       return true;
+               }
+
+               ports = kmalloc_array(sched_poll.nr_ports,
+                                     sizeof(*ports), GFP_KERNEL);
+               if (!ports) {
+                       *r = -ENOMEM;
+                       return true;
+               }
+       } else
+               ports = &port;
+
+       for (i = 0; i < sched_poll.nr_ports; i++) {
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
+               gpa = kvm_mmu_gva_to_gpa_system(vcpu,
+                                               (gva_t)(sched_poll.ports + i),
+                                               NULL);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+               if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
+                                               &ports[i], sizeof(port))) {
+                       *r = -EFAULT;
+                       goto out;
+               }
+       }
+
+       if (sched_poll.nr_ports == 1)
+               vcpu->arch.xen.poll_evtchn = port;
+       else
+               vcpu->arch.xen.poll_evtchn = -1;
+
+       set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+       if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
+               vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+
+               if (sched_poll.timeout)
+                       mod_timer(&vcpu->arch.xen.poll_timer,
+                                 jiffies + nsecs_to_jiffies(sched_poll.timeout));
+
+               kvm_vcpu_halt(vcpu);
+
+               if (sched_poll.timeout)
+                       del_timer(&vcpu->arch.xen.poll_timer);
+
+               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+               kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+       }
+
+       vcpu->arch.xen.poll_evtchn = 0;
+       *r = 0;
+out:
+       /* Really, this is only needed in case of timeout */
+       clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+       if (unlikely(sched_poll.nr_ports > 1))
+               kfree(ports);
+       return true;
+}
+
+static void cancel_evtchn_poll(struct timer_list *t)
+{
+       struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
+
+       kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+       kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
+                                  int cmd, u64 param, u64 *r)
  {
         switch (cmd) {
+       case SCHEDOP_poll:
+               if (kvm_xen_schedop_poll(vcpu, longmode, param, r))
+                       return true;
+               fallthrough;
         case SCHEDOP_yield:
                 kvm_vcpu_on_spin(vcpu, true);
                 *r = 0;
@@ -1121,7 +1259,8 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
                         handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r);
                 break;
         case __HYPERVISOR_sched_op:
-               handled = kvm_xen_hcall_sched_op(vcpu, params[0], params[1], &r);
+               handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0],
+                                                params[1], &r);
                 break;
         case __HYPERVISOR_vcpu_op:
                 handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1],
@@ -1168,6 +1307,17 @@ static inline int max_evtchn_port(struct kvm *kvm)
                 return COMPAT_EVTCHN_2L_NR_CHANNELS;
  }
  
+static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+{
+       int poll_evtchn = vcpu->arch.xen.poll_evtchn;
+
+       if ((poll_evtchn == port || poll_evtchn == -1) &&
+           test_and_clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask)) {
+               kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+               kvm_vcpu_kick(vcpu);
+       }
+}
+
  /*
   * The return value from this function is propagated to kvm_set_irq() API,
   * so it returns:
@@ -1235,6 +1385,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                 rc = 0; /* It was already raised */
         } else if (test_bit(xe->port, mask_bits)) {
                 rc = -ENOTCONN; /* Masked */
+               kvm_xen_check_poller(vcpu, xe->port);
         } else {
                 rc = 1; /* Delivered to the bitmap in shared_info. */
                 /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
@@ -1665,6 +1816,8 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
  void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
  {
         vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
+       vcpu->arch.xen.poll_evtchn = 0;
+       timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
  }
  
  void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1678,6 +1831,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
                                      &vcpu->arch.xen.vcpu_info_cache);
         kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
                                      &vcpu->arch.xen.vcpu_time_info_cache);
+       del_timer_sync(&vcpu->arch.xen.poll_timer);
  }
  
  void kvm_xen_init_vm(struct kvm *kvm)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 865e17146815a6585d801a31abf4a63c853d1dff..376c611443cd33bfa0ba4a302ef753a5b313b9b2 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -233,6 +233,12 @@ int main(int argc, char *argv[])
                 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
                 .msr = XEN_HYPERCALL_MSR,
         };
+
+       /* Let the kernel know that we *will* use it for sending all
+        * event channels, which lets it intercept SCHEDOP_poll */
+       if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)
+               hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
         vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
  
         struct kvm_xen_hvm_attr lm = {
author	Boris Ostrovsky <boris.ostrovsky@oracle.com>
	Thu, 3 Mar 2022 15:41:26 +0000 (15:41 +0000)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 2 Apr 2022 09:41:17 +0000 (05:41 -0400)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/xen.c		patch \| blob \| history
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c		patch \| blob \| history