]> git.baikalelectronics.ru Git - kernel.git/commitdiff
KVM: x86/xen: intercept EVTCHNOP_send from guests
authorJoao Martins <joao.m.martins@oracle.com>
Thu, 3 Mar 2022 15:41:19 +0000 (15:41 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Sat, 2 Apr 2022 09:41:15 +0000 (05:41 -0400)
Userspace registers a sending @port to either deliver to an @eventfd
or directly back to a local event channel port.

After binding events the guest or host may wish to bind those
events to a particular vcpu. This is usually done for unbound
and and interdomain events. Update requests are handled via the
KVM_XEN_EVTCHN_UPDATE flag.

Unregistered ports are handled by the emulator.

Co-developed-by: Ankur Arora <ankur.a.arora@oracle.com>
Co-developed-By: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-10-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/xen.c
include/uapi/linux/kvm.h

index 7808491ebba8ad766083115d09eda106031312e8..b20f7d99d7022ff9cb5ee2825d115025c7d368d6 100644 (file)
@@ -1024,6 +1024,7 @@ struct kvm_xen {
        bool long_mode;
        u8 upcall_vector;
        struct gfn_to_pfn_cache shinfo_cache;
+       struct idr evtchn_ports;
 };
 
 enum kvm_irqchip_mode {
index f0f0011c4617dd8184527b3099a3af41d102b587..3d95167028ba97be5af5d9a9e485af2ef725a958 100644 (file)
@@ -11,6 +11,7 @@
 #include "lapic.h"
 #include "hyperv.h"
 
+#include <linux/eventfd.h>
 #include <linux/kvm_host.h>
 #include <linux/sched/stat.h>
 
@@ -21,6 +22,9 @@
 
 #include "trace.h"
 
+static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
+static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r);
+
 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
 
 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
@@ -365,36 +369,44 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 {
        int r = -ENOENT;
 
-       mutex_lock(&kvm->lock);
 
        switch (data->type) {
        case KVM_XEN_ATTR_TYPE_LONG_MODE:
                if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
                        r = -EINVAL;
                } else {
+                       mutex_lock(&kvm->lock);
                        kvm->arch.xen.long_mode = !!data->u.long_mode;
+                       mutex_unlock(&kvm->lock);
                        r = 0;
                }
                break;
 
        case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+               mutex_lock(&kvm->lock);
                r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
+               mutex_unlock(&kvm->lock);
                break;
 
        case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
                if (data->u.vector && data->u.vector < 0x10)
                        r = -EINVAL;
                else {
+                       mutex_lock(&kvm->lock);
                        kvm->arch.xen.upcall_vector = data->u.vector;
+                       mutex_unlock(&kvm->lock);
                        r = 0;
                }
                break;
 
+       case KVM_XEN_ATTR_TYPE_EVTCHN:
+               r = kvm_xen_setattr_evtchn(kvm, data);
+               break;
+
        default:
                break;
        }
 
-       mutex_unlock(&kvm->lock);
        return r;
 }
 
@@ -770,18 +782,6 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
        return 0;
 }
 
-void kvm_xen_init_vm(struct kvm *kvm)
-{
-}
-
-void kvm_xen_destroy_vm(struct kvm *kvm)
-{
-       kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
-
-       if (kvm->arch.xen_hvm_config.msr)
-               static_branch_slow_dec_deferred(&kvm_xen_enabled);
-}
-
 static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 {
        kvm_rax_write(vcpu, result);
@@ -801,7 +801,8 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 {
        bool longmode;
-       u64 input, params[6];
+       u64 input, params[6], r = -ENOSYS;
+       bool handled = false;
 
        input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
 
@@ -832,6 +833,19 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
        trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
                                params[3], params[4], params[5]);
 
+       switch (input) {
+       case __HYPERVISOR_event_channel_op:
+               if (params[0] == EVTCHNOP_send)
+                       handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r);
+               break;
+
+       default:
+               break;
+       }
+
+       if (handled)
+               return kvm_xen_hypercall_set_result(vcpu, r);
+
        vcpu->run->exit_reason = KVM_EXIT_XEN;
        vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
        vcpu->run->xen.u.hcall.longmode = longmode;
@@ -1118,6 +1132,234 @@ int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *
        return ret;
 }
 
+/*
+ * Support for *outbound* event channel events via the EVTCHNOP_send hypercall.
+ */
+struct evtchnfd {
+       u32 send_port;
+       u32 type;
+       union {
+               struct kvm_xen_evtchn port;
+               struct {
+                       u32 port; /* zero */
+                       struct eventfd_ctx *ctx;
+               } eventfd;
+       } deliver;
+};
+
+/*
+ * Update target vCPU or priority for a registered sending channel.
+ */
+static int kvm_xen_eventfd_update(struct kvm *kvm,
+                                 struct kvm_xen_hvm_attr *data)
+{
+       u32 port = data->u.evtchn.send_port;
+       struct evtchnfd *evtchnfd;
+
+       if (!port || port >= max_evtchn_port(kvm))
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
+       mutex_unlock(&kvm->lock);
+
+       if (!evtchnfd)
+               return -ENOENT;
+
+       /* For an UPDATE, nothing may change except the priority/vcpu */
+       if (evtchnfd->type != data->u.evtchn.type)
+               return -EINVAL;
+
+       /*
+        * Port cannot change, and if it's zero that was an eventfd
+        * which can't be changed either.
+        */
+       if (!evtchnfd->deliver.port.port ||
+           evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port)
+               return -EINVAL;
+
+       /* We only support 2 level event channels for now */
+       if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
+       if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) {
+               evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
+               evtchnfd->deliver.port.vcpu_idx = -1;
+       }
+       mutex_unlock(&kvm->lock);
+       return 0;
+}
+
+/*
+ * Configure the target (eventfd or local port delivery) for sending on
+ * a given event channel.
+ */
+static int kvm_xen_eventfd_assign(struct kvm *kvm,
+                                 struct kvm_xen_hvm_attr *data)
+{
+       u32 port = data->u.evtchn.send_port;
+       struct eventfd_ctx *eventfd = NULL;
+       struct evtchnfd *evtchnfd = NULL;
+       int ret = -EINVAL;
+
+       if (!port || port >= max_evtchn_port(kvm))
+               return -EINVAL;
+
+       evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL);
+       if (!evtchnfd)
+               return -ENOMEM;
+
+       switch(data->u.evtchn.type) {
+       case EVTCHNSTAT_ipi:
+               /* IPI  must map back to the same port# */
+               if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
+                       goto out; /* -EINVAL */
+               break;
+
+       case EVTCHNSTAT_interdomain:
+               if (data->u.evtchn.deliver.port.port) {
+                       if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
+                               goto out; /* -EINVAL */
+               } else {
+                       eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
+                       if (IS_ERR(eventfd)) {
+                               ret = PTR_ERR(eventfd);
+                               goto out;
+                       }
+               }
+               break;
+
+       case EVTCHNSTAT_virq:
+       case EVTCHNSTAT_closed:
+       case EVTCHNSTAT_unbound:
+       case EVTCHNSTAT_pirq:
+       default: /* Unknown event channel type */
+               goto out; /* -EINVAL */
+       }
+
+       evtchnfd->send_port = data->u.evtchn.send_port;
+       evtchnfd->type = data->u.evtchn.type;
+       if (eventfd) {
+               evtchnfd->deliver.eventfd.ctx = eventfd;
+       } else {
+               /* We only support 2 level event channels for now */
+               if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
+                       goto out; /* -EINVAL; */
+
+               evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port;
+               evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
+               evtchnfd->deliver.port.vcpu_idx = -1;
+               evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
+       }
+
+       mutex_lock(&kvm->lock);
+       ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1,
+                       GFP_KERNEL);
+       mutex_unlock(&kvm->lock);
+       if (ret >= 0)
+               return 0;
+
+       if (ret == -ENOSPC)
+               ret = -EEXIST;
+out:
+       if (eventfd)
+               eventfd_ctx_put(eventfd);
+       kfree(evtchnfd);
+       return ret;
+}
+
+static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
+{
+       struct evtchnfd *evtchnfd;
+
+       mutex_lock(&kvm->lock);
+       evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port);
+       mutex_unlock(&kvm->lock);
+
+       if (!evtchnfd)
+               return -ENOENT;
+
+       if (kvm)
+               synchronize_srcu(&kvm->srcu);
+       if (!evtchnfd->deliver.port.port)
+               eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
+       kfree(evtchnfd);
+       return 0;
+}
+
+static int kvm_xen_eventfd_reset(struct kvm *kvm)
+{
+       struct evtchnfd *evtchnfd;
+       int i;
+
+       mutex_lock(&kvm->lock);
+       idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
+               idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
+               synchronize_srcu(&kvm->srcu);
+               if (!evtchnfd->deliver.port.port)
+                       eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
+               kfree(evtchnfd);
+       }
+       mutex_unlock(&kvm->lock);
+
+       return 0;
+}
+
+static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
+{
+       u32 port = data->u.evtchn.send_port;
+
+       if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET)
+               return kvm_xen_eventfd_reset(kvm);
+
+       if (!port || port >= max_evtchn_port(kvm))
+               return -EINVAL;
+
+       if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN)
+               return kvm_xen_eventfd_deassign(kvm, port);
+       if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE)
+               return kvm_xen_eventfd_update(kvm, data);
+       if (data->u.evtchn.flags)
+               return -EINVAL;
+
+       return kvm_xen_eventfd_assign(kvm, data);
+}
+
+static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
+{
+       struct evtchnfd *evtchnfd;
+       struct evtchn_send send;
+       gpa_t gpa;
+       int idx;
+
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+       if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) {
+               *r = -EFAULT;
+               return true;
+       }
+
+       /* The evtchn_ports idr is protected by vcpu->kvm->srcu */
+       evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port);
+       if (!evtchnfd)
+               return false;
+
+       if (evtchnfd->deliver.port.port) {
+               int ret = kvm_xen_set_evtchn(&evtchnfd->deliver.port, vcpu->kvm);
+               if (ret < 0 && ret != -ENOTCONN)
+                       return false;
+       } else {
+               eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1);
+       }
+
+       *r = 0;
+       return true;
+}
+
 void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 {
        kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
@@ -1127,3 +1369,26 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
        kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
                                     &vcpu->arch.xen.vcpu_time_info_cache);
 }
+
+void kvm_xen_init_vm(struct kvm *kvm)
+{
+       idr_init(&kvm->arch.xen.evtchn_ports);
+}
+
+void kvm_xen_destroy_vm(struct kvm *kvm)
+{
+       struct evtchnfd *evtchnfd;
+       int i;
+
+       kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+
+       idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
+               if (!evtchnfd->deliver.port.port)
+                       eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
+               kfree(evtchnfd);
+       }
+       idr_destroy(&kvm->arch.xen.evtchn_ports);
+
+       if (kvm->arch.xen_hvm_config.msr)
+               static_branch_slow_dec_deferred(&kvm_xen_enabled);
+}
index 49cd2e9e0f6ac8f07b8f86f3abe2dc486147251c..623ed2cb228f7bf1f884267ec8868b3e254c167e 100644 (file)
@@ -1686,6 +1686,32 @@ struct kvm_xen_hvm_attr {
                struct {
                        __u64 gfn;
                } shared_info;
+               struct {
+                       __u32 send_port;
+                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+                       __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN                (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE          (1 << 1)
+#define KVM_XEN_EVTCHN_RESET           (1 << 2)
+                       /*
+                        * Events sent by the guest are either looped back to
+                        * the guest itself (potentially on a different port#)
+                        * or signalled via an eventfd.
+                        */
+                       union {
+                               struct {
+                                       __u32 port;
+                                       __u32 vcpu;
+                                       __u32 priority;
+                               } port;
+                               struct {
+                                       __u32 port; /* Zero for eventfd */
+                                       __s32 fd;
+                               } eventfd;
+                               __u32 padding[4];
+                       } deliver;
+               } evtchn;
+
                __u64 pad[8];
        } u;
 };
@@ -1694,6 +1720,8 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)