]> git.baikalelectronics.ru Git - kernel.git/commitdiff
xen/events: defer eoi in case of excessive number of events
authorJuergen Gross <jgross@suse.com>
Mon, 7 Sep 2020 13:47:30 +0000 (15:47 +0200)
committerJuergen Gross <jgross@suse.com>
Tue, 20 Oct 2020 08:22:16 +0000 (10:22 +0200)
In case rogue guests are sending events at high frequency it might
happen that xen_evtchn_do_upcall() won't stop processing events in
dom0. As this is done in irq handling a crash might be the result.

In order to avoid that, delay further inter-domain events after some
time in xen_evtchn_do_upcall() by forcing eoi processing into a
worker on the same cpu, thus inhibiting new events coming in.

The time after which eoi processing is to be delayed is configurable
via a new module parameter "event_loop_timeout" which specifies the
maximum event loop time in jiffies (default: 2, the value was chosen
after some tests showing that a value of 2 was the lowest with an
only slight drop of dom0 network throughput while multiple guests
performed an event storm).

How long eoi processing will be delayed can be specified via another
parameter "event_eoi_delay" (again in jiffies, default 10, again the
value was chosen after testing with different delay values).

This is part of XSA-332.

Cc: stable@vger.kernel.org
Reported-by: Julien Grall <julien@xen.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Wei Liu <wl@xen.org>
Documentation/admin-guide/kernel-parameters.txt
drivers/xen/events/events_2l.c
drivers/xen/events/events_base.c
drivers/xen/events/events_fifo.c
drivers/xen/events/events_internal.h

index a1068742a6df11375e2b47135a19c2c6befa59be..89d977f0b78605afcc4739f44465b5f803545876 100644 (file)
                        improve timer resolution at the expense of processing
                        more timer interrupts.
 
+       xen.event_eoi_delay=    [XEN]
+                       How long to delay EOI handling in case of event
+                       storms (jiffies). Default is 10.
+
+       xen.event_loop_timeout= [XEN]
+                       After which time (jiffies) the event handling loop
+                       should start to delay EOI handling. Default is 2.
+
        nopv=           [X86,XEN,KVM,HYPER_V,VMWARE]
                        Disables the PV optimizations forcing the guest to run
                        as generic guest with no PV drivers. Currently support
index e1af5e093ff4550c199290f0e496b6593192c155..fe5ad0e89cd8a7006449a1c71d38c921c116a27d 100644 (file)
@@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
  * a bitset of words which contain pending event bits.  The second
  * level is a bitset of pending events themselves.
  */
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
 {
        int irq;
        xen_ulong_t pending_words;
@@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
 
                        /* Process port. */
                        port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
-                       irq = get_evtchn_to_irq(port);
-
-                       if (irq != -1)
-                               generic_handle_irq(irq);
+                       handle_irq_for_port(port, ctrl);
 
                        bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
 
index 9cbfea5e9a084a9f39e440f4bf9fcfbaaef32295..cde096a6f11d0a8807d3b6e699ba40f18d020cbb 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
 
 #ifdef CONFIG_X86
 #include <asm/desc.h>
 
 #include "events_internal.h"
 
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
 const struct evtchn_ops *evtchn_ops;
 
 /*
@@ -88,6 +99,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
  * irq_mapping_update_lock
  *   evtchn_rwlock
  *     IRQ-desc lock
+ *       percpu eoi_list_lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
@@ -120,6 +132,8 @@ static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
 static void disable_dynirq(struct irq_data *data);
 
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
 static void clear_evtchn_to_irq_row(unsigned row)
 {
        unsigned col;
@@ -399,17 +413,120 @@ void notify_remote_via_irq(int irq)
 }
 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
+struct lateeoi_work {
+       struct delayed_work delayed;
+       spinlock_t eoi_list_lock;
+       struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+       unsigned long flags;
+
+       spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+       list_del_init(&info->eoi_list);
+       spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+       struct irq_info *elem;
+       u64 now = get_jiffies_64();
+       unsigned long delay;
+       unsigned long flags;
+
+       if (now < info->eoi_time)
+               delay = info->eoi_time - now;
+       else
+               delay = 1;
+
+       spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+       if (list_empty(&eoi->eoi_list)) {
+               list_add(&info->eoi_list, &eoi->eoi_list);
+               mod_delayed_work_on(info->eoi_cpu, system_wq,
+                                   &eoi->delayed, delay);
+       } else {
+               list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+                       if (elem->eoi_time <= info->eoi_time)
+                               break;
+               }
+               list_add(&info->eoi_list, &elem->eoi_list);
+       }
+
+       spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
 static void xen_irq_lateeoi_locked(struct irq_info *info)
 {
        evtchn_port_t evtchn;
+       unsigned int cpu;
 
        evtchn = info->evtchn;
-       if (!VALID_EVTCHN(evtchn))
+       if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
                return;
 
+       cpu = info->eoi_cpu;
+       if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
+               lateeoi_list_add(info);
+               return;
+       }
+
+       info->eoi_time = 0;
        unmask_evtchn(evtchn);
 }
 
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+       struct lateeoi_work *eoi;
+       struct irq_info *info;
+       u64 now = get_jiffies_64();
+       unsigned long flags;
+
+       eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+       read_lock_irqsave(&evtchn_rwlock, flags);
+
+       while (true) {
+               spin_lock(&eoi->eoi_list_lock);
+
+               info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+                                               eoi_list);
+
+               if (info == NULL || now < info->eoi_time) {
+                       spin_unlock(&eoi->eoi_list_lock);
+                       break;
+               }
+
+               list_del_init(&info->eoi_list);
+
+               spin_unlock(&eoi->eoi_list_lock);
+
+               info->eoi_time = 0;
+
+               xen_irq_lateeoi_locked(info);
+       }
+
+       if (info)
+               mod_delayed_work_on(info->eoi_cpu, system_wq,
+                                   &eoi->delayed, info->eoi_time - now);
+
+       read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+       INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+       spin_lock_init(&eoi->eoi_list_lock);
+       INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 {
        struct irq_info *info;
@@ -429,6 +546,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
 static void xen_irq_init(unsigned irq)
 {
        struct irq_info *info;
+
 #ifdef CONFIG_SMP
        /* By default all event channels notify CPU#0. */
        cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -443,6 +561,7 @@ static void xen_irq_init(unsigned irq)
 
        set_info_for_irq(irq, info);
 
+       INIT_LIST_HEAD(&info->eoi_list);
        list_add_tail(&info->list, &xen_irq_list_head);
 }
 
@@ -498,6 +617,9 @@ static void xen_free_irq(unsigned irq)
 
        write_lock_irqsave(&evtchn_rwlock, flags);
 
+       if (!list_empty(&info->eoi_list))
+               lateeoi_list_del(info);
+
        list_del(&info->list);
 
        set_info_for_irq(irq, NULL);
@@ -1358,17 +1480,66 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
        notify_remote_via_irq(irq);
 }
 
+struct evtchn_loop_ctrl {
+       ktime_t timeout;
+       unsigned count;
+       bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+       int irq;
+       struct irq_info *info;
+
+       irq = get_evtchn_to_irq(port);
+       if (irq == -1)
+               return;
+
+       /*
+        * Check for timeout every 256 events.
+        * We are setting the timeout value only after the first 256
+        * events in order to not hurt the common case of few loop
+        * iterations. The 256 is basically an arbitrary value.
+        *
+        * In case we are hitting the timeout we need to defer all further
+        * EOIs in order to ensure to leave the event handling loop rather
+        * sooner than later.
+        */
+       if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+               ktime_t kt = ktime_get();
+
+               if (!ctrl->timeout) {
+                       kt = ktime_add_ms(kt,
+                                         jiffies_to_msecs(event_loop_timeout));
+                       ctrl->timeout = kt;
+               } else if (kt > ctrl->timeout) {
+                       ctrl->defer_eoi = true;
+               }
+       }
+
+       info = info_for_irq(irq);
+
+       if (ctrl->defer_eoi) {
+               info->eoi_cpu = smp_processor_id();
+               info->irq_epoch = __this_cpu_read(irq_epoch);
+               info->eoi_time = get_jiffies_64() + event_eoi_delay;
+       }
+
+       generic_handle_irq(irq);
+}
+
 static void __xen_evtchn_do_upcall(void)
 {
        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
        int cpu = smp_processor_id();
+       struct evtchn_loop_ctrl ctrl = { 0 };
 
        read_lock(&evtchn_rwlock);
 
        do {
                vcpu_info->evtchn_upcall_pending = 0;
 
-               xen_evtchn_handle_events(cpu);
+               xen_evtchn_handle_events(cpu, &ctrl);
 
                BUG_ON(!irqs_disabled());
 
@@ -1377,6 +1548,13 @@ static void __xen_evtchn_do_upcall(void)
        } while (vcpu_info->evtchn_upcall_pending);
 
        read_unlock(&evtchn_rwlock);
+
+       /*
+        * Increment irq_epoch only now to defer EOIs only for
+        * xen_irq_lateeoi() invocations occurring from inside the loop
+        * above.
+        */
+       __this_cpu_inc(irq_epoch);
 }
 
 void xen_evtchn_do_upcall(struct pt_regs *regs)
@@ -1825,9 +2003,6 @@ void xen_setup_callback_vector(void) {}
 static inline void xen_alloc_callback_vector(void) {}
 #endif
 
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
 static bool fifo_events = true;
 module_param(fifo_events, bool, 0);
 
@@ -1835,6 +2010,8 @@ static int xen_evtchn_cpu_prepare(unsigned int cpu)
 {
        int ret = 0;
 
+       xen_cpu_init_eoi(cpu);
+
        if (evtchn_ops->percpu_init)
                ret = evtchn_ops->percpu_init(cpu);
 
@@ -1861,6 +2038,8 @@ void __init xen_init_IRQ(void)
        if (ret < 0)
                xen_evtchn_2l_init();
 
+       xen_cpu_init_eoi(smp_processor_id());
+
        cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
                                  "xen/evtchn:prepare",
                                  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
index 40e4ca1685aa1be76626229a6241e06bb1ba432d..6085a808da95c938e7b0f37d1710ba37453df992 100644 (file)
@@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
        return w & EVTCHN_FIFO_LINK_MASK;
 }
 
-static void handle_irq_for_port(evtchn_port_t port)
-{
-       int irq;
-
-       irq = get_evtchn_to_irq(port);
-       if (irq != -1)
-               generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
                              struct evtchn_fifo_control_block *control_block,
-                             unsigned priority, unsigned long *ready,
-                             bool drop)
+                             unsigned priority, unsigned long *ready)
 {
        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
        uint32_t head;
@@ -320,16 +310,17 @@ static void consume_one_event(unsigned cpu,
                clear_bit(priority, ready);
 
        if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
-               if (unlikely(drop))
+               if (unlikely(!ctrl))
                        pr_warn("Dropping pending event for port %u\n", port);
                else
-                       handle_irq_for_port(port);
+                       handle_irq_for_port(port, ctrl);
        }
 
        q->head[priority] = head;
 }
 
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+                                       struct evtchn_loop_ctrl *ctrl)
 {
        struct evtchn_fifo_control_block *control_block;
        unsigned long ready;
@@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
 
        while (ready) {
                q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
-               consume_one_event(cpu, control_block, q, &ready, drop);
+               consume_one_event(cpu, ctrl, control_block, q, &ready);
                ready |= xchg(&control_block->ready, 0);
        }
 }
 
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+                                     struct evtchn_loop_ctrl *ctrl)
 {
-       __evtchn_fifo_handle_events(cpu, false);
+       __evtchn_fifo_handle_events(cpu, ctrl);
 }
 
 static void evtchn_fifo_resume(void)
@@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsigned int cpu)
 
 static int evtchn_fifo_percpu_deinit(unsigned int cpu)
 {
-       __evtchn_fifo_handle_events(cpu, true);
+       __evtchn_fifo_handle_events(cpu, NULL);
        return 0;
 }
 
index 558abea19d0dd8d7a4e40b8783971d19dc89a9ab..aac05cf52ced8592fb0d9bf1e9fad3f7fdb7bd35 100644 (file)
@@ -30,11 +30,15 @@ enum xen_irq_type {
  */
 struct irq_info {
        struct list_head list;
+       struct list_head eoi_list;
        int refcnt;
        enum xen_irq_type type; /* type */
        unsigned irq;
        evtchn_port_t evtchn;   /* event channel */
        unsigned short cpu;     /* cpu bound */
+       unsigned short eoi_cpu; /* EOI must happen on this cpu */
+       unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+       u64 eoi_time;           /* Time in jiffies when to EOI. */
 
        union {
                unsigned short virq;
@@ -53,6 +57,8 @@ struct irq_info {
 #define PIRQ_SHAREABLE (1 << 1)
 #define PIRQ_MSI_GROUP (1 << 2)
 
+struct evtchn_loop_ctrl;
+
 struct evtchn_ops {
        unsigned (*max_channels)(void);
        unsigned (*nr_channels)(void);
@@ -67,7 +73,7 @@ struct evtchn_ops {
        void (*mask)(evtchn_port_t port);
        void (*unmask)(evtchn_port_t port);
 
-       void (*handle_events)(unsigned cpu);
+       void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
        void (*resume)(void);
 
        int (*percpu_init)(unsigned int cpu);
@@ -78,6 +84,7 @@ extern const struct evtchn_ops *evtchn_ops;
 
 extern int **evtchn_to_irq;
 int get_evtchn_to_irq(evtchn_port_t evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
 
 struct irq_info *info_for_irq(unsigned irq);
 unsigned cpu_from_irq(unsigned irq);
@@ -135,9 +142,10 @@ static inline void unmask_evtchn(evtchn_port_t port)
        return evtchn_ops->unmask(port);
 }
 
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+                                           struct evtchn_loop_ctrl *ctrl)
 {
-       return evtchn_ops->handle_events(cpu);
+       return evtchn_ops->handle_events(cpu, ctrl);
 }
 
 static inline void xen_evtchn_resume(void)