From: Paolo Bonzini Date: Tue, 18 Jan 2022 09:45:32 +0000 (-0500) Subject: Merge branch 'kvm-pi-raw-spinlock' into HEAD X-Git-Tag: baikal/aarch64/sdk6.1~4811^2~24 X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=7fea5002e0c4b4d11435b6e616561bc7e52e30f2;p=kernel.git Merge branch 'kvm-pi-raw-spinlock' into HEAD Bring in fix for VT-d posted interrupts before further changing the code in 5.17. Signed-off-by: Paolo Bonzini --- 7fea5002e0c4b4d11435b6e616561bc7e52e30f2 diff --cc arch/x86/kvm/vmx/posted_intr.c index 88c53c521094a,21ea58d25771f..a82a15aa1982f --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@@ -11,23 -11,11 +11,23 @@@ #include "vmx.h" /* - * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we - * can find which vCPU should be waken up. + * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler() + * when a WAKEUP_VECTOR interrupted is posted. vCPUs are added to the list when + * the vCPU is scheduled out and is blocking (e.g. in HLT) with IRQs enabled. + * The vCPUs posted interrupt descriptor is updated at the same time to set its + * notification vector to WAKEUP_VECTOR, so that posted interrupt from devices + * wake the target vCPUs. vCPUs are removed from the list and the notification + * vector is reset when the vCPU is scheduled in. */ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +/* + * Protect the per-CPU list with a per-CPU spinlock to handle task migration. + * When a blocking vCPU is awakened _and_ migrated to a different pCPU, the + * ->sched_in() path will need to take the vCPU off the list of the _previous_ + * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will + * occur if a wakeup IRQ arrives and attempts to acquire the lock. + */ - static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + static DEFINE_PER_CPU(raw_spinlock_t, blocked_vcpu_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { @@@ -129,25 -103,17 +129,25 @@@ static void __pi_post_block(struct kvm_ struct pi_desc old, new; unsigned int dest; - do { - old.control = new.control = pi_desc->control; - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the VCPU is blocked\n"); + /* + * Remove the vCPU from the wakeup list of the _previous_ pCPU, which + * will not be the same as the current pCPU if the task was migrated. + */ - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); ++ raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - dest = cpu_physical_id(vcpu->cpu); + dest = cpu_physical_id(vcpu->cpu); + if (!x2apic_mode) + dest = (dest << 8) & 0xFF00; - if (x2apic_mode) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; + WARN(pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR, + "Wakeup handler not enabled while the vCPU was blocking"); + + do { + old.control = new.control = READ_ONCE(pi_desc->control); + + new.ndst = dest; /* set 'NV' to 'notification vector' */ new.nv = POSTED_INTR_VECTOR; @@@ -170,27 -143,45 +170,27 @@@ */ int pi_pre_block(struct kvm_vcpu *vcpu) { - unsigned int dest; struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + unsigned long flags; - if (!vmx_can_use_vtd_pi(vcpu->kvm)) + if (!vmx_can_use_vtd_pi(vcpu->kvm) || + vmx_interrupt_blocked(vcpu)) return 0; - WARN_ON(irqs_disabled()); - local_irq_disable(); - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { - vcpu->pre_pcpu = vcpu->cpu; - raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, - vcpu->pre_pcpu)); - raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - } + local_irq_save(flags); + + vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); ++ raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, vcpu->cpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); ++ raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); + + WARN(pi_desc->sn == 1, + "Posted Interrupt Suppress Notification set before blocking"); do { - old.control = new.control = pi_desc->control; - - WARN((pi_desc->sn == 1), - "Warning: SN field of posted-interrupts " - "is set before blocking\n"); - - /* - * Since vCPU can be preempted during this process, - * vcpu->cpu could be different with pre_pcpu, we - * need to set pre_pcpu as the destination of wakeup - * notification event, then we can find the right vCPU - * to wakeup in wakeup handler if interrupts happen - * when the vCPU is in blocked state. - */ - dest = cpu_physical_id(vcpu->pre_pcpu); - - if (x2apic_mode) - new.ndst = dest; - else - new.ndst = (dest << 8) & 0xFF00; + old.control = new.control = READ_ONCE(pi_desc->control); /* set 'NV' to 'wakeup vector' */ new.nv = POSTED_INTR_WAKEUP_VECTOR; @@@ -229,10 -220,10 +229,10 @@@ void pi_wakeup_handler(void blocked_vcpu_list) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (pi_test_on(pi_desc) == 1) + if (pi_test_on(pi_desc)) kvm_vcpu_kick(vcpu); } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } void __init pi_init_cpu(int cpu)