]> git.baikalelectronics.ru Git - kernel.git/commitdiff
KVM: Add helpers to wrap vcpu->srcu_idx and yell if it's abused
authorSean Christopherson <seanjc@google.com>
Fri, 15 Apr 2022 00:43:43 +0000 (00:43 +0000)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 21 Apr 2022 17:16:11 +0000 (13:16 -0400)
Add wrappers to acquire/release KVM's SRCU lock when stashing the index
in vcpu->src_idx, along with rudimentary detection of illegal usage,
e.g. re-acquiring SRCU and thus overwriting vcpu->src_idx.  Because the
SRCU index is (currently) either 0 or 1, illegal nesting bugs can go
unnoticed for quite some time and only cause problems when the nested
lock happens to get a different index.

Wrap the WARNs in PROVE_RCU=y, and make them ONCE, otherwise KVM will
likely yell so loudly that it will bring the kernel to its knees.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Fabiano Rosas <farosas@linux.ibm.com>
Message-Id: <20220415004343.2203171-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_rtas.c
arch/powerpc/kvm/powerpc.c
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_exit.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/vsie.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h

index e4ce2a35483f6fcb32f00b50c633c563375e1a1d..42851c32ff3bee0eb576b3d90c768fa7313f247c 100644 (file)
@@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
                        return -EINVAL;
                /* Read the entry from guest memory */
                addr = base + (index * sizeof(rpte));
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
+               kvm_vcpu_srcu_read_lock(vcpu);
                ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (ret) {
                        if (pte_ret_p)
                                *pte_ret_p = addr;
@@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
 
        /* Read the table to find the root of the radix tree */
        ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
-       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
-       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (ret)
                return ret;
 
index 9d373f8963ee98a9977f4a796f200c6343b5d67b..c943a051c6e700c2b58416aef857b9e310563678 100644 (file)
@@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        /* copy parameters in */
        hv_ptr = kvmppc_get_gpr(vcpu, 4);
        regs_ptr = kvmppc_get_gpr(vcpu, 5);
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                              hv_ptr, regs_ptr);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (err)
                return H_PARAMETER;
 
@@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
                byteswap_hv_regs(&l2_hv);
                byteswap_pt_regs(&l2_regs);
        }
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
                                               hv_ptr, regs_ptr);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (err)
                return H_AUTHORITY;
 
@@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
                        goto not_found;
 
                /* Write what was loaded into our buffer back to the L1 guest */
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (rc)
                        goto not_found;
        } else {
                /* Load the data to be stored from the L1 guest into our buf */
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (rc)
                        goto not_found;
 
index 0f847f1e5ddd0ba6590642548fe799f6335ca015..6808bda0dbc10c114a9b1d2fab1b4d63d24a6224 100644 (file)
@@ -229,9 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
         */
        args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (rc)
                goto fail;
 
index 875c30c12db046957bed254f5519a279023e2e8e..533c4232e5abfd926c859fcdfb45c7fee658682a 100644 (file)
@@ -425,9 +425,9 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                return EMULATE_DONE;
        }
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        if (rc)
                return EMULATE_DO_MMIO;
 
index 4a52fda6417bbd14d47917ee185eb999cae57f42..7461f964d20a92e579e80c2485a41be9da2b051a 100644 (file)
@@ -727,13 +727,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        /* Mark this VCPU ran at least once */
        vcpu->arch.ran_atleast_once = true;
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        /* Process MMIO value returned from user-space */
        if (run->exit_reason == KVM_EXIT_MMIO) {
                ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
                if (ret) {
-                       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        return ret;
                }
        }
@@ -742,13 +742,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
                ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
                if (ret) {
-                       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        return ret;
                }
        }
 
        if (run->immediate_exit) {
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                return -EINTR;
        }
 
@@ -787,7 +787,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 */
                vcpu->mode = IN_GUEST_MODE;
 
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                smp_mb__after_srcu_read_unlock();
 
                /*
@@ -805,7 +805,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        local_irq_enable();
                        preempt_enable();
-                       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+                       kvm_vcpu_srcu_read_lock(vcpu);
                        continue;
                }
 
@@ -849,7 +849,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
                preempt_enable();
 
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
        }
@@ -858,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
        vcpu_put(vcpu);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        return ret;
 }
index 2d56faddb9d1cf68b2df75fddf81839cc74dac4d..a72c15d4b42a599a2b640b27af8eb699b4f4724e 100644 (file)
@@ -456,9 +456,9 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
 void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
 {
        if (!kvm_arch_vcpu_runnable(vcpu)) {
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_halt(vcpu);
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
        }
 }
index 9b30beac904db866ba23fecb890784fb8e8eb4a9..af96dc0549a4b57ccce07688cad056c1e3f67fe1 100644 (file)
@@ -1334,11 +1334,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
        hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
        VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
 no_timer:
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        kvm_vcpu_halt(vcpu);
        vcpu->valid_wakeup = false;
        __unset_cpu_idle(vcpu);
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        hrtimer_cancel(&vcpu->arch.ckc_timer);
        return 0;
index 156d1c25a3c1ec9a225669013153bf44c90d5363..da3dabda1a12623a2ee09bb7c1e7abc855793fc0 100644 (file)
@@ -4237,14 +4237,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
         * We try to hold kvm->srcu during most of vcpu_run (except when run-
         * ning the guest), so that memslots (and other stuff) are protected
         */
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        do {
                rc = vcpu_pre_run(vcpu);
                if (rc)
                        break;
 
-               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                /*
                 * As PF_VCPU will be used in fault handler, between
                 * guest_enter and guest_exit should be no uaccess.
@@ -4281,12 +4281,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                __enable_cpu_timer_accounting(vcpu);
                guest_exit_irqoff();
                local_irq_enable();
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                rc = vcpu_post_run(vcpu, exit_reason);
        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
        return rc;
 }
 
index acda4b6fc851824d22c3b7b68715c0905a8edfd1..dada78b92691fa01f8d6a96081743a8ca2dd59fe 100644 (file)
@@ -1091,7 +1091,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
        handle_last_fault(vcpu, vsie_page);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        /* save current guest state of bp isolation override */
        guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1133,7 +1133,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
        if (!guest_bp_isolation)
                clear_thread_flag(TIF_ISOLATE_BP_GUEST);
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        if (rc == -EINTR) {
                VCPU_EVENT(vcpu, 3, "%s", "machine check");
index 867c0fd8d187a825d4a00cc39d5c011af9fae2c4..51eb27824452949041e83a0e845a22c3ea7199ec 100644 (file)
@@ -10097,7 +10097,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        /* Store vcpu->apicv_active before vcpu->mode.  */
        smp_store_release(&vcpu->mode, IN_GUEST_MODE);
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        /*
         * 1) We should set ->mode before checking ->requests.  Please see
@@ -10128,7 +10128,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                smp_wmb();
                local_irq_enable();
                preempt_enable();
-               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
                r = 1;
                goto cancel_injection;
        }
@@ -10254,7 +10254,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        local_irq_enable();
        preempt_enable();
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
 
        /*
         * Profile KVM exit RIPs:
@@ -10284,7 +10284,7 @@ out:
 }
 
 /* Called within kvm->srcu read side.  */
-static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+static inline int vcpu_block(struct kvm_vcpu *vcpu)
 {
        bool hv_timer;
 
@@ -10300,12 +10300,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
                if (hv_timer)
                        kvm_lapic_switch_to_sw_timer(vcpu);
 
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
                        kvm_vcpu_halt(vcpu);
                else
                        kvm_vcpu_block(vcpu);
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                if (hv_timer)
                        kvm_lapic_switch_to_hv_timer(vcpu);
@@ -10347,7 +10347,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
 static int vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
-       struct kvm *kvm = vcpu->kvm;
 
        vcpu->arch.l1tf_flush_l1d = true;
 
@@ -10355,7 +10354,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                if (kvm_vcpu_running(vcpu)) {
                        r = vcpu_enter_guest(vcpu);
                } else {
-                       r = vcpu_block(kvm, vcpu);
+                       r = vcpu_block(vcpu);
                }
 
                if (r <= 0)
@@ -10374,9 +10373,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                }
 
                if (__xfer_to_guest_mode_work_pending()) {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+                       kvm_vcpu_srcu_read_unlock(vcpu);
                        r = xfer_to_guest_mode_handle_work(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+                       kvm_vcpu_srcu_read_lock(vcpu);
                        if (r)
                                return r;
                }
@@ -10479,7 +10478,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
-       struct kvm *kvm = vcpu->kvm;
        int r;
 
        vcpu_load(vcpu);
@@ -10487,7 +10485,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        kvm_run->flags = 0;
        kvm_load_guest_fpu(vcpu);
 
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       kvm_vcpu_srcu_read_lock(vcpu);
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                        r = -EINTR;
@@ -10499,9 +10497,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 */
                WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
 
-               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_block(vcpu);
-               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               kvm_vcpu_srcu_read_lock(vcpu);
 
                if (kvm_apic_accept_events(vcpu) < 0) {
                        r = 0;
@@ -10562,7 +10560,7 @@ out:
        if (kvm_run->kvm_valid_regs)
                store_regs(vcpu);
        post_kvm_run_save(vcpu);
-       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+       kvm_vcpu_srcu_read_unlock(vcpu);
 
        kvm_sigset_deactivate(vcpu);
        vcpu_put(vcpu);
index 3f9b22c4983a85704667a89e274c364326980574..2dab4b696682e62e2babeabd42c77a3d75e6b7cd 100644 (file)
@@ -315,7 +315,10 @@ struct kvm_vcpu {
        int cpu;
        int vcpu_id; /* id given by userspace at creation */
        int vcpu_idx; /* index in kvm->vcpus array */
-       int srcu_idx;
+       int ____srcu_idx; /* Don't use this directly.  You've been warned. */
+#ifdef CONFIG_PROVE_RCU
+       int srcu_depth;
+#endif
        int mode;
        u64 requests;
        unsigned long guest_debug;
@@ -840,6 +843,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
        unlikely(__ret);                                        \
 })
 
+static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PROVE_RCU
+       WARN_ONCE(vcpu->srcu_depth++,
+                 "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1);
+#endif
+       vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+}
+
+static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu)
+{
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx);
+
+#ifdef CONFIG_PROVE_RCU
+       WARN_ONCE(--vcpu->srcu_depth,
+                 "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth);
+#endif
+}
+
 static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
 {
        return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);