]> git.baikalelectronics.ru Git - kernel.git/commitdiff
KVM: PPC: Book3S HV Nested: L2 must not run with L1 xive context
authorNicholas Piggin <npiggin@gmail.com>
Thu, 3 Mar 2022 05:33:14 +0000 (15:33 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 13 May 2022 11:34:33 +0000 (21:34 +1000)
The PowerNV L0 currently pushes the OS xive context when running a vCPU,
regardless of whether it is running a nested guest. The problem is that
xive OS ring interrupts will be delivered while the L2 is running.

At the moment, by default, the L2 guest runs with LPCR[LPES]=0, which
actually makes external interrupts go to the L0. That causes the L2 to
exit and the interrupt taken or injected into the L1, so in some
respects this behaves like an escalation. It's not clear if this was
deliberate or not, there's no comment about it and the L1 is actually
allowed to clear LPES in the L2, so it's confusing at best.

When the L2 is running, the L1 is essentially in a ceded state with
respect to external interrupts (it can't respond to them directly and
won't get scheduled again absent some additional event). So the natural
way to solve this is when the L0 handles a H_ENTER_NESTED hypercall to
run the L2, have it arm the escalation interrupt and don't push the L1
context while running the L2.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220303053315.1056880-6-npiggin@gmail.com
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_xive.c

index f14520506b61f7c612c70c89811a95cfaeeb85b9..3d37448ddf445c41bc26936f3e54e4482adad9b1 100644 (file)
@@ -4058,14 +4058,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                }
 
        } else if (nested) {
-               kvmppc_xive_push_vcpu(vcpu);
-
                __this_cpu_write(cpu_in_guest, kvm);
                trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
                __this_cpu_write(cpu_in_guest, NULL);
 
-               kvmppc_xive_pull_vcpu(vcpu);
-
        } else {
                kvmppc_xive_push_vcpu(vcpu);
 
@@ -4077,8 +4073,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
                        unsigned long req = kvmppc_get_gpr(vcpu, 3);
 
-                       /* H_CEDE has to be handled now */
+                       /*
+                        * XIVE rearm and XICS hcalls must be handled
+                        * before xive context is pulled (is this
+                        * true?)
+                        */
                        if (req == H_CEDE) {
+                               /* H_CEDE has to be handled now */
                                kvmppc_cede(vcpu);
                                if (!kvmppc_xive_rearm_escalation(vcpu)) {
                                        /*
@@ -4090,7 +4091,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                                kvmppc_set_gpr(vcpu, 3, 0);
                                trap = 0;
 
-                       /* XICS hcalls must be handled before xive is pulled */
+                       } else if (req == H_ENTER_NESTED) {
+                               /*
+                                * L2 should not run with the L1
+                                * context so rearm and pull it.
+                                */
+                               if (!kvmppc_xive_rearm_escalation(vcpu)) {
+                                       /*
+                                        * Pending escalation so abort
+                                        * H_ENTER_NESTED.
+                                        */
+                                       kvmppc_set_gpr(vcpu, 3, 0);
+                                       trap = 0;
+                               }
+
                        } else if (hcall_is_xics(req)) {
                                int ret;
 
index 4e7b9f0a21c7927b1dfd2ab06198366ff928ef46..ee4be73649e5b44518f75965e017217917e7e9b0 100644 (file)
@@ -241,7 +241,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 
        vcpu->arch.irq_pending = 1;
        smp_mb();
-       if (vcpu->arch.ceded)
+       if (vcpu->arch.ceded || vcpu->arch.nested)
                kvmppc_fast_vcpu_kick(vcpu);
 
        /* Since we have the no-EOI flag, the interrupt is effectively