]> git.baikalelectronics.ru Git - kernel.git/commitdiff
x86/entry: Switch page fault exception to IDTENTRY_RAW
authorThomas Gleixner <tglx@linutronix.de>
Thu, 21 May 2020 20:05:28 +0000 (22:05 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 11 Jun 2020 13:15:09 +0000 (15:15 +0200)
Convert page fault exceptions to IDTENTRY_RAW:

  - Implement the C entry point with DEFINE_IDTENTRY_RAW
  - Add the CR2 read into the exception handler
  - Add the idtentry_enter/exit_cond_rcu() invocations in
    in the regular page fault handler and in the async PF
    part.
  - Emit the ASM stub with DECLARE_IDTENTRY_RAW
  - Remove the ASM idtentry in 64-bit
  - Remove the CR2 read from 64-bit
  - Remove the open coded ASM entry code in 32-bit
  - Fix up the XEN/PV code
  - Remove the old prototypes

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20200521202118.238455120@linutronix.de
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/traps.h
arch/x86/kernel/idt.c
arch/x86/kernel/kvm.c
arch/x86/mm/fault.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/xen-asm_64.S

index 3ab04dca9aab029545fe2de7e44d86a3525e6021..660ed3ed37dc504dcf44e23a96dd0288854be5a3 100644 (file)
@@ -1398,36 +1398,6 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
 
 #endif /* CONFIG_HYPERV */
 
-SYM_CODE_START(page_fault)
-       ASM_CLAC
-       pushl   $do_page_fault
-       jmp     common_exception_read_cr2
-SYM_CODE_END(page_fault)
-
-SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
-       /* the function address is in %gs's slot on the stack */
-       SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
-       ENCODE_FRAME_POINTER
-
-       /* fixup %gs */
-       GS_TO_REG %ecx
-       movl    PT_GS(%esp), %edi
-       REG_TO_PTGS %ecx
-       SET_KERNEL_GS %ecx
-
-       GET_CR2_INTO(%ecx)                      # might clobber %eax
-
-       /* fixup orig %eax */
-       movl    PT_ORIG_EAX(%esp), %edx         # get the error code
-       movl    $-1, PT_ORIG_EAX(%esp)          # no syscall to restart
-
-       TRACE_IRQS_OFF
-       movl    %esp, %eax                      # pt_regs pointer
-       CALL_NOSPEC edi
-       jmp     ret_from_exception
-SYM_CODE_END(common_exception_read_cr2)
-
 SYM_CODE_START_LOCAL_NOALIGN(common_exception)
        /* the function address is in %gs's slot on the stack */
        SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
index b70c7788ef08940831d84433e140425f5430b1d1..5789f76932b6cbe91122720a890e617808e3048a 100644 (file)
@@ -506,15 +506,6 @@ SYM_CODE_END(spurious_entries_start)
        call    error_entry
        UNWIND_HINT_REGS
 
-       .if \vector == X86_TRAP_PF
-               /*
-                * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
-                * intermediate storage as RDX can be clobbered in enter_from_user_mode().
-                * GET_CR2_INTO can clobber RAX.
-                */
-               GET_CR2_INTO(%r12);
-       .endif
-
        .if \sane == 0
        TRACE_IRQS_OFF
 
@@ -533,10 +524,6 @@ SYM_CODE_END(spurious_entries_start)
                movq    $-1, ORIG_RAX(%rsp)     /* no syscall to restart */
        .endif
 
-       .if \vector == X86_TRAP_PF
-               movq    %r12, %rdx              /* Move CR2 into 3rd argument */
-       .endif
-
        call    \cfunc
 
        .if \sane == 0
@@ -1059,12 +1046,6 @@ apicinterrupt SPURIOUS_APIC_VECTOR               spurious_interrupt              smp_spurious_interrupt
 apicinterrupt IRQ_WORK_VECTOR                  irq_work_interrupt              smp_irq_work_interrupt
 #endif
 
-/*
- * Exception entry points.
- */
-
-idtentry       X86_TRAP_PF             page_fault              do_page_fault                   has_error_code=1
-
 /*
  * Reload gs selector with exception handling
  * edi:  new selector
index b2a5fe02dcf09e07d737eca5289720a217bf934c..9ec5466e4c051500d90033ce3ad9580c2d949be8 100644 (file)
@@ -387,7 +387,8 @@ DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP,     exc_general_protection);
 DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC,        exc_alignment_check);
 
 /* Raw exception entries which need extra work */
-DECLARE_IDTENTRY_RAW(X86_TRAP_BP,      exc_int3);
+DECLARE_IDTENTRY_RAW(X86_TRAP_BP,              exc_int3);
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,    exc_page_fault);
 
 #ifdef CONFIG_X86_MCE
 DECLARE_IDTENTRY_MCE(X86_TRAP_MC,      exc_machine_check);
index f5a2e438a8784d59d741ba2a76999c1d3e3ca2e5..d7de360eec74cda8f251875f74d19a0ac5f24f51 100644 (file)
@@ -9,17 +9,6 @@
 #include <asm/idtentry.h>
 #include <asm/siginfo.h>                       /* TRAP_TRACE, ... */
 
-#define dotraplinkage __visible
-
-asmlinkage void page_fault(void);
-asmlinkage void async_page_fault(void);
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
-asmlinkage void xen_page_fault(void);
-#endif
-
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
-
 #ifdef CONFIG_X86_64
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
index ec55479e1dd154cd77a6d0092fcbb9e12f56e462..ddb11154aeeeb3de4408f6daab51cf4629c80e98 100644 (file)
@@ -62,7 +62,7 @@ static const __initconst struct idt_data early_idts[] = {
        INTG(X86_TRAP_DB,               asm_exc_debug),
        SYSG(X86_TRAP_BP,               asm_exc_int3),
 #ifdef CONFIG_X86_32
-       INTG(X86_TRAP_PF,               page_fault),
+       INTG(X86_TRAP_PF,               asm_exc_page_fault),
 #endif
 };
 
@@ -156,7 +156,7 @@ static const __initconst struct idt_data apic_idts[] = {
  * stacks work only after cpu_init().
  */
 static const __initconst struct idt_data early_pf_idts[] = {
-       INTG(X86_TRAP_PF,               page_fault),
+       INTG(X86_TRAP_PF,               asm_exc_page_fault),
 };
 
 /*
index d6f22a3a1f7da4bd0ab1d5118577a2ae4e27cf8a..d00f7c430e65a91cd8a6d4efe5eb4d8e30ceae46 100644 (file)
@@ -218,7 +218,7 @@ again:
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
 
-u32 kvm_read_and_reset_apf_flags(void)
+noinstr u32 kvm_read_and_reset_apf_flags(void)
 {
        u32 flags = 0;
 
@@ -230,11 +230,11 @@ u32 kvm_read_and_reset_apf_flags(void)
        return flags;
 }
 EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
-NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags);
 
-bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
+noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
        u32 reason = kvm_read_and_reset_apf_flags();
+       bool rcu_exit;
 
        switch (reason) {
        case KVM_PV_REASON_PAGE_NOT_PRESENT:
@@ -244,6 +244,9 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
                return false;
        }
 
+       rcu_exit = idtentry_enter_cond_rcu(regs);
+       instrumentation_begin();
+
        /*
         * If the host managed to inject an async #PF into an interrupt
         * disabled region, then die hard as this is not going to end well
@@ -258,13 +261,13 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
                /* Page is swapped out by the host. */
                kvm_async_pf_task_wait_schedule(token);
        } else {
-               rcu_irq_enter();
                kvm_async_pf_task_wake(token);
-               rcu_irq_exit();
        }
+
+       instrumentation_end();
+       idtentry_exit_cond_rcu(regs, rcu_exit);
        return true;
 }
-NOKPROBE_SYMBOL(__kvm_handle_async_pf);
 
 static void __init paravirt_ops_setup(void)
 {
index d7b52a2a1bce49fe73373bb34f4e8232d604e7d5..eef29bb53cd0b922311d698be35b234259c6f19d 100644 (file)
@@ -1357,11 +1357,38 @@ trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
                trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void
-do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
-               unsigned long address)
+static __always_inline void
+handle_page_fault(struct pt_regs *regs, unsigned long error_code,
+                             unsigned long address)
+{
+       trace_page_fault_entries(regs, error_code, address);
+
+       if (unlikely(kmmio_fault(regs, address)))
+               return;
+
+       /* Was the fault on kernel-controlled part of the address space? */
+       if (unlikely(fault_in_kernel_space(address))) {
+               do_kern_addr_fault(regs, error_code, address);
+       } else {
+               do_user_addr_fault(regs, error_code, address);
+               /*
+                * User address page fault handling might have reenabled
+                * interrupts. Fixing up all potential exit points of
+                * do_user_addr_fault() and its leaf functions is just not
+                * doable w/o creating an unholy mess or turning the code
+                * upside down.
+                */
+               local_irq_disable();
+       }
+}
+
+DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
 {
+       unsigned long address = read_cr2();
+       bool rcu_exit;
+
        prefetchw(&current->mm->mmap_lock);
+
        /*
         * KVM has two types of events that are, logically, interrupts, but
         * are unfortunately delivered using the #PF vector.  These events are
@@ -1376,28 +1403,28 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
         * getting values from real and async page faults mixed up.
         *
         * Fingers crossed.
+        *
+        * The async #PF handling code takes care of idtentry handling
+        * itself.
         */
        if (kvm_handle_async_pf(regs, (u32)address))
                return;
 
-       trace_page_fault_entries(regs, hw_error_code, address);
+       /*
+        * Entry handling for valid #PF from kernel mode is slightly
+        * different: RCU is already watching and rcu_irq_enter() must not
+        * be invoked because a kernel fault on a user space address might
+        * sleep.
+        *
+        * In case the fault hit a RCU idle region the conditional entry
+        * code reenabled RCU to avoid subsequent wreckage which helps
+        * debugability.
+        */
+       rcu_exit = idtentry_enter_cond_rcu(regs);
 
-       if (unlikely(kmmio_fault(regs, address)))
-               return;
+       instrumentation_begin();
+       handle_page_fault(regs, error_code, address);
+       instrumentation_end();
 
-       /* Was the fault on kernel-controlled part of the address space? */
-       if (unlikely(fault_in_kernel_space(address))) {
-               do_kern_addr_fault(regs, hw_error_code, address);
-       } else {
-               do_user_addr_fault(regs, hw_error_code, address);
-               /*
-                * User address page fault handling might have reenabled
-                * interrupts. Fixing up all potential exit points of
-                * do_user_addr_fault() and its leaf functions is just not
-                * doable w/o creating an unholy mess or turning the code
-                * upside down.
-                */
-               local_irq_disable();
-       }
+       idtentry_exit_cond_rcu(regs, rcu_exit);
 }
-NOKPROBE_SYMBOL(do_page_fault);
index 008291121cb4eebd4e02675ec2a8542f9124e5e2..33b309d659559e4276886c7efed82b03c8a1252d 100644 (file)
@@ -626,7 +626,7 @@ static struct trap_array_entry trap_array[] = {
 #ifdef CONFIG_IA32_EMULATION
        { entry_INT80_compat,          xen_entry_INT80_compat,          false },
 #endif
-       { page_fault,                  xen_page_fault,                  false },
+       TRAP_ENTRY(exc_page_fault,                      false ),
        TRAP_ENTRY(exc_divide_error,                    false ),
        TRAP_ENTRY(exc_bounds,                          false ),
        TRAP_ENTRY(exc_invalid_op,                      false ),
index 19fbbdbcbde9207ca2be67ab138c1ef8d64b007d..5d252aaeade8b3a327bc5b80addec37222ed66b7 100644 (file)
@@ -43,7 +43,7 @@ xen_pv_trap asm_exc_invalid_tss
 xen_pv_trap asm_exc_segment_not_present
 xen_pv_trap asm_exc_stack_segment
 xen_pv_trap asm_exc_general_protection
-xen_pv_trap page_fault
+xen_pv_trap asm_exc_page_fault
 xen_pv_trap asm_exc_spurious_interrupt_bug
 xen_pv_trap asm_exc_coprocessor_error
 xen_pv_trap asm_exc_alignment_check