]> git.baikalelectronics.ru Git - kernel.git/commitdiff
x86/unwind: Recover kretprobe trampoline entry
authorMasami Hiramatsu <mhiramat@kernel.org>
Tue, 14 Sep 2021 14:42:31 +0000 (23:42 +0900)
committerSteven Rostedt (VMware) <rostedt@goodmis.org>
Fri, 1 Oct 2021 01:24:07 +0000 (21:24 -0400)
Since the kretprobe replaces the function return address with
the kretprobe_trampoline on the stack, x86 unwinders can not
continue the stack unwinding at that point, or record
kretprobe_trampoline instead of correct return address.

To fix this issue, find the correct return address from task's
kretprobe_instances as like as function-graph tracer does.

With this fix, the unwinder can correctly unwind the stack
from kretprobe event on x86, as below.

           <...>-135     [003] ...1     6.722338: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read)
           <...>-135     [003] ...1     6.722377: <stack trace>
 => kretprobe_trace_func+0x209/0x2f0
 => kretprobe_dispatcher+0x4a/0x70
 => __kretprobe_trampoline_handler+0xca/0x150
 => trampoline_handler+0x44/0x70
 => kretprobe_trampoline+0x2a/0x50
 => vfs_read+0xab/0x1a0
 => ksys_read+0x5f/0xe0
 => do_syscall_64+0x33/0x40
 => entry_SYSCALL_64_after_hwframe+0x44/0xae

Link: https://lkml.kernel.org/r/163163055130.489837.5161749078833497255.stgit@devnote2
Reported-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
arch/x86/include/asm/unwind.h
arch/x86/kernel/unwind_frame.c
arch/x86/kernel/unwind_guess.c
arch/x86/kernel/unwind_orc.c

index 70fc159ebe6959fead369c46d8a143812a1bc058..fca2e783e3ce8d79c1b70f30d4685bb8ba45f2c4 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
+#include <linux/kprobes.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -15,6 +16,7 @@ struct unwind_state {
        unsigned long stack_mask;
        struct task_struct *task;
        int graph_idx;
+       struct llist_node *kr_cur;
        bool error;
 #if defined(CONFIG_UNWINDER_ORC)
        bool signal, full_regs;
@@ -99,6 +101,27 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
                        void *orc, size_t orc_size) {}
 #endif
 
+static inline
+unsigned long unwind_recover_kretprobe(struct unwind_state *state,
+                                      unsigned long addr, unsigned long *addr_p)
+{
+       return is_kretprobe_trampoline(addr) ?
+               kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) :
+               addr;
+}
+
+/* Recover the return address modified by kretprobe and ftrace_graph. */
+static inline
+unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+                                    unsigned long addr, unsigned long *addr_p)
+{
+       unsigned long ret;
+
+       ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+                                   addr, addr_p);
+       return unwind_recover_kretprobe(state, ret, addr_p);
+}
+
 /*
  * This disables KASAN checking when reading a value from another task's stack,
  * since the other task could be running on another CPU and could have poisoned
index d7c44b257f7f4ea66bf4a49333fccb32f2c55d08..8e1c50c86e5db161c38cd1c0a630f0372c828ecf 100644 (file)
@@ -240,8 +240,7 @@ static bool update_stack_state(struct unwind_state *state,
        else {
                addr_p = unwind_get_return_address_ptr(state);
                addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
-               state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-                                                 addr, addr_p);
+               state->ip = unwind_recover_ret_addr(state, addr, addr_p);
        }
 
        /* Save the original stack pointer for unwind_dump(): */
index c49f10ffd8cded385e9c0e26cc65406de78a925b..884d68a6e714e16b2a4dc2a8cb12301de6d8402a 100644 (file)
@@ -15,8 +15,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
 
        addr = READ_ONCE_NOCHECK(*state->sp);
 
-       return ftrace_graph_ret_addr(state->task, &state->graph_idx,
-                                    addr, state->sp);
+       return unwind_recover_ret_addr(state, addr, state->sp);
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
index a1202536fc57cc2b4af2b0bfde3f08fb6f9c034a..e6f7592790af951a6e656f4a8892d5e570ae67e5 100644 (file)
@@ -534,9 +534,8 @@ bool unwind_next_frame(struct unwind_state *state)
                if (!deref_stack_reg(state, ip_p, &state->ip))
                        goto err;
 
-               state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-                                                 state->ip, (void *)ip_p);
-
+               state->ip = unwind_recover_ret_addr(state, state->ip,
+                                                   (unsigned long *)ip_p);
                state->sp = sp;
                state->regs = NULL;
                state->prev_regs = NULL;
@@ -549,7 +548,18 @@ bool unwind_next_frame(struct unwind_state *state)
                                         (void *)orig_ip);
                        goto err;
                }
-
+               /*
+                * There is a small chance to interrupt at the entry of
+                * __kretprobe_trampoline() where the ORC info doesn't exist.
+                * That point is right after the RET to __kretprobe_trampoline()
+                * which was modified return address.
+                * At that point, the @addr_p of the unwind_recover_kretprobe()
+                * (this has to point the address of the stack entry storing
+                * the modified return address) must be "SP - (a stack entry)"
+                * because SP is incremented by the RET.
+                */
+               state->ip = unwind_recover_kretprobe(state, state->ip,
+                               (unsigned long *)(state->sp - sizeof(long)));
                state->regs = (struct pt_regs *)sp;
                state->prev_regs = NULL;
                state->full_regs = true;
@@ -562,6 +572,9 @@ bool unwind_next_frame(struct unwind_state *state)
                                         (void *)orig_ip);
                        goto err;
                }
+               /* See UNWIND_HINT_TYPE_REGS case comment. */
+               state->ip = unwind_recover_kretprobe(state, state->ip,
+                               (unsigned long *)(state->sp - sizeof(long)));
 
                if (state->full_regs)
                        state->prev_regs = state->regs;