Since the kretprobe replaces the function return address with
the kretprobe_trampoline on the stack, x86 unwinders can not
continue the stack unwinding at that point, or record
kretprobe_trampoline instead of correct return address.
To fix this issue, find the correct return address from task's
kretprobe_instances as like as function-graph tracer does.
With this fix, the unwinder can correctly unwind the stack
from kretprobe event on x86, as below.
<...>-135 [003] ...1 6.722338: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read)
<...>-135 [003] ...1 6.722377: <stack trace>
=> kretprobe_trace_func+0x209/0x2f0
=> kretprobe_dispatcher+0x4a/0x70
=> __kretprobe_trampoline_handler+0xca/0x150
=> trampoline_handler+0x44/0x70
=> kretprobe_trampoline+0x2a/0x50
=> vfs_read+0xab/0x1a0
=> ksys_read+0x5f/0xe0
=> do_syscall_64+0x33/0x40
=> entry_SYSCALL_64_after_hwframe+0x44/0xae
Link: https://lkml.kernel.org/r/163163055130.489837.5161749078833497255.stgit@devnote2
Reported-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
#include <linux/sched.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
+ struct llist_node *kr_cur;
bool error;
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
void *orc, size_t orc_size) {}
#endif
+static inline
+unsigned long unwind_recover_kretprobe(struct unwind_state *state,
+ unsigned long addr, unsigned long *addr_p)
+{
+ return is_kretprobe_trampoline(addr) ?
+ kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) :
+ addr;
+}
+
+/* Recover the return address modified by kretprobe and ftrace_graph. */
+static inline
+unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+ unsigned long addr, unsigned long *addr_p)
+{
+ unsigned long ret;
+
+ ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ addr, addr_p);
+ return unwind_recover_kretprobe(state, ret, addr_p);
+}
+
/*
* This disables KASAN checking when reading a value from another task's stack,
* since the other task could be running on another CPU and could have poisoned
else {
addr_p = unwind_get_return_address_ptr(state);
addr = READ_ONCE_TASK_STACK(state->task, *addr_p);
- state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, addr_p);
+ state->ip = unwind_recover_ret_addr(state, addr, addr_p);
}
/* Save the original stack pointer for unwind_dump(): */
addr = READ_ONCE_NOCHECK(*state->sp);
- return ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, state->sp);
+ return unwind_recover_ret_addr(state, addr, state->sp);
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
if (!deref_stack_reg(state, ip_p, &state->ip))
goto err;
- state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- state->ip, (void *)ip_p);
-
+ state->ip = unwind_recover_ret_addr(state, state->ip,
+ (unsigned long *)ip_p);
state->sp = sp;
state->regs = NULL;
state->prev_regs = NULL;
(void *)orig_ip);
goto err;
}
-
+ /*
+ * There is a small chance to interrupt at the entry of
+ * __kretprobe_trampoline() where the ORC info doesn't exist.
+ * That point is right after the RET to __kretprobe_trampoline()
+ * which was modified return address.
+ * At that point, the @addr_p of the unwind_recover_kretprobe()
+ * (this has to point the address of the stack entry storing
+ * the modified return address) must be "SP - (a stack entry)"
+ * because SP is incremented by the RET.
+ */
+ state->ip = unwind_recover_kretprobe(state, state->ip,
+ (unsigned long *)(state->sp - sizeof(long)));
state->regs = (struct pt_regs *)sp;
state->prev_regs = NULL;
state->full_regs = true;
(void *)orig_ip);
goto err;
}
+ /* See UNWIND_HINT_TYPE_REGS case comment. */
+ state->ip = unwind_recover_kretprobe(state, state->ip,
+ (unsigned long *)(state->sp - sizeof(long)));
if (state->full_regs)
state->prev_regs = state->regs;