]> git.baikalelectronics.ru Git - kernel.git/commitdiff
Yama: fix double-spinlock and user access in atomic context
authorJann Horn <jann@thejh.net>
Sun, 22 May 2016 04:01:34 +0000 (06:01 +0200)
committerJames Morris <james.l.morris@oracle.com>
Wed, 25 May 2016 23:56:18 +0000 (09:56 +1000)
Commit 8a56038c2aef ("Yama: consolidate error reporting") causes lockups
when someone hits a Yama denial. Call chain:

process_vm_readv -> process_vm_rw -> process_vm_rw_core -> mm_access
-> ptrace_may_access
task_lock(...) is taken
__ptrace_may_access -> security_ptrace_access_check
-> yama_ptrace_access_check -> report_access -> kstrdup_quotable_cmdline
-> get_cmdline -> access_process_vm -> get_task_mm
task_lock(...) is taken again

task_lock(p) just calls spin_lock(&p->alloc_lock), so at this point,
spin_lock() is called on a lock that is already held by the current
process.

Also: Since the alloc_lock is a spinlock, sleeping inside
security_ptrace_access_check hooks is probably not allowed at all? So it's
not even possible to print the cmdline from in there because that might
involve paging in userspace memory.

It would be tempting to rewrite ptrace_may_access() to drop the alloc_lock
before calling the LSM, but even then, ptrace_may_access() itself might be
called from various contexts in which you're not allowed to sleep; for
example, as far as I understand, to be able to hold a reference to another
task, usually an RCU read lock will be taken (see e.g. kcmp() and
get_robust_list()), so that also prohibits sleeping. (And using e.g. FUSE,
a user can cause pagefault handling to take arbitrary amounts of time -
see https://bugs.chromium.org/p/project-zero/issues/detail?id=808.)

Therefore, AFAIK, in order to print the name of a process below
security_ptrace_access_check(), you'd have to either grab a reference to
the mm_struct and defer the access violation reporting or just use the
"comm" value that's stored in kernelspace and accessible without big
complications. (Or you could try to use some kind of atomic remote VM
access that fails if the memory isn't paged in, similar to
copy_from_user_inatomic(), and if necessary fall back to comm, but
that'd be kind of ugly because the comm/cmdline choice would look
pretty random to the user.)

Fix it by deferring reporting of the access violation until current
exits kernelspace the next time.

v2: Don't oops on PTRACE_TRACEME, call report_access under
task_lock(current). Also fix nonsensical comment. And don't use
GPF_ATOMIC for memory allocation with no locks held.
This patch is tested both for ptrace attach and ptrace traceme.

Fixes: 8a56038c2aef ("Yama: consolidate error reporting")
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.l.morris@oracle.com>
security/yama/yama_lsm.c

index 9b756b1f3dc5241cc11eb8ccb3755e0bd126b3c8..0309f2111c703b499134d3217d0a3735cdb5f2a0 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/string_helpers.h>
+#include <linux/task_work.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
 
 #define YAMA_SCOPE_DISABLED    0
 #define YAMA_SCOPE_RELATIONAL  1
@@ -42,20 +45,71 @@ static DEFINE_SPINLOCK(ptracer_relations_lock);
 static void yama_relation_cleanup(struct work_struct *work);
 static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
 
-static void report_access(const char *access, struct task_struct *target,
-                         struct task_struct *agent)
+struct access_report_info {
+       struct callback_head work;
+       const char *access;
+       struct task_struct *target;
+       struct task_struct *agent;
+};
+
+static void __report_access(struct callback_head *work)
 {
+       struct access_report_info *info =
+               container_of(work, struct access_report_info, work);
        char *target_cmd, *agent_cmd;
 
-       target_cmd = kstrdup_quotable_cmdline(target, GFP_ATOMIC);
-       agent_cmd = kstrdup_quotable_cmdline(agent, GFP_ATOMIC);
+       target_cmd = kstrdup_quotable_cmdline(info->target, GFP_KERNEL);
+       agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_KERNEL);
 
        pr_notice_ratelimited(
                "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
-               access, target_cmd, target->pid, agent_cmd, agent->pid);
+               info->access, target_cmd, info->target->pid, agent_cmd,
+               info->agent->pid);
 
        kfree(agent_cmd);
        kfree(target_cmd);
+
+       put_task_struct(info->agent);
+       put_task_struct(info->target);
+       kfree(info);
+}
+
+/* defers execution because cmdline access can sleep */
+static void report_access(const char *access, struct task_struct *target,
+                               struct task_struct *agent)
+{
+       struct access_report_info *info;
+       char agent_comm[sizeof(agent->comm)];
+
+       assert_spin_locked(&target->alloc_lock); /* for target->comm */
+
+       if (current->flags & PF_KTHREAD) {
+               /* I don't think kthreads call task_work_run() before exiting.
+                * Imagine angry ranting about procfs here.
+                */
+               pr_notice_ratelimited(
+                   "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
+                   access, target->comm, target->pid,
+                   get_task_comm(agent_comm, agent), agent->pid);
+               return;
+       }
+
+       info = kmalloc(sizeof(*info), GFP_ATOMIC);
+       if (!info)
+               return;
+       init_task_work(&info->work, __report_access);
+       get_task_struct(target);
+       get_task_struct(agent);
+       info->access = access;
+       info->target = target;
+       info->agent = agent;
+       if (task_work_add(current, &info->work, true) == 0)
+               return; /* success */
+
+       WARN(1, "report_access called from exiting task");
+       put_task_struct(target);
+       put_task_struct(agent);
+       kfree(info);
 }
 
 /**
@@ -351,8 +405,11 @@ int yama_ptrace_traceme(struct task_struct *parent)
                break;
        }
 
-       if (rc)
+       if (rc) {
+               task_lock(current);
                report_access("traceme", current, parent);
+               task_unlock(current);
+       }
 
        return rc;
 }