]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm/hugetlb: fix a race between hugetlb sysctl handlers
authorMuchun Song <songmuchun@bytedance.com>
Fri, 4 Sep 2020 23:36:13 +0000 (16:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Sep 2020 19:14:30 +0000 (12:14 -0700)
There is a race between the assignment of `table->data` and write value
to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on
the other thread.

  CPU0:                                 CPU1:
                                        proc_sys_write
  hugetlb_sysctl_handler                  proc_sys_call_handler
  hugetlb_sysctl_handler_common             hugetlb_sysctl_handler
    table->data = &tmp;                       hugetlb_sysctl_handler_common
                                                table->data = &tmp;
      proc_doulongvec_minmax
        do_proc_doulongvec_minmax           sysctl_head_finish
          __do_proc_doulongvec_minmax         unuse_table
            i = table->data;
            *i = val;  // corrupt CPU1's stack

Fix this by duplicating the `table`, and only update the duplicate of
it.  And introduce a helper of proc_hugetlb_doulongvec_minmax() to
simplify the code.

The following oops was seen:

    BUG: kernel NULL pointer dereference, address: 0000000000000000
    #PF: supervisor instruction fetch in kernel mode
    #PF: error_code(0x0010) - not-present page
    Code: Bad RIP value.
    ...
    Call Trace:
     ? set_max_huge_pages+0x3da/0x4f0
     ? alloc_pool_huge_page+0x150/0x150
     ? proc_doulongvec_minmax+0x46/0x60
     ? hugetlb_sysctl_handler_common+0x1c7/0x200
     ? nr_hugepages_store+0x20/0x20
     ? copy_fd_bitmaps+0x170/0x170
     ? hugetlb_sysctl_handler+0x1e/0x20
     ? proc_sys_call_handler+0x2f1/0x300
     ? unregister_sysctl_table+0xb0/0xb0
     ? __fd_install+0x78/0x100
     ? proc_sys_write+0x14/0x20
     ? __vfs_write+0x4d/0x90
     ? vfs_write+0xef/0x240
     ? ksys_write+0xc0/0x160
     ? __ia32_sys_read+0x50/0x50
     ? __close_fd+0x129/0x150
     ? __x64_sys_write+0x43/0x50
     ? do_syscall_64+0x6c/0x200
     ? entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 5815a4fff30f ("hugetlb: multiple hstates for multiple page sizes")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/hugetlb.c

index 5957dc80ebb1856ec19608b885a9cb0a98ffa984..67fc6383995b430c7b6c6ebb0c01dc2128dbe2b4 100644 (file)
@@ -3465,6 +3465,22 @@ static unsigned int allowed_mems_nr(struct hstate *h)
 }
 
 #ifdef CONFIG_SYSCTL
+static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
+                                         void *buffer, size_t *length,
+                                         loff_t *ppos, unsigned long *out)
+{
+       struct ctl_table dup_table;
+
+       /*
+        * In order to avoid races with __do_proc_doulongvec_minmax(), we
+        * can duplicate the @table and alter the duplicate of it.
+        */
+       dup_table = *table;
+       dup_table.data = out;
+
+       return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
+}
+
 static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                         struct ctl_table *table, int write,
                         void *buffer, size_t *length, loff_t *ppos)
@@ -3476,9 +3492,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        if (!hugepages_supported())
                return -EOPNOTSUPP;
 
-       table->data = &tmp;
-       table->maxlen = sizeof(unsigned long);
-       ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+       ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+                                            &tmp);
        if (ret)
                goto out;
 
@@ -3521,9 +3536,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
        if (write && hstate_is_gigantic(h))
                return -EINVAL;
 
-       table->data = &tmp;
-       table->maxlen = sizeof(unsigned long);
-       ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+       ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+                                            &tmp);
        if (ret)
                goto out;