]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm/userfaultfd: fix uffd-wp special cases for fork()
authorPeter Xu <peterx@redhat.com>
Thu, 1 Jul 2021 01:49:02 +0000 (18:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Jul 2021 03:47:27 +0000 (20:47 -0700)
We tried to do something similar in a4df862084ab ("userfaultfd: wp: drop
_PAGE_UFFD_WP properly when fork") previously, but it's not doing it all
right..  A few fixes around the code path:

1. We were referencing VM_UFFD_WP vm_flags on the _old_ vma rather
   than the new vma.  That's overlooked in a4df862084ab, so it won't work
   as expected.  Thanks to the recent rework on fork code
   (fefff6922ac58fff), we can easily get the new vma now, so switch the
   checks to that.

2. Dropping the uffd-wp bit in copy_huge_pmd() could be wrong if the
   huge pmd is a migration huge pmd.  When it happens, instead of using
   pmd_uffd_wp(), we should use pmd_swp_uffd_wp().  The fix is simply to
   handle them separately.

3. Forget to carry over uffd-wp bit for a write migration huge pmd
   entry.  This also happens in copy_huge_pmd(), where we converted a
   write huge migration entry into a read one.

4. In copy_nonpresent_pte(), drop uffd-wp if necessary for swap ptes.

5. In copy_present_page() when COW is enforced when fork(), we also
   need to pass over the uffd-wp bit if VM_UFFD_WP is armed on the new
   vma, and when the pte to be copied has uffd-wp bit set.

Remove the comment in copy_present_pte() about this.  It won't help a huge
lot to only comment there, but comment everywhere would be an overkill.
Let's assume the commit messages would help.

[peterx@redhat.com: fix a few thp pmd missing uffd-wp bit]
Link: https://lkml.kernel.org/r/20210428225030.9708-4-peterx@redhat.com
Link: https://lkml.kernel.org/r/20210428225030.9708-3-peterx@redhat.com
Fixes: a4df862084abb ("userfaultfd: wp: drop _PAGE_UFFD_WP properly when fork")
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Wang Qing <wangqing@vivo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/huge_mm.h
include/linux/swapops.h
mm/huge_memory.c
mm/memory.c

index b4e1ebaae825aafb947550e571652589ca3b837a..939f21b69ead3f8b7b4e12fffaa219ffd9eb3c95 100644 (file)
@@ -10,7 +10,7 @@
 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
-                 struct vm_area_struct *vma);
+                 struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
 void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
 int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
index 5907205c712c4554df6d1897e6951671325c2466..708fbeb21dd397c8a3511d1368f389cc5124d280 100644 (file)
@@ -265,6 +265,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
 
        if (pmd_swp_soft_dirty(pmd))
                pmd = pmd_swp_clear_soft_dirty(pmd);
+       if (pmd_swp_uffd_wp(pmd))
+               pmd = pmd_swp_clear_uffd_wp(pmd);
        arch_entry = __pmd_to_swp_entry(pmd);
        return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
index b72d919ab13a25a481e51a2084e45890990beb76..40a90ff181801a8e716901ee367d8d0931ceae70 100644 (file)
@@ -1026,7 +1026,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
-                 struct vm_area_struct *vma)
+                 struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 {
        spinlock_t *dst_ptl, *src_ptl;
        struct page *src_page;
@@ -1035,7 +1035,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        int ret = -ENOMEM;
 
        /* Skip if can be re-fill on fault */
-       if (!vma_is_anonymous(vma))
+       if (!vma_is_anonymous(dst_vma))
                return 0;
 
        pgtable = pte_alloc_one(dst_mm);
@@ -1049,14 +1049,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        ret = -EAGAIN;
        pmd = *src_pmd;
 
-       /*
-        * Make sure the _PAGE_UFFD_WP bit is cleared if the new VMA
-        * does not have the VM_UFFD_WP, which means that the uffd
-        * fork event is not enabled.
-        */
-       if (!(vma->vm_flags & VM_UFFD_WP))
-               pmd = pmd_clear_uffd_wp(pmd);
-
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
        if (unlikely(is_swap_pmd(pmd))) {
                swp_entry_t entry = pmd_to_swp_entry(pmd);
@@ -1067,11 +1059,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                        pmd = swp_entry_to_pmd(entry);
                        if (pmd_swp_soft_dirty(*src_pmd))
                                pmd = pmd_swp_mksoft_dirty(pmd);
+                       if (pmd_swp_uffd_wp(*src_pmd))
+                               pmd = pmd_swp_mkuffd_wp(pmd);
                        set_pmd_at(src_mm, addr, src_pmd, pmd);
                }
                add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
                mm_inc_nr_ptes(dst_mm);
                pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+               if (!userfaultfd_wp(dst_vma))
+                       pmd = pmd_swp_clear_uffd_wp(pmd);
                set_pmd_at(dst_mm, addr, dst_pmd, pmd);
                ret = 0;
                goto out_unlock;
@@ -1107,11 +1103,11 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         * best effort that the pinned pages won't be replaced by another
         * random page during the coming copy-on-write.
         */
-       if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
+       if (unlikely(page_needs_cow_for_dma(src_vma, src_page))) {
                pte_free(dst_mm, pgtable);
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
-               __split_huge_pmd(vma, src_pmd, addr, false, NULL);
+               __split_huge_pmd(src_vma, src_pmd, addr, false, NULL);
                return -EAGAIN;
        }
 
@@ -1121,8 +1117,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 out_zero_page:
        mm_inc_nr_ptes(dst_mm);
        pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
-
        pmdp_set_wrprotect(src_mm, addr, src_pmd);
+       if (!userfaultfd_wp(dst_vma))
+               pmd = pmd_clear_uffd_wp(pmd);
        pmd = pmd_mkold(pmd_wrprotect(pmd));
        set_pmd_at(dst_mm, addr, dst_pmd, pmd);
 
@@ -1835,6 +1832,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        newpmd = swp_entry_to_pmd(entry);
                        if (pmd_swp_soft_dirty(*pmd))
                                newpmd = pmd_swp_mksoft_dirty(newpmd);
+                       if (pmd_swp_uffd_wp(*pmd))
+                               newpmd = pmd_swp_mkuffd_wp(newpmd);
                        set_pmd_at(mm, addr, pmd, newpmd);
                }
                goto unlock;
@@ -3245,6 +3244,8 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
                pmde = pmd_mksoft_dirty(pmde);
        if (is_write_migration_entry(entry))
                pmde = maybe_pmd_mkwrite(pmde, vma);
+       if (pmd_swp_uffd_wp(*pvmw->pmd))
+               pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
 
        flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
        if (PageAnon(new))
index 48c4576df89812675e7715cddb3cec70e0102b66..07a71a016c182fe66a9d510bf4c8deae5896f479 100644 (file)
@@ -707,10 +707,10 @@ out:
 
 static unsigned long
 copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-               unsigned long addr, int *rss)
+               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *dst_vma,
+               struct vm_area_struct *src_vma, unsigned long addr, int *rss)
 {
-       unsigned long vm_flags = vma->vm_flags;
+       unsigned long vm_flags = dst_vma->vm_flags;
        pte_t pte = *src_pte;
        struct page *page;
        swp_entry_t entry = pte_to_swp_entry(pte);
@@ -779,6 +779,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
        }
+       if (!userfaultfd_wp(dst_vma))
+               pte = pte_swp_clear_uffd_wp(pte);
        set_pte_at(dst_mm, addr, dst_pte, pte);
        return 0;
 }
@@ -844,6 +846,9 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
        /* All done, just insert the new page copy in the child */
        pte = mk_pte(new_page, dst_vma->vm_page_prot);
        pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
+       if (userfaultfd_pte_wp(dst_vma, *src_pte))
+               /* Uffd-wp needs to be delivered to dest pte as well */
+               pte = pte_wrprotect(pte_mkuffd_wp(pte));
        set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
        return 0;
 }
@@ -893,12 +898,7 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
                pte = pte_mkclean(pte);
        pte = pte_mkold(pte);
 
-       /*
-        * Make sure the _PAGE_UFFD_WP bit is cleared if the new VMA
-        * does not have the VM_UFFD_WP, which means that the uffd
-        * fork event is not enabled.
-        */
-       if (!(vm_flags & VM_UFFD_WP))
+       if (!userfaultfd_wp(dst_vma))
                pte = pte_clear_uffd_wp(pte);
 
        set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
@@ -973,7 +973,8 @@ again:
                if (unlikely(!pte_present(*src_pte))) {
                        entry.val = copy_nonpresent_pte(dst_mm, src_mm,
                                                        dst_pte, src_pte,
-                                                       src_vma, addr, rss);
+                                                       dst_vma, src_vma,
+                                                       addr, rss);
                        if (entry.val)
                                break;
                        progress += 8;
@@ -1050,8 +1051,8 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
                        || pmd_devmap(*src_pmd)) {
                        int err;
                        VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma);
-                       err = copy_huge_pmd(dst_mm, src_mm,
-                                           dst_pmd, src_pmd, addr, src_vma);
+                       err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd,
+                                           addr, dst_vma, src_vma);
                        if (err == -ENOMEM)
                                return -ENOMEM;
                        if (!err)