]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm/shmem: persist uffd-wp bit across zapping for file-backed
authorPeter Xu <peterx@redhat.com>
Fri, 13 May 2022 03:22:53 +0000 (20:22 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 13 May 2022 14:20:10 +0000 (07:20 -0700)
File-backed memory is prone to being unmapped at any time.  It means all
information in the pte will be dropped, including the uffd-wp flag.

To persist the uffd-wp flag, we'll use the pte markers.  This patch
teaches the zap code to understand uffd-wp and know when to keep or drop
the uffd-wp bit.

Add a new flag ZAP_FLAG_DROP_MARKER and set it in zap_details when we
don't want to persist such an information, for example, when destroying
the whole vma, or punching a hole in a shmem file.  For the rest cases we
should never drop the uffd-wp bit, or the wr-protect information will get
lost.

The new ZAP_FLAG_DROP_MARKER needs to be put into mm.h rather than
memory.c because it'll be further referenced in hugetlb files later.

Link: https://lkml.kernel.org/r/20220405014847.14295-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h
include/linux/mm_inline.h
mm/memory.c
mm/rmap.c

index d63ba0e0e06836abecd45e50b616d50d15c11871..61786259e52a617f4d506908437f28cc56489cbe 100644 (file)
@@ -3428,4 +3428,14 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 }
 #endif
 
+typedef unsigned int __bitwise zap_flags_t;
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory.  This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma.  By
+ * default, the flag is not set.
+ */
+#define  ZAP_FLAG_DROP_MARKER        ((__force zap_flags_t) BIT(0))
+
 #endif /* _LINUX_MM_H */
index ac32125745abc03f2373dd6930ec29783047b6cc..7b25b53c474a7f17d6ce5cc378a8f1d226afdc00 100644 (file)
@@ -6,6 +6,8 @@
 #include <linux/huge_mm.h>
 #include <linux/swap.h>
 #include <linux/string.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/swapops.h>
 
 /**
  * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
@@ -316,5 +318,46 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
        return atomic_read(&mm->tlb_flush_pending) > 1;
 }
 
+/*
+ * If this pte is wr-protected by uffd-wp in any form, arm the special pte to
+ * replace a none pte.  NOTE!  This should only be called when *pte is already
+ * cleared so we will never accidentally replace something valuable.  Meanwhile
+ * none pte also means we are not demoting the pte so tlb flushed is not needed.
+ * E.g., when pte cleared the caller should have taken care of the tlb flush.
+ *
+ * Must be called with pgtable lock held so that no thread will see the none
+ * pte, and if they see it, they'll fault and serialize at the pgtable lock.
+ *
+ * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled.
+ */
+static inline void
+pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *pte, pte_t pteval)
+{
+#ifdef CONFIG_PTE_MARKER_UFFD_WP
+       bool arm_uffd_pte = false;
+
+       /* The current status of the pte should be "cleared" before calling */
+       WARN_ON_ONCE(!pte_none(*pte));
+
+       if (vma_is_anonymous(vma) || !userfaultfd_wp(vma))
+               return;
+
+       /* A uffd-wp wr-protected normal pte */
+       if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval)))
+               arm_uffd_pte = true;
+
+       /*
+        * A uffd-wp wr-protected swap pte.  Note: this should even cover an
+        * existing pte marker with uffd-wp bit set.
+        */
+       if (unlikely(pte_swp_uffd_wp_any(pteval)))
+               arm_uffd_pte = true;
+
+       if (unlikely(arm_uffd_pte))
+               set_pte_at(vma->vm_mm, addr, pte,
+                          make_pte_marker(PTE_MARKER_UFFD_WP));
+#endif
+}
 
 #endif
index c16f873373a25dda45c483dcedcbd0c8456752c0..ecb1d58dcaef61918050d0db33c8b58abf653237 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
 
 #include <trace/events/kmem.h>
 
@@ -1306,6 +1307,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
 struct zap_details {
        struct folio *single_folio;     /* Locked folio to be unmapped */
        bool even_cows;                 /* Zap COWed private pages too? */
+       zap_flags_t zap_flags;          /* Extra flags for zapping */
 };
 
 /* Whether we should zap all COWed (private) pages too */
@@ -1334,6 +1336,29 @@ static inline bool should_zap_page(struct zap_details *details, struct page *pag
        return !PageAnon(page);
 }
 
+static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
+{
+       if (!details)
+               return false;
+
+       return details->zap_flags & ZAP_FLAG_DROP_MARKER;
+}
+
+/*
+ * This function makes sure that we'll replace the none pte with an uffd-wp
+ * swap special pte marker when necessary. Must be with the pgtable lock held.
+ */
+static inline void
+zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
+                             unsigned long addr, pte_t *pte,
+                             struct zap_details *details, pte_t pteval)
+{
+       if (zap_drop_file_uffd_wp(details))
+               return;
+
+       pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
+}
+
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                struct vm_area_struct *vma, pmd_t *pmd,
                                unsigned long addr, unsigned long end,
@@ -1371,6 +1396,8 @@ again:
                        ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                        tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
+                       zap_install_uffd_wp_if_needed(vma, addr, pte, details,
+                                                     ptent);
                        if (unlikely(!page))
                                continue;
 
@@ -1401,6 +1428,13 @@ again:
                        page = pfn_swap_entry_to_page(entry);
                        if (unlikely(!should_zap_page(details, page)))
                                continue;
+                       /*
+                        * Both device private/exclusive mappings should only
+                        * work with anonymous page so far, so we don't need to
+                        * consider uffd-wp bit when zap. For more information,
+                        * see zap_install_uffd_wp_if_needed().
+                        */
+                       WARN_ON_ONCE(!vma_is_anonymous(vma));
                        rss[mm_counter(page)]--;
                        if (is_device_private_entry(entry))
                                page_remove_rmap(page, vma, false);
@@ -1417,8 +1451,10 @@ again:
                        if (!should_zap_page(details, page))
                                continue;
                        rss[mm_counter(page)]--;
-               } else if (is_pte_marker_entry(entry)) {
-                       /* By default, simply drop all pte markers when zap */
+               } else if (pte_marker_entry_uffd_wp(entry)) {
+                       /* Only drop the uffd-wp marker if explicitly requested */
+                       if (!zap_drop_file_uffd_wp(details))
+                               continue;
                } else if (is_hwpoison_entry(entry)) {
                        if (!should_zap_cows(details))
                                continue;
@@ -1427,6 +1463,7 @@ again:
                        WARN_ON_ONCE(1);
                }
                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+               zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
        } while (pte++, addr += PAGE_SIZE, addr != end);
 
        add_mm_rss_vec(mm, rss);
@@ -1637,12 +1674,17 @@ void unmap_vmas(struct mmu_gather *tlb,
                unsigned long end_addr)
 {
        struct mmu_notifier_range range;
+       struct zap_details details = {
+               .zap_flags = ZAP_FLAG_DROP_MARKER,
+               /* Careful - we need to zap private pages too! */
+               .even_cows = true,
+       };
 
        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
                                start_addr, end_addr);
        mmu_notifier_invalidate_range_start(&range);
        for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
-               unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+               unmap_single_vma(tlb, vma, start_addr, end_addr, &details);
        mmu_notifier_invalidate_range_end(&range);
 }
 
@@ -3438,6 +3480,7 @@ void unmap_mapping_folio(struct folio *folio)
 
        details.even_cows = false;
        details.single_folio = folio;
+       details.zap_flags = ZAP_FLAG_DROP_MARKER;
 
        i_mmap_lock_read(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
index 72464378e1a69e71afaa7955926a87c62eaacd06..94d6b24a1ac2d9ca3dea887f8f22799ba288ae7e 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -73,6 +73,7 @@
 #include <linux/page_idle.h>
 #include <linux/memremap.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/mm_inline.h>
 
 #include <asm/tlbflush.h>
 
@@ -1585,6 +1586,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                        pteval = ptep_clear_flush(vma, address, pvmw.pte);
                }
 
+               /*
+                * Now the pte is cleared. If this pte was uffd-wp armed,
+                * we may want to replace a none pte with a marker pte if
+                * it's file-backed, so we don't lose the tracking info.
+                */
+               pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
                /* Set the dirty flag on the folio now the pte is gone. */
                if (pte_dirty(pteval))
                        folio_mark_dirty(folio);