mm/shmem: handle uffd-wp special pte in page fault handler

author Peter Xu <peterx@redhat.com>

Fri, 13 May 2022 03:22:53 +0000 (20:22 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 13 May 2022 14:20:10 +0000 (07:20 -0700)
author Peter Xu <peterx@redhat.com>
Fri, 13 May 2022 03:22:53 +0000 (20:22 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 13 May 2022 14:20:10 +0000 (07:20 -0700)
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h

index 3354d9ddc35f5109b245f91cce54317f93763b33..e7afcdfd4b46f53dfc57a173c6869b4a4e37812e 100644 (file)
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -96,6 +96,18 @@ static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
         return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR);
  }
  
+/*
+ * Don't do fault around for either WP or MINOR registered uffd range.  For
+ * MINOR registered range, fault around will be a total disaster and ptes can
+ * be installed without notifications; for WP it should mostly be fine as long
+ * as the fault around checks for pte_none() before the installation, however
+ * to be super safe we just forbid it.
+ */
+static inline bool uffd_disable_fault_around(struct vm_area_struct *vma)
+{
+       return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR);
+}
+
  static inline bool userfaultfd_missing(struct vm_area_struct *vma)
  {
         return vma->vm_flags & VM_UFFD_MISSING;
@@ -236,6 +248,11 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
  {
  }
  
+static inline bool uffd_disable_fault_around(struct vm_area_struct *vma)
+{
+       return false;
+}
+
  #endif /* CONFIG_USERFAULTFD */
  
  static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry)
diff --git a/mm/memory.c b/mm/memory.c

index 878da420d97b8525b576c25c0df7d3327154bdcf..c16f873373a25dda45c483dcedcbd0c8456752c0 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3559,6 +3559,39 @@ static inline bool should_try_to_free_swap(struct page *page,
                 page_count(page) == 2;
  }
  
+static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
+{
+       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
+                                      vmf->address, &vmf->ptl);
+       /*
+        * Be careful so that we will only recover a special uffd-wp pte into a
+        * none pte.  Otherwise it means the pte could have changed, so retry.
+        */
+       if (is_pte_marker(*vmf->pte))
+               pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
+       return 0;
+}
+
+/*
+ * This is actually a page-missing access, but with uffd-wp special pte
+ * installed.  It means this pte was wr-protected before being unmapped.
+ */
+static vm_fault_t pte_marker_handle_uffd_wp(struct vm_fault *vmf)
+{
+       /*
+        * Just in case there're leftover special ptes even after the region
+        * got unregistered - we can simply clear them.  We can also do that
+        * proactively when e.g. when we do UFFDIO_UNREGISTER upon some uffd-wp
+        * ranges, but it should be more efficient to be done lazily here.
+        */
+       if (unlikely(!userfaultfd_wp(vmf->vma) || vma_is_anonymous(vmf->vma)))
+               return pte_marker_clear(vmf);
+
+       /* do_fault() can handle pte markers too like none pte */
+       return do_fault(vmf);
+}
+
  static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
  {
         swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte);
@@ -3572,8 +3605,11 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
         if (WARN_ON_ONCE(vma_is_anonymous(vmf->vma) || !marker))
                 return VM_FAULT_SIGBUS;
  
-       /* TODO: handle pte markers */
-       return 0;
+       if (pte_marker_entry_uffd_wp(entry))
+               return pte_marker_handle_uffd_wp(vmf);
+
+       /* This is an unknown pte marker */
+       return VM_FAULT_SIGBUS;
  }
  
  /*
@@ -4157,6 +4193,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
  void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
  {
         struct vm_area_struct *vma = vmf->vma;
+       bool uffd_wp = pte_marker_uffd_wp(vmf->orig_pte);
         bool write = vmf->flags & FAULT_FLAG_WRITE;
         bool prefault = vmf->address != addr;
         pte_t entry;
@@ -4171,6 +4208,8 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
  
         if (write)
                 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       if (unlikely(uffd_wp))
+               entry = pte_mkuffd_wp(pte_wrprotect(entry));
         /* copy-on-write page */
         if (write && !(vma->vm_flags & VM_SHARED)) {
                 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
@@ -4352,9 +4391,21 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
         return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
  }
  
+/* Return true if we should do read fault-around, false otherwise */
+static inline bool should_fault_around(struct vm_fault *vmf)
+{
+       /* No ->map_pages?  No way to fault around... */
+       if (!vmf->vma->vm_ops->map_pages)
+               return false;
+
+       if (uffd_disable_fault_around(vmf->vma))
+               return false;
+
+       return fault_around_bytes >> PAGE_SHIFT > 1;
+}
+
  static vm_fault_t do_read_fault(struct vm_fault *vmf)
  {
-       struct vm_area_struct *vma = vmf->vma;
         vm_fault_t ret = 0;
  
         /*
@@ -4362,12 +4413,10 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf)
          * if page by the offset is not ready to be mapped (cold cache or
          * something).
          */
-       if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
-               if (likely(!userfaultfd_minor(vmf->vma))) {
-                       ret = do_fault_around(vmf);
-                       if (ret)
-                               return ret;
-               }
+       if (should_fault_around(vmf)) {
+               ret = do_fault_around(vmf);
+               if (ret)
+                       return ret;
         }
  
         ret = __do_fault(vmf);
author	Peter Xu <peterx@redhat.com>
	Fri, 13 May 2022 03:22:53 +0000 (20:22 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 13 May 2022 14:20:10 +0000 (07:20 -0700)
include/linux/userfaultfd_k.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history