mm/hugetlb: only drop uffd-wp special pte if required

author Peter Xu <peterx@redhat.com>

Fri, 13 May 2022 03:22:55 +0000 (20:22 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 13 May 2022 14:20:11 +0000 (07:20 -0700)
author Peter Xu <peterx@redhat.com>
Fri, 13 May 2022 03:22:55 +0000 (20:22 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 13 May 2022 14:20:11 +0000 (07:20 -0700)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 591599829e2a6659e355d5ca7e2da1e9474e6007..5945caccf003ef96e91574ded30ef1fa86359185 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -405,7 +405,8 @@ static void remove_huge_page(struct page *page)
  }
  
  static void
-hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
+hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
+                     zap_flags_t zap_flags)
  {
         struct vm_area_struct *vma;
  
@@ -439,7 +440,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
                 }
  
                 unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
-                                                                       NULL);
+                                    NULL, zap_flags);
         }
  }
  
@@ -517,7 +518,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
                                 hugetlb_vmdelete_list(&mapping->i_mmap,
                                         index * pages_per_huge_page(h),
-                                       (index + 1) * pages_per_huge_page(h));
+                                       (index + 1) * pages_per_huge_page(h),
+                                       ZAP_FLAG_DROP_MARKER);
                                 i_mmap_unlock_write(mapping);
                         }
  
@@ -583,7 +585,8 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
         i_mmap_lock_write(mapping);
         i_size_write(inode, offset);
         if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
-               hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
+               hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
+                                     ZAP_FLAG_DROP_MARKER);
         i_mmap_unlock_write(mapping);
         remove_inode_hugepages(inode, offset, LLONG_MAX);
  }
@@ -616,8 +619,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                 i_mmap_lock_write(mapping);
                 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
                         hugetlb_vmdelete_list(&mapping->i_mmap,
-                                               hole_start >> PAGE_SHIFT,
-                                               hole_end  >> PAGE_SHIFT);
+                                             hole_start >> PAGE_SHIFT,
+                                             hole_end >> PAGE_SHIFT, 0);
                 i_mmap_unlock_write(mapping);
                 remove_inode_hugepages(inode, hole_start, hole_end);
                 inode_unlock(inode);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index f1143f1fb4443bedf4615b1406843f7f87b45e40..19cec415f54685191f8e809de3928ff6462d832a 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -143,11 +143,12 @@ long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
                          unsigned long *, unsigned long *, long, unsigned int,
                          int *);
  void unmap_hugepage_range(struct vm_area_struct *,
-                         unsigned long, unsigned long, struct page *);
+                         unsigned long, unsigned long, struct page *,
+                         zap_flags_t);
  void __unmap_hugepage_range_final(struct mmu_gather *tlb,
                           struct vm_area_struct *vma,
                           unsigned long start, unsigned long end,
-                         struct page *ref_page);
+                         struct page *ref_page, zap_flags_t zap_flags);
  void hugetlb_report_meminfo(struct seq_file *);
  int hugetlb_report_node_meminfo(char *buf, int len, int nid);
  void hugetlb_show_meminfo(void);
@@ -406,7 +407,8 @@ static inline unsigned long hugetlb_change_protection(
  
  static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
                         struct vm_area_struct *vma, unsigned long start,
-                       unsigned long end, struct page *ref_page)
+                       unsigned long end, struct page *ref_page,
+                       zap_flags_t zap_flags)
  {
         BUG();
  }
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 61786259e52a617f4d506908437f28cc56489cbe..de32c038338751bbc155793dff7a65e369d5a996 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3428,8 +3428,6 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
  }
  #endif
  
-typedef unsigned int __bitwise zap_flags_t;
-
  /*
   * Whether to drop the pte markers, for example, the uffd-wp information for
   * file-backed memory.  This should only be specified when we will completely
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index dd382270ae409f1fa0a61b5a8aea0e2c13f6d5d9..b34ff2cdbc4face79515b1881188c3e89f36034f 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -863,4 +863,6 @@ enum fault_flag {
         FAULT_FLAG_ORIG_PTE_VALID =     1 << 11,
  };
  
+typedef unsigned int __bitwise zap_flags_t;
+
  #endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index ec9774ed84c018ae67994d9e62e485682a476559..99281aecbd289a3495822c8e38288c7e9c7a8e34 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4973,7 +4973,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
  
  static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
                                    unsigned long start, unsigned long end,
-                                  struct page *ref_page)
+                                  struct page *ref_page, zap_flags_t zap_flags)
  {
         struct mm_struct *mm = vma->vm_mm;
         unsigned long address;
@@ -5029,7 +5029,18 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                  * unmapped and its refcount is dropped, so just clear pte here.
                  */
                 if (unlikely(!pte_present(pte))) {
-                       huge_pte_clear(mm, address, ptep, sz);
+                       /*
+                        * If the pte was wr-protected by uffd-wp in any of the
+                        * swap forms, meanwhile the caller does not want to
+                        * drop the uffd-wp bit in this zap, then replace the
+                        * pte with a marker.
+                        */
+                       if (pte_swp_uffd_wp_any(pte) &&
+                           !(zap_flags & ZAP_FLAG_DROP_MARKER))
+                               set_huge_pte_at(mm, address, ptep,
+                                               make_pte_marker(PTE_MARKER_UFFD_WP));
+                       else
+                               huge_pte_clear(mm, address, ptep, sz);
                         spin_unlock(ptl);
                         continue;
                 }
@@ -5057,7 +5068,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                 tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
                 if (huge_pte_dirty(pte))
                         set_page_dirty(page);
-
+               /* Leave a uffd-wp pte marker if needed */
+               if (huge_pte_uffd_wp(pte) &&
+                   !(zap_flags & ZAP_FLAG_DROP_MARKER))
+                       set_huge_pte_at(mm, address, ptep,
+                                       make_pte_marker(PTE_MARKER_UFFD_WP));
                 hugetlb_count_sub(pages_per_huge_page(h), mm);
                 page_remove_rmap(page, vma, true);
  
@@ -5091,9 +5106,10 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
  
  void __unmap_hugepage_range_final(struct mmu_gather *tlb,
                           struct vm_area_struct *vma, unsigned long start,
-                         unsigned long end, struct page *ref_page)
+                         unsigned long end, struct page *ref_page,
+                         zap_flags_t zap_flags)
  {
-       __unmap_hugepage_range(tlb, vma, start, end, ref_page);
+       __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
  
         /*
          * Clear this flag so that x86's huge_pmd_share page_table_shareable
@@ -5109,12 +5125,13 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
  }
  
  void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-                         unsigned long end, struct page *ref_page)
+                         unsigned long end, struct page *ref_page,
+                         zap_flags_t zap_flags)
  {
         struct mmu_gather tlb;
  
         tlb_gather_mmu(&tlb, vma->vm_mm);
-       __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
+       __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
         tlb_finish_mmu(&tlb);
  }
  
@@ -5169,7 +5186,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                  */
                 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
                         unmap_hugepage_range(iter_vma, address,
-                                            address + huge_page_size(h), page);
+                                            address + huge_page_size(h), page, 0);
         }
         i_mmap_unlock_write(mapping);
  }
diff --git a/mm/memory.c b/mm/memory.c

index 8827157cf3928d76894c49035c95557a8888e4c8..82adda885605c83c80dc450fdaaf70f5c5d58ba9 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1675,8 +1675,11 @@ static void unmap_single_vma(struct mmu_gather *tlb,
                          * safe to do nothing in this case.
                          */
                         if (vma->vm_file) {
+                               zap_flags_t zap_flags = details ?
+                                   details->zap_flags : 0;
                                 i_mmap_lock_write(vma->vm_file->f_mapping);
-                               __unmap_hugepage_range_final(tlb, vma, start, end, NULL);
+                               __unmap_hugepage_range_final(tlb, vma, start, end,
+                                                            NULL, zap_flags);
                                 i_mmap_unlock_write(vma->vm_file->f_mapping);
                         }
                 } else
author	Peter Xu <peterx@redhat.com>
	Fri, 13 May 2022 03:22:55 +0000 (20:22 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 13 May 2022 14:20:11 +0000 (07:20 -0700)
fs/hugetlbfs/inode.c		patch \| blob \| history
include/linux/hugetlb.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history