Pull folio updates from Matthew Wilcox:
- Rewrite how munlock works to massively reduce the contention on
i_mmap_rwsem (Hugh Dickins):
https://lore.kernel.org/linux-mm/
8e4356d-9622-a7f0-b2c-
f116b5f2efea@google.com/
- Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
Hellwig):
https://lore.kernel.org/linux-mm/
20220210072828.
2930359-1-hch@lst.de/
- Convert GUP to use folios and make pincount available for order-1
pages. (Matthew Wilcox)
- Convert a few more truncation functions to use folios (Matthew
Wilcox)
- Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
Wilcox)
- Convert rmap_walk to use folios (Matthew Wilcox)
- Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
- Add support for creating large folios in readahead (Matthew Wilcox)
* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
mm/damon: minor cleanup for damon_pa_young
selftests/vm/transhuge-stress: Support file-backed PMD folios
mm/filemap: Support VM_HUGEPAGE for file mappings
mm/readahead: Switch to page_cache_ra_order
mm/readahead: Align file mappings for non-DAX
mm/readahead: Add large folio readahead
mm: Support arbitrary THP sizes
mm: Make large folios depend on THP
mm: Fix READ_ONLY_THP warning
mm/filemap: Allow large folios to be added to the page cache
mm: Turn can_split_huge_page() into can_split_folio()
mm/vmscan: Convert pageout() to take a folio
mm/vmscan: Turn page_check_references() into folio_check_references()
mm/vmscan: Account large folios correctly
mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
mm/vmscan: Free non-shmem folios without splitting them
mm/rmap: Constify the rmap_walk_control argument
mm/rmap: Convert rmap_walk() to take a folio
mm: Turn page_anon_vma() into folio_anon_vma()
mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
...
#include <linux/swap.h>
#include "../internal.h"
-#include "prmtv-common.h"
+#include "ops-common.h"
- static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
+ static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, void *arg)
{
- struct page_vma_mapped_walk pvmw = {
- .page = page,
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
while (page_vma_mapped_walk(&pvmw)) {
addr = pvmw.address;
if (PageAnon(new))
page_add_anon_rmap(new, vma, mmun_start, true);
else
- page_add_file_rmap(new, true);
+ page_add_file_rmap(new, vma, true);
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
- if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
- mlock_vma_page(new);
+
+ /* No need to invalidate - it was non-present before */
update_mmu_cache_pmd(vma, address, pvmw->pmd);
}
#endif
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);
+ void free_zone_device_page(struct page *page);
+
+ /*
+ * mm/gup.c
+ */
+ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
+
+DECLARE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
+
#endif /* __MM_INTERNAL_H */
* important here to have the interrupts disabled because it is the
* only synchronisation we have for updating the per-CPU variables.
*/
- VM_BUG_ON(!irqs_disabled());
+ memcg_stats_lock();
mem_cgroup_charge_statistics(memcg, -nr_entries);
- memcg_check_events(memcg, page_to_nid(page));
+ memcg_stats_unlock();
+ memcg_check_events(memcg, folio_nid(folio));
css_put(&memcg->css);
}
if (kill)
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
- if (!PageHuge(hpage)) {
- try_to_unmap(folio, ttu);
+ if (PageHuge(hpage) && !PageAnon(hpage)) {
+ /*
+ * For hugetlb pages in shared mappings, try_to_unmap
+ * could potentially call huge_pmd_unshare. Because of
+ * this, take semaphore in write mode here and set
+ * TTU_RMAP_LOCKED to indicate we have taken the lock
+ * at this higher level.
+ */
+ mapping = hugetlb_page_mapping_lock_write(hpage);
+ if (mapping) {
- try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
++ try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
+ i_mmap_unlock_write(mapping);
+ } else
+ pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
} else {
- try_to_unmap(hpage, ttu);
- if (!PageAnon(hpage)) {
- /*
- * For hugetlb pages in shared mappings, try_to_unmap
- * could potentially call huge_pmd_unshare. Because of
- * this, take semaphore in write mode here and set
- * TTU_RMAP_LOCKED to indicate we have taken the lock
- * at this higher level.
- */
- mapping = hugetlb_page_mapping_lock_write(hpage);
- if (mapping) {
- try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
- i_mmap_unlock_write(mapping);
- } else
- pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
- } else {
- try_to_unmap(folio, ttu);
- }
++ try_to_unmap(folio, ttu);
}
unmap_success = !page_mapped(hpage);
entry = pte_to_swp_entry(ptent);
if (is_device_private_entry(entry) ||
is_device_exclusive_entry(entry)) {
- struct page *page = pfn_swap_entry_to_page(entry);
-
- if (unlikely(zap_skip_check_mapping(details, page)))
+ page = pfn_swap_entry_to_page(entry);
+ if (unlikely(!should_zap_page(details, page)))
continue;
- pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
rss[mm_counter(page)]--;
if (is_device_private_entry(entry))
- page_remove_rmap(page, false);
+ page_remove_rmap(page, vma, false);
put_page(page);
- continue;
- }
-
- /* If details->check_mapping, we leave swap entries. */
- if (unlikely(details))
- continue;
-
- if (!non_swap_entry(entry))
+ } else if (!non_swap_entry(entry)) {
+ /* Genuine swap entry, hence a private anon page */
+ if (!should_zap_cows(details))
+ continue;
rss[MM_SWAPENTS]--;
- else if (is_migration_entry(entry)) {
- struct page *page;
-
+ if (unlikely(!free_swap_and_cache(entry)))
+ print_bad_pte(vma, addr, ptent, NULL);
+ } else if (is_migration_entry(entry)) {
page = pfn_swap_entry_to_page(entry);
+ if (!should_zap_page(details, page))
+ continue;
rss[mm_counter(page)]--;
+ } else if (is_hwpoison_entry(entry)) {
+ if (!should_zap_cows(details))
+ continue;
+ } else {
+ /* We should have covered all the swap entry types */
+ WARN_ON_ONCE(1);
}
- if (unlikely(!free_swap_and_cache(entry)))
- print_bad_pte(vma, addr, ptent, NULL);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
* that isolated the page, the page cache and optional buffer
* heads at page->private.
*/
- int page_cache_pins = thp_nr_pages(page);
- return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
+ return folio_ref_count(folio) - folio_test_private(folio) ==
+ 1 + folio_nr_pages(folio);
}
-static int may_write_to_inode(struct inode *inode)
-{
- if (current->flags & PF_SWAPWRITE)
- return 1;
- if (!inode_write_congested(inode))
- return 1;
- if (inode_to_bdi(inode) == current->backing_dev_info)
- return 1;
- return 0;
-}
-
/*
- * We detected a synchronous write error writing a page out. Probably
+ * We detected a synchronous write error writing a folio out. Probably
* -ENOSPC. We need to propagate that into the address_space for a subsequent
* fsync(), msync() or close().
*
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
- if (!may_write_to_inode(mapping->host))
- return PAGE_KEEP;
- if (clear_page_dirty_for_io(page)) {
+ if (folio_clear_dirty_for_io(folio)) {
int res;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
if (referenced_ptes) {
/*
- * All mapped pages start out with page table
+ * All mapped folios start out with page table
* references from the instantiating fault, so we need
- * to look twice if a mapped file/anon page is used more
- * to look twice if a mapped file folio is used more
++ * to look twice if a mapped file/anon folio is used more
* than once.
*
* Mark it and spare it for another trip around the
* end of the LRU a second time.
*/
mapping = page_mapping(page);
- if (((dirty || writeback) && mapping &&
- inode_write_congested(mapping->host)) ||
- (writeback && PageReclaim(page)))
+ if (writeback && PageReclaim(page))
- stat->nr_congested++;
+ stat->nr_congested += nr_pages;
/*
* If a page at the tail of the LRU is under writeback, there