mm: multi-gen LRU: groundwork

author Yu Zhao <yuzhao@google.com>

Sun, 18 Sep 2022 08:00:02 +0000 (02:00 -0600)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 27 Sep 2022 02:46:09 +0000 (19:46 -0700)
author Yu Zhao <yuzhao@google.com>
Sun, 18 Sep 2022 08:00:02 +0000 (02:00 -0600)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 27 Sep 2022 02:46:09 +0000 (19:46 -0700)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c

index 51897427a5346ed2377ec92857154601c24917e3..b4a6e0a1b945aaf82eb3f141a2b188307379da7e 100644 (file)
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *page)
                1 << PG_active |
                1 << PG_workingset |
                1 << PG_reclaim |
-              1 << PG_waiters))) {
+              1 << PG_waiters |
+              LRU_GEN_MASK | LRU_REFS_MASK))) {
                 dump_page(page, "fuse: trying to steal weird page");
                 return 1;
         }
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h

index fb8aadb81cd6102412ac955b145cf43b14bc8e00..2ff703900fd09e10c97860bf785fda2d16e6dded 100644 (file)
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -40,6 +40,9 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec,
  {
         struct pglist_data *pgdat = lruvec_pgdat(lruvec);
  
+       lockdep_assert_held(&lruvec->lru_lock);
+       WARN_ON_ONCE(nr_pages != (int)nr_pages);
+
         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
         __mod_zone_page_state(&pgdat->node_zones[zid],
                                 NR_ZONE_LRU_BASE + lru, nr_pages);
@@ -101,11 +104,177 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio)
         return lru;
  }
  
+#ifdef CONFIG_LRU_GEN
+
+static inline bool lru_gen_enabled(void)
+{
+       return true;
+}
+
+static inline bool lru_gen_in_fault(void)
+{
+       return current->in_lru_fault;
+}
+
+static inline int lru_gen_from_seq(unsigned long seq)
+{
+       return seq % MAX_NR_GENS;
+}
+
+static inline int folio_lru_gen(struct folio *folio)
+{
+       unsigned long flags = READ_ONCE(folio->flags);
+
+       return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
+{
+       unsigned long max_seq = lruvec->lrugen.max_seq;
+
+       VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+
+       /* see the comment on MIN_NR_GENS */
+       return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
+}
+
+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
+                                      int old_gen, int new_gen)
+{
+       int type = folio_is_file_lru(folio);
+       int zone = folio_zonenum(folio);
+       int delta = folio_nr_pages(folio);
+       enum lru_list lru = type * LRU_INACTIVE_FILE;
+       struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+       VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
+       VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
+       VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
+
+       if (old_gen >= 0)
+               WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
+                          lrugen->nr_pages[old_gen][type][zone] - delta);
+       if (new_gen >= 0)
+               WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
+                          lrugen->nr_pages[new_gen][type][zone] + delta);
+
+       /* addition */
+       if (old_gen < 0) {
+               if (lru_gen_is_active(lruvec, new_gen))
+                       lru += LRU_ACTIVE;
+               __update_lru_size(lruvec, lru, zone, delta);
+               return;
+       }
+
+       /* deletion */
+       if (new_gen < 0) {
+               if (lru_gen_is_active(lruvec, old_gen))
+                       lru += LRU_ACTIVE;
+               __update_lru_size(lruvec, lru, zone, -delta);
+               return;
+       }
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+       unsigned long seq;
+       unsigned long flags;
+       int gen = folio_lru_gen(folio);
+       int type = folio_is_file_lru(folio);
+       int zone = folio_zonenum(folio);
+       struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+       VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
+
+       if (folio_test_unevictable(folio))
+               return false;
+       /*
+        * There are three common cases for this page:
+        * 1. If it's hot, e.g., freshly faulted in or previously hot and
+        *    migrated, add it to the youngest generation.
+        * 2. If it's cold but can't be evicted immediately, i.e., an anon page
+        *    not in swapcache or a dirty page pending writeback, add it to the
+        *    second oldest generation.
+        * 3. Everything else (clean, cold) is added to the oldest generation.
+        */
+       if (folio_test_active(folio))
+               seq = lrugen->max_seq;
+       else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
+                (folio_test_reclaim(folio) &&
+                 (folio_test_dirty(folio) || folio_test_writeback(folio))))
+               seq = lrugen->min_seq[type] + 1;
+       else
+               seq = lrugen->min_seq[type];
+
+       gen = lru_gen_from_seq(seq);
+       flags = (gen + 1UL) << LRU_GEN_PGOFF;
+       /* see the comment on MIN_NR_GENS about PG_active */
+       set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
+
+       lru_gen_update_size(lruvec, folio, -1, gen);
+       /* for folio_rotate_reclaimable() */
+       if (reclaiming)
+               list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
+       else
+               list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
+
+       return true;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+       unsigned long flags;
+       int gen = folio_lru_gen(folio);
+
+       if (gen < 0)
+               return false;
+
+       VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
+       VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
+
+       /* for folio_migrate_flags() */
+       flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
+       flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
+       gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+
+       lru_gen_update_size(lruvec, folio, gen, -1);
+       list_del(&folio->lru);
+
+       return true;
+}
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline bool lru_gen_enabled(void)
+{
+       return false;
+}
+
+static inline bool lru_gen_in_fault(void)
+{
+       return false;
+}
+
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+       return false;
+}
+
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+       return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
  static __always_inline
  void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
  {
         enum lru_list lru = folio_lru_list(folio);
  
+       if (lru_gen_add_folio(lruvec, folio, false))
+               return;
+
         update_lru_size(lruvec, lru, folio_zonenum(folio),
                         folio_nr_pages(folio));
         if (lru != LRU_UNEVICTABLE)
@@ -123,6 +292,9 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
  {
         enum lru_list lru = folio_lru_list(folio);
  
+       if (lru_gen_add_folio(lruvec, folio, true))
+               return;
+
         update_lru_size(lruvec, lru, folio_zonenum(folio),
                         folio_nr_pages(folio));
         /* This is not expected to be used on LRU_UNEVICTABLE */
@@ -140,6 +312,9 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
  {
         enum lru_list lru = folio_lru_list(folio);
  
+       if (lru_gen_del_folio(lruvec, folio, false))
+               return;
+
         if (lru != LRU_UNEVICTABLE)
                 list_del(&folio->lru);
         update_lru_size(lruvec, lru, folio_zonenum(folio),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 18cf0fc5ce670327c8a72bd5efd42992038ac82c..6f4ea078d90ff572b710db43fe4fc8d8f4bd7664 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -317,6 +317,102 @@ enum lruvec_flags {
                                          */
  };
  
+#endif /* !__GENERATING_BOUNDS_H */
+
+/*
+ * Evictable pages are divided into multiple generations. The youngest and the
+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
+ *
+ * A page is added to the youngest generation on faulting. The aging needs to
+ * check the accessed bit at least twice before handing this page over to the
+ * eviction. The first check takes care of the accessed bit set on the initial
+ * fault; the second check makes sure this page hasn't been used since then.
+ * This process, AKA second chance, requires a minimum of two generations,
+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
+ * rest of generations, if they exist, are considered inactive. See
+ * lru_gen_is_active().
+ *
+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
+ * the aging needs not to worry about it. And it's set again when a page
+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
+ * See lru_gen_add_folio() and lru_gen_del_folio().
+ *
+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
+ * number of categories of the active/inactive LRU when keeping track of
+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
+ * in folio->flags.
+ */
+#define MIN_NR_GENS            2U
+#define MAX_NR_GENS            4U
+
+#ifndef __GENERATING_BOUNDS_H
+
+struct lruvec;
+
+#define LRU_GEN_MASK           ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
+#define LRU_REFS_MASK          ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
+
+#ifdef CONFIG_LRU_GEN
+
+enum {
+       LRU_GEN_ANON,
+       LRU_GEN_FILE,
+};
+
+/*
+ * The youngest generation number is stored in max_seq for both anon and file
+ * types as they are aged on an equal footing. The oldest generation numbers are
+ * stored in min_seq[] separately for anon and file types as clean file pages
+ * can be evicted regardless of swap constraints.
+ *
+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
+ * min_seq behind.
+ *
+ * The number of pages in each generation is eventually consistent and therefore
+ * can be transiently negative.
+ */
+struct lru_gen_struct {
+       /* the aging increments the youngest generation number */
+       unsigned long max_seq;
+       /* the eviction increments the oldest generation numbers */
+       unsigned long min_seq[ANON_AND_FILE];
+       /* the multi-gen LRU lists, lazily sorted on eviction */
+       struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+       /* the multi-gen LRU sizes, eventually consistent */
+       long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+};
+
+void lru_gen_init_lruvec(struct lruvec *lruvec);
+
+#ifdef CONFIG_MEMCG
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
+#endif
+
+#else /* !CONFIG_LRU_GEN */
+
+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
+{
+}
+#endif
+
+#endif /* CONFIG_LRU_GEN */
+
  struct lruvec {
         struct list_head                lists[NR_LRU_LISTS];
         /* per lruvec lru_lock for memcg */
@@ -334,6 +430,10 @@ struct lruvec {
         unsigned long                   refaults[ANON_AND_FILE];
         /* Various lruvec state flags (enum lruvec_flags) */
         unsigned long                   flags;
+#ifdef CONFIG_LRU_GEN
+       /* evictable pages divided into generations */
+       struct lru_gen_struct           lrugen;
+#endif
  #ifdef CONFIG_MEMCG
         struct pglist_data *pgdat;
  #endif
@@ -749,6 +849,8 @@ static inline bool zone_is_empty(struct zone *zone)
  #define ZONES_PGOFF            (NODES_PGOFF - ZONES_WIDTH)
  #define LAST_CPUPID_PGOFF      (ZONES_PGOFF - LAST_CPUPID_WIDTH)
  #define KASAN_TAG_PGOFF                (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
+#define LRU_GEN_PGOFF          (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
+#define LRU_REFS_PGOFF         (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
  
  /*
   * Define the bit shifts to access each section.  For non-existent
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h

index ef1e3e736e1483e1f6070d99dfac2c163069c902..240905407a188fb02106c55338734b30b7b28881 100644 (file)
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -55,7 +55,8 @@
  #define SECTIONS_WIDTH         0
  #endif
  
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
+       <= BITS_PER_LONG - NR_PAGEFLAGS
  #define NODES_WIDTH            NODES_SHIFT
  #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
  #error "Vmemmap: No space for nodes field in page flags"
@@ -89,8 +90,8 @@
  #define LAST_CPUPID_SHIFT 0
  #endif
  
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
-       <= BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+       KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
  #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
  #else
  #define LAST_CPUPID_WIDTH 0
@@ -100,10 +101,12 @@
  #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
  #endif
  
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
-       > BITS_PER_LONG - NR_PAGEFLAGS
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
+       KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
  #error "Not enough bits in page flags"
  #endif
  
+#define LRU_REFS_WIDTH 0
+
  #endif
  #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index 465ff35a8c00a80c4540cdd7e8a1441b97137fda..0b0ae5084e60c7f8b784a239c718e2a418566f89 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
          1UL << PG_private      | 1UL << PG_private_2   |       \
          1UL << PG_writeback    | 1UL << PG_reserved    |       \
          1UL << PG_slab         | 1UL << PG_active      |       \
-        1UL << PG_unevictable  | __PG_MLOCKED)
+        1UL << PG_unevictable  | __PG_MLOCKED | LRU_GEN_MASK)
  
  /*
   * Flags checked when a page is prepped for return by the page allocator.
@@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
   * alloc-free cycle to prevent from reusing the page.
   */
  #define PAGE_FLAGS_CHECK_AT_PREP       \
-       (PAGEFLAGS_MASK & ~__PG_HWPOISON)
+       ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
  
  #define PAGE_FLAGS_PRIVATE                             \
         (1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index d9a2466664f77ce2eed5591b6b6c69536cf11d56..a2dcfb91df0326f4423598a7279bf744820b8cdf 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -914,6 +914,10 @@ struct task_struct {
  #ifdef CONFIG_MEMCG
         unsigned                        in_user_fault:1;
  #endif
+#ifdef CONFIG_LRU_GEN
+       /* whether the LRU algorithm may apply to this access */
+       unsigned                        in_lru_fault:1;
+#endif
  #ifdef CONFIG_COMPAT_BRK
         unsigned                        brk_randomized:1;
  #endif
diff --git a/kernel/bounds.c b/kernel/bounds.c

index 9795d75b09b2323306ad6a058a6350a87a251443..5ee60777d8e4624b87938429142bbf0963e985a8 100644 (file)
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -22,6 +22,11 @@ int main(void)
         DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
  #endif
         DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
+#ifdef CONFIG_LRU_GEN
+       DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
+#else
+       DEFINE(LRU_GEN_WIDTH, 0);
+#endif
         /* End of constants */
  
         return 0;
diff --git a/mm/Kconfig b/mm/Kconfig

index e3fbd0788878479a8231bce0e961cdf5e6d564bb..378306aee6227416424da105aeed3ff78ecfc273 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1118,6 +1118,14 @@ config PTE_MARKER_UFFD_WP
           purposes.  It is required to enable userfaultfd write protection on
           file-backed memory types like shmem and hugetlbfs.
  
+config LRU_GEN
+       bool "Multi-Gen LRU"
+       depends on MMU
+       # make sure folio->flags has enough spare bits
+       depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
+       help
+         A high performance LRU implementation to overcommit memory.
+
  source "mm/damon/Kconfig"
  
  endmenu
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index f4a656b279b1b0f49280bd3cce1962052351a0e6..949d7c325133c83a366319848a3cdf7259eca13f 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2444,7 +2444,8 @@ static void __split_huge_page_tail(struct page *head, int tail,
  #ifdef CONFIG_64BIT
                          (1L << PG_arch_2) |
  #endif
-                        (1L << PG_dirty)));
+                        (1L << PG_dirty) |
+                        LRU_GEN_MASK | LRU_REFS_MASK));
  
         /* ->mapping in first tail page is compound_mapcount */
         VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 403af5f7a2b90006d6405ca72f9cd736c324e124..937141d482211fba3abf13f8842203a8d07e6a6d 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5175,6 +5175,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
  
  static void mem_cgroup_free(struct mem_cgroup *memcg)
  {
+       lru_gen_exit_memcg(memcg);
         memcg_wb_domain_exit(memcg);
         __mem_cgroup_free(memcg);
  }
@@ -5233,6 +5234,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
         memcg->deferred_split_queue.split_queue_len = 0;
  #endif
         idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+       lru_gen_init_memcg(memcg);
         return memcg;
  fail:
         mem_cgroup_id_remove(memcg);
diff --git a/mm/memory.c b/mm/memory.c

index 3a9b00c765c2bfafe2bef671fefbfe6fa52276a8..63832dab15d366a18fe96eac0aa467e3c82bd4ef 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5117,6 +5117,27 @@ static inline void mm_account_fault(struct pt_regs *regs,
                 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
  }
  
+#ifdef CONFIG_LRU_GEN
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+       /* the LRU algorithm doesn't apply to sequential or random reads */
+       current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
+}
+
+static void lru_gen_exit_fault(void)
+{
+       current->in_lru_fault = false;
+}
+#else
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+}
+
+static void lru_gen_exit_fault(void)
+{
+}
+#endif /* CONFIG_LRU_GEN */
+
  /*
   * By the time we get here, we already hold the mm semaphore
   *
@@ -5148,11 +5169,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
         if (flags & FAULT_FLAG_USER)
                 mem_cgroup_enter_user_fault();
  
+       lru_gen_enter_fault(vma);
+
         if (unlikely(is_vm_hugetlb_page(vma)))
                 ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
         else
                 ret = __handle_mm_fault(vma, address, flags);
  
+       lru_gen_exit_fault();
+
         if (flags & FAULT_FLAG_USER) {
                 mem_cgroup_exit_user_fault();
                 /*
diff --git a/mm/mm_init.c b/mm/mm_init.c

index 9ddaf0e1b0ab95fba2a5d7eee30899ccbd7de701..0d7b2bd2454a1f36b7d82976fda0ae10f4ab5dff 100644 (file)
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layout(void)
  
         shift = 8 * sizeof(unsigned long);
         width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
-               - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
+               - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
         mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
-               "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
+               "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
                 SECTIONS_WIDTH,
                 NODES_WIDTH,
                 ZONES_WIDTH,
                 LAST_CPUPID_WIDTH,
                 KASAN_TAG_WIDTH,
+               LRU_GEN_WIDTH,
+               LRU_REFS_WIDTH,
                 NR_PAGEFLAGS);
         mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
                 "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
diff --git a/mm/mmzone.c b/mm/mmzone.c

index 0ae7571e35abb07795037c510ec1c161ce1e8b82..68e1511be12de6052b91eed6c86287042dd69d73 100644 (file)
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -88,6 +88,8 @@ void lruvec_init(struct lruvec *lruvec)
          * Poison its list head, so that any operations on it would crash.
          */
         list_del(&lruvec->lists[LRU_UNEVICTABLE]);
+
+       lru_gen_init_lruvec(lruvec);
  }
  
  #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
diff --git a/mm/swap.c b/mm/swap.c

index 9cee7f6a380942e9bb60e783b4abdea07e9aaa69..0e423b7d458b6ee0b9cceab2bf99199194f1a0fc 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -484,6 +484,11 @@ void folio_add_lru(struct folio *folio)
                         folio_test_unevictable(folio), folio);
         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
  
+       /* see the comment in lru_gen_add_folio() */
+       if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
+           lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
+               folio_set_active(folio);
+
         folio_get(folio);
         local_lock(&cpu_fbatches.lock);
         fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
@@ -575,7 +580,7 @@ static void lru_deactivate_file_fn(struct lruvec *lruvec, struct folio *folio)
  
  static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
  {
-       if (folio_test_active(folio) && !folio_test_unevictable(folio)) {
+       if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
                 long nr_pages = folio_nr_pages(folio);
  
                 lruvec_del_folio(lruvec, folio);
@@ -688,8 +693,8 @@ void deactivate_page(struct page *page)
  {
         struct folio *folio = page_folio(page);
  
-       if (folio_test_lru(folio) && folio_test_active(folio) &&
-           !folio_test_unevictable(folio)) {
+       if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
+           (folio_test_active(folio) || lru_gen_enabled())) {
                 struct folio_batch *fbatch;
  
                 folio_get(folio);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 9c77df1a711ce837faca0c534d5a39c64ce1a20f..680ad52090e1f0becd0651f84fb81fc0f7491ab1 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3050,6 +3050,81 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
         return can_demote(pgdat->node_id, sc);
  }
  
+#ifdef CONFIG_LRU_GEN
+
+/******************************************************************************
+ *                          shorthand helpers
+ ******************************************************************************/
+
+#define for_each_gen_type_zone(gen, type, zone)                                \
+       for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)                   \
+               for ((type) = 0; (type) < ANON_AND_FILE; (type)++)      \
+                       for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
+
+static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
+{
+       struct pglist_data *pgdat = NODE_DATA(nid);
+
+#ifdef CONFIG_MEMCG
+       if (memcg) {
+               struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
+
+               /* for hotadd_new_pgdat() */
+               if (!lruvec->pgdat)
+                       lruvec->pgdat = pgdat;
+
+               return lruvec;
+       }
+#endif
+       VM_WARN_ON_ONCE(!mem_cgroup_disabled());
+
+       return pgdat ? &pgdat->__lruvec : NULL;
+}
+
+/******************************************************************************
+ *                          initialization
+ ******************************************************************************/
+
+void lru_gen_init_lruvec(struct lruvec *lruvec)
+{
+       int gen, type, zone;
+       struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+       lrugen->max_seq = MIN_NR_GENS + 1;
+
+       for_each_gen_type_zone(gen, type, zone)
+               INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
+}
+
+#ifdef CONFIG_MEMCG
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
+{
+}
+
+void lru_gen_exit_memcg(struct mem_cgroup *memcg)
+{
+       int nid;
+
+       for_each_node(nid) {
+               struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+               VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
+                                          sizeof(lruvec->lrugen.nr_pages)));
+       }
+}
+#endif
+
+static int __init init_lru_gen(void)
+{
+       BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
+       BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
+
+       return 0;
+};
+late_initcall(init_lru_gen);
+
+#endif /* CONFIG_LRU_GEN */
+
  static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
  {
         unsigned long nr[NR_LRU_LISTS];
author	Yu Zhao <yuzhao@google.com>
	Sun, 18 Sep 2022 08:00:02 +0000 (02:00 -0600)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 27 Sep 2022 02:46:09 +0000 (19:46 -0700)
fs/fuse/dev.c		patch \| blob \| history
include/linux/mm_inline.h		patch \| blob \| history
include/linux/mmzone.h		patch \| blob \| history
include/linux/page-flags-layout.h		patch \| blob \| history
include/linux/page-flags.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/bounds.c		patch \| blob \| history
mm/Kconfig		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mm_init.c		patch \| blob \| history
mm/mmzone.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history