mm: disable LRU pagevec during the migration temporarily

author Minchan Kim <minchan@kernel.org>

Wed, 5 May 2021 01:36:54 +0000 (18:36 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 5 May 2021 18:27:24 +0000 (11:27 -0700)
author Minchan Kim <minchan@kernel.org>
Wed, 5 May 2021 01:36:54 +0000 (18:36 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 5 May 2021 18:27:24 +0000 (11:27 -0700)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h

index 3a389633b68ff9598bad0d13245dc068c106b65a..9e4a2dc8622c1a8da7f5ae1ff665355fe4bfb1ac 100644 (file)
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
  extern void putback_movable_page(struct page *page);
  
  extern void migrate_prep(void);
+extern void migrate_finish(void);
  extern void migrate_prep_local(void);
  extern void migrate_page_states(struct page *newpage, struct page *page);
  extern void migrate_page_copy(struct page *newpage, struct page *page);
@@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
         { return -EBUSY; }
  
  static inline int migrate_prep(void) { return -ENOSYS; }
+static inline int migrate_finish(void) { return -ENOSYS; }
  static inline int migrate_prep_local(void) { return -ENOSYS; }
  
  static inline void migrate_page_states(struct page *newpage, struct page *page)
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 42191da1bdc9bd80442973396a38e3663c29314d..f69e0f67651dfd4f7040ba48de39ecd5c797a96e 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -340,6 +340,20 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file,
  extern void lru_note_cost_page(struct page *);
  extern void lru_cache_add(struct page *);
  extern void mark_page_accessed(struct page *);
+
+extern atomic_t lru_disable_count;
+
+static inline bool lru_cache_disabled(void)
+{
+       return atomic_read(&lru_disable_count);
+}
+
+static inline void lru_cache_enable(void)
+{
+       atomic_dec(&lru_disable_count);
+}
+
+extern void lru_cache_disable(void);
  extern void lru_add_drain(void);
  extern void lru_add_drain_cpu(int cpu);
  extern void lru_add_drain_cpu_zone(struct zone *zone);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 0cdbbfbc57572e4e12f71b660af93e29bb9022fe..729fba144c718b21be8131d1e468d0c874cb7bd4 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1611,6 +1611,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
          * in a way that pages from isolated pageblock are left on pcplists.
          */
         zone_pcp_disable(zone);
+       lru_cache_disable();
  
         /* set above range as isolated */
         ret = start_isolate_page_range(start_pfn, end_pfn,
@@ -1642,7 +1643,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
                         }
  
                         cond_resched();
-                       lru_add_drain_all();
  
                         ret = scan_movable_pages(pfn, end_pfn, &pfn);
                         if (!ret) {
@@ -1687,6 +1687,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
         zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
         spin_unlock_irqrestore(&zone->lock, flags);
  
+       lru_cache_enable();
         zone_pcp_enable(zone);
  
         /* removal success */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index cd0295567a042c75e300fcd8bc5952d58240c2fd..3b95e169e97de313c32fcd3e2148a99845903a80 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1208,6 +1208,8 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
                         break;
         }
         mmap_read_unlock(mm);
+
+       migrate_finish();
         if (err < 0)
                 return err;
         return busy;
@@ -1371,6 +1373,8 @@ up_out:
         mmap_write_unlock(mm);
  mpol_out:
         mpol_put(new);
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+               migrate_finish();
         return err;
  }
  
diff --git a/mm/migrate.c b/mm/migrate.c

index 47df0df8f21ad2b0a133766b516a4ae83123d34d..5b09567dc293da8450f3afc1ae684c31bd3a030b 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -66,11 +66,13 @@ void migrate_prep(void)
  {
         /*
          * Clear the LRU lists so pages can be isolated.
-        * Note that pages may be moved off the LRU after we have
-        * drained them. Those pages will fail to migrate like other
-        * pages that may be busy.
          */
-       lru_add_drain_all();
+       lru_cache_disable();
+}
+
+void migrate_finish(void)
+{
+       lru_cache_enable();
  }
  
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
@@ -1838,6 +1840,7 @@ out_flush:
         if (err >= 0)
                 err = err1;
  out:
+       migrate_finish();
         return err;
  }
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 64d4aae2a78abcd7903ba9c13fffcd8ac3198b39..2cefb634e0d6456328135eabe1dba1d549a413f1 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8715,6 +8715,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                 if (ret == -ENOMEM)
                         break;
         }
+
+       migrate_finish();
         if (ret < 0) {
                 alloc_contig_dump_pages(&cc->migratepages);
                 putback_movable_pages(&cc->migratepages);
diff --git a/mm/swap.c b/mm/swap.c

index 31b844d4ed94eebe98dcad0824f8ecad69bff0f5..c94f55e7b6493e695d5e9d2105512a4c2c53286c 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -235,6 +235,18 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
         }
  }
  
+/* return true if pagevec needs to drain */
+static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page)
+{
+       bool ret = false;
+
+       if (!pagevec_add(pvec, page) || PageCompound(page) ||
+                       lru_cache_disabled())
+               ret = true;
+
+       return ret;
+}
+
  /*
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
@@ -252,7 +264,7 @@ void rotate_reclaimable_page(struct page *page)
                 get_page(page);
                 local_lock_irqsave(&lru_rotate.lock, flags);
                 pvec = this_cpu_ptr(&lru_rotate.pvec);
-               if (!pagevec_add(pvec, page) || PageCompound(page))
+               if (pagevec_add_and_need_flush(pvec, page))
                         pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
                 local_unlock_irqrestore(&lru_rotate.lock, flags);
         }
@@ -343,7 +355,7 @@ static void activate_page(struct page *page)
                 local_lock(&lru_pvecs.lock);
                 pvec = this_cpu_ptr(&lru_pvecs.activate_page);
                 get_page(page);
-               if (!pagevec_add(pvec, page) || PageCompound(page))
+               if (pagevec_add_and_need_flush(pvec, page))
                         pagevec_lru_move_fn(pvec, __activate_page);
                 local_unlock(&lru_pvecs.lock);
         }
@@ -458,7 +470,7 @@ void lru_cache_add(struct page *page)
         get_page(page);
         local_lock(&lru_pvecs.lock);
         pvec = this_cpu_ptr(&lru_pvecs.lru_add);
-       if (!pagevec_add(pvec, page) || PageCompound(page))
+       if (pagevec_add_and_need_flush(pvec, page))
                 __pagevec_lru_add(pvec);
         local_unlock(&lru_pvecs.lock);
  }
@@ -654,7 +666,7 @@ void deactivate_file_page(struct page *page)
                 local_lock(&lru_pvecs.lock);
                 pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
  
-               if (!pagevec_add(pvec, page) || PageCompound(page))
+               if (pagevec_add_and_need_flush(pvec, page))
                         pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
                 local_unlock(&lru_pvecs.lock);
         }
@@ -676,7 +688,7 @@ void deactivate_page(struct page *page)
                 local_lock(&lru_pvecs.lock);
                 pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
                 get_page(page);
-               if (!pagevec_add(pvec, page) || PageCompound(page))
+               if (pagevec_add_and_need_flush(pvec, page))
                         pagevec_lru_move_fn(pvec, lru_deactivate_fn);
                 local_unlock(&lru_pvecs.lock);
         }
@@ -698,7 +710,7 @@ void mark_page_lazyfree(struct page *page)
                 local_lock(&lru_pvecs.lock);
                 pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
                 get_page(page);
-               if (!pagevec_add(pvec, page) || PageCompound(page))
+               if (pagevec_add_and_need_flush(pvec, page))
                         pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
                 local_unlock(&lru_pvecs.lock);
         }
@@ -735,7 +747,7 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
   * Calling this function with cpu hotplug locks held can actually lead
   * to obscure indirect dependencies via WQ context.
   */
-void lru_add_drain_all(void)
+inline void __lru_add_drain_all(bool force_all_cpus)
  {
         /*
          * lru_drain_gen - Global pages generation number
@@ -780,7 +792,7 @@ void lru_add_drain_all(void)
          * (C) Exit the draining operation if a newer generation, from another
          * lru_add_drain_all(), was already scheduled for draining. Check (A).
          */
-       if (unlikely(this_gen != lru_drain_gen))
+       if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
                 goto done;
  
         /*
@@ -810,7 +822,8 @@ void lru_add_drain_all(void)
         for_each_online_cpu(cpu) {
                 struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  
-               if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+               if (force_all_cpus ||
+                   pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
                     data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
                     pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
                     pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
@@ -828,6 +841,11 @@ void lru_add_drain_all(void)
  done:
         mutex_unlock(&lock);
  }
+
+void lru_add_drain_all(void)
+{
+       __lru_add_drain_all(false);
+}
  #else
  void lru_add_drain_all(void)
  {
@@ -835,6 +853,34 @@ void lru_add_drain_all(void)
  }
  #endif /* CONFIG_SMP */
  
+atomic_t lru_disable_count = ATOMIC_INIT(0);
+
+/*
+ * lru_cache_disable() needs to be called before we start compiling
+ * a list of pages to be migrated using isolate_lru_page().
+ * It drains pages on LRU cache and then disable on all cpus until
+ * lru_cache_enable is called.
+ *
+ * Must be paired with a call to lru_cache_enable().
+ */
+void lru_cache_disable(void)
+{
+       atomic_inc(&lru_disable_count);
+#ifdef CONFIG_SMP
+       /*
+        * lru_add_drain_all in the force mode will schedule draining on
+        * all online CPUs so any calls of lru_cache_disabled wrapped by
+        * local_lock or preemption disabled would be ordered by that.
+        * The atomic operation doesn't need to have stronger ordering
+        * requirements because that is enforeced by the scheduling
+        * guarantees.
+        */
+       __lru_add_drain_all(true);
+#else
+       lru_add_drain();
+#endif
+}
+
  /**
   * release_pages - batched put_page()
   * @pages: array of pages to release
author	Minchan Kim <minchan@kernel.org>
	Wed, 5 May 2021 01:36:54 +0000 (18:36 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 5 May 2021 18:27:24 +0000 (11:27 -0700)
include/linux/migrate.h		patch \| blob \| history
include/linux/swap.h		patch \| blob \| history
mm/memory_hotplug.c		patch \| blob \| history
mm/mempolicy.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history