mm/page_alloc: fix race condition between build_all_zonelists and page allocation

author Mel Gorman <mgorman@techsingularity.net>

Wed, 24 Aug 2022 11:14:50 +0000 (12:14 +0100)

committer Andrew Morton <akpm@linux-foundation.org>

Sun, 11 Sep 2022 23:22:29 +0000 (16:22 -0700)
author Mel Gorman <mgorman@techsingularity.net>
Wed, 24 Aug 2022 11:14:50 +0000 (12:14 +0100)
committer Andrew Morton <akpm@linux-foundation.org>
Sun, 11 Sep 2022 23:22:29 +0000 (16:22 -0700)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index e5486d47406e81c3c1f96551c1eded4551e1a38b..1678431fb4c42a9eacb862cbd41d92169c148ea5 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4708,6 +4708,30 @@ void fs_reclaim_release(gfp_t gfp_mask)
  EXPORT_SYMBOL_GPL(fs_reclaim_release);
  #endif
  
+/*
+ * Zonelists may change due to hotplug during allocation. Detect when zonelists
+ * have been rebuilt so allocation retries. Reader side does not lock and
+ * retries the allocation if zonelist changes. Writer side is protected by the
+ * embedded spin_lock.
+ */
+static DEFINE_SEQLOCK(zonelist_update_seq);
+
+static unsigned int zonelist_iter_begin(void)
+{
+       if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+               return read_seqbegin(&zonelist_update_seq);
+
+       return 0;
+}
+
+static unsigned int check_retry_zonelist(unsigned int seq)
+{
+       if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+               return read_seqretry(&zonelist_update_seq, seq);
+
+       return seq;
+}
+
  /* Perform direct synchronous page reclaim */
  static unsigned long
  __perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -5001,6 +5025,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         int compaction_retries;
         int no_progress_loops;
         unsigned int cpuset_mems_cookie;
+       unsigned int zonelist_iter_cookie;
         int reserve_flags;
  
         /*
@@ -5011,11 +5036,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
                 gfp_mask &= ~__GFP_ATOMIC;
  
-retry_cpuset:
+restart:
         compaction_retries = 0;
         no_progress_loops = 0;
         compact_priority = DEF_COMPACT_PRIORITY;
         cpuset_mems_cookie = read_mems_allowed_begin();
+       zonelist_iter_cookie = zonelist_iter_begin();
  
         /*
          * The fast path uses conservative alloc_flags to succeed only until
@@ -5187,9 +5213,13 @@ retry:
                 goto retry;
  
  
-       /* Deal with possible cpuset update races before we start OOM killing */
-       if (check_retry_cpuset(cpuset_mems_cookie, ac))
-               goto retry_cpuset;
+       /*
+        * Deal with possible cpuset update races or zonelist updates to avoid
+        * a unnecessary OOM kill.
+        */
+       if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
+           check_retry_zonelist(zonelist_iter_cookie))
+               goto restart;
  
         /* Reclaim has failed us, start killing things */
         page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
@@ -5209,9 +5239,13 @@ retry:
         }
  
  nopage:
-       /* Deal with possible cpuset update races before we fail */
-       if (check_retry_cpuset(cpuset_mems_cookie, ac))
-               goto retry_cpuset;
+       /*
+        * Deal with possible cpuset update races or zonelist updates to avoid
+        * a unnecessary OOM kill.
+        */
+       if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
+           check_retry_zonelist(zonelist_iter_cookie))
+               goto restart;
  
         /*
          * Make sure that __GFP_NOFAIL request doesn't leak out and make sure
@@ -6514,9 +6548,8 @@ static void __build_all_zonelists(void *data)
         int nid;
         int __maybe_unused cpu;
         pg_data_t *self = data;
-       static DEFINE_SPINLOCK(lock);
  
-       spin_lock(&lock);
+       write_seqlock(&zonelist_update_seq);
  
  #ifdef CONFIG_NUMA
         memset(node_load, 0, sizeof(node_load));
@@ -6553,7 +6586,7 @@ static void __build_all_zonelists(void *data)
  #endif
         }
  
-       spin_unlock(&lock);
+       write_sequnlock(&zonelist_update_seq);
  }
  
  static noinline void __init
author	Mel Gorman <mgorman@techsingularity.net>
	Wed, 24 Aug 2022 11:14:50 +0000 (12:14 +0100)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sun, 11 Sep 2022 23:22:29 +0000 (16:22 -0700)