mm, oom: rework oom detection

author Michal Hocko <mhocko@suse.com>

Fri, 20 May 2016 23:57:00 +0000 (16:57 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
author Michal Hocko <mhocko@suse.com>
Fri, 20 May 2016 23:57:00 +0000 (16:57 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
diff --git a/include/linux/swap.h b/include/linux/swap.h

index ad220359f1b072cfbf29456871312bbf07d5cceb..0af2bb2028fd51c56b319b264e1d2f4991d7b073 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -316,6 +316,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
                                                 struct vm_area_struct *vma);
  
  /* linux/mm/vmscan.c */
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                         gfp_t gfp_mask, nodemask_t *mask);
  extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 8bcc10616fabdae756f0a9d246177b242c963e43..fa39efc3a692e7040924af6781678e6c1dda036f 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3386,6 +3386,77 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
         return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
  }
  
+/*
+ * Maximum number of reclaim retries without any progress before OOM killer
+ * is consider as the only way to move forward.
+ */
+#define MAX_RECLAIM_RETRIES 16
+
+/*
+ * Checks whether it makes sense to retry the reclaim to make a forward progress
+ * for the given allocation request.
+ * The reclaim feedback represented by did_some_progress (any progress during
+ * the last reclaim round), pages_reclaimed (cumulative number of reclaimed
+ * pages) and no_progress_loops (number of reclaim rounds without any progress
+ * in a row) is considered as well as the reclaimable pages on the applicable
+ * zone list (with a backoff mechanism which is a function of no_progress_loops).
+ *
+ * Returns true if a retry is viable or false to enter the oom path.
+ */
+static inline bool
+should_reclaim_retry(gfp_t gfp_mask, unsigned order,
+                    struct alloc_context *ac, int alloc_flags,
+                    bool did_some_progress, unsigned long pages_reclaimed,
+                    int no_progress_loops)
+{
+       struct zone *zone;
+       struct zoneref *z;
+
+       /*
+        * Make sure we converge to OOM if we cannot make any progress
+        * several times in the row.
+        */
+       if (no_progress_loops > MAX_RECLAIM_RETRIES)
+               return false;
+
+       if (order > PAGE_ALLOC_COSTLY_ORDER) {
+               if (pages_reclaimed >= (1<<order))
+                       return false;
+
+               if (did_some_progress)
+                       return true;
+       }
+
+       /*
+        * Keep reclaiming pages while there is a chance this will lead somewhere.
+        * If none of the target zones can satisfy our allocation request even
+        * if all reclaimable pages are considered then we are screwed and have
+        * to go OOM.
+        */
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
+                                       ac->nodemask) {
+               unsigned long available;
+
+               available = zone_reclaimable_pages(zone);
+               available -= DIV_ROUND_UP(no_progress_loops * available,
+                                         MAX_RECLAIM_RETRIES);
+               available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+               /*
+                * Would the allocation succeed if we reclaimed the whole
+                * available?
+                */
+               if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
+                               ac->high_zoneidx, alloc_flags, available)) {
+                       /* Wait for some write requests to complete then retry */
+                       wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50);
+                       return true;
+               }
+       }
+
+       return false;
+}
+
  static inline struct page *
  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                                                 struct alloc_context *ac)
@@ -3397,6 +3468,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         unsigned long did_some_progress;
         enum migrate_mode migration_mode = MIGRATE_ASYNC;
         enum compact_result compact_result;
+       int no_progress_loops = 0;
  
         /*
          * In the slowpath, we sanity check order to avoid ever trying to
@@ -3525,23 +3597,35 @@ retry:
         if (gfp_mask & __GFP_NORETRY)
                 goto noretry;
  
-       /* Keep reclaiming pages as long as there is reasonable progress */
-       pages_reclaimed += did_some_progress;
-       if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) ||
-           ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) {
-               /* Wait for some write requests to complete then retry */
-               wait_iff_congested(ac->preferred_zoneref->zone, BLK_RW_ASYNC, HZ/50);
-               goto retry;
+       /*
+        * Do not retry costly high order allocations unless they are
+        * __GFP_REPEAT
+        */
+       if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
+               goto noretry;
+
+       if (did_some_progress) {
+               no_progress_loops = 0;
+               pages_reclaimed += did_some_progress;
+       } else {
+               no_progress_loops++;
         }
  
+       if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
+                                did_some_progress > 0, pages_reclaimed,
+                                no_progress_loops))
+               goto retry;
+
         /* Reclaim has failed us, start killing things */
         page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
         if (page)
                 goto got_pg;
  
         /* Retry as long as the OOM killer is making progress */
-       if (did_some_progress)
+       if (did_some_progress) {
+               no_progress_loops = 0;
                 goto retry;
+       }
  
  noretry:
         /*
diff --git a/mm/vmscan.c b/mm/vmscan.c

index a386454c015aa69fc9c1116e13500e3905b850e2..c4a2f4512fcaccf00e5ec6f41eddcdce0e18fb7d 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -191,7 +191,7 @@ static bool sane_reclaim(struct scan_control *sc)
  }
  #endif
  
-static unsigned long zone_reclaimable_pages(struct zone *zone)
+unsigned long zone_reclaimable_pages(struct zone *zone)
  {
         unsigned long nr;
  
@@ -2507,10 +2507,8 @@ static inline bool compaction_ready(struct zone *zone, int order, int classzone_
   *
   * If a zone is deemed to be full of pinned pages then just give it a light
   * scan then give up on it.
- *
- * Returns true if a zone was reclaimable.
   */
-static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
+static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
  {
         struct zoneref *z;
         struct zone *zone;
@@ -2518,7 +2516,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         unsigned long nr_soft_scanned;
         gfp_t orig_mask;
         enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
-       bool reclaimable = false;
  
         /*
          * If the number of buffer_heads in the machine exceeds the maximum
@@ -2583,17 +2580,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                 &nr_soft_scanned);
                         sc->nr_reclaimed += nr_soft_reclaimed;
                         sc->nr_scanned += nr_soft_scanned;
-                       if (nr_soft_reclaimed)
-                               reclaimable = true;
                         /* need some check for avoid more shrink_zone() */
                 }
  
-               if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
-                       reclaimable = true;
-
-               if (global_reclaim(sc) &&
-                   !reclaimable && zone_reclaimable(zone))
-                       reclaimable = true;
+               shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
         }
  
         /*
@@ -2601,8 +2591,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
          * promoted it to __GFP_HIGHMEM.
          */
         sc->gfp_mask = orig_mask;
-
-       return reclaimable;
  }
  
  /*
@@ -2627,7 +2615,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
         int initial_priority = sc->priority;
         unsigned long total_scanned = 0;
         unsigned long writeback_threshold;
-       bool zones_reclaimable;
  retry:
         delayacct_freepages_start();
  
@@ -2638,7 +2625,7 @@ retry:
                 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                 sc->priority);
                 sc->nr_scanned = 0;
-               zones_reclaimable = shrink_zones(zonelist, sc);
+               shrink_zones(zonelist, sc);
  
                 total_scanned += sc->nr_scanned;
                 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
@@ -2685,10 +2672,6 @@ retry:
                 goto retry;
         }
  
-       /* Any of the zones still reclaimable?  Don't OOM. */
-       if (zones_reclaimable)
-               return 1;
-
         return 0;
  }
author	Michal Hocko <mhocko@suse.com>
	Fri, 20 May 2016 23:57:00 +0000 (16:57 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 21 May 2016 00:58:30 +0000 (17:58 -0700)
include/linux/swap.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history