]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm/demotion: update node_is_toptier to work with memory tiers
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Thu, 18 Aug 2022 13:10:41 +0000 (18:40 +0530)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 27 Sep 2022 02:46:12 +0000 (19:46 -0700)
With memory tier support we can have memory only NUMA nodes in the top
tier from which we want to avoid promotion tracking NUMA faults.  Update
node_is_toptier to work with memory tiers.  All NUMA nodes are by default
top tier nodes.  With lower(slower) memory tiers added we consider all
memory tiers above a memory tier having CPU NUMA nodes as a top memory
tier

[sj@kernel.org: include missed header file, memory-tiers.h]
Link: https://lkml.kernel.org/r/20220820190720.248704-1-sj@kernel.org
[akpm@linux-foundation.org: mm/memory.c needs linux/memory-tiers.h]
[aneesh.kumar@linux.ibm.com: make toptier_distance inclusive upper bound of toptiers]
Link: https://lkml.kernel.org/r/20220830081457.118960-1-aneesh.kumar@linux.ibm.com
Link: https://lkml.kernel.org/r/20220818131042.113280-10-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Wei Xu <weixugc@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hesham Almatary <hesham.almatary@huawei.com>
Cc: Jagdish Gediya <jvgediya.oss@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memory-tiers.h
include/linux/node.h
kernel/sched/fair.c
mm/huge_memory.c
mm/memory-tiers.c
mm/memory.c
mm/migrate.c
mm/mprotect.c

index 42791554b9b97e386c7c4769b06d880c4d5b7af4..965009aa01d72b73c66d046a0ff4816e376e852b 100644 (file)
@@ -40,6 +40,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype);
 #ifdef CONFIG_MIGRATION
 int next_demotion_node(int node);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
+bool node_is_toptier(int node);
 #else
 static inline int next_demotion_node(int node)
 {
@@ -50,6 +51,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
 {
        *targets = NODE_MASK_NONE;
 }
+
+static inline bool node_is_toptier(int node)
+{
+       return true;
+}
 #endif
 
 #else
@@ -87,5 +93,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
 {
        *targets = NODE_MASK_NONE;
 }
+
+static inline bool node_is_toptier(int node)
+{
+       return true;
+}
 #endif /* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
index 40d641a8bfb0d4cfba3cc232f6f837458f7e5c76..9ec680dd607f7515d0692ba65f7b69e8d12dd30c 100644 (file)
@@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
 
 #define to_node(device) container_of(device, struct node, dev)
 
-static inline bool node_is_toptier(int node)
-{
-       return node_state(node, N_CPU);
-}
-
 #endif /* _LINUX_NODE_H_ */
index d642e9ff28292072157b7f9d99a9b6168389806e..0e3e08a093d497319bcc8aef941fa1a6bac946d4 100644 (file)
@@ -40,6 +40,7 @@
 
 #include <linux/cpuidle.h>
 #include <linux/interrupt.h>
+#include <linux/memory-tiers.h>
 #include <linux/mempolicy.h>
 #include <linux/mutex_api.h>
 #include <linux/profile.h>
index 949d7c325133c83a366319848a3cdf7259eca13f..534d30cff9d75dad07a8d7d369178b26e0b67238 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/numa.h>
 #include <linux/page_owner.h>
 #include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
index 45dd6fa4e2d169d2696ade491177804e9c01c94b..c82eb0111383bbb1d869ad7397d156cf017b0eb3 100644 (file)
@@ -37,6 +37,7 @@ static LIST_HEAD(memory_tiers);
 static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
 static struct memory_dev_type *default_dram_type;
 #ifdef CONFIG_MIGRATION
+static int top_tier_adistance;
 /*
  * node_demotion[] examples:
  *
@@ -162,6 +163,31 @@ static struct memory_tier *__node_get_memory_tier(int node)
 }
 
 #ifdef CONFIG_MIGRATION
+bool node_is_toptier(int node)
+{
+       bool toptier;
+       pg_data_t *pgdat;
+       struct memory_tier *memtier;
+
+       pgdat = NODE_DATA(node);
+       if (!pgdat)
+               return false;
+
+       rcu_read_lock();
+       memtier = rcu_dereference(pgdat->memtier);
+       if (!memtier) {
+               toptier = true;
+               goto out;
+       }
+       if (memtier->adistance_start <= top_tier_adistance)
+               toptier = true;
+       else
+               toptier = false;
+out:
+       rcu_read_unlock();
+       return toptier;
+}
+
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
 {
        struct memory_tier *memtier;
@@ -319,6 +345,27 @@ static void establish_demotion_targets(void)
                        }
                } while (1);
        }
+       /*
+        * Promotion is allowed from a memory tier to higher
+        * memory tier only if the memory tier doesn't include
+        * compute. We want to skip promotion from a memory tier,
+        * if any node that is part of the memory tier have CPUs.
+        * Once we detect such a memory tier, we consider that tier
+        * as top tiper from which promotion is not allowed.
+        */
+       list_for_each_entry_reverse(memtier, &memory_tiers, list) {
+               tier_nodes = get_memtier_nodemask(memtier);
+               nodes_and(tier_nodes, node_states[N_CPU], tier_nodes);
+               if (!nodes_empty(tier_nodes)) {
+                       /*
+                        * abstract distance below the max value of this memtier
+                        * is considered toptier.
+                        */
+                       top_tier_adistance = memtier->adistance_start +
+                                               MEMTIER_CHUNK_SIZE - 1;
+                       break;
+               }
+       }
        /*
         * Now build the lower_tier mask for each node collecting node mask from
         * all memory tier below it. This allows us to fallback demotion page
index 63832dab15d366a18fe96eac0aa467e3c82bd4ef..cb955c0b77382191b55fd967afcb29bfcbca53b5 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/gfp.h>
 #include <linux/migrate.h>
 #include <linux/string.h>
+#include <linux/memory-tiers.h>
 #include <linux/debugfs.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/dax.h>
index 2a2329bf7c1a3f27a8f6bf0d81e281f11d45ee46..d74573c36573b45f68404ffc6d93bc2fbebe0cad 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/memory.h>
 #include <linux/random.h>
 #include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
 
 #include <asm/tlbflush.h>
 
index ed013f836b4a53414d06b7d22d4541d230b99441..55ed4a889990fb4effd6ca044d966e39c68c76bc 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/pgtable.h>
 #include <linux/sched/sysctl.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/memory-tiers.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>