powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings

author Parth Shah <parth@linux.ibm.com>

Wed, 28 Jul 2021 17:56:07 +0000 (23:26 +0530)

committer Michael Ellerman <mpe@ellerman.id.au>

Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
author Parth Shah <parth@linux.ibm.com>
Wed, 28 Jul 2021 17:56:07 +0000 (23:26 +0530)
committer Michael Ellerman <mpe@ellerman.id.au>
Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index 1259040cc3a4adf3c2b8c568a1320f0f34524b9e..7ef1cd8168a055cd4a9c6211446df9ef4c80f4a2 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
  
  DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
  
  #ifdef CONFIG_SMP
  
@@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
  
  extern bool has_big_cores;
  extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
  
  #define cpu_smt_mask cpu_smt_mask
  #ifdef CONFIG_SCHED_SMT
@@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
  #define hard_smp_processor_id()                get_hard_smp_processor_id(0)
  #define smp_setup_cpu_maps()
  #define thread_group_shares_l2  0
+#define thread_group_shares_l3 0
  static inline void inhibit_secondary_onlining(void) {}
  static inline void uninhibit_secondary_onlining(void) {}
  static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c

index 20d91693eac1ee7c7ee80c039740215e51cf050e..cf1be75b7833a9f1dfe7245ef128e9575840d8cf 100644 (file)
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
         else if (thread_group_shares_l2 && level == 2)
                 return cpumask_first(per_cpu(thread_group_l2_cache_map,
                                              cpu_id));
+       else if (thread_group_shares_l3 && level == 3)
+               return cpumask_first(per_cpu(thread_group_l3_cache_map,
+                                            cpu_id));
         return -1;
  }
  
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index a7fcac44a8e29b8801f70862566c5f276ea02640..f2abd88e0c254dda4b9f56a77c3679d7a1b8a166 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
  bool has_big_cores;
  bool coregroup_enabled;
  bool thread_group_shares_l2;
+bool thread_group_shares_l3;
  
  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -101,7 +102,7 @@ enum {
  
  #define MAX_THREAD_LIST_SIZE   8
  #define THREAD_GROUP_SHARE_L1   1
-#define THREAD_GROUP_SHARE_L2   2
+#define THREAD_GROUP_SHARE_L2_L3 2
  struct thread_groups {
         unsigned int property;
         unsigned int nr_groups;
@@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
   */
  DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
  
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
+
  /* SMP operations for this machine */
  struct smp_ops_t *smp_ops;
  
@@ -889,19 +896,41 @@ out:
         return tg;
  }
  
+static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+{
+       int first_thread = cpu_first_thread_sibling(cpu);
+       int i;
+
+       zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+       for (i = first_thread; i < first_thread + threads_per_core; i++) {
+               int i_group_start = get_cpu_thread_group_start(i, tg);
+
+               if (unlikely(i_group_start == -1)) {
+                       WARN_ON_ONCE(1);
+                       return -ENODATA;
+               }
+
+               if (i_group_start == cpu_group_start)
+                       cpumask_set_cpu(i, *mask);
+       }
+
+       return 0;
+}
+
  static int __init init_thread_group_cache_map(int cpu, int cache_property)
  
  {
-       int first_thread = cpu_first_thread_sibling(cpu);
-       int i, cpu_group_start = -1, err = 0;
+       int cpu_group_start = -1, err = 0;
         struct thread_groups *tg = NULL;
         cpumask_var_t *mask = NULL;
  
         if (cache_property != THREAD_GROUP_SHARE_L1 &&
-           cache_property != THREAD_GROUP_SHARE_L2)
+           cache_property != THREAD_GROUP_SHARE_L2_L3)
                 return -EINVAL;
  
         tg = get_thread_groups(cpu, cache_property, &err);
+
         if (!tg)
                 return err;
  
@@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
                 return -ENODATA;
         }
  
-       if (cache_property == THREAD_GROUP_SHARE_L1)
+       if (cache_property == THREAD_GROUP_SHARE_L1) {
                 mask = &per_cpu(thread_group_l1_cache_map, cpu);
-       else if (cache_property == THREAD_GROUP_SHARE_L2)
+               update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+       }
+       else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
                 mask = &per_cpu(thread_group_l2_cache_map, cpu);
-
-       zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
-
-       for (i = first_thread; i < first_thread + threads_per_core; i++) {
-               int i_group_start = get_cpu_thread_group_start(i, tg);
-
-               if (unlikely(i_group_start == -1)) {
-                       WARN_ON_ONCE(1);
-                       return -ENODATA;
-               }
-
-               if (i_group_start == cpu_group_start)
-                       cpumask_set_cpu(i, *mask);
+               update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+               mask = &per_cpu(thread_group_l3_cache_map, cpu);
+               update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
         }
  
+
         return 0;
  }
  
@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
         has_big_cores = true;
  
         for_each_possible_cpu(cpu) {
-               int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+               int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
  
                 if (err)
                         return err;
         }
  
         thread_group_shares_l2 = true;
-       pr_debug("L2 cache only shared by the threads in the small core\n");
+       thread_group_shares_l3 = true;
+       pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
         return 0;
  }
author	Parth Shah <parth@linux.ibm.com>
	Wed, 28 Jul 2021 17:56:07 +0000 (23:26 +0530)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
arch/powerpc/include/asm/smp.h		patch \| blob \| history
arch/powerpc/kernel/cacheinfo.c		patch \| blob \| history
arch/powerpc/kernel/smp.c		patch \| blob \| history