srcu: Make expedited RCU grace periods block even less frequently

author Neeraj Upadhyay <quic_neeraju@quicinc.com>

Fri, 1 Jul 2022 03:15:45 +0000 (08:45 +0530)

committer Paul E. McKenney <paulmck@kernel.org>

Tue, 19 Jul 2022 18:39:59 +0000 (11:39 -0700)
author Neeraj Upadhyay <quic_neeraju@quicinc.com>
Fri, 1 Jul 2022 03:15:45 +0000 (08:45 +0530)
committer Paul E. McKenney <paulmck@kernel.org>
Tue, 19 Jul 2022 18:39:59 +0000 (11:39 -0700)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 2522b11e593f2397840d59541f9befcebed505b1..c3245baf588f517c872eab203f274a0b3b74ddb1 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5771,6 +5771,24 @@
                         expediting.  Set to zero to disable automatic
                         expediting.
  
+       srcutree.srcu_max_nodelay [KNL]
+                       Specifies the number of no-delay instances
+                       per jiffy for which the SRCU grace period
+                       worker thread will be rescheduled with zero
+                       delay. Beyond this limit, worker thread will
+                       be rescheduled with a sleep delay of one jiffy.
+
+       srcutree.srcu_max_nodelay_phase [KNL]
+                       Specifies the per-grace-period phase, number of
+                       non-sleeping polls of readers. Beyond this limit,
+                       grace period worker thread will be rescheduled
+                       with a sleep delay of one jiffy, between each
+                       rescan of the readers, for a grace period phase.
+
+       srcutree.srcu_retry_check_delay [KNL]
+                       Specifies number of microseconds of non-sleeping
+                       delay between each non-sleeping poll of readers.
+
         srcutree.small_contention_lim [KNL]
                         Specifies the number of update-side contention
                         events per jiffy will be tolerated before
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c

index 0db7873f4e95bb76b452403b3301518f896a2280..1c304fec89c02df9121b8cfa60087a843bf5f49c 100644 (file)
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
         return sum;
  }
  
-#define SRCU_INTERVAL          1       // Base delay if no expedited GPs pending.
-#define SRCU_MAX_INTERVAL      10      // Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE 3       // Maximum per-GP-phase consecutive no-delay instances.
-#define SRCU_MAX_NODELAY       100     // Maximum consecutive no-delay instances.
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited().  We spin for a fixed time period
+ * (defined below, boot time configurable) to allow SRCU readers to exit
+ * their read-side critical sections.  If there are still some readers
+ * after one jiffy, we repeatedly block for one jiffy time periods.
+ * The blocking time is increased as the grace-period age increases,
+ * with max blocking time capped at 10 jiffies.
+ */
+#define SRCU_DEFAULT_RETRY_CHECK_DELAY         5
+
+static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
+module_param(srcu_retry_check_delay, ulong, 0444);
+
+#define SRCU_INTERVAL          1               // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL      10              // Maximum incremental delay from slow readers.
+
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO      3UL     // Lowmark on default per-GP-phase
+                                                       // no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI      1000UL  // Highmark on default per-GP-phase
+                                                       // no-delay instances.
+
+#define SRCU_UL_CLAMP_LO(val, low)     ((val) > (low) ? (val) : (low))
+#define SRCU_UL_CLAMP_HI(val, high)    ((val) < (high) ? (val) : (high))
+#define SRCU_UL_CLAMP(val, low, high)  SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
+// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
+// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
+// called from process_srcu().
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED        \
+       (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
+
+// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
+       SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED,  \
+                     SRCU_DEFAULT_MAX_NODELAY_PHASE_LO,        \
+                     SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
+
+static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
+module_param(srcu_max_nodelay_phase, ulong, 0444);
+
+// Maximum consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY       (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
+                                        SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
+
+static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
+module_param(srcu_max_nodelay, ulong, 0444);
  
  /*
   * Return grace-period delay, zero if there are expedited grace
@@ -535,7 +577,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
                         jbase += j - gpstart;
                 if (!jbase) {
                         WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
-                       if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+                       if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
                                 jbase = 1;
                 }
         }
@@ -612,15 +654,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
  }
  EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
- */
-#define SRCU_RETRY_CHECK_DELAY         5
-
  /*
   * Start an SRCU grace period.
   */
@@ -706,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp
   */
  static void srcu_gp_end(struct srcu_struct *ssp)
  {
-       unsigned long cbdelay;
+       unsigned long cbdelay = 1;
         bool cbs;
         bool last_lvl;
         int cpu;
@@ -726,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
         spin_lock_irq_rcu_node(ssp);
         idx = rcu_seq_state(ssp->srcu_gp_seq);
         WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-       cbdelay = !!srcu_get_delay(ssp);
+       if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+               cbdelay = 0;
+
         WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
         rcu_seq_end(&ssp->srcu_gp_seq);
         gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -927,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
   */
  static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
  {
+       unsigned long curdelay;
+
+       curdelay = !srcu_get_delay(ssp);
+
         for (;;) {
                 if (srcu_readers_active_idx_check(ssp, idx))
                         return true;
-               if (--trycount + !srcu_get_delay(ssp) <= 0)
+               if ((--trycount + curdelay) <= 0)
                         return false;
-               udelay(SRCU_RETRY_CHECK_DELAY);
+               udelay(srcu_retry_check_delay);
         }
  }
  
@@ -1588,7 +1627,7 @@ static void process_srcu(struct work_struct *work)
                 j = jiffies;
                 if (READ_ONCE(ssp->reschedule_jiffies) == j) {
                         WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
-                       if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+                       if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
                                 curdelay = 1;
                 } else {
                         WRITE_ONCE(ssp->reschedule_count, 1);
@@ -1680,6 +1719,11 @@ static int __init srcu_bootup_announce(void)
         pr_info("Hierarchical SRCU implementation.\n");
         if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
                 pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+       if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
+               pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
+       if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
+               pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
+       pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
         return 0;
  }
  early_initcall(srcu_bootup_announce);
author	Neeraj Upadhyay <quic_neeraju@quicinc.com>
	Fri, 1 Jul 2022 03:15:45 +0000 (08:45 +0530)
committer	Paul E. McKenney <paulmck@kernel.org>
	Tue, 19 Jul 2022 18:39:59 +0000 (11:39 -0700)
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| history
kernel/rcu/srcutree.c		patch \| blob \| history