workqueue: fix possible idle worker depletion across CPU hotplug

author Lai Jiangshan <laijs@cn.fujitsu.com>

Mon, 10 Sep 2012 17:03:44 +0000 (10:03 -0700)

committer Tejun Heo <tj@kernel.org>

Mon, 10 Sep 2012 17:05:54 +0000 (10:05 -0700)
author Lai Jiangshan <laijs@cn.fujitsu.com>
Mon, 10 Sep 2012 17:03:44 +0000 (10:03 -0700)
committer Tejun Heo <tj@kernel.org>
Mon, 10 Sep 2012 17:05:54 +0000 (10:05 -0700)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 383548ed0b54ad1d12b84e0934e542b7b432622d..1e1373bcb3e3125f72baf77cf396689d18de9bdd 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1825,10 +1825,45 @@ static bool manage_workers(struct worker *worker)
         struct worker_pool *pool = worker->pool;
         bool ret = false;
  
-       if (!mutex_trylock(&pool->manager_mutex))
+       if (pool->flags & POOL_MANAGING_WORKERS)
                 return ret;
  
         pool->flags |= POOL_MANAGING_WORKERS;
+
+       /*
+        * To simplify both worker management and CPU hotplug, hold off
+        * management while hotplug is in progress.  CPU hotplug path can't
+        * grab %POOL_MANAGING_WORKERS to achieve this because that can
+        * lead to idle worker depletion (all become busy thinking someone
+        * else is managing) which in turn can result in deadlock under
+        * extreme circumstances.  Use @pool->manager_mutex to synchronize
+        * manager against CPU hotplug.
+        *
+        * manager_mutex would always be free unless CPU hotplug is in
+        * progress.  trylock first without dropping @gcwq->lock.
+        */
+       if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
+               spin_unlock_irq(&pool->gcwq->lock);
+               mutex_lock(&pool->manager_mutex);
+               /*
+                * CPU hotplug could have happened while we were waiting
+                * for manager_mutex.  Hotplug itself can't handle us
+                * because manager isn't either on idle or busy list, and
+                * @gcwq's state and ours could have deviated.
+                *
+                * As hotplug is now excluded via manager_mutex, we can
+                * simply try to bind.  It will succeed or fail depending
+                * on @gcwq's current state.  Try it and adjust
+                * %WORKER_UNBOUND accordingly.
+                */
+               if (worker_maybe_bind_and_lock(worker))
+                       worker->flags &= ~WORKER_UNBOUND;
+               else
+                       worker->flags |= WORKER_UNBOUND;
+
+               ret = true;
+       }
+
         pool->flags &= ~POOL_MANAGE_WORKERS;
  
         /*
author	Lai Jiangshan <laijs@cn.fujitsu.com>
	Mon, 10 Sep 2012 17:03:44 +0000 (10:03 -0700)
committer	Tejun Heo <tj@kernel.org>
	Mon, 10 Sep 2012 17:05:54 +0000 (10:05 -0700)