perf/core: Fix sys_perf_event_open() vs. hotplug

author Peter Zijlstra <peterz@infradead.org>

Fri, 9 Dec 2016 13:59:00 +0000 (14:59 +0100)

committer Ingo Molnar <mingo@kernel.org>

Sat, 14 Jan 2017 09:56:10 +0000 (10:56 +0100)
author Peter Zijlstra <peterz@infradead.org>
Fri, 9 Dec 2016 13:59:00 +0000 (14:59 +0100)
committer Ingo Molnar <mingo@kernel.org>
Sat, 14 Jan 2017 09:56:10 +0000 (10:56 +0100)
diff --git a/kernel/events/core.c b/kernel/events/core.c

index ab15509fab8c0659c3f422036f5649718a9e4437..72ce7d63e561c050e14dfb423a5626efa50bc4cb 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2249,7 +2249,7 @@ static int  __perf_install_in_context(void *info)
         struct perf_event_context *ctx = event->ctx;
         struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
         struct perf_event_context *task_ctx = cpuctx->task_ctx;
-       bool activate = true;
+       bool reprogram = true;
         int ret = 0;
  
         raw_spin_lock(&cpuctx->ctx.lock);
@@ -2257,27 +2257,26 @@ static int  __perf_install_in_context(void *info)
                 raw_spin_lock(&ctx->lock);
                 task_ctx = ctx;
  
-               /* If we're on the wrong CPU, try again */
-               if (task_cpu(ctx->task) != smp_processor_id()) {
-                       ret = -ESRCH;
-                       goto unlock;
-               }
+               reprogram = (ctx->task == current);
  
                 /*
-                * If we're on the right CPU, see if the task we target is
-                * current, if not we don't have to activate the ctx, a future
-                * context switch will do that for us.
+                * If the task is running, it must be running on this CPU,
+                * otherwise we cannot reprogram things.
+                *
+                * If its not running, we don't care, ctx->lock will
+                * serialize against it becoming runnable.
                  */
-               if (ctx->task != current)
-                       activate = false;
-               else
-                       WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
+               if (task_curr(ctx->task) && !reprogram) {
+                       ret = -ESRCH;
+                       goto unlock;
+               }
  
+               WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
         } else if (task_ctx) {
                 raw_spin_lock(&task_ctx->lock);
         }
  
-       if (activate) {
+       if (reprogram) {
                 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
                 add_event_to_ctx(event, ctx);
                 ctx_resched(cpuctx, task_ctx);
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
         /*
          * Installing events is tricky because we cannot rely on ctx->is_active
          * to be set in case this is the nr_events 0 -> 1 transition.
+        *
+        * Instead we use task_curr(), which tells us if the task is running.
+        * However, since we use task_curr() outside of rq::lock, we can race
+        * against the actual state. This means the result can be wrong.
+        *
+        * If we get a false positive, we retry, this is harmless.
+        *
+        * If we get a false negative, things are complicated. If we are after
+        * perf_event_context_sched_in() ctx::lock will serialize us, and the
+        * value must be correct. If we're before, it doesn't matter since
+        * perf_event_context_sched_in() will program the counter.
+        *
+        * However, this hinges on the remote context switch having observed
+        * our task->perf_event_ctxp[] store, such that it will in fact take
+        * ctx::lock in perf_event_context_sched_in().
+        *
+        * We do this by task_function_call(), if the IPI fails to hit the task
+        * we know any future context switch of task must see the
+        * perf_event_ctpx[] store.
          */
-again:
+
         /*
-        * Cannot use task_function_call() because we need to run on the task's
-        * CPU regardless of whether its current or not.
+        * This smp_mb() orders the task->perf_event_ctxp[] store with the
+        * task_cpu() load, such that if the IPI then does not find the task
+        * running, a future context switch of that task must observe the
+        * store.
          */
-       if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+       smp_mb();
+again:
+       if (!task_function_call(task, __perf_install_in_context, event))
                 return;
  
         raw_spin_lock_irq(&ctx->lock);
@@ -2348,12 +2370,16 @@ again:
                 raw_spin_unlock_irq(&ctx->lock);
                 return;
         }
-       raw_spin_unlock_irq(&ctx->lock);
         /*
-        * Since !ctx->is_active doesn't mean anything, we must IPI
-        * unconditionally.
+        * If the task is not running, ctx->lock will avoid it becoming so,
+        * thus we can safely install the event.
          */
-       goto again;
+       if (task_curr(task)) {
+               raw_spin_unlock_irq(&ctx->lock);
+               goto again;
+       }
+       add_event_to_ctx(event, ctx);
+       raw_spin_unlock_irq(&ctx->lock);
  }
  
  /*
author	Peter Zijlstra <peterz@infradead.org>
	Fri, 9 Dec 2016 13:59:00 +0000 (14:59 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Sat, 14 Jan 2017 09:56:10 +0000 (10:56 +0100)