]> git.baikalelectronics.ru Git - kernel.git/commitdiff
intel_idle: Disable IBRS during long idle
authorPeter Zijlstra <peterz@infradead.org>
Mon, 3 Oct 2022 13:10:18 +0000 (10:10 -0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 7 Oct 2022 07:16:55 +0000 (09:16 +0200)
commit 9f8be7f6268ceeb2f4322edcea2df4f5990b8f96 upstream.

Having IBRS enabled while the SMT sibling is idle unnecessarily slows
down the running sibling. OTOH, disabling IBRS around idle takes two
MSR writes, which will increase the idle latency.

Therefore, only disable IBRS around deeper idle states. Shallow idle
states are bounded by the tick in duration, since NOHZ is not allowed
for them by virtue of their short target residency.

Only do this for mwait-driven idle, since that keeps interrupts disabled
across idle, which makes disabling IBRS vs IRQ-entry a non-issue.

Note: C6 is a random threshold, most importantly C1 probably shouldn't
disable IBRS, benchmarking needed.

Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
[cascardo: no CPUIDLE_FLAG_IRQ_ENABLE]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[cascardo: context adjustments]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/include/asm/nospec-branch.h
arch/x86/kernel/cpu/bugs.c
drivers/idle/intel_idle.c

index 2de7c08fa36ee931a4b9ede51d6ff26fab9aa6d2..0b1630c2c46b298a4bc76c2b371aa4aac1cc7dbe 100644 (file)
@@ -299,6 +299,7 @@ static inline void indirect_branch_prediction_barrier(void)
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
 extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
 
 /*
  * With retpoline, we must use IBRS to restrict branch prediction
index d0baeb89f5ac8c9b96e27cda2e46333d96648f46..5f8ea848d30b0f66d63cc2139efefe72e008ab7c 100644 (file)
@@ -77,6 +77,12 @@ void write_spec_ctrl_current(u64 val, bool force)
                wrmsrl(MSR_IA32_SPEC_CTRL, val);
 }
 
+u64 spec_ctrl_current(void)
+{
+       return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
+
 /*
  * The vendor and possibly platform specific bits which can be modified in
  * x86_spec_ctrl_base.
index 347b08b56042f707203fd625e6e757b7b186ae6a..63b22122626185a0ee45f7676de6f2b611863643 100644 (file)
 #include <linux/tick.h>
 #include <trace/events/power.h>
 #include <linux/sched.h>
+#include <linux/sched/smt.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/moduleparam.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
@@ -97,6 +99,12 @@ static struct cpuidle_state *cpuidle_state_table;
  */
 #define CPUIDLE_FLAG_TLB_FLUSHED       0x10000
 
+/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS              BIT(16)
+
 /*
  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
  * the C-state (top nibble) and sub-state (bottom nibble)
@@ -107,6 +115,24 @@ static struct cpuidle_state *cpuidle_state_table;
 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
 
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+                                    struct cpuidle_driver *drv, int index)
+{
+       bool smt_active = sched_smt_active();
+       u64 spec_ctrl = spec_ctrl_current();
+       int ret;
+
+       if (smt_active)
+               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+       ret = intel_idle(dev, drv, index);
+
+       if (smt_active)
+               wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+       return ret;
+}
+
 /*
  * States are indexed by the cstate number,
  * which is also the index into the MWAIT hint array.
@@ -605,7 +631,7 @@ static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C6",
                .desc = "MWAIT 0x20",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 85,
                .target_residency = 200,
                .enter = &intel_idle,
@@ -613,7 +639,7 @@ static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C7s",
                .desc = "MWAIT 0x33",
-               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 124,
                .target_residency = 800,
                .enter = &intel_idle,
@@ -621,7 +647,7 @@ static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C8",
                .desc = "MWAIT 0x40",
-               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 200,
                .target_residency = 800,
                .enter = &intel_idle,
@@ -629,7 +655,7 @@ static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C9",
                .desc = "MWAIT 0x50",
-               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 480,
                .target_residency = 5000,
                .enter = &intel_idle,
@@ -637,7 +663,7 @@ static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C10",
                .desc = "MWAIT 0x60",
-               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 890,
                .target_residency = 5000,
                .enter = &intel_idle,
@@ -666,7 +692,7 @@ static struct cpuidle_state skx_cstates[] = {
        {
                .name = "C6",
                .desc = "MWAIT 0x20",
-               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
                .exit_latency = 133,
                .target_residency = 600,
                .enter = &intel_idle,
@@ -1370,6 +1396,11 @@ static void __init intel_idle_cpuidle_driver_init(void)
                drv->states[drv->state_count] = /* structure copy */
                        cpuidle_state_table[cstate];
 
+               if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+                   cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+                       drv->states[drv->state_count].enter = intel_idle_ibrs;
+               }
+
                drv->state_count += 1;
        }