aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/fpu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 14:27:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 14:27:49 -0800
commit518bacf5a569d111e256d58b9fbc8d7b80ec42ea (patch)
tree53aa3297fbd3cf98caa592dec5b3be4e01646ff4 /arch/x86/kernel/fpu
parent535b2f73f6f60fb227b700136c134c5d7c8f8ad3 (diff)
parent064e6a8ba61a751625478f656c6f76a6f37a009e (diff)
downloadlinux-518bacf5a569d111e256d58b9fbc8d7b80ec42ea.tar.gz
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar: "The main changes in this cycle were: - do a large round of simplifications after all CPUs do 'eager' FPU context switching in v4.9: remove CR0 twiddling, remove leftover eager/lazy bts, etc (Andy Lutomirski) - more FPU code simplifications: remove struct fpu::counter, clarify nomenclature, remove unnecessary arguments/functions and better structure the code (Rik van Riel)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/fpu: Remove clts() x86/fpu: Remove stts() x86/fpu: Handle #NM without FPU emulation as an error x86/fpu, lguest: Remove CR0.TS support x86/fpu, kvm: Remove host CR0.TS manipulation x86/fpu: Remove irq_ts_save() and irq_ts_restore() x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs() x86/fpu: Get rid of two redundant clts() calls x86/fpu: Finish excising 'eagerfpu' x86/fpu: Split old_fpu & new_fpu handling into separate functions x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state() x86/fpu: Split old & new FPU code paths x86/fpu: Remove __fpregs_(de)activate() x86/fpu: Rename lazy restore functions to "register state valid" x86/fpu, kvm: Remove KVM vcpu->fpu_counter x86/fpu: Remove struct fpu::counter x86/fpu: Remove use_eager_fpu() x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction x86/fpu: Hard-disable lazy FPU mode x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r--arch/x86/kernel/fpu/bugs.c7
-rw-r--r--arch/x86/kernel/fpu/core.c74
-rw-r--r--arch/x86/kernel/fpu/init.c107
-rw-r--r--arch/x86/kernel/fpu/signal.c8
-rw-r--r--arch/x86/kernel/fpu/xstate.c9
5 files changed, 12 insertions, 193 deletions
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index aad34aafc0e0..d913047f832c 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -23,17 +23,12 @@ static double __initdata y = 3145727.0;
*/
void __init fpu__init_check_bugs(void)
{
- u32 cr0_saved;
s32 fdiv_bug;
/* kernel_fpu_begin/end() relies on patched alternative instructions. */
if (!boot_cpu_has(X86_FEATURE_FPU))
return;
- /* We might have CR0::TS set already, clear it: */
- cr0_saved = read_cr0();
- write_cr0(cr0_saved & ~X86_CR0_TS);
-
kernel_fpu_begin();
/*
@@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void)
kernel_fpu_end();
- write_cr0(cr0_saved);
-
if (fdiv_bug) {
set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
pr_warn("Hmm, FPU with FDIV bug\n");
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ebb4e95fbd74..e4e97a5355ce 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void)
return this_cpu_read(in_kernel_fpu);
}
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
static bool interrupted_kernel_fpu_idle(void)
{
- if (kernel_fpu_disabled())
- return false;
-
- if (use_eager_fpu())
- return true;
-
- return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+ return !kernel_fpu_disabled();
}
/*
@@ -125,8 +107,7 @@ void __kernel_fpu_begin(void)
*/
copy_fpregs_to_fpstate(fpu);
} else {
- this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- __fpregs_activate_hw();
+ __cpu_invalidate_fpregs_state();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -137,8 +118,6 @@ void __kernel_fpu_end(void)
if (fpu->fpregs_active)
copy_kernel_to_fpregs(&fpu->state);
- else
- __fpregs_deactivate_hw();
kernel_fpu_enable();
}
@@ -159,35 +138,6 @@ void kernel_fpu_end(void)
EXPORT_SYMBOL_GPL(kernel_fpu_end);
/*
- * CR0::TS save/restore functions:
- */
-int irq_ts_save(void)
-{
- /*
- * If in process context and not atomic, we can take a spurious DNA fault.
- * Otherwise, doing clts() in process context requires disabling preemption
- * or some heavy lifting like kernel_fpu_begin()
- */
- if (!in_atomic())
- return 0;
-
- if (read_cr0() & X86_CR0_TS) {
- clts();
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(irq_ts_save);
-
-void irq_ts_restore(int TS_state)
-{
- if (TS_state)
- stts();
-}
-EXPORT_SYMBOL_GPL(irq_ts_restore);
-
-/*
* Save the FPU state (mark it for reload if necessary):
*
* This only ever gets called for the current task.
@@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&fpu->state);
- else
- fpregs_deactivate(fpu);
+ copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu);
@@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
@@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
*/
- if (use_eager_fpu())
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/*
* Save current FPU registers directly into the child
@@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&src_fpu->state);
- else
- fpregs_deactivate(src_fpu);
+ copy_kernel_to_fpregs(&src_fpu->state);
}
preempt_enable();
@@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
if (fpu->fpstate_active) {
/* Invalidate any lazy state: */
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
@@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void)
* ensures we will not be lazy and skip a XRSTOR in the
* future.
*/
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
}
/*
@@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu)
trace_x86_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
- fpu->counter++;
trace_x86_fpu_after_restore(fpu);
kernel_fpu_enable();
}
@@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore);
void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- fpu->counter = 0;
if (fpu->fpregs_active) {
/* Ignore delayed exceptions from user space */
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 2f2b8c7ccb85..60dece392b3a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -10,18 +10,6 @@
#include <linux/init.h>
/*
- * Initialize the TS bit in CR0 according to the style of context-switches
- * we are using:
- */
-static void fpu__init_cpu_ctx_switch(void)
-{
- if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
- stts();
- else
- clts();
-}
-
-/*
* Initialize the registers found in all CPUs, CR0 and CR4:
*/
static void fpu__init_cpu_generic(void)
@@ -58,7 +46,6 @@ void fpu__init_cpu(void)
{
fpu__init_cpu_generic();
fpu__init_cpu_xstate();
- fpu__init_cpu_ctx_switch();
}
/*
@@ -233,82 +220,16 @@ static void __init fpu__init_system_xstate_size_legacy(void)
}
/*
- * FPU context switching strategies:
- *
- * Against popular belief, we don't do lazy FPU saves, due to the
- * task migration complications it brings on SMP - we only do
- * lazy FPU restores.
- *
- * 'lazy' is the traditional strategy, which is based on setting
- * CR0::TS to 1 during context-switch (instead of doing a full
- * restore of the FPU state), which causes the first FPU instruction
- * after the context switch (whenever it is executed) to fault - at
- * which point we lazily restore the FPU state into FPU registers.
- *
- * Tasks are of course under no obligation to execute FPU instructions,
- * so it can easily happen that another context-switch occurs without
- * a single FPU instruction being executed. If we eventually switch
- * back to the original task (that still owns the FPU) then we have
- * not only saved the restores along the way, but we also have the
- * FPU ready to be used for the original task.
- *
- * 'lazy' is deprecated because it's almost never a performance win
- * and it's much more complicated than 'eager'.
- *
- * 'eager' switching is by default on all CPUs, there we switch the FPU
- * state during every context switch, regardless of whether the task
- * has used FPU instructions in that time slice or not. This is done
- * because modern FPU context saving instructions are able to optimize
- * state saving and restoration in hardware: they can detect both
- * unused and untouched FPU state and optimize accordingly.
- *
- * [ Note that even in 'lazy' mode we might optimize context switches
- * to use 'eager' restores, if we detect that a task is using the FPU
- * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
- */
-static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
-
-/*
* Find supported xfeatures based on cpu features and command-line input.
* This must be called after fpu__init_parse_early_param() is called and
* xfeatures_mask is enumerated.
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
- /* Support all xfeatures known to us */
- if (eagerfpu != DISABLE)
- return XCNTXT_MASK;
-
- /* Warning of xfeatures being disabled for no eagerfpu mode */
- if (xfeatures_mask & XFEATURE_MASK_EAGER) {
- pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
- xfeatures_mask & XFEATURE_MASK_EAGER);
- }
-
- /* Return a mask that masks out all features requiring eagerfpu mode */
- return ~XFEATURE_MASK_EAGER;
+ return XCNTXT_MASK;
}
-/*
- * Disable features dependent on eagerfpu.
- */
-static void __init fpu__clear_eager_fpu_features(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_MPX);
-}
-
-/*
- * Pick the FPU context switching strategy:
- *
- * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
- * the following is true:
- *
- * (1) the cpu has xsaveopt, as it has the optimization and doing eager
- * FPU switching has a relatively low cost compared to a plain xsave;
- * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
- * switching. Should the kernel boot with noxsaveopt, we support MPX
- * with eager FPU switching at a higher cost.
- */
+/* Legacy code to initialize eager fpu mode. */
static void __init fpu__init_system_ctx_switch(void)
{
static bool on_boot_cpu __initdata = 1;
@@ -317,17 +238,6 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0;
WARN_ON_FPU(current->thread.fpu.fpstate_active);
-
- if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
- eagerfpu = ENABLE;
-
- if (xfeatures_mask & XFEATURE_MASK_EAGER)
- eagerfpu = ENABLE;
-
- if (eagerfpu == ENABLE)
- setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
-
- printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
}
/*
@@ -336,11 +246,6 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
- eagerfpu = DISABLE;
- fpu__clear_eager_fpu_features();
- }
-
if (cmdline_find_option_bool(boot_command_line, "no387"))
setup_clear_cpu_cap(X86_FEATURE_FPU);
@@ -375,14 +280,6 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
*/
fpu__init_cpu();
- /*
- * But don't leave CR0::TS set yet, as some of the FPU setup
- * methods depend on being able to execute FPU instructions
- * that will fault on a set TS, such as the FXSAVE in
- * fpu__init_system_mxcsr().
- */
- clts();
-
fpu__init_system_generic();
fpu__init_system_xstate_size_legacy();
fpu__init_system_xstate();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a184c210efba..83c23c230b4c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
fpu->fpstate_active = 1;
- if (use_eager_fpu()) {
- preempt_disable();
- fpu__restore(fpu);
- preempt_enable();
- }
+ preempt_disable();
+ fpu__restore(fpu);
+ preempt_enable();
return err;
} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index ce47452879fd..1d7770447b3e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -892,15 +892,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
- /*
- * For most XSAVE components, this would be an arduous task:
- * brining fpstate up to date with fpregs, updating fpstate,
- * then re-populating fpregs. But, for components that are
- * never lazily managed, we can just access the fpregs
- * directly. PKRU is never managed lazily, so we can just
- * manipulate it directly. Make sure it stays that way.
- */
- WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)