diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 14:27:49 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-12 14:27:49 -0800 |
commit | 518bacf5a569d111e256d58b9fbc8d7b80ec42ea (patch) | |
tree | 53aa3297fbd3cf98caa592dec5b3be4e01646ff4 /arch/x86/kernel/fpu | |
parent | 535b2f73f6f60fb227b700136c134c5d7c8f8ad3 (diff) | |
parent | 064e6a8ba61a751625478f656c6f76a6f37a009e (diff) | |
download | linux-518bacf5a569d111e256d58b9fbc8d7b80ec42ea.tar.gz |
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar:
"The main changes in this cycle were:
- do a large round of simplifications after all CPUs do 'eager' FPU
context switching in v4.9: remove CR0 twiddling, remove leftover
eager/lazy bts, etc (Andy Lutomirski)
- more FPU code simplifications: remove struct fpu::counter, clarify
nomenclature, remove unnecessary arguments/functions and better
structure the code (Rik van Riel)"
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/fpu: Remove clts()
x86/fpu: Remove stts()
x86/fpu: Handle #NM without FPU emulation as an error
x86/fpu, lguest: Remove CR0.TS support
x86/fpu, kvm: Remove host CR0.TS manipulation
x86/fpu: Remove irq_ts_save() and irq_ts_restore()
x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
x86/fpu: Get rid of two redundant clts() calls
x86/fpu: Finish excising 'eagerfpu'
x86/fpu: Split old_fpu & new_fpu handling into separate functions
x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
x86/fpu: Split old & new FPU code paths
x86/fpu: Remove __fpregs_(de)activate()
x86/fpu: Rename lazy restore functions to "register state valid"
x86/fpu, kvm: Remove KVM vcpu->fpu_counter
x86/fpu: Remove struct fpu::counter
x86/fpu: Remove use_eager_fpu()
x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
x86/fpu: Hard-disable lazy FPU mode
x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r-- | arch/x86/kernel/fpu/bugs.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 74 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 107 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/signal.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 9 |
5 files changed, 12 insertions, 193 deletions
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index aad34aafc0e0..d913047f832c 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -23,17 +23,12 @@ static double __initdata y = 3145727.0; */ void __init fpu__init_check_bugs(void) { - u32 cr0_saved; s32 fdiv_bug; /* kernel_fpu_begin/end() relies on patched alternative instructions. */ if (!boot_cpu_has(X86_FEATURE_FPU)) return; - /* We might have CR0::TS set already, clear it: */ - cr0_saved = read_cr0(); - write_cr0(cr0_saved & ~X86_CR0_TS); - kernel_fpu_begin(); /* @@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void) kernel_fpu_end(); - write_cr0(cr0_saved); - if (fdiv_bug) { set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); pr_warn("Hmm, FPU with FDIV bug\n"); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ebb4e95fbd74..e4e97a5355ce 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void) return this_cpu_read(in_kernel_fpu); } -/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ static bool interrupted_kernel_fpu_idle(void) { - if (kernel_fpu_disabled()) - return false; - - if (use_eager_fpu()) - return true; - - return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); + return !kernel_fpu_disabled(); } /* @@ -125,8 +107,7 @@ void __kernel_fpu_begin(void) */ copy_fpregs_to_fpstate(fpu); } else { - this_cpu_write(fpu_fpregs_owner_ctx, NULL); - __fpregs_activate_hw(); + __cpu_invalidate_fpregs_state(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -137,8 +118,6 @@ void __kernel_fpu_end(void) if (fpu->fpregs_active) copy_kernel_to_fpregs(&fpu->state); - else - __fpregs_deactivate_hw(); kernel_fpu_enable(); } @@ -159,35 +138,6 @@ void kernel_fpu_end(void) EXPORT_SYMBOL_GPL(kernel_fpu_end); /* - * CR0::TS save/restore functions: - */ -int irq_ts_save(void) -{ - /* - * If in process context and not atomic, we can take a spurious DNA fault. - * Otherwise, doing clts() in process context requires disabling preemption - * or some heavy lifting like kernel_fpu_begin() - */ - if (!in_atomic()) - return 0; - - if (read_cr0() & X86_CR0_TS) { - clts(); - return 1; - } - - return 0; -} -EXPORT_SYMBOL_GPL(irq_ts_save); - -void irq_ts_restore(int TS_state) -{ - if (TS_state) - stts(); -} -EXPORT_SYMBOL_GPL(irq_ts_restore); - -/* * Save the FPU state (mark it for reload if necessary): * * This only ever gets called for the current task. @@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_before_save(fpu); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { - if (use_eager_fpu()) - copy_kernel_to_fpregs(&fpu->state); - else - fpregs_deactivate(fpu); + copy_kernel_to_fpregs(&fpu->state); } } trace_x86_fpu_after_save(fpu); @@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->counter = 0; dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; @@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Don't let 'init optimized' areas of the XSAVE area * leak into the child task: */ - if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * Save current FPU registers directly into the child @@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); - if (use_eager_fpu()) - copy_kernel_to_fpregs(&src_fpu->state); - else - fpregs_deactivate(src_fpu); + copy_kernel_to_fpregs(&src_fpu->state); } preempt_enable(); @@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) if (fpu->fpstate_active) { /* Invalidate any lazy state: */ - fpu->last_cpu = -1; + __fpu_invalidate_fpregs_state(fpu); } else { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); @@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void) * ensures we will not be lazy and skip a XRSTOR in the * future. */ - fpu->last_cpu = -1; + __fpu_invalidate_fpregs_state(fpu); } /* @@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu) trace_x86_fpu_before_restore(fpu); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); - fpu->counter++; trace_x86_fpu_after_restore(fpu); kernel_fpu_enable(); } @@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__drop(struct fpu *fpu) { preempt_disable(); - fpu->counter = 0; if (fpu->fpregs_active) { /* Ignore delayed exceptions from user space */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 2f2b8c7ccb85..60dece392b3a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -10,18 +10,6 @@ #include <linux/init.h> /* - * Initialize the TS bit in CR0 according to the style of context-switches - * we are using: - */ -static void fpu__init_cpu_ctx_switch(void) -{ - if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) - stts(); - else - clts(); -} - -/* * Initialize the registers found in all CPUs, CR0 and CR4: */ static void fpu__init_cpu_generic(void) @@ -58,7 +46,6 @@ void fpu__init_cpu(void) { fpu__init_cpu_generic(); fpu__init_cpu_xstate(); - fpu__init_cpu_ctx_switch(); } /* @@ -233,82 +220,16 @@ static void __init fpu__init_system_xstate_size_legacy(void) } /* - * FPU context switching strategies: - * - * Against popular belief, we don't do lazy FPU saves, due to the - * task migration complications it brings on SMP - we only do - * lazy FPU restores. - * - * 'lazy' is the traditional strategy, which is based on setting - * CR0::TS to 1 during context-switch (instead of doing a full - * restore of the FPU state), which causes the first FPU instruction - * after the context switch (whenever it is executed) to fault - at - * which point we lazily restore the FPU state into FPU registers. - * - * Tasks are of course under no obligation to execute FPU instructions, - * so it can easily happen that another context-switch occurs without - * a single FPU instruction being executed. If we eventually switch - * back to the original task (that still owns the FPU) then we have - * not only saved the restores along the way, but we also have the - * FPU ready to be used for the original task. - * - * 'lazy' is deprecated because it's almost never a performance win - * and it's much more complicated than 'eager'. - * - * 'eager' switching is by default on all CPUs, there we switch the FPU - * state during every context switch, regardless of whether the task - * has used FPU instructions in that time slice or not. This is done - * because modern FPU context saving instructions are able to optimize - * state saving and restoration in hardware: they can detect both - * unused and untouched FPU state and optimize accordingly. - * - * [ Note that even in 'lazy' mode we might optimize context switches - * to use 'eager' restores, if we detect that a task is using the FPU - * frequently. See the fpu->counter logic in fpu/internal.h for that. ] - */ -static enum { ENABLE, DISABLE } eagerfpu = ENABLE; - -/* * Find supported xfeatures based on cpu features and command-line input. * This must be called after fpu__init_parse_early_param() is called and * xfeatures_mask is enumerated. */ u64 __init fpu__get_supported_xfeatures_mask(void) { - /* Support all xfeatures known to us */ - if (eagerfpu != DISABLE) - return XCNTXT_MASK; - - /* Warning of xfeatures being disabled for no eagerfpu mode */ - if (xfeatures_mask & XFEATURE_MASK_EAGER) { - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XFEATURE_MASK_EAGER); - } - - /* Return a mask that masks out all features requiring eagerfpu mode */ - return ~XFEATURE_MASK_EAGER; + return XCNTXT_MASK; } -/* - * Disable features dependent on eagerfpu. - */ -static void __init fpu__clear_eager_fpu_features(void) -{ - setup_clear_cpu_cap(X86_FEATURE_MPX); -} - -/* - * Pick the FPU context switching strategy: - * - * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of - * the following is true: - * - * (1) the cpu has xsaveopt, as it has the optimization and doing eager - * FPU switching has a relatively low cost compared to a plain xsave; - * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU - * switching. Should the kernel boot with noxsaveopt, we support MPX - * with eager FPU switching at a higher cost. - */ +/* Legacy code to initialize eager fpu mode. */ static void __init fpu__init_system_ctx_switch(void) { static bool on_boot_cpu __initdata = 1; @@ -317,17 +238,6 @@ static void __init fpu__init_system_ctx_switch(void) on_boot_cpu = 0; WARN_ON_FPU(current->thread.fpu.fpstate_active); - - if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) - eagerfpu = ENABLE; - - if (xfeatures_mask & XFEATURE_MASK_EAGER) - eagerfpu = ENABLE; - - if (eagerfpu == ENABLE) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); - - printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); } /* @@ -336,11 +246,6 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { - eagerfpu = DISABLE; - fpu__clear_eager_fpu_features(); - } - if (cmdline_find_option_bool(boot_command_line, "no387")) setup_clear_cpu_cap(X86_FEATURE_FPU); @@ -375,14 +280,6 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) */ fpu__init_cpu(); - /* - * But don't leave CR0::TS set yet, as some of the FPU setup - * methods depend on being able to execute FPU instructions - * that will fault on a set TS, such as the FXSAVE in - * fpu__init_system_mxcsr(). - */ - clts(); - fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a184c210efba..83c23c230b4c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(fpu); - preempt_enable(); - } + preempt_disable(); + fpu__restore(fpu); + preempt_enable(); return err; } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ce47452879fd..1d7770447b3e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -892,15 +892,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; - /* - * For most XSAVE components, this would be an arduous task: - * brining fpstate up to date with fpregs, updating fpstate, - * then re-populating fpregs. But, for components that are - * never lazily managed, we can just access the fpregs - * directly. PKRU is never managed lazily, so we can just - * manipulate it directly. Make sure it stays that way. - */ - WARN_ON_ONCE(!use_eager_fpu()); /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) |