aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/alternative-asm.h53
-rw-r--r--arch/x86/include/asm/alternative.h73
-rw-r--r--arch/x86/include/asm/apic.h69
-rw-r--r--arch/x86/include/asm/barrier.h76
-rw-r--r--arch/x86/include/asm/cacheflush.h59
-rw-r--r--arch/x86/include/asm/calling.h285
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h39
-rw-r--r--arch/x86/include/asm/debugreg.h5
-rw-r--r--arch/x86/include/asm/desc.h27
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/dma.h2
-rw-r--r--arch/x86/include/asm/dwarf2.h24
-rw-r--r--arch/x86/include/asm/efi.h30
-rw-r--r--arch/x86/include/asm/elf.h8
-rw-r--r--arch/x86/include/asm/fb.h6
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/fpu-internal.h140
-rw-r--r--arch/x86/include/asm/ftrace.h33
-rw-r--r--arch/x86/include/asm/hash.h7
-rw-r--r--arch/x86/include/asm/highmem.h25
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h85
-rw-r--r--arch/x86/include/asm/i387.h6
-rw-r--r--arch/x86/include/asm/imr.h60
-rw-r--r--arch/x86/include/asm/insn.h12
-rw-r--r--arch/x86/include/asm/intel-mid.h3
-rw-r--r--arch/x86/include/asm/io.h10
-rw-r--r--arch/x86/include/asm/io_apic.h40
-rw-r--r--arch/x86/include/asm/iommu_table.h11
-rw-r--r--arch/x86/include/asm/irq_remapping.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h6
-rw-r--r--arch/x86/include/asm/irqflags.h49
-rw-r--r--arch/x86/include/asm/jump_label.h5
-rw-r--r--arch/x86/include/asm/kasan.h31
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h120
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/lguest_hcall.h1
-rw-r--r--arch/x86/include/asm/livepatch.h46
-rw-r--r--arch/x86/include/asm/mce.h22
-rw-r--r--arch/x86/include/asm/microcode.h75
-rw-r--r--arch/x86/include/asm/microcode_amd.h4
-rw-r--r--arch/x86/include/asm/microcode_intel.h15
-rw-r--r--arch/x86/include/asm/mmu.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h76
-rw-r--r--arch/x86/include/asm/mpx.h103
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h23
-rw-r--r--arch/x86/include/asm/paravirt.h27
-rw-r--r--arch/x86/include/asm/pat.h7
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/percpu.h61
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level.h38
-rw-r--r--arch/x86/include/asm/pgtable-3level.h12
-rw-r--r--arch/x86/include/asm/pgtable.h92
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h11
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h137
-rw-r--r--arch/x86/include/asm/platform_sst_audio.h62
-rw-r--r--arch/x86/include/asm/pmc_atom.h22
-rw-r--r--arch/x86/include/asm/preempt.h3
-rw-r--r--arch/x86/include/asm/processor.h185
-rw-r--r--arch/x86/include/asm/ptrace.h45
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/segment.h299
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h68
-rw-r--r--arch/x86/include/asm/special_insns.h30
-rw-r--r--arch/x86/include/asm/spinlock.h106
-rw-r--r--arch/x86/include/asm/string_64.h18
-rw-r--r--arch/x86/include/asm/switch_to.h12
-rw-r--r--arch/x86/include/asm/thread_info.h95
-rw-r--r--arch/x86/include/asm/tlbflush.h77
-rw-r--r--arch/x86/include/asm/traps.h7
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h4
-rw-r--r--arch/x86/include/asm/vgtod.h21
-rw-r--r--arch/x86/include/asm/virtext.h5
-rw-r--r--arch/x86/include/asm/vmx.h7
-rw-r--r--arch/x86/include/asm/vsyscall.h33
-rw-r--r--arch/x86/include/asm/vvar.h2
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/include/asm/xen/cpuid.h91
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h4
-rw-r--r--arch/x86/include/asm/xen/page.h89
-rw-r--r--arch/x86/include/asm/xsave.h29
98 files changed, 2191 insertions, 1376 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 0ab4f9fd2687..3a45668f6dc3 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16);
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
+extern void (*__acpi_unregister_gsi)(u32 gsi);
static inline void disable_acpi(void)
{
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
.endm
#endif
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
+ .byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+ \oldinstr
+141:
+ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr
+144:
+ .popsection
+.endm
+
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+ \oldinstr
+141:
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr1
+144:
+ \newinstr2
+145:
+ .popsection
.endm
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction, <= instrlen */
-};
+ u8 replacementlen; /* length of new instruction */
+ u8 padlen; /* length of build-time padding */
+} __packed;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num) "664"#num
+#define e_replacement(num) "665"#num
-#define b_replacement(number) "663"#number
-#define e_replacement(number) "664"#number
+#define alt_end_marker "663"
+#define alt_slen "662b-661b"
+#define alt_pad_len alt_end_marker"b-662b"
+#define alt_total_slen alt_end_marker"b-661b"
+#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
-#define ALTINSTR_ENTRY(feature, number) \
+#define OLDINSTR(oldinstr, num) \
+ __OLDINSTR(oldinstr, num) \
+ alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
+ alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \
- " .long " b_replacement(number)"f - .\n" /* new instruction */ \
+ " .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
- " .byte " alt_slen "\n" /* source len */ \
- " .byte " alt_rlen(number) "\n" /* replacement len */
-
-#define DISCARD_ENTRY(number) /* rlen <= slen */ \
- " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+ " .byte " alt_total_slen "\n" /* source len */ \
+ " .byte " alt_rlen(num) "\n" /* replacement len */ \
+ " .byte " alt_pad_len "\n" /* pad len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
- b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+ b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \
- OLDINSTR(oldinstr) \
+ OLDINSTR(oldinstr, 1) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \
- ".pushsection .discard,\"aw\",@progbits\n" \
- DISCARD_ENTRY(1) \
- ".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
- OLDINSTR(oldinstr) \
+ OLDINSTR_2(oldinstr, 1, 2) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature1, 1) \
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
- ".pushsection .discard,\"aw\",@progbits\n" \
- DISCARD_ENTRY(1) \
- DISCARD_ENTRY(2) \
- ".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative(oldinstr, newinstr, feature) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+ asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
/*
* Alternative inline assembly with input.
*
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 465b309af254..976b86a325e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+ alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr)));
}
@@ -106,7 +106,14 @@ extern u32 native_safe_apic_wait_icr_idle(void);
extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);
-extern int x2apic_mode;
+static inline bool apic_is_x2apic_enabled(void)
+{
+ u64 msr;
+
+ if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
+ return false;
+ return msr & X2APIC_ENABLE;
+}
#ifdef CONFIG_X86_X2APIC
/*
@@ -169,48 +176,23 @@ static inline u64 native_x2apic_icr_read(void)
return val;
}
+extern int x2apic_mode;
extern int x2apic_phys;
-extern int x2apic_preenabled;
-extern void check_x2apic(void);
-extern void enable_x2apic(void);
+extern void __init check_x2apic(void);
+extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
- u64 msr;
-
- if (!cpu_has_x2apic)
- return 0;
-
- rdmsrl(MSR_IA32_APICBASE, msr);
- if (msr & X2APIC_ENABLE)
- return 1;
- return 0;
+ return cpu_has_x2apic && apic_is_x2apic_enabled();
}
#define x2apic_supported() (cpu_has_x2apic)
-static inline void x2apic_force_phys(void)
-{
- x2apic_phys = 1;
-}
#else
-static inline void disable_x2apic(void)
-{
-}
-static inline void check_x2apic(void)
-{
-}
-static inline void enable_x2apic(void)
-{
-}
-static inline int x2apic_enabled(void)
-{
- return 0;
-}
-static inline void x2apic_force_phys(void)
-{
-}
+static inline void check_x2apic(void) { }
+static inline void x2apic_setup(void) { }
+static inline int x2apic_enabled(void) { return 0; }
-#define x2apic_preenabled 0
-#define x2apic_supported() 0
+#define x2apic_mode (0)
+#define x2apic_supported() (0)
#endif
extern void enable_IR_x2apic(void);
@@ -219,22 +201,29 @@ extern int get_physical_broadcast(void);
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
-extern void connect_bsp_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void setup_local_APIC(void);
-extern void end_local_APIC_setup(void);
-extern void bsp_end_local_APIC_setup(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+ return -1;
+}
+#else
extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
/*
* On 32bit this is mach-xxx local
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0f4460b5636d..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,78 +24,28 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb() rmb()
+#define dma_rmb() rmb()
#else
-# define smp_rmb() barrier()
+#define dma_rmb() barrier()
#endif
+#define dma_wmb() barrier()
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() read_barrier_depends()
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
#define set_mb(var, value) do { var = value; barrier(); } while (0)
#endif /* SMP */
+#define read_barrier_depends() do { } while (0)
+#define smp_read_barrier_depends() do { } while (0)
+
#if defined(CONFIG_X86_PPRO_FENCE)
/*
@@ -145,13 +95,11 @@ do { \
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
* code region.
- *
- * (Could use an alternative three way for this if there was one.)
*/
static __always_inline void rdtsc_barrier(void)
{
- alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
- alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+ alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+ "lfence", X86_FEATURE_LFENCE_RDTSC);
}
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 9863ee3747da..47c8e32f621a 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -5,65 +5,6 @@
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
-#ifdef CONFIG_X86_PAT
-/*
- * X86 PAT uses page flags WC and Uncached together to keep track of
- * memory type of pages that have backing page struct. X86 PAT supports 3
- * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and
- * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
- * been changed from its default (value of -1 used to denote this).
- * Note we do not support _PAGE_CACHE_UC here.
- */
-
-#define _PGMT_DEFAULT 0
-#define _PGMT_WC (1UL << PG_arch_1)
-#define _PGMT_UC_MINUS (1UL << PG_uncached)
-#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
-
-static inline unsigned long get_page_memtype(struct page *pg)
-{
- unsigned long pg_flags = pg->flags & _PGMT_MASK;
-
- if (pg_flags == _PGMT_DEFAULT)
- return -1;
- else if (pg_flags == _PGMT_WC)
- return _PAGE_CACHE_WC;
- else if (pg_flags == _PGMT_UC_MINUS)
- return _PAGE_CACHE_UC_MINUS;
- else
- return _PAGE_CACHE_WB;
-}
-
-static inline void set_page_memtype(struct page *pg, unsigned long memtype)
-{
- unsigned long memtype_flags = _PGMT_DEFAULT;
- unsigned long old_flags;
- unsigned long new_flags;
-
- switch (memtype) {
- case _PAGE_CACHE_WC:
- memtype_flags = _PGMT_WC;
- break;
- case _PAGE_CACHE_UC_MINUS:
- memtype_flags = _PGMT_UC_MINUS;
- break;
- case _PAGE_CACHE_WB:
- memtype_flags = _PGMT_WB;
- break;
- }
-
- do {
- old_flags = pg->flags;
- new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
- } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
-}
-#else
-static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
-static inline void set_page_memtype(struct page *pg, unsigned long memtype) { }
-#endif
-
/*
* The set_memory_* API can be used to change various attributes of a virtual
* address range. The attributes include:
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 76659b67fd11..1c8b50edb2db 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,144 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-
-#define ARGOFFSET R11
-#define SWFRAME ORIG_RAX
-
- .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
- subq $9*8+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET 9*8+\addskip
- movq_cfi rdi, 8*8
- movq_cfi rsi, 7*8
- movq_cfi rdx, 6*8
-
- .if \save_rcx
- movq_cfi rcx, 5*8
- .endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15 0*8
+#define R14 1*8
+#define R13 2*8
+#define R12 3*8
+#define RBP 4*8
+#define RBX 5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11 6*8
+#define R10 7*8
+#define R9 8*8
+#define R8 9*8
+#define RAX 10*8
+#define RCX 11*8
+#define RDX 12*8
+#define RSI 13*8
+#define RDI 14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 15*8
+/* Return frame for iretq */
+#define RIP 16*8
+#define CS 17*8
+#define EFLAGS 18*8
+#define RSP 19*8
+#define SS 20*8
+
+#define SIZEOF_PTREGS 21*8
+
+ .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+ subq $15*8+\addskip, %rsp
+ CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+ .endm
- .if \rax_enosys
- movq $-ENOSYS, 4*8(%rsp)
- .else
- movq_cfi rax, 4*8
+ .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+ .if \r11
+ movq_cfi r11, 6*8+\offset
.endif
-
- .if \save_r891011
- movq_cfi r8, 3*8
- movq_cfi r9, 2*8
- movq_cfi r10, 1*8
- movq_cfi r11, 0*8
+ .if \r8910
+ movq_cfi r10, 7*8+\offset
+ movq_cfi r9, 8*8+\offset
+ movq_cfi r8, 9*8+\offset
+ .endif
+ .if \rax
+ movq_cfi rax, 10*8+\offset
+ .endif
+ .if \rcx
+ movq_cfi rcx, 11*8+\offset
.endif
+ movq_cfi rdx, 12*8+\offset
+ movq_cfi rsi, 13*8+\offset
+ movq_cfi rdi, 14*8+\offset
+ .endm
+ .macro SAVE_C_REGS offset=0
+ SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+ SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_R891011
+ SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RCX_R891011
+ SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+ SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+ .endm
+
+ .macro SAVE_EXTRA_REGS offset=0
+ movq_cfi r15, 0*8+\offset
+ movq_cfi r14, 1*8+\offset
+ movq_cfi r13, 2*8+\offset
+ movq_cfi r12, 3*8+\offset
+ movq_cfi rbp, 4*8+\offset
+ movq_cfi rbx, 5*8+\offset
+ .endm
+ .macro SAVE_EXTRA_REGS_RBP offset=0
+ movq_cfi rbp, 4*8+\offset
+ .endm
+ .macro RESTORE_EXTRA_REGS offset=0
+ movq_cfi_restore 0*8+\offset, r15
+ movq_cfi_restore 1*8+\offset, r14
+ movq_cfi_restore 2*8+\offset, r13
+ movq_cfi_restore 3*8+\offset, r12
+ movq_cfi_restore 4*8+\offset, rbp
+ movq_cfi_restore 5*8+\offset, rbx
.endm
-#define ARG_SKIP (9*8)
+ .macro ZERO_EXTRA_REGS
+ xorl %r15d, %r15d
+ xorl %r14d, %r14d
+ xorl %r13d, %r13d
+ xorl %r12d, %r12d
+ xorl %ebp, %ebp
+ xorl %ebx, %ebx
+ .endm
- .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
- rstor_r8910=1, rstor_rdx=1
+ .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
- movq_cfi_restore 0*8, r11
+ movq_cfi_restore 6*8, r11
.endif
-
.if \rstor_r8910
- movq_cfi_restore 1*8, r10
- movq_cfi_restore 2*8, r9
- movq_cfi_restore 3*8, r8
+ movq_cfi_restore 7*8, r10
+ movq_cfi_restore 8*8, r9
+ movq_cfi_restore 9*8, r8
.endif
-
.if \rstor_rax
- movq_cfi_restore 4*8, rax
+ movq_cfi_restore 10*8, rax
.endif
-
.if \rstor_rcx
- movq_cfi_restore 5*8, rcx
+ movq_cfi_restore 11*8, rcx
.endif
-
.if \rstor_rdx
- movq_cfi_restore 6*8, rdx
- .endif
-
- movq_cfi_restore 7*8, rsi
- movq_cfi_restore 8*8, rdi
-
- .if ARG_SKIP+\addskip > 0
- addq $ARG_SKIP+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
+ movq_cfi_restore 12*8, rdx
.endif
+ movq_cfi_restore 13*8, rsi
+ movq_cfi_restore 14*8, rdi
.endm
-
- .macro LOAD_ARGS offset, skiprax=0
- movq \offset(%rsp), %r11
- movq \offset+8(%rsp), %r10
- movq \offset+16(%rsp), %r9
- movq \offset+24(%rsp), %r8
- movq \offset+40(%rsp), %rcx
- movq \offset+48(%rsp), %rdx
- movq \offset+56(%rsp), %rsi
- movq \offset+64(%rsp), %rdi
- .if \skiprax
- .else
- movq \offset+72(%rsp), %rax
- .endif
+ .macro RESTORE_C_REGS
+ RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
-
-#define REST_SKIP (6*8)
-
- .macro SAVE_REST
- subq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET REST_SKIP
- movq_cfi rbx, 5*8
- movq_cfi rbp, 4*8
- movq_cfi r12, 3*8
- movq_cfi r13, 2*8
- movq_cfi r14, 1*8
- movq_cfi r15, 0*8
+ .macro RESTORE_C_REGS_EXCEPT_RAX
+ RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
-
- .macro RESTORE_REST
- movq_cfi_restore 0*8, r15
- movq_cfi_restore 1*8, r14
- movq_cfi_restore 2*8, r13
- movq_cfi_restore 3*8, r12
- movq_cfi_restore 4*8, rbp
- movq_cfi_restore 5*8, rbx
- addq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
+ .macro RESTORE_C_REGS_EXCEPT_RCX
+ RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
-
- .macro SAVE_ALL
- SAVE_ARGS
- SAVE_REST
+ .macro RESTORE_C_REGS_EXCEPT_R11
+ RESTORE_C_REGS_HELPER 1,1,0,1,1
+ .endm
+ .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+ RESTORE_C_REGS_HELPER 1,0,0,1,1
+ .endm
+ .macro RESTORE_RSI_RDI
+ RESTORE_C_REGS_HELPER 0,0,0,0,0
+ .endm
+ .macro RESTORE_RSI_RDI_RDX
+ RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm
- .macro RESTORE_ALL addskip=0
- RESTORE_REST
- RESTORE_ARGS 1, \addskip
+ .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+ addq $15*8+\addskip, %rsp
+ CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
.endm
.macro icebp
@@ -211,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro SAVE_ALL
- pushl_cfi %eax
- CFI_REL_OFFSET eax, 0
- pushl_cfi %ebp
- CFI_REL_OFFSET ebp, 0
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %edx
- CFI_REL_OFFSET edx, 0
- pushl_cfi %ecx
- CFI_REL_OFFSET ecx, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg eax
+ pushl_cfi_reg ebp
+ pushl_cfi_reg edi
+ pushl_cfi_reg esi
+ pushl_cfi_reg edx
+ pushl_cfi_reg ecx
+ pushl_cfi_reg ebx
.endm
.macro RESTORE_ALL
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %ecx
- CFI_RESTORE ecx
- popl_cfi %edx
- CFI_RESTORE edx
- popl_cfi %esi
- CFI_RESTORE esi
- popl_cfi %edi
- CFI_RESTORE edi
- popl_cfi %ebp
- CFI_RESTORE ebp
- popl_cfi %eax
- CFI_RESTORE eax
+ popl_cfi_reg ebx
+ popl_cfi_reg ecx
+ popl_cfi_reg edx
+ popl_cfi_reg esi
+ popl_cfi_reg edi
+ popl_cfi_reg ebp
+ popl_cfi_reg eax
.endm
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c401f79f..acdee09228b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp;
} else {
/* -128 for the x32 ABI redzone */
- sp = this_cpu_read(old_rsp) - 128;
+ sp = task_pt_regs(current)->sp - 128;
}
return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0bb1335313b2..854c04b3c9c2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -174,6 +174,7 @@
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
/*
@@ -189,6 +190,11 @@
#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */
+#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
+#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
+#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
+#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -225,7 +231,9 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -383,6 +391,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
+#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
#if __GNUC__ >= 4
extern void warn_pre_alternatives(void);
@@ -411,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* 1: do replace */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -425,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
@@ -450,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -476,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{
#ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
- asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ asm_volatile_goto("1: jmp %l[t_dynamic]\n"
"2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
- " .long 3f - .\n" /* repl offset */
+ " .long 4f - .\n" /* repl offset */
" .word %P1\n" /* always replace */
- " .byte 2b - 1b\n" /* src len */
- " .byte 4f - 3f\n" /* repl len */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
- "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
- "4:\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
- " .byte 2b - 1b\n" /* src len */
+ " .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
: : t_dynamic, t_no);
@@ -520,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P2\n" /* always replace */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -534,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 4b - 3b\n" /* src len */
" .byte 6f - 5f\n" /* repl len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 61fd18b83b6c..12cb66f6d3a5 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
+#ifdef CONFIG_CPU_SUP_AMD
+extern void set_dr_addr_mask(unsigned long mask, int dr);
+#else
+static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
+#endif
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 50d033a8947d..a0bf89fd2647 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
}
-#define _LDT_empty(info) \
+/* This intentionally ignores lm, since 32-bit apps don't have that field. */
+#define LDT_empty(info) \
((info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
@@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
(info)->seg_not_present == 1 && \
(info)->useable == 0)
-#ifdef CONFIG_X86_64
-#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-#else
-#define LDT_empty(info) (_LDT_empty(info))
-#endif
+/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
+static inline bool LDT_zero(const struct user_desc *info)
+{
+ return (info->base_addr == 0 &&
+ info->limit == 0 &&
+ info->contents == 0 &&
+ info->read_exec_only == 0 &&
+ info->seg_32bit == 0 &&
+ info->limit_in_pages == 0 &&
+ info->seg_not_present == 0 &&
+ info->useable == 0);
+}
static inline void clear_LDT(void)
{
@@ -368,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected.
*/
-#define set_intr_gate(n, addr) \
+#define set_intr_gate_notrace(n, addr) \
do { \
BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
__KERNEL_CS); \
+ } while (0)
+
+#define set_intr_gate(n, addr) \
+ do { \
+ set_intr_gate_notrace(n, addr); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \
} while (0)
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 97534a7d38e3..f226df064660 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -10,6 +10,12 @@
* cpu_feature_enabled().
*/
+#ifdef CONFIG_X86_INTEL_MPX
+# define DISABLE_MPX 0
+#else
+# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
+#endif
+
#ifdef CONFIG_X86_64
# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
@@ -34,6 +40,6 @@
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
#define DISABLED_MASK8 0
-#define DISABLED_MASK9 0
+#define DISABLED_MASK9 (DISABLE_MPX)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 0bdb0c54d9a1..fe884e18fa6e 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -70,7 +70,7 @@
#define MAX_DMA_CHANNELS 8
/* 16MB ISA DMA zone */
-#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT)
+#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
CFI_ADJUST_CFA_OFFSET 8
.endm
+ .macro pushq_cfi_reg reg
+ pushq %\reg
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET \reg, 0
+ .endm
+
.macro popq_cfi reg
popq \reg
CFI_ADJUST_CFA_OFFSET -8
.endm
+ .macro popq_cfi_reg reg
+ popq %\reg
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE \reg
+ .endm
+
.macro pushfq_cfi
pushfq
CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
CFI_ADJUST_CFA_OFFSET 4
.endm
+ .macro pushl_cfi_reg reg
+ pushl %\reg
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET \reg, 0
+ .endm
+
.macro popl_cfi reg
popl \reg
CFI_ADJUST_CFA_OFFSET -4
.endm
+ .macro popl_cfi_reg reg
+ popl %\reg
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE \reg
+ .endm
+
.macro pushfl_cfi
pushfl
CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 9b11757975d0..3738b138b843 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -2,6 +2,8 @@
#define _ASM_X86_EFI_H
#include <asm/i387.h>
+#include <asm/pgtable.h>
+
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
extern struct efi_scratch efi_scratch;
extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -158,6 +160,30 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
}
#endif /* CONFIG_EFI_MIXED */
+
+/* arch specific definitions used by the stub code */
+
+struct efi_config {
+ u64 image_handle;
+ u64 table;
+ u64 allocate_pool;
+ u64 allocate_pages;
+ u64 get_memory_map;
+ u64 free_pool;
+ u64 free_pages;
+ u64 locate_handle;
+ u64 handle_protocol;
+ u64 exit_boot_services;
+ u64 text_output;
+ efi_status_t (*call)(unsigned long, ...);
+ bool is64;
+} __packed;
+
+__pure const struct efi_config *__efi_early(void);
+
+#define efi_call_early(f, ...) \
+ __efi_early()->call(__efi_early()->f, __VA_ARGS__);
+
extern bool efi_reboot_required(void);
#else
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..935588d95c82 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@ do { \
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
- regs->ax = regs->bx = regs->cx = regs->dx = 0;
- regs->si = regs->di = regs->bp = 0;
+ /* Commented-out registers are cleared in stub_execve */
+ /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+ regs->si = regs->di /*= regs->bp*/ = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
- regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+ /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
t->fs = t->gs = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;
@@ -365,6 +366,7 @@ enum align_flags {
struct va_alignment {
int flags;
unsigned long mask;
+ unsigned long bits;
} ____cacheline_aligned;
extern struct va_alignment va_align;
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
index 2519d0679d99..c3dd5e71f439 100644
--- a/arch/x86/include/asm/fb.h
+++ b/arch/x86/include/asm/fb.h
@@ -8,8 +8,12 @@
static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
unsigned long off)
{
+ unsigned long prot;
+
+ prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK;
if (boot_cpu_data.x86 > 3)
- pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
+ pgprot_val(vma->vm_page_prot) =
+ prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
}
extern int fb_is_primary_device(struct fb_info *info);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index ffb1733ac91f..f80d70009ff8 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -69,7 +69,9 @@ enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_HOLE,
#else
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
+#endif
#ifdef CONFIG_PARAVIRT_CLOCK
PVCLOCK_FIXMAP_BEGIN,
PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
@@ -136,9 +138,7 @@ enum fixed_addresses {
extern void reserve_top_address(unsigned long reserve);
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE)
extern int fixmaps_set;
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e97622f57722..da5e96756570 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+ per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+ tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+ return new == this_cpu_read_stable(fpu_owner_task) &&
+ cpu == new->thread.fpu.last_cpu;
+}
+
static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
static inline void fx_finit(struct i387_fxsave_struct *fx)
{
- memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
@@ -207,7 +234,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
if (config_enabled(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
else if (config_enabled(CONFIG_AS_FXSAVEQ))
- asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
else {
/* Using "rex64; fxsave %0" is broken because, if the memory
* operand uses any extended registers for addressing, a second
@@ -290,9 +317,11 @@ static inline int fpu_restore_checking(struct fpu *fpu)
static inline int restore_fpu_checking(struct task_struct *tsk)
{
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. "m" is a random variable that should be in L1 */
+ /*
+ * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
+ * pending. Clear the x87 state here by setting it to fixed values.
+ * "m" is a random variable that should be in L1.
+ */
if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
@@ -349,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
__thread_set_has_fpu(tsk);
}
-static inline void __drop_fpu(struct task_struct *tsk)
+static inline void drop_fpu(struct task_struct *tsk)
{
+ /*
+ * Forget coprocessor state..
+ */
+ preempt_disable();
+ tsk->thread.fpu_counter = 0;
+
if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
@@ -358,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk)
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
+
+ clear_stopped_child_used_math(tsk);
+ preempt_enable();
}
-static inline void drop_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
{
- /*
- * Forget coprocessor state..
- */
- preempt_disable();
- tsk->thread.fpu_counter = 0;
- __drop_fpu(tsk);
- clear_used_math();
- preempt_enable();
+ if (use_xsave())
+ xrstor_state(init_xstate_buf, -1);
+ else
+ fxrstor_checking(&init_xstate_buf->i387);
}
-static inline void drop_init_fpu(struct task_struct *tsk)
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
{
if (!use_eager_fpu())
drop_fpu(tsk);
- else {
- if (use_xsave())
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
- }
+ else
+ restore_init_xstate();
}
/*
@@ -398,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
*/
typedef struct { int preload; } fpu_switch_t;
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
- per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == this_cpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
@@ -424,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
- new->thread.fpu_counter > 5);
+ fpu.preload = tsk_used_math(new) &&
+ (use_eager_fpu() || new->thread.fpu_counter > 5);
+
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
- cpu = ~0;
- old->thread.fpu.last_cpu = cpu;
- old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
+ task_disable_lazy_fpu_restore(old);
+ else
+ old->thread.fpu.last_cpu = cpu;
+
+ /* But leave fpu_owner_task! */
+ old->thread.fpu.has_fpu = 0;
/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
@@ -441,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
stts();
} else {
old->thread.fpu_counter = 0;
- old->thread.fpu.last_cpu = ~0;
+ task_disable_lazy_fpu_restore(old);
if (fpu.preload) {
new->thread.fpu_counter++;
- if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+ if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@@ -464,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
- drop_init_fpu(new);
+ fpu_reset_state(new);
}
}
@@ -493,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
}
/*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
*
* NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
*/
static inline void user_fpu_begin(void)
{
@@ -518,24 +540,6 @@ static inline void __save_fpu(struct task_struct *tsk)
}
/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
- if (use_eager_fpu()) {
- __save_fpu(tsk);
- return;
- }
-
- preempt_disable();
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- preempt_enable();
-}
-
-/*
* i387 state interaction
*/
static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index e1f7fecaa7d6..f45acad3c4b6 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -1,39 +1,6 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
-#ifdef __ASSEMBLY__
-
- /* skip is set if the stack was already partially adjusted */
- .macro MCOUNT_SAVE_FRAME skip=0
- /*
- * We add enough stack to save all regs.
- */
- subq $(SS+8-\skip), %rsp
- movq %rax, RAX(%rsp)
- movq %rcx, RCX(%rsp)
- movq %rdx, RDX(%rsp)
- movq %rsi, RSI(%rsp)
- movq %rdi, RDI(%rsp)
- movq %r8, R8(%rsp)
- movq %r9, R9(%rsp)
- /* Move RIP to its proper location */
- movq SS+8(%rsp), %rdx
- movq %rdx, RIP(%rsp)
- .endm
-
- .macro MCOUNT_RESTORE_FRAME skip=0
- movq R9(%rsp), %r9
- movq R8(%rsp), %r8
- movq RDI(%rsp), %rdi
- movq RSI(%rsp), %rsi
- movq RDX(%rsp), %rdx
- movq RCX(%rsp), %rcx
- movq RAX(%rsp), %rax
- addq $(SS+8-\skip), %rsp
- .endm
-
-#endif
-
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CC_USING_FENTRY
# define MCOUNT_ADDR ((long)(__fentry__))
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
deleted file mode 100644
index e8c58f88b1d4..000000000000
--- a/arch/x86/include/asm/hash.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_X86_HASH_H
-#define _ASM_X86_HASH_H
-
-struct fast_hash_ops;
-extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
-
-#endif /* _ASM_X86_HASH_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 302a323b3f67..04e9d023168f 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -38,17 +38,20 @@ extern unsigned long highstart_pfn, highend_pfn;
/*
* Ordering is:
*
- * FIXADDR_TOP
- * fixed_addresses
- * FIXADDR_START
- * temp fixed addresses
- * FIXADDR_BOOT_START
- * Persistent kmap area
- * PKMAP_BASE
- * VMALLOC_END
- * Vmalloc area
- * VMALLOC_START
- * high_memory
+ * high memory on: high_memory off:
+ * FIXADDR_TOP FIXADDR_TOP
+ * fixed addresses fixed addresses
+ * FIXADDR_START FIXADDR_START
+ * temp fixed addresses/persistent kmap area VMALLOC_END
+ * PKMAP_BASE temp fixed addresses/vmalloc area
+ * VMALLOC_END VMALLOC_START
+ * vmalloc area high_memory
+ * VMALLOC_START
+ * high_memory
+ *
+ * The temp fixed area is only used during boot for early_ioremap(), and
+ * it is unused when the ioremap() is functional. vmalloc/pkmap area become
+ * available after early boot so the temp fixed area is available for re-use.
*/
#define LAST_PKMAP_MASK (LAST_PKMAP-1)
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index ef1c4d2d41ec..6c98be864a75 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -12,6 +12,7 @@
*/
struct arch_hw_breakpoint {
unsigned long address;
+ unsigned long mask;
u8 len;
u8 type;
};
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906d83df..e9571ddabc4f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void);
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
#endif /* CONFIG_TRACING */
-/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
-extern unsigned long io_apic_irqs;
-
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-
-struct io_apic_irq_attr {
- int ioapic;
- int ioapic_pin;
- int trigger;
- int polarity;
-};
-
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
- int ioapic, int ioapic_pin,
- int trigger, int polarity)
-{
- irq_attr->ioapic = ioapic;
- irq_attr->ioapic_pin = ioapic_pin;
- irq_attr->trigger = trigger;
- irq_attr->polarity = polarity;
-}
-
+#ifdef CONFIG_IRQ_REMAP
/* Intel specific interrupt remapping information */
struct irq_2_iommu {
struct intel_iommu *iommu;
@@ -131,14 +108,12 @@ struct irq_2_irte {
u16 devid; /* Device ID for IRTE table */
u16 index; /* Index into IRTE table*/
};
+#endif /* CONFIG_IRQ_REMAP */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+struct irq_data;
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
u8 vector;
@@ -150,18 +125,39 @@ struct irq_cfg {
struct irq_2_irte irq_2_irte;
};
#endif
+ union {
+#ifdef CONFIG_X86_IO_APIC
+ struct {
+ struct list_head irq_2_pin;
+ };
+#endif
+ };
};
+extern struct irq_cfg *irq_cfg(unsigned int irq);
+extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
+extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
+extern void setup_vector_irq(int cpu);
+#ifdef CONFIG_SMP
extern void send_cleanup_vector(struct irq_cfg *);
+extern void irq_complete_move(struct irq_cfg *cfg);
+#else
+static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void irq_complete_move(struct irq_cfg *c) { }
+#endif
-struct irq_data;
-int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
- unsigned int *dest_id);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
-extern void setup_ioapic_dest(void);
-
-extern void enable_IO_APIC(void);
+extern int apic_retrigger_irq(struct irq_data *data);
+extern void apic_ack_edge(struct irq_data *data);
+extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ unsigned int *dest_id);
+#else /* CONFIG_X86_LOCAL_APIC */
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+#endif /* CONFIG_X86_LOCAL_APIC */
/* Statistics */
extern atomic_t irq_err_count;
@@ -185,9 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
-extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
#ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
#endif
#define VECTOR_UNDEFINED (-1)
@@ -195,17 +191,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void setup_vector_irq(int cpu);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
-extern void __setup_vector_irq(int cpu);
-#else
-static inline void lock_vector_lock(void) {}
-static inline void unlock_vector_lock(void) {}
-static inline void __setup_vector_irq(int cpu) {}
-#endif
#endif /* !ASSEMBLY_ */
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ed8089d69094..6eb6fcb83f63 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -40,8 +40,8 @@ extern void __kernel_fpu_end(void);
static inline void kernel_fpu_begin(void)
{
- WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
+ WARN_ON_ONCE(!irq_fpu_usable());
__kernel_fpu_begin();
}
@@ -51,6 +51,10 @@ static inline void kernel_fpu_end(void)
preempt_enable();
}
+/* Must be called with preempt disabled */
+extern void kernel_fpu_disable(void);
+extern void kernel_fpu_enable(void);
+
/*
* Some instructions like VIA's padlock instructions generate a spurious
* DNA fault but don't modify SSE registers. And these instructions
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
+/*
+ * imr.h: Isolated Memory Region API
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _IMR_H
+#define _IMR_H
+
+#include <linux/types.h>
+
+/*
+ * IMR agent access mask bits
+ * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
+ * definitions.
+ */
+#define IMR_ESRAM_FLUSH BIT(31)
+#define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */
+#define IMR_RMU BIT(29)
+#define IMR_VC1_SAI_ID3 BIT(15)
+#define IMR_VC1_SAI_ID2 BIT(14)
+#define IMR_VC1_SAI_ID1 BIT(13)
+#define IMR_VC1_SAI_ID0 BIT(12)
+#define IMR_VC0_SAI_ID3 BIT(11)
+#define IMR_VC0_SAI_ID2 BIT(10)
+#define IMR_VC0_SAI_ID1 BIT(9)
+#define IMR_VC0_SAI_ID0 BIT(8)
+#define IMR_CPU_0 BIT(1) /* SMM mode */
+#define IMR_CPU BIT(0) /* Non SMM mode */
+#define IMR_ACCESS_NONE 0
+
+/*
+ * Read/Write access-all bits here include some reserved bits
+ * These are the values firmware uses and are accepted by hardware.
+ * The kernel defines read/write access-all in the same way as firmware
+ * in order to have a consistent and crisp definition across firmware,
+ * bootloader and kernel.
+ */
+#define IMR_READ_ACCESS_ALL 0xBFFFFFFF
+#define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF
+
+/* Number of IMRs provided by Quark X1000 SoC */
+#define QUARK_X1000_IMR_MAX 0x08
+#define QUARK_X1000_IMR_REGBASE 0x40
+
+/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
+#define IMR_ALIGN 0x400
+#define IMR_MASK (IMR_ALIGN - 1)
+
+int imr_add_range(phys_addr_t base, size_t size,
+ unsigned int rmask, unsigned int wmask, bool lock);
+
+int imr_remove_range(phys_addr_t base, size_t size);
+
+#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 48eb30a86062..e7814b74caf8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -65,10 +65,11 @@ struct insn {
unsigned char x86_64;
const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
const insn_byte_t *next_byte;
};
-#define MAX_INSN_SIZE 16
+#define MAX_INSN_SIZE 15
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
@@ -96,7 +97,7 @@ struct insn {
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
-extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
extern void insn_get_modrm(struct insn *insn);
@@ -115,12 +116,13 @@ static inline void insn_get_attribute(struct insn *insn)
extern int insn_rip_relative(struct insn *insn);
/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
{
#ifdef CONFIG_X86_64
- insn_init(insn, kaddr, 1);
+ insn_init(insn, kaddr, buf_len, 1);
#else /* CONFIG_X86_32 */
- insn_init(insn, kaddr, 0);
+ insn_init(insn, kaddr, buf_len, 0);
#endif
}
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index e34e097b6f9d..705d35708a50 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
#define SFI_MTMR_MAX_NUM 8
#define SFI_MRTC_MAX 8
-extern struct console early_mrst_console;
-extern void mrst_early_console_init(void);
-
extern struct console early_hsu_console;
extern void hsu_early_console_init(const char *);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index b8237d8a1e0c..34a5b93704d3 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -74,6 +74,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define __raw_readw __readw
#define __raw_readl __readl
+#define writeb_relaxed(v, a) __writeb(v, a)
+#define writew_relaxed(v, a) __writew(v, a)
+#define writel_relaxed(v, a) __writel(v, a)
#define __raw_writeb __writeb
#define __raw_writew __writew
#define __raw_writel __writel
@@ -86,6 +89,7 @@ build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
#define readq_relaxed(a) readq(a)
+#define writeq_relaxed(v, a) writeq(v, a)
#define __raw_readq(a) readq(a)
#define __raw_writeq(val, addr) writeq(val, addr)
@@ -310,11 +314,11 @@ BUILDIO(b, b, char)
BUILDIO(w, w, short)
BUILDIO(l, , int)
-extern void *xlate_dev_mem_ptr(unsigned long phys);
-extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
+extern void *xlate_dev_mem_ptr(phys_addr_t phys);
+extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
- unsigned long prot_val);
+ enum page_cache_mode pcm);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
extern bool is_early_ioremap_ptep(pte_t *ptep);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 1733ab49ac5e..2f91685fe1cd 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -132,6 +132,10 @@ extern int noioapicquirk;
/* -1 if "noapic" boot option passed */
extern int noioapicreroute;
+extern unsigned long io_apic_irqs;
+
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
+
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
@@ -139,18 +143,15 @@ extern int noioapicreroute;
#define io_apic_assign_pci_irqs \
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-struct io_apic_irq_attr;
struct irq_cfg;
extern void ioapic_insert_resources(void);
+extern int arch_early_ioapic_init(void);
extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
unsigned int, int,
struct io_apic_irq_attr *);
extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
-extern void native_compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id);
extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
extern int save_ioapic_entries(void);
@@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
+struct io_apic_irq_attr {
+ int ioapic;
+ int ioapic_pin;
+ int trigger;
+ int polarity;
+};
+
enum ioapic_domain_type {
IOAPIC_DOMAIN_INVALID,
IOAPIC_DOMAIN_LEGACY,
@@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
extern u32 mp_pin_to_gsi(int ioapic, int pin);
extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
extern void mp_unmap_irq(int irq);
-extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
- struct ioapic_domain_cfg *cfg);
+extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+ struct ioapic_domain_cfg *cfg);
+extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq);
extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
@@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
extern void io_apic_eoi(unsigned int apic, unsigned int vector);
-extern bool mp_should_keep_irq(struct device *dev);
-
+extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void setup_ioapic_dest(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
+extern void print_IO_APICs(void);
#else /* !CONFIG_X86_IO_APIC */
+#define IO_APIC_IRQ(x) 0
#define io_apic_assign_pci_irqs 0
#define setup_ioapic_ids_from_mpc x86_init_noop
static inline void ioapic_insert_resources(void) { }
+static inline int arch_early_ioapic_init(void) { return 0; }
+static inline void print_IO_APICs(void) {}
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
static inline void mp_unmap_irq(int irq) { }
-static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
static inline int save_ioapic_entries(void)
{
@@ -262,8 +278,12 @@ static inline void disable_ioapic_support(void) { }
#define native_io_apic_print_entries NULL
#define native_ioapic_set_affinity NULL
#define native_setup_ioapic_entry NULL
-#define native_compose_msi_msg NULL
#define native_eoi_ioapic_pin NULL
+
+static inline void setup_IO_APIC(void) { }
+static inline void enable_IO_APIC(void) { }
+static inline void setup_ioapic_dest(void) { }
+
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f42a04735a0a..e37d6b3ad983 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -79,11 +79,12 @@ struct iommu_table_entry {
* d). Similar to the 'init', except that this gets called from pci_iommu_init
* where we do have a memory allocator.
*
- * The standard vs the _FINISH differs in that the _FINISH variant will
- * continue detecting other IOMMUs in the call list after the
- * the detection routine returns a positive number. The _FINISH will
- * stop the execution chain. Both will still call the 'init' and
- * 'late_init' functions if they are set.
+ * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
+ * in that the former will continue detecting other IOMMUs in the call
+ * list after the detection routine returns a positive number, while the
+ * latter will stop the execution chain upon first successful detection.
+ * Both variants will still call the 'init' and 'late_init' functions if
+ * they are set.
*/
#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
__IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index b7747c4c2cf2..6224d316c405 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,8 +33,6 @@ struct irq_cfg;
#ifdef CONFIG_IRQ_REMAP
-extern void setup_irq_remapping_ops(void);
-extern int irq_remapping_supported(void);
extern void set_irq_remapping_broken(void);
extern int irq_remapping_prepare(void);
extern int irq_remapping_enable(void);
@@ -60,8 +58,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip);
#else /* CONFIG_IRQ_REMAP */
-static inline void setup_irq_remapping_ops(void) { }
-static inline int irq_remapping_supported(void) { return 0; }
static inline void set_irq_remapping_broken(void) { }
static inline int irq_remapping_prepare(void) { return -ENODEV; }
static inline int irq_remapping_enable(void) { return -ENODEV; }
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e3111d..666c89ec4bd7 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -126,6 +126,12 @@
#define NR_VECTORS 256
+#ifdef CONFIG_X86_LOCAL_APIC
+#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
+#else
+#define FIRST_SYSTEM_VECTOR NR_VECTORS
+#endif
+
#define FPU_IRQ 13
#define FIRST_VM86_IRQ 3
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..b77f5edb03b0 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
#define USERGS_SYSRET32 \
swapgs; \
sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32 \
- swapgs; \
- sti; \
- sysexit
#else
#define INTERRUPT_RETURN iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(flags);
}
+#endif /* !__ASSEMBLY__ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
+# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
#else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_X86_64
+# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
+# define LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
- SAVE_REST; \
- LOCKDEP_SYS_EXIT; \
- RESTORE_REST; \
+ call lockdep_sys_exit_thunk; \
cli; \
TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT \
+# else
+# define LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
popl %edx; \
popl %ecx; \
popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
-# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
+# define LOCKDEP_SYS_EXIT_IRQ
+# endif
#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
-# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
# define LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb4395a..a4c1cf7e93f8 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
@@ -44,4 +42,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
new file mode 100644
index 000000000000..8b22422fbad8
--- /dev/null
+++ b/arch/x86/include/asm/kasan.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_KASAN_H
+#define _ASM_X86_KASAN_H
+
+/*
+ * Compiler uses shadow offset assuming that addresses start
+ * from 0. Kernel addresses don't start from 0, so shadow
+ * for kernel really starts from compiler's shadow offset +
+ * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
+ */
+#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
+ (0xffff800000000000ULL >> 3))
+/* 47 bits for kernel address -> (47 - 3) bits for shadow */
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3)))
+
+#ifndef __ASSEMBLY__
+
+extern pte_t kasan_zero_pte[];
+extern pte_t kasan_zero_pmd[];
+extern pte_t kasan_zero_pud[];
+
+#ifdef CONFIG_KASAN
+void __init kasan_map_early_shadow(pgd_t *pgd);
+void __init kasan_init(void);
+#else
+static inline void kasan_map_early_shadow(pgd_t *pgd) { }
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index eb181178fe0b..57a9d94fe160 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -208,6 +208,7 @@ struct x86_emulate_ops {
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+ void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30d6a0c..dea2e7e962e3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,13 +33,11 @@
#define KVM_MAX_VCPUS 255
#define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
/* memory slots that are not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
-#define KVM_MMIO_SIZE 16
-
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
@@ -51,6 +49,7 @@
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD BIT_64(63)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -82,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
#define KVM_PERMILLE_MMU_PAGES 20
#define KVM_MIN_ALLOC_MMU_PAGES 64
#define KVM_MMU_HASH_SHIFT 10
@@ -159,6 +153,18 @@ enum {
#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+
+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
+
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
/*
@@ -334,6 +340,7 @@ struct kvm_pmu {
enum {
KVM_DEBUGREG_BP_ENABLED = 1,
KVM_DEBUGREG_WONT_EXIT = 2,
+ KVM_DEBUGREG_RELOAD = 4,
};
struct kvm_vcpu_arch {
@@ -361,6 +368,7 @@ struct kvm_vcpu_arch {
int mp_state;
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
+ u64 ia32_xss;
/*
* Paging state of the vcpu
@@ -419,6 +427,9 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+ int maxphyaddr;
+
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
@@ -538,11 +549,20 @@ struct kvm_arch_memory_slot {
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID. It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER 4
+#define KVM_APIC_MODE_XAPIC_FLAT 8
+#define KVM_APIC_MODE_X2APIC 16
+
struct kvm_apic_map {
struct rcu_head rcu;
- u8 ldr_bits;
- /* fields bellow are used to decode ldr values in different modes */
- u32 cid_shift, cid_mask, lid_mask;
+ u8 mode;
struct kvm_lapic *phys_map[256];
/* first index is cluster id second is cpu id in a cluster */
struct kvm_lapic *logical_map[16][16];
@@ -602,6 +622,9 @@ struct kvm_arch {
struct kvm_xen_hvm_config xen_hvm_config;
+ /* reads protected by irq_srcu, writes by irq_lock */
+ struct hlist_head mask_notifier_list;
+
/* fields used by HYPER-V emulation */
u64 hv_guest_os_id;
u64 hv_hypercall;
@@ -610,6 +633,8 @@ struct kvm_arch {
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
+
+ bool boot_vcpu_runs_old_kvmclock;
};
struct kvm_vm_stat {
@@ -638,6 +663,7 @@ struct kvm_vcpu_stat {
u32 irq_window_exits;
u32 nmi_window_exits;
u32 halt_exits;
+ u32 halt_successful_poll;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
@@ -659,6 +685,16 @@ struct msr_data {
u64 data;
};
+struct kvm_lapic_irq {
+ u32 vector;
+ u32 delivery_mode;
+ u32 dest_mode;
+ u32 level;
+ u32 trig_mode;
+ u32 shorthand;
+ u32 dest_id;
+};
+
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@@ -767,10 +803,36 @@ struct kvm_x86_ops {
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
+ bool (*xsaves_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+
+ /*
+ * Arch-specific dirty logging hooks. These hooks are only supposed to
+ * be valid if the specific arch has hardware-accelerated dirty logging
+ * mechanism. Currently only for PML on VMX.
+ *
+ * - slot_enable_log_dirty:
+ * called when enabling log dirty mode for the slot.
+ * - slot_disable_log_dirty:
+ * called when disabling log dirty mode for the slot.
+ * also called when slot is created with log dirty disabled.
+ * - flush_log_dirty:
+ * called before reporting dirty_bitmap to userspace.
+ * - enable_log_dirty_pt_masked:
+ * called when reenabling log dirty for the GFNs in the mask after
+ * corresponding bits are cleared in slot->dirty_bitmap.
+ */
+ void (*slot_enable_log_dirty)(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+ void (*slot_disable_log_dirty)(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+ void (*flush_log_dirty)(struct kvm *kvm);
+ void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t offset, unsigned long mask);
};
struct kvm_arch_async_pf {
@@ -803,10 +865,19 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
-void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask);
+void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+void kvm_mmu_slot_set_dirty(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
+void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
@@ -818,6 +889,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
+struct kvm_irq_mask_notifier {
+ void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+ int irq;
+ struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+ bool mask);
+
extern bool tdp_enabled;
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -859,11 +943,12 @@ struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
@@ -895,6 +980,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t gfn, void *data, int offset, int len,
u32 access);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
static inline int __kvm_irq_line_state(unsigned long *irq_state,
int irq_source_id, int level)
@@ -1053,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
@@ -1066,6 +1151,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e62cf897f781..c1adf33fdd0d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
static inline bool kvm_para_available(void)
{
- return 0;
+ return false;
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 879fd7d33877..ef01fef3eebc 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -16,7 +16,6 @@
#define LHCALL_SET_PTE 14
#define LHCALL_SET_PGD 15
#define LHCALL_LOAD_TLS 16
-#define LHCALL_NOTIFY 17
#define LHCALL_LOAD_GDT_ENTRY 18
#define LHCALL_SEND_INTERRUPTS 19
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
new file mode 100644
index 000000000000..a455a53d789a
--- /dev/null
+++ b/arch/x86/include/asm/livepatch.h
@@ -0,0 +1,46 @@
+/*
+ * livepatch.h - x86-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+ * Copyright (C) 2014 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ASM_X86_LIVEPATCH_H
+#define _ASM_X86_LIVEPATCH_H
+
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+#ifndef CC_USING_FENTRY
+ return 1;
+#endif
+ return 0;
+}
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+ unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+ regs->ip = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif /* _ASM_X86_LIVEPATCH_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f761e5..1f5a86d518db 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
+#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
+
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
* bits 15:0. But bit 12 is the 'F' bit, defined for corrected
@@ -78,7 +82,6 @@
/* Software defined banks */
#define MCE_EXTENDED_BANK 128
#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
-#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
#define MCE_LOG_LEN 32
#define MCE_LOG_SIGNATURE "MACHINECHECK"
@@ -113,6 +116,12 @@ struct mca_config {
u32 rip_msr;
};
+struct mce_vendor_flags {
+ __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
+ __reserved_0 : 63;
+};
+extern struct mce_vendor_flags mce_flags;
+
extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -125,9 +134,11 @@ extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_vendor_init_severity(void);
#else
static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_vendor_init_severity(void) {}
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@@ -180,14 +191,13 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
- MCP_TIMESTAMP = (1 << 0), /* log time stamp */
- MCP_UC = (1 << 1), /* log uncorrected errors */
- MCP_DONTLOG = (1 << 2), /* only clear, don't log */
+ MCP_TIMESTAMP = BIT(0), /* log time stamp */
+ MCP_UC = BIT(1), /* log uncorrected errors */
+ MCP_DONTLOG = BIT(2), /* only clear, don't log */
};
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void);
-void mce_notify_process(void);
DECLARE_PER_CPU(struct mce, injectm);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 64dc362506b7..2fb20d6f7e23 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -75,9 +75,83 @@ static inline void __exit exit_amd_microcode(void) {}
#ifdef CONFIG_MICROCODE_EARLY
#define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx) \
+ (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_vendor(void)
+{
+ u32 eax = 0x00000000;
+ u32 ebx, ecx = 0, edx;
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+ return X86_VENDOR_INTEL;
+
+ if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+ return X86_VENDOR_AMD;
+
+ return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int __x86_family(unsigned int sig)
+{
+ unsigned int x86;
+
+ x86 = (sig >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (sig >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_family(void)
+{
+ u32 eax = 0x00000001;
+ u32 ebx, ecx = 0, edx;
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ return __x86_family(eax);
+}
+
+static inline unsigned int x86_model(unsigned int sig)
+{
+ unsigned int x86, model;
+
+ x86 = __x86_family(sig);
+
+ model = (sig >> 4) & 0xf;
+
+ if (x86 == 0x6 || x86 == 0xf)
+ model += ((sig >> 16) & 0xf) << 4;
+
+ return model;
+}
+
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
extern int __init save_microcode_in_initrd(void);
+void reload_early_microcode(void);
#else
static inline void __init load_ucode_bsp(void) {}
static inline void load_ucode_ap(void) {}
@@ -85,6 +159,7 @@ static inline int __init save_microcode_in_initrd(void)
{
return 0;
}
+static inline void reload_early_microcode(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index b7b10b82d3e5..af935397e053 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -59,7 +59,7 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
extern int apply_microcode_amd(int cpu);
-extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
#define PATCH_MAX_SIZE PAGE_SIZE
extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -68,10 +68,12 @@ extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
extern void __init load_ucode_amd_bsp(void);
extern void load_ucode_amd_ap(void);
extern int __init save_microcode_in_initrd_amd(void);
+void reload_ucode_amd(void);
#else
static inline void __init load_ucode_amd_bsp(void) {}
static inline void load_ucode_amd_ap(void) {}
static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
+void reload_ucode_amd(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index bbe296e0bce1..2b9209c46ca9 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -56,23 +56,28 @@ struct extended_sigtable {
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-extern int
-get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
+extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc);
extern int microcode_sanity_check(void *mc, int print_err);
-extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
-extern int
-update_match_revision(struct microcode_header_intel *mc_header, int rev);
+extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc);
+
+static inline int
+revision_is_newer(struct microcode_header_intel *mc_header, int rev)
+{
+ return (mc_header->rev <= rev) ? 0 : 1;
+}
#ifdef CONFIG_MICROCODE_INTEL_EARLY
extern void __init load_ucode_intel_bsp(void);
extern void load_ucode_intel_ap(void);
extern void show_ucode_info_early(void);
extern int __init save_microcode_in_initrd_intel(void);
+void reload_ucode_intel(void);
#else
static inline __init void load_ucode_intel_bsp(void) {}
static inline void load_ucode_intel_ap(void) {}
static inline void show_ucode_info_early(void) {}
static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
+static inline void reload_ucode_intel(void) {}
#endif
#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 876e74e8eec7..09b9620a73b4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
struct mutex lock;
void __user *vdso;
+
+ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 166af2a8e865..883f6b933fa4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -10,15 +10,29 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
+#include <asm/mpx.h>
#ifndef CONFIG_PARAVIRT
-#include <asm-generic/mm_hooks.h>
-
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
}
#endif /* !CONFIG_PARAVIRT */
+#ifdef CONFIG_PERF_EVENTS
+extern struct static_key rdpmc_always_available;
+
+static inline void load_mm_cr4(struct mm_struct *mm)
+{
+ if (static_key_true(&rdpmc_always_available) ||
+ atomic_read(&mm->context.perf_rdpmc_allowed))
+ cr4_set_bits(X86_CR4_PCE);
+ else
+ cr4_clear_bits(X86_CR4_PCE);
+}
+#else
+static inline void load_mm_cr4(struct mm_struct *mm) {}
+#endif
+
/*
* Used for LDT copy/destruction.
*/
@@ -53,7 +67,21 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
- /* Load the LDT, if the LDT is different: */
+ /* Load per-mm CR4 state */
+ load_mm_cr4(next);
+
+ /*
+ * Load the LDT, if the LDT is different.
+ *
+ * It's possible that prev->context.ldt doesn't match
+ * the LDT register. This can happen if leave_mm(prev)
+ * was called and then modify_ldt changed
+ * prev->context.ldt but suppressed an IPI to this CPU.
+ * In this case, prev->context.ldt != NULL, because we
+ * never free an LDT while the mm still exists. That
+ * means that next->context.ldt != prev->context.ldt,
+ * because mms never share an LDT.
+ */
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
@@ -77,6 +105,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
load_LDT_nolock(&next->context);
}
}
@@ -102,4 +131,45 @@ do { \
} while (0)
#endif
+static inline void arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ paravirt_arch_dup_mmap(oldmm, mm);
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+ paravirt_arch_exit_mmap(mm);
+}
+
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ mpx_mm_init(mm);
+}
+
+static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * mpx_notify_unmap() goes and reads a rarely-hot
+ * cacheline in the mm_struct. That can be expensive
+ * enough to be seen in profiles.
+ *
+ * The mpx_notify_unmap() call and its contents have been
+ * observed to affect munmap() performance on hardware
+ * where MPX is not present.
+ *
+ * The unlikely() optimizes for the fast case: no MPX
+ * in the CPU, or no MPX use in the process. Even if
+ * we get this wrong (in the unlikely event that MPX
+ * is widely enabled on some system) the overhead of
+ * MPX itself (reading bounds tables) is expected to
+ * overwhelm the overhead of getting this unlikely()
+ * consistently wrong.
+ */
+ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+ mpx_notify_unmap(mm, vma, start, end);
+}
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
new file mode 100644
index 000000000000..a952a13d59a7
--- /dev/null
+++ b/arch/x86/include/asm/mpx.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_X86_MPX_H
+#define _ASM_X86_MPX_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/insn.h>
+
+/*
+ * NULL is theoretically a valid place to put the bounds
+ * directory, so point this at an invalid address.
+ */
+#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1)
+#define MPX_BNDCFG_ENABLE_FLAG 0x1
+#define MPX_BD_ENTRY_VALID_FLAG 0x1
+
+#ifdef CONFIG_X86_64
+
+/* upper 28 bits [47:20] of the virtual address in 64-bit used to
+ * index into bounds directory (BD).
+ */
+#define MPX_BD_ENTRY_OFFSET 28
+#define MPX_BD_ENTRY_SHIFT 3
+/* bits [19:3] of the virtual address in 64-bit used to index into
+ * bounds table (BT).
+ */
+#define MPX_BT_ENTRY_OFFSET 17
+#define MPX_BT_ENTRY_SHIFT 5
+#define MPX_IGN_BITS 3
+#define MPX_BD_ENTRY_TAIL 3
+
+#else
+
+#define MPX_BD_ENTRY_OFFSET 20
+#define MPX_BD_ENTRY_SHIFT 2
+#define MPX_BT_ENTRY_OFFSET 10
+#define MPX_BT_ENTRY_SHIFT 4
+#define MPX_IGN_BITS 2
+#define MPX_BD_ENTRY_TAIL 2
+
+#endif
+
+#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
+#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
+
+#define MPX_BNDSTA_TAIL 2
+#define MPX_BNDCFG_TAIL 12
+#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
+#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
+#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
+
+#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
+#define MPX_BNDSTA_ERROR_CODE 0x3
+
+#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
+#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
+#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
+ MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
+#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
+ MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
+
+#ifdef CONFIG_X86_INTEL_MPX
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
+ struct xsave_struct *xsave_buf);
+int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
+static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
+{
+ return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+}
+static inline void mpx_mm_init(struct mm_struct *mm)
+{
+ /*
+ * NULL is theoretically a valid place to put the bounds
+ * directory, so point this at an invalid address.
+ */
+ mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+}
+void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+#else
+static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
+ struct xsave_struct *xsave_buf)
+{
+ return NULL;
+}
+static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+{
+ return -EINVAL;
+}
+static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
+{
+ return 0;
+}
+static inline void mpx_mm_init(struct mm_struct *mm)
+{
+}
+static inline void mpx_notify_unmap(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_X86_INTEL_MPX */
+
+#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ trace_hardirqs_on();
+ /* "mwait %eax, %ecx;" */
+ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f48b17df4224..3a52ee0e726d 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -20,7 +20,6 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define STACKFAULT_STACK 0
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 0
#define DEBUG_STACK 0
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index f408caf73430..b3bebf9e5746 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,6 +39,8 @@ void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */
-#define __HAVE_ARCH_GATE_AREA 1
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+# define __HAVE_ARCH_GATE_AREA 1
+#endif
#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 678205195ae1..4edd53b79a81 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,25 +1,30 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#define THREAD_SIZE_ORDER 2
+#ifdef CONFIG_KASAN
+#define KASAN_STACK_ORDER 1
+#else
+#define KASAN_STACK_ORDER 0
+#endif
+
+#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
-#define EXCEPTION_STACK_ORDER 0
+#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+#define DOUBLEFAULT_STACK 1
+#define NMI_STACK 2
+#define DEBUG_STACK 3
+#define MCE_STACK 4
+#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cd6e1610e29e..5f6051d5d139 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}
-static inline unsigned long read_cr4(void)
+static inline unsigned long __read_cr4(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
}
-static inline unsigned long read_cr4_safe(void)
+static inline unsigned long __read_cr4_safe(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
}
-static inline void write_cr4(unsigned long x)
+static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
@@ -330,13 +330,13 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
}
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
}
-static inline void arch_exit_mmap(struct mm_struct *mm)
+static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
}
@@ -976,15 +976,20 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
#else /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
+#ifndef __ASSEMBLY__
+static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+}
+
+static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
+{
+}
+#endif /* __ASSEMBLY__ */
#endif /* !CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index e2c1668dde7a..91bc4ba95f91 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -11,16 +11,17 @@ static const int pat_enabled;
#endif
extern void pat_init(void);
+void pat_init_cache_modes(void);
extern int reserve_memtype(u64 start, u64 end,
- unsigned long req_type, unsigned long *ret_type);
+ enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
- unsigned long flag);
+ enum page_cache_mode pcm);
int io_reserve_memtype(resource_size_t start, resource_size_t end,
- unsigned long *type);
+ enum page_cache_mode *pcm);
void io_free_memtype(resource_size_t start, resource_size_t end);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0e683f..4e370a5d8117 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void);
#ifdef CONFIG_PCI_MSI
/* implemented in arch/x86/kernel/apic/io_apic. */
struct msi_desc;
+void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
+ unsigned int dest, struct msi_msg *msg, u8 hpet_id);
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev);
int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
unsigned int irq_base, unsigned int irq_offset);
#else
+#define native_compose_msi_msg NULL
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+extern bool mp_should_keep_irq(struct device *dev);
+
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index fd472181a1d0..e0ba66ca68c6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -64,7 +64,7 @@
#define __percpu_prefix ""
#endif
-#define __percpu_arg(x) __percpu_prefix "%P" #x
+#define __percpu_arg(x) __percpu_prefix "%" #x
/*
* Initialized pointers to per-cpu variables needed for the boot
@@ -179,29 +179,58 @@ do { \
} \
} while (0)
-#define percpu_from_op(op, var, constraint) \
+#define percpu_from_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \
- : constraint); \
+ : "m" (var)); \
break; \
case 2: \
asm(op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
- : constraint); \
+ : "m" (var)); \
break; \
case 4: \
asm(op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
- : constraint); \
+ : "m" (var)); \
break; \
case 8: \
asm(op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
- : constraint); \
+ : "m" (var)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+ pfo_ret__; \
+})
+
+#define percpu_stable_op(op, var) \
+({ \
+ typeof(var) pfo_ret__; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_arg(P1)",%0" \
+ : "=q" (pfo_ret__) \
+ : "p" (&(var))); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_arg(P1)",%0" \
+ : "=r" (pfo_ret__) \
+ : "p" (&(var))); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_arg(P1)",%0" \
+ : "=r" (pfo_ret__) \
+ : "p" (&(var))); \
+ break; \
+ case 8: \
+ asm(op "q "__percpu_arg(P1)",%0" \
+ : "=r" (pfo_ret__) \
+ : "p" (&(var))); \
break; \
default: __bad_percpu_size(); \
} \
@@ -359,11 +388,11 @@ do { \
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
-#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
+#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
+#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
+#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -381,9 +410,9 @@ do { \
#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
-#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -435,7 +464,7 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@@ -444,7 +473,7 @@ do { \
#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
+DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8dfc9fd094a3..dc0f6ed35b08 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -177,6 +177,9 @@ struct x86_pmu_capability {
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
+#define IBS_CAPS_OPBRNFUSE (1U<<8)
+#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
+#define IBS_CAPS_OPDATA4 (1U<<10)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 206a87fdd22d..fd74a11959de 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,44 +62,8 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-/*
- * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
- * split up the 29 bits of offset into this range.
- */
-#define PTE_FILE_MAX_BITS 29
-#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
-#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
-
-#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
-#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
-
-#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
-#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
-
-static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
-{
- return (pgoff_t)
- (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3));
-}
-
-static __always_inline pte_t pgoff_to_pte(pgoff_t off)
-{
- return (pte_t){
- .pte_low =
- pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
- pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
- pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) +
- _PAGE_FILE,
- };
-}
-
/* Encode and de-code a swap entry */
-#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#define SWP_TYPE_BITS 5
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 81bb91b49a88..cdaa58c9b39e 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -176,18 +176,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif
-/*
- * Bits 0, 6 and 7 are taken in the low part of the pte,
- * put the 32 bits of offset into the high part.
- *
- * For soft-dirty tracking 11 bit is taken from
- * the low part of pte as well.
- */
-#define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) \
- ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
-#define PTE_FILE_MAX_BITS 32
-
/* Encode and de-code a swap entry */
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index aa97a070f09f..a0c35bf6cb92 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -9,9 +9,10 @@
/*
* Macro to mark a page protection value as UC-
*/
-#define pgprot_noncached(prot) \
- ((boot_cpu_data.x86 > 3) \
- ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \
+#define pgprot_noncached(prot) \
+ ((boot_cpu_data.x86 > 3) \
+ ? (__pgprot(pgprot_val(prot) | \
+ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
#ifndef __ASSEMBLY__
@@ -99,6 +100,11 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
+static inline int pmd_dirty(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_DIRTY;
+}
+
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
@@ -109,11 +115,6 @@ static inline int pte_write(pte_t pte)
return pte_flags(pte) & _PAGE_RW;
}
-static inline int pte_file(pte_t pte)
-{
- return pte_flags(pte) & _PAGE_FILE;
-}
-
static inline int pte_huge(pte_t pte)
{
return pte_flags(pte) & _PAGE_PSE;
@@ -131,13 +132,7 @@ static inline int pte_exec(pte_t pte)
static inline int pte_special(pte_t pte)
{
- /*
- * See CONFIG_NUMA_BALANCING pte_numa in include/asm-generic/pgtable.h.
- * On x86 we have _PAGE_BIT_NUMA == _PAGE_BIT_GLOBAL+1 ==
- * __PAGE_BIT_SOFTW1 == _PAGE_BIT_SPECIAL.
- */
- return (pte_flags(pte) & _PAGE_SPECIAL) &&
- (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_PROTNONE));
+ return pte_flags(pte) & _PAGE_SPECIAL;
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -299,7 +294,7 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
- return pmd_clear_flags(pmd, _PAGE_PRESENT);
+ return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
}
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
@@ -323,21 +318,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
-static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
-{
- return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
-}
-
-static inline pte_t pte_file_mksoft_dirty(pte_t pte)
-{
- return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
-}
-
-static inline int pte_file_soft_dirty(pte_t pte)
-{
- return pte_flags(pte) & _PAGE_SOFT_DIRTY;
-}
-
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
/*
@@ -404,8 +384,8 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
- unsigned long flags,
- unsigned long new_flags)
+ enum page_cache_mode pcm,
+ enum page_cache_mode new_pcm)
{
/*
* PAT type is always WB for untracked ranges, so no need to check.
@@ -419,10 +399,10 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
* - request is uncached, return cannot be write-back
* - request is write-combine, return cannot be write-back
*/
- if ((flags == _PAGE_CACHE_UC_MINUS &&
- new_flags == _PAGE_CACHE_WB) ||
- (flags == _PAGE_CACHE_WC &&
- new_flags == _PAGE_CACHE_WB)) {
+ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
+ new_pcm == _PAGE_CACHE_MODE_WB) ||
+ (pcm == _PAGE_CACHE_MODE_WC &&
+ new_pcm == _PAGE_CACHE_MODE_WB)) {
return 0;
}
@@ -457,13 +437,6 @@ static inline int pte_same(pte_t a, pte_t b)
static inline int pte_present(pte_t a)
{
- return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
- _PAGE_NUMA);
-}
-
-#define pte_present_nonuma pte_present_nonuma
-static inline int pte_present_nonuma(pte_t a)
-{
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
@@ -473,7 +446,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
if (pte_flags(a) & _PAGE_PRESENT)
return true;
- if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) &&
+ if ((pte_flags(a) & _PAGE_PROTNONE) &&
mm_tlb_flush_pending(mm))
return true;
@@ -493,10 +466,27 @@ static inline int pmd_present(pmd_t pmd)
* the _PAGE_PSE flag will remain set at all times while the
* _PAGE_PRESENT bit is clear).
*/
- return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
- _PAGE_NUMA);
+ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
+ == _PAGE_PROTNONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT))
+ == _PAGE_PROTNONE;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
static inline int pmd_none(pmd_t pmd)
{
/* Only check low word on 32-bit platforms, since it might be
@@ -553,11 +543,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
static inline int pmd_bad(pmd_t pmd)
{
-#ifdef CONFIG_NUMA_BALANCING
- /* pmd_numa check */
- if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
- return 0;
-#endif
return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
}
@@ -876,19 +861,16 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present_nonuma(pte));
return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present_nonuma(pte));
return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present_nonuma(pte));
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
#endif
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index ed5903be26fe..9fb2f2bc8245 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -37,7 +37,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#define LAST_PKMAP 1024
#endif
-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
+#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
& PMD_MASK)
#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4572b2f30237..2ee781114d34 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -133,10 +133,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
/* PUD - Level3 access */
/* PMD - Level 2 access */
-#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
- _PAGE_FILE })
-#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
/* PTE - Level 1 access. */
@@ -145,13 +141,8 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
/* Encode and de-code a swap entry */
-#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
-#ifdef CONFIG_NUMA_BALANCING
-/* Automatic NUMA balancing needs to be distinguishable from swap entries */
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
-#else
+#define SWP_TYPE_BITS 5
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-#endif
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 7166e25ecb57..602b6028c5b6 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -63,6 +63,8 @@ typedef struct { pteval_t pte; } pte_t;
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
+#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
#define EARLY_DYNAMIC_PAGE_TABLES 64
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 07789647bf33..8c7c10802e9c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -4,7 +4,7 @@
#include <linux/const.h>
#include <asm/page_types.h>
-#define FIRST_USER_ADDRESS 0
+#define FIRST_USER_ADDRESS 0UL
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
@@ -27,19 +27,9 @@
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
-/*
- * Swap offsets on configurations that allow automatic NUMA balancing use the
- * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
- * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
- * maximum possible swap space from 16TB to 8TB.
- */
-#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
-
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
-/* - set: nonlinear file mapping, saved PTE; unset:swap */
-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
@@ -78,21 +68,6 @@
#endif
/*
- * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
- * that is not present. The hinting fault gathers numa placement statistics
- * (see pte_numa()). The bit is always zero when the PTE is not present.
- *
- * The bit picked must be always zero when the pmd is present and not
- * present, so that we don't lose information when we set it while
- * atomically clearing the present bit.
- */
-#ifdef CONFIG_NUMA_BALANCING
-#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
-#else
-#define _PAGE_NUMA (_AT(pteval_t, 0))
-#endif
-
-/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -114,7 +89,6 @@
#define _PAGE_NX (_AT(pteval_t, 0))
#endif
-#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -125,14 +99,31 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY | _PAGE_NUMA)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
+ _PAGE_SOFT_DIRTY)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+
+/*
+ * The cache modes defined here are used to translate between pure SW usage
+ * and the HW defined cache mode bits and/or PAT entries.
+ *
+ * The resulting bits for PWT, PCD and PAT should be chosen in a way
+ * to have the WB mode at index 0 (all bits clear). This is the default
+ * right now and likely would break too much if changed.
+ */
+#ifndef __ASSEMBLY__
+enum page_cache_mode {
+ _PAGE_CACHE_MODE_WB = 0,
+ _PAGE_CACHE_MODE_WC = 1,
+ _PAGE_CACHE_MODE_UC_MINUS = 2,
+ _PAGE_CACHE_MODE_UC = 3,
+ _PAGE_CACHE_MODE_WT = 4,
+ _PAGE_CACHE_MODE_WP = 5,
+ _PAGE_CACHE_MODE_NUM = 8
+};
+#endif
-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
-#define _PAGE_CACHE_WB (0)
-#define _PAGE_CACHE_WC (_PAGE_PWT)
-#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
-#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
+#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
+#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -156,41 +147,27 @@
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
-#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
-#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
-#define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
-#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
-#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR)
-#define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE)
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
-#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
-#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
/* xwr */
#define __P000 PAGE_NONE
@@ -324,22 +301,61 @@ static inline pteval_t pte_flags(pte_t pte)
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
-#ifdef CONFIG_NUMA_BALANCING
-/* Set of bits that distinguishes present, prot_none and numa ptes */
-#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)
-static inline pteval_t ptenuma_flags(pte_t pte)
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM];
+extern uint8_t __pte2cachemode_tbl[8];
+
+#define __pte2cm_idx(cb) \
+ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \
+ (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \
+ (((cb) >> _PAGE_BIT_PWT) & 1))
+#define __cm_idx2pte(i) \
+ ((((i) & 4) << (_PAGE_BIT_PAT - 2)) | \
+ (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \
+ (((i) & 1) << _PAGE_BIT_PWT))
+
+static inline unsigned long cachemode2protval(enum page_cache_mode pcm)
{
- return pte_flags(pte) & _PAGE_NUMA_MASK;
+ if (likely(pcm == 0))
+ return 0;
+ return __cachemode2pte_tbl[pcm];
}
-
-static inline pmdval_t pmdnuma_flags(pmd_t pmd)
+static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{
- return pmd_flags(pmd) & _PAGE_NUMA_MASK;
+ return __pgprot(cachemode2protval(pcm));
}
-#endif /* CONFIG_NUMA_BALANCING */
+static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
+{
+ unsigned long masked;
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) } )
+ masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK;
+ if (likely(masked == 0))
+ return 0;
+ return __pte2cachemode_tbl[__pte2cm_idx(masked)];
+}
+static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
+{
+ pgprot_t new;
+ unsigned long val;
+
+ val = pgprot_val(pgprot);
+ pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
+ return new;
+}
+static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
+{
+ pgprot_t new;
+ unsigned long val;
+
+ val = pgprot_val(pgprot);
+ pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
+ ((val & _PAGE_PAT_LARGE) >>
+ (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
+ return new;
+}
typedef struct page *pgtable_t;
@@ -396,6 +412,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags);
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
index 0a4e140315b6..7249e6d0902d 100644
--- a/arch/x86/include/asm/platform_sst_audio.h
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -16,6 +16,9 @@
#include <linux/sfi.h>
+#define MAX_NUM_STREAMS_MRFLD 25
+#define MAX_NUM_STREAMS MAX_NUM_STREAMS_MRFLD
+
enum sst_audio_task_id_mrfld {
SST_TASK_ID_NONE = 0,
SST_TASK_ID_SBA = 1,
@@ -73,6 +76,65 @@ struct sst_platform_data {
unsigned int strm_map_size;
};
+struct sst_info {
+ u32 iram_start;
+ u32 iram_end;
+ bool iram_use;
+ u32 dram_start;
+ u32 dram_end;
+ bool dram_use;
+ u32 imr_start;
+ u32 imr_end;
+ bool imr_use;
+ u32 mailbox_start;
+ bool use_elf;
+ bool lpe_viewpt_rqd;
+ unsigned int max_streams;
+ u32 dma_max_len;
+ u8 num_probes;
+};
+
+struct sst_lib_dnld_info {
+ unsigned int mod_base;
+ unsigned int mod_end;
+ unsigned int mod_table_offset;
+ unsigned int mod_table_size;
+ bool mod_ddr_dnld;
+};
+
+struct sst_res_info {
+ unsigned int shim_offset;
+ unsigned int shim_size;
+ unsigned int shim_phy_addr;
+ unsigned int ssp0_offset;
+ unsigned int ssp0_size;
+ unsigned int dma0_offset;
+ unsigned int dma0_size;
+ unsigned int dma1_offset;
+ unsigned int dma1_size;
+ unsigned int iram_offset;
+ unsigned int iram_size;
+ unsigned int dram_offset;
+ unsigned int dram_size;
+ unsigned int mbox_offset;
+ unsigned int mbox_size;
+ unsigned int acpi_lpe_res_index;
+ unsigned int acpi_ddr_index;
+ unsigned int acpi_ipc_irq_index;
+};
+
+struct sst_ipc_info {
+ int ipc_offset;
+ unsigned int mbox_recv_off;
+};
+
+struct sst_platform_info {
+ const struct sst_info *probe_data;
+ const struct sst_ipc_info *ipc_info;
+ const struct sst_res_info *res_info;
+ const struct sst_lib_dnld_info *lib_info;
+ const char *platform;
+};
int add_sst_platform_device(void);
#endif
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h
index fc7a17c05d35..bc0fc0866553 100644
--- a/arch/x86/include/asm/pmc_atom.h
+++ b/arch/x86/include/asm/pmc_atom.h
@@ -53,6 +53,28 @@
/* Sleep state counter is in units of of 32us */
#define PMC_TMR_SHIFT 5
+/* Power status of power islands */
+#define PMC_PSS 0x98
+
+#define PMC_PSS_BIT_GBE BIT(0)
+#define PMC_PSS_BIT_SATA BIT(1)
+#define PMC_PSS_BIT_HDA BIT(2)
+#define PMC_PSS_BIT_SEC BIT(3)
+#define PMC_PSS_BIT_PCIE BIT(4)
+#define PMC_PSS_BIT_LPSS BIT(5)
+#define PMC_PSS_BIT_LPE BIT(6)
+#define PMC_PSS_BIT_DFX BIT(7)
+#define PMC_PSS_BIT_USH_CTRL BIT(8)
+#define PMC_PSS_BIT_USH_SUS BIT(9)
+#define PMC_PSS_BIT_USH_VCCS BIT(10)
+#define PMC_PSS_BIT_USH_VCCA BIT(11)
+#define PMC_PSS_BIT_OTG_CTRL BIT(12)
+#define PMC_PSS_BIT_OTG_VCCS BIT(13)
+#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14)
+#define PMC_PSS_BIT_OTG_VCCA BIT(15)
+#define PMC_PSS_BIT_USB BIT(16)
+#define PMC_PSS_BIT_USB_SUS BIT(17)
+
/* These registers reflect D3 status of functions */
#define PMC_D3_STS_0 0xA0
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 400873450e33..8f3271842533 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -30,9 +30,6 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
-#define task_preempt_count(p) \
- (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
-
#define init_task_preempt_count(p) do { \
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
} while (0)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index eb71ec794732..d2203b5d9538 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -127,7 +127,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+};
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
@@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
-DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
#else
#define cpu_info boot_cpu_data
@@ -210,8 +210,23 @@ struct x86_hw_tss {
unsigned long sp0;
unsigned short ss0, __ss0h;
unsigned long sp1;
- /* ss1 caches MSR_IA32_SYSENTER_CS: */
- unsigned short ss1, __ss1h;
+
+ /*
+ * We don't use ring 1, so ss1 is a convenient scratch space in
+ * the same cacheline as sp0. We use ss1 to cache the value in
+ * MSR_IA32_SYSENTER_CS. When we context switch
+ * MSR_IA32_SYSENTER_CS, we first check if the new value being
+ * written matches ss1, and, if it's not, then we wrmsr the new
+ * value and update ss1.
+ *
+ * The only reason we context switch MSR_IA32_SYSENTER_CS is
+ * that we set it to zero in vm86 tasks to avoid corrupting the
+ * stack if we were to go through the sysenter path from vm86
+ * mode.
+ */
+ unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
+
+ unsigned short __ss1h;
unsigned long sp2;
unsigned short ss2, __ss2h;
unsigned long __cr3;
@@ -276,13 +291,17 @@ struct tss_struct {
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
- * .. and then another 0x100 bytes for the emergency kernel stack:
+ * Space for the temporary SYSENTER stack:
*/
- unsigned long stack[64];
+ unsigned long SYSENTER_stack[64];
} ____cacheline_aligned;
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
/*
* Save the original ist values for checking stack pointers during debugging
@@ -374,13 +393,14 @@ struct lwp_struct {
u8 reserved[128];
};
-struct bndregs_struct {
- u64 bndregs[8];
+struct bndreg {
+ u64 lower_bound;
+ u64 upper_bound;
} __packed;
-struct bndcsr_struct {
- u64 cfg_reg_u;
- u64 status_reg;
+struct bndcsr {
+ u64 bndcfgu;
+ u64 bndstatus;
} __packed;
struct xsave_hdr_struct {
@@ -394,8 +414,8 @@ struct xsave_struct {
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
struct lwp_struct lwp;
- struct bndregs_struct bndregs;
- struct bndcsr_struct bndcsr;
+ struct bndreg bndreg[4];
+ struct bndcsr bndcsr;
/* new processor state extensions will go here */
} __attribute__ ((packed, aligned (64)));
@@ -473,7 +493,6 @@ struct thread_struct {
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
#else
- unsigned long usersp; /* Copy from PDA */
unsigned short es;
unsigned short ds;
unsigned short fsindex;
@@ -563,6 +582,16 @@ static inline void native_swapgs(void)
#endif
}
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+ return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+ /* sp0 on x86_32 is special in and around vm86 mode. */
+ return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -578,39 +607,6 @@ static inline void load_sp0(struct tss_struct *tss,
#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT */
-/*
- * Save the cr4 feature set we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-extern u32 *trampoline_cr4_features;
-
-static inline void set_in_cr4(unsigned long mask)
-{
- unsigned long cr4;
-
- mmu_cr4_features |= mask;
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- cr4 = read_cr4();
- cr4 |= mask;
- write_cr4(cr4);
-}
-
-static inline void clear_in_cr4(unsigned long mask)
-{
- unsigned long cr4;
-
- mmu_cr4_features &= ~mask;
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- cr4 = read_cr4();
- cr4 &= ~mask;
- write_cr4(cr4);
-}
-
typedef struct {
unsigned long seg;
} mm_segment_t;
@@ -793,10 +789,10 @@ extern char ignore_fpu_irq;
#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
-# define BASE_PREFETCH ASM_NOP4
+# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 (%1)"
+# define BASE_PREFETCH "prefetcht0 %P1"
#endif
/*
@@ -807,10 +803,9 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH,
- "prefetchnta (%1)",
+ alternative_input(BASE_PREFETCH, "prefetchnta %P1",
X86_FEATURE_XMM,
- "r" (x));
+ "m" (*(const char *)x));
}
/*
@@ -820,10 +815,9 @@ static inline void prefetch(const void *x)
*/
static inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH,
- "prefetchw (%1)",
- X86_FEATURE_3DNOW,
- "r" (x));
+ alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ X86_FEATURE_3DNOWPREFETCH,
+ "m" (*(const char *)x));
}
static inline void spin_lock_prefetch(const void *x)
@@ -831,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
prefetchw(x);
}
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+ TOP_OF_KERNEL_STACK_PADDING)
+
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
@@ -841,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \
- .sp0 = sizeof(init_stack) + (long)&init_stack, \
+ .sp0 = TOP_OF_INIT_STACK, \
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
}
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS { \
- .x86_tss = { \
- .sp0 = sizeof(init_stack) + (long)&init_stack, \
- .ss0 = __KERNEL_DS, \
- .ss1 = __KERNEL_CS, \
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
- }, \
- .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
-}
-
extern unsigned long thread_saved_pc(struct task_struct *tsk);
-#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info) \
-({ \
- unsigned long *__ptr = (unsigned long *)(info); \
- (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
-})
-
/*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs"
* is accessible even if the CPU haven't stored the SS/ESP registers
* on the stack (interrupt gate does not save these registers
@@ -882,18 +856,24 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
* "struct pt_regs" is possible, but they may contain the
* completely wrong values.
*/
-#define task_pt_regs(task) \
-({ \
- struct pt_regs *__regs__; \
- __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
- __regs__ - 1; \
+#define task_pt_regs(task) \
+({ \
+ unsigned long __ptr = (unsigned long)task_stack_page(task); \
+ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
+ ((struct pt_regs *)__ptr) - 1; \
})
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
/*
- * User space process size. 47bits minus one guard page.
+ * User space process size. 47bits minus one guard page. The guard
+ * page is necessary on Intel CPUs: if a SYSCALL instruction is at
+ * the highest possible canonical userspace address, then that
+ * syscall will enter the kernel with a non-canonical return
+ * address, and SYSRET will explode dangerously. We avoid this
+ * particular problem by preventing anything from being mapped
+ * at the maximum canonical address.
*/
#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
@@ -912,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
- .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS { \
- .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+ .sp0 = TOP_OF_INIT_STACK \
}
/*
@@ -928,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -953,6 +924,24 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+/* Register/unregister a process' MPX related resource */
+#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk))
+#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk))
+
+#ifdef CONFIG_X86_INTEL_MPX
+extern int mpx_enable_management(struct task_struct *tsk);
+extern int mpx_disable_management(struct task_struct *tsk);
+#else
+static inline int mpx_enable_management(struct task_struct *tsk)
+{
+ return -EINVAL;
+}
+static inline int mpx_disable_management(struct task_struct *tsk)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_X86_INTEL_MPX */
+
extern u16 amd_get_nb_id(int cpu);
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..19507ffa5d28 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
unsigned long ip;
unsigned long cs;
unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
}
/*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value. This tricky test checks that with
- * one comparison. Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode. On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value. This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
*/
static inline int user_mode(struct pt_regs *regs)
{
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
#endif
}
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
- return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
- USER_RPL;
-#else
- return user_mode(regs);
-#endif
-}
-
static inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
-#define current_user_stack_pointer() this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer() \
- (test_thread_flag(TIF_IA32) \
- ? current_pt_regs()->sp \
- : this_cpu_read(old_rsp))
+#define current_user_stack_pointer() current_pt_regs()->sp
+#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
#ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
*/
#define arch_ptrace_stop_needed(code, info) \
({ \
- set_thread_flag(TIF_NOTIFY_RESUME); \
+ force_iret(); \
false; \
})
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..25b1cc07d496 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
+ u32 migrate_count;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 6f1c3a8a33ab..5a9856eb12ba 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
#include <linux/const.h>
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
#define GDT_ENTRY(flags, base, limit) \
((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
(((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@@ -12,210 +14,228 @@
(((base) & _AC(0x00ffffff,ULL)) << 16) | \
(((limit) & _AC(0x0000ffff,ULL))))
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
#define GDT_ENTRY_BOOT_CS 2
-#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS 3
+#define GDT_ENTRY_BOOT_TSS 4
+#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
-#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK 0x3
-#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL 0x3
+
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK 0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT 0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT 0x0
+
+#define GDT_ENTRY_INVALID_SEG 0
#ifdef CONFIG_X86_32
/*
* The layout of the per-CPU GDT under Linux:
*
- * 0 - null
+ * 0 - null <=== cacheline #1
* 1 - reserved
* 2 - reserved
* 3 - reserved
*
- * 4 - unused <==== new cacheline
+ * 4 - unused <=== cacheline #2
* 5 - unused
*
* ------- start of TLS (Thread-Local Storage) segments:
*
* 6 - TLS segment #1 [ glibc's TLS segment ]
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
- * 8 - TLS segment #3
+ * 8 - TLS segment #3 <=== cacheline #3
* 9 - reserved
* 10 - reserved
* 11 - reserved
*
* ------- start of kernel segments:
*
- * 12 - kernel code segment <==== new cacheline
+ * 12 - kernel code segment <=== cacheline #4
* 13 - kernel data segment
* 14 - default user CS
* 15 - default user DS
- * 16 - TSS
+ * 16 - TSS <=== cacheline #5
* 17 - LDT
* 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support
- * 20 - PNPBIOS support
+ * 20 - PNPBIOS support <=== cacheline #6
* 21 - PNPBIOS support
* 22 - PNPBIOS support
* 23 - APM BIOS support
- * 24 - APM BIOS support
+ * 24 - APM BIOS support <=== cacheline #7
* 25 - APM BIOS support
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - stack_canary-20 [ for stack protector ]
+ * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
*/
-#define GDT_ENTRY_TLS_MIN 6
-#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_KERNEL_CS 12
+#define GDT_ENTRY_KERNEL_DS 13
#define GDT_ENTRY_DEFAULT_USER_CS 14
-
#define GDT_ENTRY_DEFAULT_USER_DS 15
+#define GDT_ENTRY_TSS 16
+#define GDT_ENTRY_LDT 17
+#define GDT_ENTRY_PNPBIOS_CS32 18
+#define GDT_ENTRY_PNPBIOS_CS16 19
+#define GDT_ENTRY_PNPBIOS_DS 20
+#define GDT_ENTRY_PNPBIOS_TS1 21
+#define GDT_ENTRY_PNPBIOS_TS2 22
+#define GDT_ENTRY_APMBIOS_BASE 23
+
+#define GDT_ENTRY_ESPFIX_SS 26
+#define GDT_ENTRY_PERCPU 27
+#define GDT_ENTRY_STACK_CANARY 28
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS 31
-#define GDT_ENTRY_KERNEL_BASE (12)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES 32
-#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0)
+/*
+ * Segment selector values corresponding to the above entries:
+ */
-#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
-#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5)
+/* segment for calling fn: */
+#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
-#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11)
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
-#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
+/* data segment for BIOS: */
+#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
-#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
+# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
#else
-#define __KERNEL_PERCPU 0
+# define __KERNEL_PERCPU 0
#endif
-#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
#ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
+# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
-#define __KERNEL_STACK_CANARY 0
+# define __KERNEL_STACK_CANARY 0
#endif
-#define GDT_ENTRY_DOUBLEFAULT_TSS 31
+#else /* 64-bit: */
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
-
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
-#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
-#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
-#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
-#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
-
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK 0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK 0x4
+#include <asm/cache.h>
-/* User mode is privilege level 3 */
-#define USER_RPL 0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT 0x4
-#define SEGMENT_GDT 0x0
+#define GDT_ENTRY_KERNEL32_CS 1
+#define GDT_ENTRY_KERNEL_CS 2
+#define GDT_ENTRY_KERNEL_DS 3
/*
- * Matching rules for certain types of segments.
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ * if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
*/
+#define GDT_ENTRY_DEFAULT_USER32_CS 4
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define GDT_ENTRY_DEFAULT_USER_CS 6
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+/* Needs two entries */
+#define GDT_ENTRY_TSS 8
+/* Needs two entries */
+#define GDT_ENTRY_LDT 10
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
-#else
-#include <asm/cache.h>
-
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
-
-#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU 15
/*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * Number of entries in the GDT table:
*/
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS __USER_DS
-
-#define GDT_ENTRY_TSS 8 /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRIES 16
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS __USER_DS
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
#endif
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
#ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl() 0
+# define get_kernel_rpl() 0
#endif
-/* User mode is privilege level 3 */
-#define USER_RPL 0x3
-/* LDT segment has TI set, GDT has it cleared */
-#define SEGMENT_LDT 0x4
-#define SEGMENT_GDT 0x0
+#define IDT_ENTRIES 256
+#define NUM_EXCEPTION_VECTORS 32
-/* Bottom two bits of selector give the ring privilege level */
-#define SEGMENT_RPL_MASK 0x3
-/* Bit 2 is table indicator (LDT/GDT) */
-#define SEGMENT_TI_MASK 0x4
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK 0x00027d00
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK 0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define GDT_SIZE (GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
#ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
#endif
/*
@@ -240,37 +260,30 @@ do { \
} while (0)
/*
- * Save a segment register away
+ * Save a segment register away:
*/
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
*/
#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk) ((tsk)->thread.gs)
-#define lazy_save_gs(v) savesegment(gs, (v))
-#define lazy_load_gs(v) loadsegment(gs, (v))
-#else /* X86_32_LAZY_GS */
-#define get_user_gs(regs) (u16)((regs)->gs)
-#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v) do { } while (0)
-#define lazy_load_gs(v) do { } while (0)
-#endif /* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
+# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
+# define task_user_gs(tsk) ((tsk)->thread.gs)
+# define lazy_save_gs(v) savesegment(gs, (v))
+# define lazy_load_gs(v) loadsegment(gs, (v))
+# else /* X86_32_LAZY_GS */
+# define get_user_gs(regs) (u16)((regs)->gs)
+# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
+# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
+# define lazy_save_gs(v) do { } while (0)
+# define lazy_load_gs(v) do { } while (0)
+# endif /* X86_32_LAZY_GS */
#endif /* X86_32 */
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
- return __limit + 1;
-}
-
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b236e21..f69e06b283fb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
*/
extern struct boot_params boot_params;
+static inline bool kaslr_enabled(void)
+{
+ return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
/*
* Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..6fe6b182c998 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
unsigned long ip;
unsigned long flags;
unsigned short cs;
- unsigned short gs;
- unsigned short fs;
- unsigned short __pad0;
+ unsigned short __pad2; /* Was called gs, but was always zero. */
+ unsigned short __pad1; /* Was called fs, but was always zero. */
+ unsigned short ss;
unsigned long err;
unsigned long trapno;
unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a958164088c..89db46752a8f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
- unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
#ifdef CONFIG_X86_SMAP
-#define ASM_CLAC \
- 661: ASM_NOP3 ; \
- .pushsection .altinstr_replacement, "ax" ; \
- 662: __ASM_CLAC ; \
- .popsection ; \
- .pushsection .altinstructions, "a" ; \
- altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
- .popsection
-
-#define ASM_STAC \
- 661: ASM_NOP3 ; \
- .pushsection .altinstr_replacement, "ax" ; \
- 662: __ASM_STAC ; \
- .popsection ; \
- .pushsection .altinstructions, "a" ; \
- altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
- .popsection
+#define ASM_CLAC \
+ ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+
+#define ASM_STAC \
+ ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@@ -61,20 +49,20 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3bc835..81d02fc7dafa 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
deleted file mode 100644
index 0da7409f0bec..000000000000
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(0xa, 0xf);
- spin_unlock_irqrestore(&rtc_lock, flags);
- local_flush_tlb();
- pr_debug("1.\n");
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
- start_eip >> 4;
- pr_debug("2.\n");
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
- start_eip & 0xf;
- pr_debug("3.\n");
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
- unsigned long flags;
-
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(0, 0xf);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
-}
-
-static inline void __init smpboot_setup_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
- else {
- nr_ioapics = 0;
- }
-#endif
-}
-
-static inline void smpboot_clear_io_apic(void)
-{
-#ifdef CONFIG_X86_IO_APIC
- nr_ioapics = 0;
-#endif
-}
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index e820c080a4e9..aeb4666e0c0a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
#ifdef __KERNEL__
+#include <asm/nops.h>
+
static inline void native_clts(void)
{
asm volatile("clts");
@@ -137,17 +139,17 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x);
}
-static inline unsigned long read_cr4(void)
+static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
}
-static inline unsigned long read_cr4_safe(void)
+static inline unsigned long __read_cr4_safe(void)
{
return native_read_cr4_safe();
}
-static inline void write_cr4(unsigned long x)
+static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
"+m" (*(volatile char __force *)__p));
}
+static inline void clwb(volatile void *__p)
+{
+ volatile struct { char x[64]; } *p = __p;
+
+ asm volatile(ALTERNATIVE_2(
+ ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+ ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+ X86_FEATURE_CLFLUSHOPT,
+ ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
+ X86_FEATURE_CLWB)
+ : [p] "+m" (*p)
+ : [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+ alternative(ASM_NOP7,
+ ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+ "sfence",
+ X86_FEATURE_PCOMMIT);
+}
+
#define nop() asm volatile ("nop")
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 9295016485c9..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
{
- set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+ set_bit(0, (volatile unsigned long *)&lock->tickets.head);
}
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+static inline int __tickets_equal(__ticket_t one, __ticket_t two)
+{
+ return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
+}
+
+static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
+ __ticket_t head)
+{
+ if (head & TICKET_SLOWPATH_FLAG) {
+ arch_spinlock_t old, new;
+
+ old.tickets.head = head;
+ new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
+ old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
+ new.tickets.tail = old.tickets.tail;
+
+ /* try to clear slowpath flag when there are no contenders */
+ cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
+ }
+}
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
- return lock.tickets.head == lock.tickets.tail;
+ return __tickets_equal(lock.tickets.head, lock.tickets.tail);
}
/*
@@ -87,91 +107,69 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
if (likely(inc.head == inc.tail))
goto out;
- inc.tail &= ~TICKET_SLOWPATH_FLAG;
for (;;) {
unsigned count = SPIN_THRESHOLD;
do {
- if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
- goto out;
+ inc.head = READ_ONCE(lock->tickets.head);
+ if (__tickets_equal(inc.head, inc.tail))
+ goto clear_slowpath;
cpu_relax();
} while (--count);
__ticket_lock_spinning(lock, inc.tail);
}
-out: barrier(); /* make sure nothing creeps before the lock is taken */
+clear_slowpath:
+ __ticket_check_and_clear_slowpath(lock, inc.head);
+out:
+ barrier(); /* make sure nothing creeps before the lock is taken */
}
static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
{
arch_spinlock_t old, new;
- old.tickets = ACCESS_ONCE(lock->tickets);
- if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
+ old.tickets = READ_ONCE(lock->tickets);
+ if (!__tickets_equal(old.tickets.head, old.tickets.tail))
return 0;
new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
+ new.head_tail &= ~TICKET_SLOWPATH_FLAG;
/* cmpxchg is a full barrier, so nothing can move before it */
return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
}
-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
- arch_spinlock_t old)
-{
- arch_spinlock_t new;
-
- BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
- /* Perform the unlock on the "before" copy */
- old.tickets.head += TICKET_LOCK_INC;
-
- /* Clear the slowpath flag */
- new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
-
- /*
- * If the lock is uncontended, clear the flag - use cmpxchg in
- * case it changes behind our back though.
- */
- if (new.tickets.head != new.tickets.tail ||
- cmpxchg(&lock->head_tail, old.head_tail,
- new.head_tail) != old.head_tail) {
- /*
- * Lock still has someone queued for it, so wake up an
- * appropriate waiter.
- */
- __ticket_unlock_kick(lock, old.tickets.head);
- }
-}
-
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
if (TICKET_SLOWPATH_FLAG &&
- static_key_false(&paravirt_ticketlocks_enabled)) {
- arch_spinlock_t prev;
+ static_key_false(&paravirt_ticketlocks_enabled)) {
+ __ticket_t head;
- prev = *lock;
- add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+ BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
- /* add_smp() is a full mb() */
+ head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
- if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
- __ticket_unlock_slowpath(lock, prev);
+ if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
+ head &= ~TICKET_SLOWPATH_FLAG;
+ __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
+ }
} else
__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
}
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+ struct __raw_tickets tmp = READ_ONCE(lock->tickets);
- return tmp.tail != tmp.head;
+ return !__tickets_equal(tmp.tail, tmp.head);
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+ struct __raw_tickets tmp = READ_ONCE(lock->tickets);
- return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
+ tmp.head &= ~TICKET_SLOWPATH_FLAG;
+ return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
}
#define arch_spin_is_contended arch_spin_is_contended
@@ -183,8 +181,20 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
- while (arch_spin_is_locked(lock))
+ __ticket_t head = READ_ONCE(lock->tickets.head);
+
+ for (;;) {
+ struct __raw_tickets tmp = READ_ONCE(lock->tickets);
+ /*
+ * We need to check "unlocked" in a loop, tmp.head == head
+ * can be false positive because of overflow.
+ */
+ if (__tickets_equal(tmp.head, tmp.tail) ||
+ !__tickets_equal(tmp.head, head))
+ break;
+
cpu_relax();
+ }
}
/*
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 19e2c468fc2c..e4661196994e 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,11 +27,12 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
function. */
#define __HAVE_ARCH_MEMCPY 1
+extern void *__memcpy(void *to, const void *from, size_t len);
+
#ifndef CONFIG_KMEMCHECK
#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
extern void *memcpy(void *to, const void *from, size_t len);
#else
-extern void *__memcpy(void *to, const void *from, size_t len);
#define memcpy(dst, src, len) \
({ \
size_t __len = (len); \
@@ -53,9 +54,11 @@ extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
+void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t count);
+void *__memmove(void *dest, const void *src, size_t count);
int memcmp(const void *cs, const void *ct, size_t count);
size_t strlen(const char *s);
@@ -63,6 +66,19 @@ char *strcpy(char *dest, const char *src);
char *strcat(char *dest, const char *src);
int strcmp(const char *cs, const char *ct);
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#undef memcpy
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index d7f3b3b78ac3..751bf4b7bf11 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,12 +79,12 @@ do { \
#else /* CONFIG_X86_32 */
/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
#define __EXTRA_CLOBBER \
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
- "r12", "r13", "r14", "r15"
+ "r12", "r13", "r14", "r15", "flags"
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
@@ -100,7 +100,11 @@ do { \
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
-/* Save restore flags to clear handle leaking NT */
+/*
+ * There is no need to save or restore flags, because flags are always
+ * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
+ * has no effect.
+ */
#define switch_to(prev, next, last) \
asm volatile(SAVE_CONTEXT \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 854053889d4d..ea2dbe82cba3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,6 +13,33 @@
#include <asm/types.h>
/*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack. We do it because of a nasty
+ * 32-bit corner case. On x86_32, the hardware stack frame is
+ * variable-length. Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame. If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist. Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0. On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault. (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
+/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
* - this struct shares the supervisor stack pages
@@ -31,7 +58,6 @@ struct thread_info {
__u32 cpu; /* current CPU */
int saved_preempt_count;
mm_segment_t addr_limit;
- struct restart_block restart_block;
void __user *sysenter_return;
unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */
@@ -45,9 +71,6 @@ struct thread_info {
.cpu = 0, \
.saved_preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
@@ -75,7 +98,6 @@ struct thread_info {
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
-#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@@ -100,7 +122,6 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
-#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
@@ -140,8 +161,8 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
- _TIF_USER_RETURN_NOTIFY)
+ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
+ _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
@@ -151,7 +172,6 @@ struct thread_info {
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
/*
* macros/functions for gaining access to the thread information structure
@@ -164,24 +184,53 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
- struct thread_info *ti;
- ti = (void *)(this_cpu_read_stable(kernel_stack) +
- KERNEL_STACK_OFFSET - THREAD_SIZE);
- return ti;
+ return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
+}
+
+static inline unsigned long current_stack_pointer(void)
+{
+ unsigned long sp;
+#ifdef CONFIG_X86_64
+ asm("mov %%rsp,%0" : "=g" (sp));
+#else
+ asm("mov %%esp,%0" : "=g" (sp));
+#endif
+ return sp;
}
#else /* !__ASSEMBLY__ */
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
- _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+ _ASM_SUB $(THREAD_SIZE),reg ;
/*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ * 2 or 4 pages, and the bottom of the kernel stack contains
+ * the thread_info structure. So to access the thread_info very
+ * quickly from assembly code we can calculate down from the
+ * top of the kernel stack to the bottom, using constant,
+ * build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
*/
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
#endif
@@ -231,6 +280,16 @@ static inline bool is_ia32_task(void)
#endif
return false;
}
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
#endif /* !__ASSEMBLY__ */
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 04905bfc508b..cd791948b286 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -15,6 +15,75 @@
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#endif
+struct tlb_state {
+#ifdef CONFIG_SMP
+ struct mm_struct *active_mm;
+ int state;
+#endif
+
+ /*
+ * Access to this CR4 shadow and to H/W CR4 is protected by
+ * disabling interrupts when modifying either one.
+ */
+ unsigned long cr4;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+
+/* Initialize cr4 shadow for this CPU. */
+static inline void cr4_init_shadow(void)
+{
+ this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
+}
+
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ if ((cr4 | mask) != cr4) {
+ cr4 |= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+ }
+}
+
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ if ((cr4 & ~mask) != cr4) {
+ cr4 &= ~mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+ }
+}
+
+/* Read the CR4 shadow. */
+static inline unsigned long cr4_read_shadow(void)
+{
+ return this_cpu_read(cpu_tlbstate.cr4);
+}
+
+/*
+ * Save some of cr4 feature set we're using (e.g. Pentium 4MB
+ * enable and PPro Global page enable), so that any CPU's that boot
+ * up after us can get the correct flags. This should only be used
+ * during boot on the boot cpu.
+ */
+extern unsigned long mmu_cr4_features;
+extern u32 *trampoline_cr4_features;
+
+static inline void cr4_set_bits_and_update_boot(unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
+ cr4_set_bits(mask);
+}
+
static inline void __native_flush_tlb(void)
{
native_write_cr3(native_read_cr3());
@@ -24,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
{
unsigned long cr4;
- cr4 = native_read_cr4();
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
/* clear PGE */
native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */
@@ -184,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
-struct tlb_state {
- struct mm_struct *active_mm;
- int state;
-};
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
-
static inline void reset_lazy_tlbstate(void)
{
this_cpu_write(cpu_tlbstate.state, 0);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bc8352e7010a..4e49d7dff78e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_TRAPS_H
#define _ASM_X86_TRAPS_H
+#include <linux/context_tracking_state.h>
#include <linux/kprobes.h>
#include <asm/debugreg.h>
@@ -39,6 +40,7 @@ asmlinkage void simd_coprocessor_error(void);
#ifdef CONFIG_TRACING
asmlinkage void trace_page_fault(void);
+#define trace_stack_segment stack_segment
#define trace_divide_error divide_error
#define trace_bounds bounds
#define trace_invalid_op invalid_op
@@ -109,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void);
asmlinkage void mce_threshold_interrupt(void);
#endif
+extern enum ctx_state ist_enter(struct pt_regs *regs);
+extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
+extern void ist_begin_non_atomic(struct pt_regs *regs);
+extern void ist_end_non_atomic(void);
+
/* Interrupts/Exceptions */
enum {
X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0d592e0a5b84..ace9dec050b1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -179,7 +179,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
asm volatile("call __get_user_%P3" \
: "=a" (__ret_gu), "=r" (__val_gu) \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
- (x) = (__typeof__(*(ptr))) __val_gu; \
+ (x) = (__force __typeof__(*(ptr))) __val_gu; \
__ret_gu; \
})
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 12a26b979bf1..f2f9b39b274a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
}
unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 2d60a7813dfe..fc808b83fccb 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -33,8 +33,8 @@
* Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
*/
-#define MAX_CPUS_PER_UVHUB 64
-#define MAX_CPUS_PER_SOCKET 32
+#define MAX_CPUS_PER_UVHUB 128
+#define MAX_CPUS_PER_SOCKET 64
#define ADP_SZ 64 /* hardware-provided max. */
#define UV_CPUS_PER_AS 32 /* hardware-provided max. */
#define ITEMS_PER_DESC 8
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3c3366c2e37f..f556c4843aa1 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -70,4 +70,25 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s)
++s->seq;
}
+#ifdef CONFIG_X86_64
+
+#define VGETCPU_CPU_MASK 0xfff
+
+static inline unsigned int __getcpu(void)
+{
+ unsigned int p;
+
+ /*
+ * Load per CPU data from GDT. LSL is faster than RDTSCP and
+ * works on all CPUs. This is volatile so that it orders
+ * correctly wrt barrier() and to keep gcc from cleverly
+ * hoisting it out of the calling function.
+ */
+ asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+
+ return p;
+}
+
+#endif /* CONFIG_X86_64 */
+
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 5da71c27cc59..cce9ee68e335 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -19,6 +19,7 @@
#include <asm/vmx.h>
#include <asm/svm.h>
+#include <asm/tlbflush.h>
/*
* VMX functions:
@@ -40,12 +41,12 @@ static inline int cpu_has_vmx(void)
static inline void cpu_vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
- write_cr4(read_cr4() & ~X86_CR4_VMXE);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static inline int cpu_vmx_enabled(void)
{
- return read_cr4() & X86_CR4_VMXE;
+ return __read_cr4() & X86_CR4_VMXE;
}
/** Disable VMX if it is enabled on the current CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade26d8d..da772edd19ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,8 @@
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EXEC_XSAVES 0x00100000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -120,6 +122,7 @@ enum vmcs_field {
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
@@ -139,6 +142,8 @@ enum vmcs_field {
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
TSC_OFFSET = 0x00002010,
TSC_OFFSET_HIGH = 0x00002011,
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
@@ -159,6 +164,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 2a46ca720afc..6ba66ee79710 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,15 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
-#define VGETCPU_RDTSCP 1
-#define VGETCPU_LSL 2
-
-/* kernel space (writeable) */
-extern int vgetcpu_mode;
-extern struct timezone sys_tz;
-
-#include <asm/vvar.h>
-
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
/*
@@ -20,25 +12,12 @@ extern void map_vsyscall(void);
* Returns true if handled.
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
-
-#ifdef CONFIG_X86_64
-
-#define VGETCPU_CPU_MASK 0xfff
-
-static inline unsigned int __getcpu(void)
+#else
+static inline void map_vsyscall(void) {}
+static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
- unsigned int p;
-
- if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
- /* Load per CPU data from RDTSCP */
- native_read_tscp(&p);
- } else {
- /* Load per CPU data from GDT */
- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
- }
-
- return p;
+ return false;
}
-#endif /* CONFIG_X86_64 */
+#endif
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 5d2b9ad2c6d2..3f32dfc2ab73 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -44,8 +44,6 @@ extern char __vvar_page;
/* DECLARE_VVAR(offset, type, name) */
-DECLARE_VVAR(0, volatile unsigned long, jiffies)
-DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
#undef DECLARE_VVAR
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index e45e4da96bf1..f58a9c7a3c86 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -172,7 +172,6 @@ struct x86_platform_ops {
struct pci_dev;
struct msi_msg;
-struct msi_desc;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
@@ -183,8 +182,6 @@ struct x86_msi_ops {
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
- u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag);
- u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);
};
struct IO_APIC_route_entry;
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
new file mode 100644
index 000000000000..0d809e9fc975
--- /dev/null
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * arch-x86/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <keir@xen.org>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/*
+ * For compatibility with other hypervisor interfaces, the Xen cpuid leaves
+ * can be found at the first otherwise unused 0x100 aligned boundary starting
+ * from 0x40000000.
+ *
+ * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid
+ * leaves will start at 0x40000100
+ */
+
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000x00)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000x01)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000x02)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+/*
+ * Leaf 5 (0x40000x04)
+ * HVM-specific features
+ */
+
+/* EAX Features */
+/* Virtualized APIC registers */
+#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0)
+/* Virtualized x2APIC accesses */
+#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1)
+/* Memory mapped from other domains has valid IOMMU entries */
+#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
+
+#define XEN_CPUID_MAX_NUM_LEAVES 4
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index 7f02fe4e2c7b..acd844c017d3 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -22,8 +22,8 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs) { }
+ dma_addr_t dev_addr, unsigned long offset, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs) { }
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c949923a5668..358dcd338915 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -41,10 +41,12 @@ typedef struct xpaddr {
extern unsigned long *machine_to_phys_mapping;
extern unsigned long machine_to_phys_nr;
+extern unsigned long *xen_p2m_addr;
+extern unsigned long xen_p2m_size;
+extern unsigned long xen_max_p2m_pfn;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
@@ -52,16 +54,50 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count);
-extern int m2p_add_override(unsigned long mfn, struct page *page,
- struct gnttab_map_grant_ref *kmap_op);
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
- struct gnttab_map_grant_ref *kmap_ops,
+ struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count);
-extern int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op,
- unsigned long mfn);
-extern struct page *m2p_find_override(unsigned long mfn);
-extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+
+/*
+ * Helper functions to write or read unsigned long values to/from
+ * memory, when the access may fault.
+ */
+static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
+{
+ return __put_user(val, (unsigned long __user *)addr);
+}
+
+static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+{
+ return __get_user(*val, (unsigned long __user *)addr);
+}
+
+/*
+ * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine():
+ * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator
+ * bits (identity or foreign) are set.
+ * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set
+ * identity or foreign indicator will be still set. __pfn_to_mfn() is
+ * encapsulating get_phys_to_machine() which is called in special cases only.
+ * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
+ * cases needing an extended handling.
+ */
+static inline unsigned long __pfn_to_mfn(unsigned long pfn)
+{
+ unsigned long mfn;
+
+ if (pfn < xen_p2m_size)
+ mfn = xen_p2m_addr[pfn];
+ else if (unlikely(pfn < xen_max_p2m_pfn))
+ return get_phys_to_machine(pfn);
+ else
+ return IDENTITY_FRAME(pfn);
+
+ if (unlikely(mfn == INVALID_P2M_ENTRY))
+ return get_phys_to_machine(pfn);
+
+ return mfn;
+}
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
@@ -70,7 +106,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
- mfn = get_phys_to_machine(pfn);
+ mfn = __pfn_to_mfn(pfn);
if (mfn != INVALID_P2M_ENTRY)
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
@@ -83,7 +119,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
if (xen_feature(XENFEAT_auto_translated_physmap))
return 1;
- return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
+ return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY;
}
static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
@@ -102,7 +138,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+ ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn);
if (ret < 0)
return ~0;
@@ -117,24 +153,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
return mfn;
pfn = mfn_to_pfn_no_overrides(mfn);
- if (get_phys_to_machine(pfn) != mfn) {
- /*
- * If this appears to be a foreign mfn (because the pfn
- * doesn't map back to the mfn), then check the local override
- * table to see if there's a better pfn to use.
- *
- * m2p_find_override_pfn returns ~0 if it doesn't find anything.
- */
- pfn = m2p_find_override_pfn(mfn, ~0);
- }
+ if (__pfn_to_mfn(pfn) != mfn)
+ pfn = ~0;
/*
- * pfn is ~0 if there are no entries in the m2p for mfn or if the
- * entry doesn't map back to the mfn and m2p_override doesn't have a
- * valid entry for it.
+ * pfn is ~0 if there are no entries in the m2p for mfn or the
+ * entry doesn't map back to the mfn.
*/
- if (pfn == ~0 &&
- get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+ if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
pfn = mfn;
return pfn;
@@ -180,7 +206,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
return mfn;
pfn = mfn_to_pfn(mfn);
- if (get_phys_to_machine(pfn) != mfn)
+ if (__pfn_to_mfn(pfn) != mfn)
return -1; /* force !pfn_valid() */
return pfn;
}
@@ -236,4 +262,11 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_remap(cookie, size) ioremap((cookie), (size));
#define xen_unmap(cookie) iounmap((cookie))
+static inline bool xen_arch_need_swiotlb(struct device *dev,
+ unsigned long pfn,
+ unsigned long mfn)
+{
+ return false;
+}
+
#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79ada658..c9a6d68b8d62 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
#define XSTATE_Hi16_ZMM 0x80
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
/* Bit 63 of XCR0 is reserved for future expansion */
#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
@@ -81,18 +82,15 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XSAVES"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XSAVE"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
-
- asm volatile(xstate_fault
- : "0" (0)
- : "memory");
-
return err;
}
@@ -111,18 +109,15 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XRSTORS"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XRSTOR"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
-
- asm volatile(xstate_fault
- : "0" (0)
- : "memory");
-
return err;
}
@@ -148,9 +143,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
*/
alternative_input_2(
"1:"XSAVE,
- "1:"XSAVEOPT,
+ XSAVEOPT,
X86_FEATURE_XSAVEOPT,
- "1:"XSAVES,
+ XSAVES,
X86_FEATURE_XSAVES,
[fx] "D" (fx), "a" (lmask), "d" (hmask) :
"memory");
@@ -177,7 +172,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
*/
alternative_input(
"1: " XRSTOR,
- "1: " XRSTORS,
+ XRSTORS,
X86_FEATURE_XSAVES,
"D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");