diff options
Diffstat (limited to 'arch/x86/include')
79 files changed, 1079 insertions, 668 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3bf000fab0ae..d55a210a49bf 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -6,6 +6,7 @@ genhdr-y += unistd_x32.h generic-y += clkdev.h generic-y += cputime.h +generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h generic-y += scatterlist.h diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 6dd1c7dd0473..5e5cd123fdfb 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return (*(volatile int *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } /** @@ -219,21 +219,6 @@ static inline short int atomic_inc_short(short int *v) return *v; } -#ifdef CONFIG_X86_64 -/** - * atomic_or_long - OR of two long integers - * @v1: pointer to type unsigned long - * @v2: pointer to type unsigned long - * - * Atomically ORs @v1 and @v2 - * Returns the result of the OR - */ -static inline void atomic_or_long(unsigned long *v1, unsigned long v2) -{ - asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); -} -#endif - /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ asm volatile(LOCK_PREFIX "andl %0,%1" \ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 46e9052bbd28..f8d273e18516 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,7 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return (*(volatile long *)&(v)->counter); + return ACCESS_ONCE((v)->counter); } /** diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 0f4460b5636d..2ab1eb33106e 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -24,78 +24,28 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() +#define dma_rmb() rmb() #else -# define smp_rmb() barrier() +#define dma_rmb() barrier() #endif +#define dma_wmb() barrier() + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + #if defined(CONFIG_X86_PPRO_FENCE) /* diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 9863ee3747da..47c8e32f621a 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -5,65 +5,6 @@ #include <asm-generic/cacheflush.h> #include <asm/special_insns.h> -#ifdef CONFIG_X86_PAT -/* - * X86 PAT uses page flags WC and Uncached together to keep track of - * memory type of pages that have backing page struct. X86 PAT supports 3 - * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and - * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not - * been changed from its default (value of -1 used to denote this). - * Note we do not support _PAGE_CACHE_UC here. - */ - -#define _PGMT_DEFAULT 0 -#define _PGMT_WC (1UL << PG_arch_1) -#define _PGMT_UC_MINUS (1UL << PG_uncached) -#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) -#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) -#define _PGMT_CLEAR_MASK (~_PGMT_MASK) - -static inline unsigned long get_page_memtype(struct page *pg) -{ - unsigned long pg_flags = pg->flags & _PGMT_MASK; - - if (pg_flags == _PGMT_DEFAULT) - return -1; - else if (pg_flags == _PGMT_WC) - return _PAGE_CACHE_WC; - else if (pg_flags == _PGMT_UC_MINUS) - return _PAGE_CACHE_UC_MINUS; - else - return _PAGE_CACHE_WB; -} - -static inline void set_page_memtype(struct page *pg, unsigned long memtype) -{ - unsigned long memtype_flags = _PGMT_DEFAULT; - unsigned long old_flags; - unsigned long new_flags; - - switch (memtype) { - case _PAGE_CACHE_WC: - memtype_flags = _PGMT_WC; - break; - case _PAGE_CACHE_UC_MINUS: - memtype_flags = _PGMT_UC_MINUS; - break; - case _PAGE_CACHE_WB: - memtype_flags = _PGMT_WB; - break; - } - - do { - old_flags = pg->flags; - new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; - } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); -} -#else -static inline unsigned long get_page_memtype(struct page *pg) { return -1; } -static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } -#endif - /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index cb4c73bfeb48..76659b67fd11 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -85,7 +85,7 @@ For 32-bit we have the following conventions - kernel is built with #define ARGOFFSET R11 #define SWFRAME ORIG_RAX - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 + .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip movq_cfi rdi, 8*8 @@ -96,7 +96,11 @@ For 32-bit we have the following conventions - kernel is built with movq_cfi rcx, 5*8 .endif + .if \rax_enosys + movq $-ENOSYS, 4*8(%rsp) + .else movq_cfi rax, 4*8 + .endif .if \save_r891011 movq_cfi r8, 3*8 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb9b258d60e7..aede2c347bde 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -8,6 +8,10 @@ #include <asm/required-features.h> #endif +#ifndef _ASM_X86_DISABLED_FEATURES_H +#include <asm/disabled-features.h> +#endif + #define NCAPINTS 11 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ @@ -185,6 +189,11 @@ #define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ +#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ +#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ +#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ +#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -202,6 +211,7 @@ #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ @@ -250,8 +260,15 @@ #include <asm/asm.h> #include <linux/bitops.h> +#ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; +#define X86_CAP_FMT "%s" +#define x86_cap_flag(flag) x86_cap_flags[flag] +#else +#define X86_CAP_FMT "%d:%d" +#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31) +#endif /* * In order to save room, we index into this array by doing @@ -274,6 +291,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \ (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) +#define DISABLED_MASK_BIT_SET(bit) \ + ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) || \ + (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) || \ + (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) || \ + (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) || \ + (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) || \ + (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) || \ + (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) || \ + (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) || \ + (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) || \ + (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) ) + #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ test_cpu_cap(c, bit)) @@ -282,6 +311,18 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \ + cpu_has(&boot_cpu_data, bit)) + #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) @@ -296,11 +337,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) -#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) #define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP) @@ -316,9 +355,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) -#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) -#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) #define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE) #define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN) #define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT) @@ -353,25 +389,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) -#ifdef CONFIG_X86_64 - -#undef cpu_has_vme -#define cpu_has_vme 0 - -#undef cpu_has_pae -#define cpu_has_pae ___BUG___ - -#undef cpu_has_k6_mtrr -#define cpu_has_k6_mtrr 0 - -#undef cpu_has_cyrix_arr -#define cpu_has_cyrix_arr 0 - -#undef cpu_has_centaur_mcr -#define cpu_has_centaur_mcr 0 - -#endif /* CONFIG_X86_64 */ - #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); extern bool __static_cpu_has_safe(u16 bit); diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4b528a970bd4..61fd18b83b6c 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -97,11 +97,11 @@ extern void hw_breakpoint_restore(void); DECLARE_PER_CPU(int, debug_stack_usage); static inline void debug_stack_usage_inc(void) { - __get_cpu_var(debug_stack_usage)++; + __this_cpu_inc(debug_stack_usage); } static inline void debug_stack_usage_dec(void) { - __get_cpu_var(debug_stack_usage)--; + __this_cpu_dec(debug_stack_usage); } int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h new file mode 100644 index 000000000000..f226df064660 --- /dev/null +++ b/arch/x86/include/asm/disabled-features.h @@ -0,0 +1,45 @@ +#ifndef _ASM_X86_DISABLED_FEATURES_H +#define _ASM_X86_DISABLED_FEATURES_H + +/* These features, although they might be available in a CPU + * will not be used because the compile options to support + * them are not present. + * + * This code allows them to be checked and disabled at + * compile time without an explicit #ifdef. Use + * cpu_feature_enabled(). + */ + +#ifdef CONFIG_X86_INTEL_MPX +# define DISABLE_MPX 0 +#else +# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) +#endif + +#ifdef CONFIG_X86_64 +# define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) +# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) +# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) +# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +#else +# define DISABLE_VME 0 +# define DISABLE_K6_MTRR 0 +# define DISABLE_CYRIX_ARR 0 +# define DISABLE_CENTAUR_MCR 0 +#endif /* CONFIG_X86_64 */ + +/* + * Make sure to add features to the correct mask + */ +#define DISABLED_MASK0 (DISABLE_VME) +#define DISABLED_MASK1 0 +#define DISABLED_MASK2 0 +#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) +#define DISABLED_MASK4 0 +#define DISABLED_MASK5 0 +#define DISABLED_MASK6 0 +#define DISABLED_MASK7 0 +#define DISABLED_MASK8 0 +#define DISABLED_MASK9 (DISABLE_MPX) + +#endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h deleted file mode 100644 index b4b38bacb404..000000000000 --- a/arch/x86/include/asm/dma-contiguous.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef ASMX86_DMA_CONTIGUOUS_H -#define ASMX86_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ - -#include <linux/types.h> - -static inline void -dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } - -#endif -#endif diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 0bdb0c54d9a1..fe884e18fa6e 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -70,7 +70,7 @@ #define MAX_DMA_CHANNELS 8 /* 16MB ISA DMA zone */ -#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0ec241ede5a2..25bce45c6fc4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...); */ #define __efi_call_virt(f, args...) efi_call_virt(f, args) -extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, - u32 type, u64 attribute); +extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, + u32 type, u64 attribute); #endif /* CONFIG_X86_32 */ -extern int add_efi_memmap; extern struct efi_scratch efi_scratch; -extern void efi_set_executable(efi_memory_desc_t *md, bool executable); -extern int efi_memblock_x86_reserve_range(void); -extern void efi_call_phys_prelog(void); -extern void efi_call_phys_epilog(void); -extern void efi_unmap_memmap(void); -extern void efi_memory_uc(u64 addr, unsigned long size); +extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); +extern int __init efi_memblock_x86_reserve_range(void); +extern void __init efi_call_phys_prolog(void); +extern void __init efi_call_phys_epilog(void); +extern void __init efi_unmap_memmap(void); +extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); -extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); @@ -159,19 +158,33 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( } #endif /* CONFIG_EFI_MIXED */ + +/* arch specific definitions used by the stub code */ + +struct efi_config { + u64 image_handle; + u64 table; + u64 allocate_pool; + u64 allocate_pages; + u64 get_memory_map; + u64 free_pool; + u64 free_pages; + u64 locate_handle; + u64 handle_protocol; + u64 exit_boot_services; + u64 text_output; + efi_status_t (*call)(unsigned long, ...); + bool is64; +} __packed; + +__pure const struct efi_config *__efi_early(void); + +#define efi_call_early(f, ...) \ + __efi_early()->call(__efi_early()->f, __VA_ARGS__); + extern bool efi_reboot_required(void); #else -/* - * IF EFI is not configured, have the EFI calls return -ENOSYS. - */ -#define efi_call0(_f) (-ENOSYS) -#define efi_call1(_f, _a1) (-ENOSYS) -#define efi_call2(_f, _a1, _a2) (-ENOSYS) -#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) -#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) -#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) -#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} static inline bool efi_reboot_required(void) { diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1a055c81d864..ca3347a9dab5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -160,8 +160,9 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS # error "The following code assumes __USER32_DS == __USER_DS" diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h index 2519d0679d99..c3dd5e71f439 100644 --- a/arch/x86/include/asm/fb.h +++ b/arch/x86/include/asm/fb.h @@ -8,8 +8,12 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, unsigned long off) { + unsigned long prot; + + prot = pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK; if (boot_cpu_data.x86 > 3) - pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; + pgprot_val(vma->vm_page_prot) = + prot | cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS); } extern int fb_is_primary_device(struct fb_info *info); diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index ffb1733ac91f..f80d70009ff8 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -69,7 +69,9 @@ enum fixed_addresses { #ifdef CONFIG_X86_32 FIX_HOLE, #else +#ifdef CONFIG_X86_VSYSCALL_EMULATION VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, +#endif #ifdef CONFIG_PARAVIRT_CLOCK PVCLOCK_FIXMAP_BEGIN, PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, @@ -136,9 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 412ececa00b9..e97622f57722 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -344,7 +344,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk) static inline void __thread_fpu_begin(struct task_struct *tsk) { - if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU)) + if (!use_eager_fpu()) clts(); __thread_set_has_fpu(tsk); } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index e1f7fecaa7d6..f45acad3c4b6 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -1,39 +1,6 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H -#ifdef __ASSEMBLY__ - - /* skip is set if the stack was already partially adjusted */ - .macro MCOUNT_SAVE_FRAME skip=0 - /* - * We add enough stack to save all regs. - */ - subq $(SS+8-\skip), %rsp - movq %rax, RAX(%rsp) - movq %rcx, RCX(%rsp) - movq %rdx, RDX(%rsp) - movq %rsi, RSI(%rsp) - movq %rdi, RDI(%rsp) - movq %r8, R8(%rsp) - movq %r9, R9(%rsp) - /* Move RIP to its proper location */ - movq SS+8(%rsp), %rdx - movq %rdx, RIP(%rsp) - .endm - - .macro MCOUNT_RESTORE_FRAME skip=0 - movq R9(%rsp), %r9 - movq R8(%rsp), %r8 - movq RDI(%rsp), %rdi - movq RSI(%rsp), %rsi - movq RDX(%rsp), %rdx - movq RCX(%rsp), %rcx - movq RAX(%rsp), %rax - addq $(SS+8-\skip), %rsp - .endm - -#endif - #ifdef CONFIG_FUNCTION_TRACER #ifdef CC_USING_FENTRY # define MCOUNT_ADDR ((long)(__fentry__)) diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h deleted file mode 100644 index e8c58f88b1d4..000000000000 --- a/arch/x86/include/asm/hash.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_X86_HASH_H -#define _ASM_X86_HASH_H - -struct fast_hash_ops; -extern void setup_arch_fast_hash(struct fast_hash_ops *ops); - -#endif /* _ASM_X86_HASH_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 302a323b3f67..04e9d023168f 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -38,17 +38,20 @@ extern unsigned long highstart_pfn, highend_pfn; /* * Ordering is: * - * FIXADDR_TOP - * fixed_addresses - * FIXADDR_START - * temp fixed addresses - * FIXADDR_BOOT_START - * Persistent kmap area - * PKMAP_BASE - * VMALLOC_END - * Vmalloc area - * VMALLOC_START - * high_memory + * high memory on: high_memory off: + * FIXADDR_TOP FIXADDR_TOP + * fixed addresses fixed addresses + * FIXADDR_START FIXADDR_START + * temp fixed addresses/persistent kmap area VMALLOC_END + * PKMAP_BASE temp fixed addresses/vmalloc area + * VMALLOC_END VMALLOC_START + * vmalloc area high_memory + * VMALLOC_START + * high_memory + * + * The temp fixed area is only used during boot for early_ioremap(), and + * it is unused when the ioremap() is functional. vmalloc/pkmap area become + * available after early boot so the temp fixed area is available for re-use. */ #define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4615906d83df..9662290e0b20 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void); #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi #endif /* CONFIG_TRACING */ -/* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) -extern unsigned long io_apic_irqs; - -extern void setup_IO_APIC(void); -extern void disable_IO_APIC(void); - -struct io_apic_irq_attr { - int ioapic; - int ioapic_pin; - int trigger; - int polarity; -}; - -static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, - int ioapic, int ioapic_pin, - int trigger, int polarity) -{ - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; -} - +#ifdef CONFIG_IRQ_REMAP /* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; @@ -131,14 +108,12 @@ struct irq_2_irte { u16 devid; /* Device ID for IRTE table */ u16 index; /* Index into IRTE table*/ }; +#endif /* CONFIG_IRQ_REMAP */ + +#ifdef CONFIG_X86_LOCAL_APIC +struct irq_data; -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ struct irq_cfg { - struct irq_pin_list *irq_2_pin; cpumask_var_t domain; cpumask_var_t old_domain; u8 vector; @@ -150,18 +125,39 @@ struct irq_cfg { struct irq_2_irte irq_2_irte; }; #endif + union { +#ifdef CONFIG_X86_IO_APIC + struct { + struct list_head irq_2_pin; + }; +#endif + }; }; +extern struct irq_cfg *irq_cfg(unsigned int irq); +extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); +extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node); +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void clear_irq_vector(int irq, struct irq_cfg *cfg); +extern void setup_vector_irq(int cpu); +#ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); +extern void irq_complete_move(struct irq_cfg *cfg); +#else +static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void irq_complete_move(struct irq_cfg *c) { } +#endif -struct irq_data; -int __ioapic_set_affinity(struct irq_data *, const struct cpumask *, - unsigned int *dest_id); -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); -extern void setup_ioapic_dest(void); - -extern void enable_IO_APIC(void); +extern int apic_retrigger_irq(struct irq_data *data); +extern void apic_ack_edge(struct irq_data *data); +extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id); +#else /* CONFIG_X86_LOCAL_APIC */ +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +#endif /* CONFIG_X86_LOCAL_APIC */ /* Statistics */ extern atomic_t irq_err_count; @@ -185,7 +181,8 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif -extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR + - FIRST_EXTERNAL_VECTOR])(void); #ifdef CONFIG_TRACING #define trace_interrupt interrupt #endif @@ -195,17 +192,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void setup_vector_irq(int cpu); - -#ifdef CONFIG_X86_IO_APIC -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); -extern void __setup_vector_irq(int cpu); -#else -static inline void lock_vector_lock(void) {} -static inline void unlock_vector_lock(void) {} -static inline void __setup_vector_irq(int cpu) {} -#endif #endif /* !ASSEMBLY_ */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 48eb30a86062..47f29b1d1846 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -65,6 +65,7 @@ struct insn { unsigned char x86_64; const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ const insn_byte_t *next_byte; }; @@ -96,7 +97,7 @@ struct insn { #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ -extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); extern void insn_get_prefixes(struct insn *insn); extern void insn_get_opcode(struct insn *insn); extern void insn_get_modrm(struct insn *insn); @@ -115,12 +116,13 @@ static inline void insn_get_attribute(struct insn *insn) extern int insn_rip_relative(struct insn *insn); /* Init insn for kernel text */ -static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +static inline void kernel_insn_init(struct insn *insn, + const void *kaddr, int buf_len) { #ifdef CONFIG_X86_64 - insn_init(insn, kaddr, 1); + insn_init(insn, kaddr, buf_len, 1); #else /* CONFIG_X86_32 */ - insn_init(insn, kaddr, 0); + insn_init(insn, kaddr, buf_len, 0); #endif } diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index b8237d8a1e0c..34a5b93704d3 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -74,6 +74,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #define __raw_readw __readw #define __raw_readl __readl +#define writeb_relaxed(v, a) __writeb(v, a) +#define writew_relaxed(v, a) __writew(v, a) +#define writel_relaxed(v, a) __writel(v, a) #define __raw_writeb __writeb #define __raw_writew __writew #define __raw_writel __writel @@ -86,6 +89,7 @@ build_mmio_read(readq, "q", unsigned long, "=r", :"memory") build_mmio_write(writeq, "q", unsigned long, "r", :"memory") #define readq_relaxed(a) readq(a) +#define writeq_relaxed(v, a) writeq(v, a) #define __raw_readq(a) readq(a) #define __raw_writeq(val, addr) writeq(val, addr) @@ -310,11 +314,11 @@ BUILDIO(b, b, char) BUILDIO(w, w, short) BUILDIO(l, , int) -extern void *xlate_dev_mem_ptr(unsigned long phys); -extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); +extern void *xlate_dev_mem_ptr(phys_addr_t phys); +extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, - unsigned long prot_val); + enum page_cache_mode pcm); extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1733ab49ac5e..bf006cce9418 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -132,6 +132,10 @@ extern int noioapicquirk; /* -1 if "noapic" boot option passed */ extern int noioapicreroute; +extern unsigned long io_apic_irqs; + +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs)) + /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. @@ -139,18 +143,15 @@ extern int noioapicreroute; #define io_apic_assign_pci_irqs \ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -struct io_apic_irq_attr; struct irq_cfg; extern void ioapic_insert_resources(void); +extern int arch_early_ioapic_init(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); -extern void native_compose_msi_msg(struct pci_dev *pdev, - unsigned int irq, unsigned int dest, - struct msi_msg *msg, u8 hpet_id); extern void native_eoi_ioapic_pin(int apic, int pin, int vector); extern int save_ioapic_entries(void); @@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); extern void setup_ioapic_ids_from_mpc_nocheck(void); +struct io_apic_irq_attr { + int ioapic; + int ioapic_pin; + int trigger; + int polarity; +}; + enum ioapic_domain_type { IOAPIC_DOMAIN_INVALID, IOAPIC_DOMAIN_LEGACY, @@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi); extern u32 mp_pin_to_gsi(int ioapic, int pin); extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags); extern void mp_unmap_irq(int irq); -extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base, - struct ioapic_domain_cfg *cfg); +extern int mp_register_ioapic(int id, u32 address, u32 gsi_base, + struct ioapic_domain_cfg *cfg); +extern int mp_unregister_ioapic(u32 gsi_base); +extern int mp_ioapic_registered(u32 gsi_base); extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq); extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq); @@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void io_apic_eoi(unsigned int apic, unsigned int vector); -extern bool mp_should_keep_irq(struct device *dev); - +extern void setup_IO_APIC(void); +extern void enable_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void setup_ioapic_dest(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); +extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ +#define IO_APIC_IRQ(x) 0 #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop static inline void ioapic_insert_resources(void) { } +static inline int arch_early_ioapic_init(void) { return 0; } +static inline void print_IO_APICs(void) {} #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; } static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; } static inline void mp_unmap_irq(int irq) { } -static inline bool mp_should_keep_irq(struct device *dev) { return 1; } static inline int save_ioapic_entries(void) { @@ -262,7 +278,6 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL -#define native_compose_msi_msg NULL #define native_eoi_ioapic_pin NULL #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5702d7e3111d..666c89ec4bd7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,6 +126,12 @@ #define NR_VECTORS 256 +#ifdef CONFIG_X86_LOCAL_APIC +#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#else +#define FIRST_SYSTEM_VECTOR NR_VECTORS +#endif + #define FPU_IRQ 13 #define FIRST_VM86_IRQ 3 diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h new file mode 100644 index 000000000000..78162f8e248b --- /dev/null +++ b/arch/x86/include/asm/irq_work.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IRQ_WORK_H +#define _ASM_IRQ_WORK_H + +#include <asm/processor.h> + +static inline bool arch_irq_work_has_interrupt(void) +{ + return cpu_has_apic; +} + +#endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 53cdfb2857ab..4421b5da409d 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,7 +27,6 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT -#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7c492ed9087b..d89c6b828c96 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -33,7 +33,7 @@ #define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 -#define KVM_USER_MEM_SLOTS 125 +#define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) @@ -51,6 +51,7 @@ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL +#define CR3_PCID_INVD (1UL << 63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -99,10 +100,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, @@ -266,7 +263,8 @@ struct kvm_mmu { struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, struct x86_exception *exception); - gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); @@ -364,6 +362,7 @@ struct kvm_vcpu_arch { int mp_state; u64 ia32_misc_enable_msr; bool tpr_access_reporting; + u64 ia32_xss; /* * Paging state of the vcpu @@ -481,6 +480,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; @@ -544,7 +544,7 @@ struct kvm_apic_map { struct rcu_head rcu; u8 ldr_bits; /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask; + u32 cid_shift, cid_mask, lid_mask, broadcast; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; @@ -576,11 +576,10 @@ struct kvm_arch { struct kvm_apic_map *apic_map; unsigned int tss_addr; - struct page *apic_access_page; + bool apic_access_page_done; gpa_t wall_clock; - struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; @@ -605,6 +604,9 @@ struct kvm_arch { struct kvm_xen_hvm_config xen_hvm_config; + /* reads protected by irq_srcu, writes by irq_lock */ + struct hlist_head mask_notifier_list; + /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; @@ -662,11 +664,21 @@ struct msr_data { u64 data; }; +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ @@ -710,7 +722,6 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); @@ -740,6 +751,7 @@ struct kvm_x86_ops { void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -770,8 +782,11 @@ struct kvm_x86_ops { enum x86_intercept_stage stage); void (*handle_external_intr)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); + bool (*xsaves_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + + void (*sched_in)(struct kvm_vcpu *kvm, int cpu); }; struct kvm_arch_async_pf { @@ -819,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask); + extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); @@ -864,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -895,8 +923,8 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); -void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); +bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); static inline int __kvm_irq_line_state(unsigned long *irq_state, int irq_source_id, int level) @@ -917,7 +945,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); int fx_init(struct kvm_vcpu *vcpu); -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); @@ -926,7 +953,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, @@ -946,7 +974,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_enable_tdp(void); void kvm_disable_tdp(void); -static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception) { return gpa; } @@ -990,6 +1019,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } +static inline u64 get_canonical(u64 la) +{ + return ((int64_t)la << 16) >> 16; +} + +static inline bool is_noncanonical_address(u64 la) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la) != la; +#else + return false; +#endif +} + #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) @@ -1037,7 +1080,7 @@ asmlinkage void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); @@ -1046,10 +1089,14 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); -void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e43465b..e62cf897f781 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include <asm/processor.h> +#include <asm/alternative.h> #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 958b90f761e5..51b26e895933 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -34,6 +34,10 @@ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +/* AMD-specific bits */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ +#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ + /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is * bits 15:0. But bit 12 is the 'F' bit, defined for corrected @@ -78,7 +82,6 @@ /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 64dc362506b7..201b520521ed 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -78,6 +78,7 @@ static inline void __exit exit_amd_microcode(void) {} extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); extern int __init save_microcode_in_initrd(void); +void reload_early_microcode(void); #else static inline void __init load_ucode_bsp(void) {} static inline void load_ucode_ap(void) {} @@ -85,6 +86,7 @@ static inline int __init save_microcode_in_initrd(void) { return 0; } +static inline void reload_early_microcode(void) {} #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index b7b10b82d3e5..af935397e053 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -59,7 +59,7 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, extern int __apply_microcode_amd(struct microcode_amd *mc_amd); extern int apply_microcode_amd(int cpu); -extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); +extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); #define PATCH_MAX_SIZE PAGE_SIZE extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -68,10 +68,12 @@ extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; extern void __init load_ucode_amd_bsp(void); extern void load_ucode_amd_ap(void); extern int __init save_microcode_in_initrd_amd(void); +void reload_ucode_amd(void); #else static inline void __init load_ucode_amd_bsp(void) {} static inline void load_ucode_amd_ap(void) {} static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; } +void reload_ucode_amd(void) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 9067166409bf..dd4c20043ce7 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -43,7 +43,7 @@ struct extended_sigtable { #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ ((struct microcode_intel *)mc)->hdr.totalsize : \ DEFAULT_UCODE_TOTALSIZE) @@ -68,11 +68,13 @@ extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); extern void show_ucode_info_early(void); extern int __init save_microcode_in_initrd_intel(void); +void reload_ucode_intel(void); #else static inline __init void load_ucode_intel_bsp(void) {} static inline void load_ucode_intel_ap(void) {} static inline void show_ucode_info_early(void) {} static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } +static inline void reload_ucode_intel(void) {} #endif #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 166af2a8e865..40269a2bf6f9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -10,9 +10,8 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> +#include <asm/mpx.h> #ifndef CONFIG_PARAVIRT -#include <asm-generic/mm_hooks.h> - static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { @@ -53,7 +52,16 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); - /* Load the LDT, if the LDT is different: */ + /* + * Load the LDT, if the LDT is different. + * + * It's possible leave_mm(prev) has been called. If so, + * then prev->context.ldt could be out of sync with the + * LDT descriptor or the LDT register. This can only happen + * if prev->context.ldt is non-null, since we never free + * an LDT. But LDTs can't be shared across mms, so + * prev->context.ldt won't be equal to next->context.ldt. + */ if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT_nolock(&next->context); } @@ -102,4 +110,27 @@ do { \ } while (0) #endif +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ + paravirt_arch_dup_mmap(oldmm, mm); +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ + paravirt_arch_exit_mmap(mm); +} + +static inline void arch_bprm_mm_init(struct mm_struct *mm, + struct vm_area_struct *vma) +{ + mpx_mm_init(mm); +} + +static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + mpx_notify_unmap(mm, vma, start, end); +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h new file mode 100644 index 000000000000..a952a13d59a7 --- /dev/null +++ b/arch/x86/include/asm/mpx.h @@ -0,0 +1,103 @@ +#ifndef _ASM_X86_MPX_H +#define _ASM_X86_MPX_H + +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/insn.h> + +/* + * NULL is theoretically a valid place to put the bounds + * directory, so point this at an invalid address. + */ +#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) +#define MPX_BNDCFG_ENABLE_FLAG 0x1 +#define MPX_BD_ENTRY_VALID_FLAG 0x1 + +#ifdef CONFIG_X86_64 + +/* upper 28 bits [47:20] of the virtual address in 64-bit used to + * index into bounds directory (BD). + */ +#define MPX_BD_ENTRY_OFFSET 28 +#define MPX_BD_ENTRY_SHIFT 3 +/* bits [19:3] of the virtual address in 64-bit used to index into + * bounds table (BT). + */ +#define MPX_BT_ENTRY_OFFSET 17 +#define MPX_BT_ENTRY_SHIFT 5 +#define MPX_IGN_BITS 3 +#define MPX_BD_ENTRY_TAIL 3 + +#else + +#define MPX_BD_ENTRY_OFFSET 20 +#define MPX_BD_ENTRY_SHIFT 2 +#define MPX_BT_ENTRY_OFFSET 10 +#define MPX_BT_ENTRY_SHIFT 4 +#define MPX_IGN_BITS 2 +#define MPX_BD_ENTRY_TAIL 2 + +#endif + +#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) +#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) + +#define MPX_BNDSTA_TAIL 2 +#define MPX_BNDCFG_TAIL 12 +#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) +#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1)) + +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) +#define MPX_BNDSTA_ERROR_CODE 0x3 + +#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1) +#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1) +#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \ + MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) +#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ + MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) + +#ifdef CONFIG_X86_INTEL_MPX +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, + struct xsave_struct *xsave_buf); +int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) +{ + return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); +} +static inline void mpx_mm_init(struct mm_struct *mm) +{ + /* + * NULL is theoretically a valid place to put the bounds + * directory, so point this at an invalid address. + */ + mm->bd_addr = MPX_INVALID_BOUNDS_DIR; +} +void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#else +static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, + struct xsave_struct *xsave_buf) +{ + return NULL; +} +static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +{ + return -EINVAL; +} +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) +{ + return 0; +} +static inline void mpx_mm_init(struct mm_struct *mm) +{ +} +static inline void mpx_notify_unmap(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} +#endif /* CONFIG_X86_INTEL_MPX */ + +#endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 4064acae625d..01b493e5a99b 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -9,7 +9,6 @@ #ifdef CONFIG_NUMA #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) /* * Too small node sizes may confuse the VM badly. Usually they diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -20,7 +20,6 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 #define NMI_STACK 0 #define DEBUG_STACK 0 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index f408caf73430..b3bebf9e5746 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -39,6 +39,8 @@ void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ -#define __HAVE_ARCH_GATE_AREA 1 +#ifdef CONFIG_X86_VSYSCALL_EMULATION +# define __HAVE_ARCH_GATE_AREA 1 +#endif #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,11 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 2 +#define DEBUG_STACK 3 +#define MCE_STACK 4 +#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cd6e1610e29e..32444ae939ca 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -330,13 +330,13 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); } -static inline void arch_exit_mmap(struct mm_struct *mm) +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); } @@ -986,5 +986,15 @@ extern void default_banner(void); #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop +#ifndef __ASSEMBLY__ +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ +} +#endif /* __ASSEMBLY__ */ #endif /* !CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index e2c1668dde7a..91bc4ba95f91 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -11,16 +11,17 @@ static const int pat_enabled; #endif extern void pat_init(void); +void pat_init_cache_modes(void); extern int reserve_memtype(u64 start, u64 end, - unsigned long req_type, unsigned long *ret_type); + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); extern int free_memtype(u64 start, u64 end); extern int kernel_map_sync_memtype(u64 base, unsigned long size, - unsigned long flag); + enum page_cache_mode pcm); int io_reserve_memtype(resource_size_t start, resource_size_t end, - unsigned long *type); + enum page_cache_mode *pcm); void io_free_memtype(resource_size_t start, resource_size_t end); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 0892ea0e683f..4e370a5d8117 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void); #ifdef CONFIG_PCI_MSI /* implemented in arch/x86/kernel/apic/io_apic. */ struct msi_desc; +void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, u8 hpet_id); int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev); int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, unsigned int irq_base, unsigned int irq_offset); #else +#define native_compose_msi_msg NULL #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index fa1195dae425..164e3f8d3c3d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index fd472181a1d0..e0ba66ca68c6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -64,7 +64,7 @@ #define __percpu_prefix "" #endif -#define __percpu_arg(x) __percpu_prefix "%P" #x +#define __percpu_arg(x) __percpu_prefix "%" #x /* * Initialized pointers to per-cpu variables needed for the boot @@ -179,29 +179,58 @@ do { \ } \ } while (0) -#define percpu_from_op(op, var, constraint) \ +#define percpu_from_op(op, var) \ ({ \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ : "=q" (pfo_ret__) \ - : constraint); \ + : "m" (var)); \ break; \ case 2: \ asm(op "w "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ - : constraint); \ + : "m" (var)); \ break; \ case 4: \ asm(op "l "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ - : constraint); \ + : "m" (var)); \ break; \ case 8: \ asm(op "q "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ - : constraint); \ + : "m" (var)); \ + break; \ + default: __bad_percpu_size(); \ + } \ + pfo_ret__; \ +}) + +#define percpu_stable_op(op, var) \ +({ \ + typeof(var) pfo_ret__; \ + switch (sizeof(var)) { \ + case 1: \ + asm(op "b "__percpu_arg(P1)",%0" \ + : "=q" (pfo_ret__) \ + : "p" (&(var))); \ + break; \ + case 2: \ + asm(op "w "__percpu_arg(P1)",%0" \ + : "=r" (pfo_ret__) \ + : "p" (&(var))); \ + break; \ + case 4: \ + asm(op "l "__percpu_arg(P1)",%0" \ + : "=r" (pfo_ret__) \ + : "p" (&(var))); \ + break; \ + case 8: \ + asm(op "q "__percpu_arg(P1)",%0" \ + : "=r" (pfo_ret__) \ + : "p" (&(var))); \ break; \ default: __bad_percpu_size(); \ } \ @@ -359,11 +388,11 @@ do { \ * per-thread variables implemented as per-cpu variables and thus * stable for the duration of the respective task. */ -#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) +#define this_cpu_read_stable(var) percpu_stable_op("mov", var) -#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) +#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) +#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) #define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) @@ -381,9 +410,9 @@ do { \ #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) -#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) +#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) +#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) @@ -435,7 +464,7 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) #define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) #define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) @@ -444,7 +473,7 @@ do { \ #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) @@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr, #include <asm-generic/percpu.h> /* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df45d2f2..dc0f6ed35b08 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS \ + (ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK | \ + ARCH_PERFMON_EVENTSEL_ANY | \ + ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \ + HSW_IN_TX | \ + HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) @@ -169,6 +177,9 @@ struct x86_pmu_capability { #define IBS_CAPS_BRNTRGT (1U<<5) #define IBS_CAPS_OPCNTEXT (1U<<6) #define IBS_CAPS_RIPINVALIDCHK (1U<<7) +#define IBS_CAPS_OPBRNFUSE (1U<<8) +#define IBS_CAPS_FETCHCTLEXTD (1U<<9) +#define IBS_CAPS_OPDATA4 (1U<<10) #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | IBS_CAPS_FETCHSAM \ diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index aa97a070f09f..e8a5454acc99 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -9,9 +9,10 @@ /* * Macro to mark a page protection value as UC- */ -#define pgprot_noncached(prot) \ - ((boot_cpu_data.x86 > 3) \ - ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ +#define pgprot_noncached(prot) \ + ((boot_cpu_data.x86 > 3) \ + ? (__pgprot(pgprot_val(prot) | \ + cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) #ifndef __ASSEMBLY__ @@ -99,6 +100,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline int pmd_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_DIRTY; +} + static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -404,8 +410,8 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, - unsigned long flags, - unsigned long new_flags) + enum page_cache_mode pcm, + enum page_cache_mode new_pcm) { /* * PAT type is always WB for untracked ranges, so no need to check. @@ -419,10 +425,10 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back */ - if ((flags == _PAGE_CACHE_UC_MINUS && - new_flags == _PAGE_CACHE_WB) || - (flags == _PAGE_CACHE_WC && - new_flags == _PAGE_CACHE_WB)) { + if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && + new_pcm == _PAGE_CACHE_MODE_WB) || + (pcm == _PAGE_CACHE_MODE_WC && + new_pcm == _PAGE_CACHE_MODE_WB)) { return 0; } diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 9ee322103c6d..b6c0b404898a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,9 +32,6 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); -extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); - - /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index ed5903be26fe..9fb2f2bc8245 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -37,7 +37,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ +#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ & PMD_MASK) #ifdef CONFIG_HIGHMEM diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3874693c0e53..4572b2f30237 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -116,7 +116,8 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -extern void sync_global_pgds(unsigned long start, unsigned long end); +extern void sync_global_pgds(unsigned long start, unsigned long end, + int removed); /* * Conversion functions: convert a page and protection to a page entry, diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 7166e25ecb57..602b6028c5b6 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -63,6 +63,8 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f216963760e5..25bcd4a89517 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -23,7 +23,6 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */ -#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */ #define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ @@ -52,7 +51,7 @@ #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1) -#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) +#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) @@ -129,11 +128,28 @@ _PAGE_SOFT_DIRTY | _PAGE_NUMA) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA) -#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) -#define _PAGE_CACHE_WB (0) -#define _PAGE_CACHE_WC (_PAGE_PWT) -#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) -#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) +/* + * The cache modes defined here are used to translate between pure SW usage + * and the HW defined cache mode bits and/or PAT entries. + * + * The resulting bits for PWT, PCD and PAT should be chosen in a way + * to have the WB mode at index 0 (all bits clear). This is the default + * right now and likely would break too much if changed. + */ +#ifndef __ASSEMBLY__ +enum page_cache_mode { + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_UC_MINUS = 2, + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + _PAGE_CACHE_MODE_NUM = 8 +}; +#endif + +#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ @@ -157,41 +173,27 @@ #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) -#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) -#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO (__PAGE_KERNEL) +#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) -#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) -#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) -#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) -#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR) -#define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) -#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) -#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) /* xwr */ #define __P000 PAGE_NONE @@ -325,9 +327,76 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } +#ifdef CONFIG_NUMA_BALANCING +/* Set of bits that distinguishes present, prot_none and numa ptes */ +#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT) +static inline pteval_t ptenuma_flags(pte_t pte) +{ + return pte_flags(pte) & _PAGE_NUMA_MASK; +} + +static inline pmdval_t pmdnuma_flags(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_NUMA_MASK; +} +#endif /* CONFIG_NUMA_BALANCING */ + #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) +extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; +extern uint8_t __pte2cachemode_tbl[8]; + +#define __pte2cm_idx(cb) \ + ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ + (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \ + (((cb) >> _PAGE_BIT_PWT) & 1)) +#define __cm_idx2pte(i) \ + ((((i) & 4) << (_PAGE_BIT_PAT - 2)) | \ + (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \ + (((i) & 1) << _PAGE_BIT_PWT)) + +static inline unsigned long cachemode2protval(enum page_cache_mode pcm) +{ + if (likely(pcm == 0)) + return 0; + return __cachemode2pte_tbl[pcm]; +} +static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) +{ + return __pgprot(cachemode2protval(pcm)); +} +static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) +{ + unsigned long masked; + + masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; + if (likely(masked == 0)) + return 0; + return __pte2cachemode_tbl[__pte2cm_idx(masked)]; +} +static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) +{ + pgprot_t new; + unsigned long val; + + val = pgprot_val(pgprot); + pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); + return new; +} +static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) +{ + pgprot_t new; + unsigned long val; + + val = pgprot_val(pgprot); + pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT_LARGE) >> + (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); + return new; +} + typedef struct page *pgtable_t; @@ -383,6 +452,7 @@ static inline void update_page_count(int level, unsigned long pages) { } extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, unsigned int *level); +extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 0a4e140315b6..7249e6d0902d 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -16,6 +16,9 @@ #include <linux/sfi.h> +#define MAX_NUM_STREAMS_MRFLD 25 +#define MAX_NUM_STREAMS MAX_NUM_STREAMS_MRFLD + enum sst_audio_task_id_mrfld { SST_TASK_ID_NONE = 0, SST_TASK_ID_SBA = 1, @@ -73,6 +76,65 @@ struct sst_platform_data { unsigned int strm_map_size; }; +struct sst_info { + u32 iram_start; + u32 iram_end; + bool iram_use; + u32 dram_start; + u32 dram_end; + bool dram_use; + u32 imr_start; + u32 imr_end; + bool imr_use; + u32 mailbox_start; + bool use_elf; + bool lpe_viewpt_rqd; + unsigned int max_streams; + u32 dma_max_len; + u8 num_probes; +}; + +struct sst_lib_dnld_info { + unsigned int mod_base; + unsigned int mod_end; + unsigned int mod_table_offset; + unsigned int mod_table_size; + bool mod_ddr_dnld; +}; + +struct sst_res_info { + unsigned int shim_offset; + unsigned int shim_size; + unsigned int shim_phy_addr; + unsigned int ssp0_offset; + unsigned int ssp0_size; + unsigned int dma0_offset; + unsigned int dma0_size; + unsigned int dma1_offset; + unsigned int dma1_size; + unsigned int iram_offset; + unsigned int iram_size; + unsigned int dram_offset; + unsigned int dram_size; + unsigned int mbox_offset; + unsigned int mbox_size; + unsigned int acpi_lpe_res_index; + unsigned int acpi_ddr_index; + unsigned int acpi_ipc_irq_index; +}; + +struct sst_ipc_info { + int ipc_offset; + unsigned int mbox_recv_off; +}; + +struct sst_platform_info { + const struct sst_info *probe_data; + const struct sst_ipc_info *ipc_info; + const struct sst_res_info *res_info; + const struct sst_lib_dnld_info *lib_info; + const char *platform; +}; int add_sst_platform_device(void); #endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7024c12f7bfe..8f3271842533 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -30,9 +30,6 @@ static __always_inline void preempt_count_set(int pc) /* * must be macros to avoid header recursion hell */ -#define task_preempt_count(p) \ - (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) - #define init_task_preempt_count(p) do { \ task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ } while (0) @@ -105,6 +102,7 @@ static __always_inline bool should_resched(void) # ifdef CONFIG_CONTEXT_TRACKING extern asmlinkage void ___preempt_schedule_context(void); # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") + extern asmlinkage void preempt_schedule_context(void); # endif #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index eb71ec794732..a092a0cce0b7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -127,7 +127,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); +}; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 @@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS]; extern __u32 cpu_caps_set[NCAPINTS]; #ifdef CONFIG_SMP -DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) #else #define cpu_info boot_cpu_data @@ -374,13 +374,14 @@ struct lwp_struct { u8 reserved[128]; }; -struct bndregs_struct { - u64 bndregs[8]; +struct bndreg { + u64 lower_bound; + u64 upper_bound; } __packed; -struct bndcsr_struct { - u64 cfg_reg_u; - u64 status_reg; +struct bndcsr { + u64 bndcfgu; + u64 bndstatus; } __packed; struct xsave_hdr_struct { @@ -394,8 +395,8 @@ struct xsave_struct { struct xsave_hdr_struct xsave_hdr; struct ymmh_struct ymmh; struct lwp_struct lwp; - struct bndregs_struct bndregs; - struct bndcsr_struct bndcsr; + struct bndreg bndreg[4]; + struct bndcsr bndcsr; /* new processor state extensions will go here */ } __attribute__ ((packed, aligned (64))); @@ -893,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #else /* - * User space process size. 47bits minus one guard page. + * User space process size. 47bits minus one guard page. The guard + * page is necessary on Intel CPUs: if a SYSCALL instruction is at + * the highest possible canonical userspace address, then that + * syscall will enter the kernel with a non-canonical return + * address, and SYSRET will explode dangerously. We avoid this + * particular problem by preventing anything from being mapped + * at the maximum canonical address. */ #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) @@ -953,6 +960,24 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +/* Register/unregister a process' MPX related resource */ +#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) +#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) + +#ifdef CONFIG_X86_INTEL_MPX +extern int mpx_enable_management(struct task_struct *tsk); +extern int mpx_disable_management(struct task_struct *tsk); +#else +static inline int mpx_enable_management(struct task_struct *tsk) +{ + return -EINVAL; +} +static inline int mpx_disable_management(struct task_struct *tsk) +{ + return -EINVAL; +} +#endif /* CONFIG_X86_INTEL_MPX */ + extern u16 amd_get_nb_id(int cpu); static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6205f0c434db..86fc2bb82287 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -75,6 +75,11 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); + +extern unsigned long syscall_trace_enter_phase1(struct pt_regs *, u32 arch); +extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch, + unsigned long phase1_result); + extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h deleted file mode 100644 index a5370a03d90c..000000000000 --- a/arch/x86/include/asm/rwlock.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _ASM_X86_RWLOCK_H -#define _ASM_X86_RWLOCK_H - -#include <asm/asm.h> - -#if CONFIG_NR_CPUS <= 2048 - -#ifndef __ASSEMBLY__ -typedef union { - s32 lock; - s32 write; -} arch_rwlock_t; -#endif - -#define RW_LOCK_BIAS 0x00100000 -#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) -#define READ_LOCK_ATOMIC(n) atomic_##n -#define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) -#define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) -#define WRITE_LOCK_CMP RW_LOCK_BIAS - -#else /* CONFIG_NR_CPUS > 2048 */ - -#include <linux/const.h> - -#ifndef __ASSEMBLY__ -typedef union { - s64 lock; - struct { - u32 read; - s32 write; - }; -} arch_rwlock_t; -#endif - -#define RW_LOCK_BIAS (_AC(1,L) << 32) -#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) -#define READ_LOCK_ATOMIC(n) atomic64_##n -#define WRITE_LOCK_ADD(n) __ASM_FORM(incl) -#define WRITE_LOCK_SUB(n) __ASM_FORM(decl) -#define WRITE_LOCK_CMP 1 - -#endif /* CONFIG_NR_CPUS */ - -#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } - -/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ - -#endif /* _ASM_X86_RWLOCK_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 6f1c3a8a33ab..db257a58571f 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -23,6 +23,15 @@ #define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) +#define SEGMENT_RPL_MASK 0x3 /* + * Bottom two bits of selector give the ring + * privilege level + */ +#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */ +#define USER_RPL 0x3 /* User mode is privilege level 3 */ +#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */ +#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */ + #ifdef CONFIG_X86_32 /* * The layout of the per-CPU GDT under Linux: @@ -125,16 +134,6 @@ #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ -/* Bottom two bits of selector give the ring privilege level */ -#define SEGMENT_RPL_MASK 0x3 -/* Bit 2 is table indicator (LDT/GDT) */ -#define SEGMENT_TI_MASK 0x4 - -/* User mode is privilege level 3 */ -#define USER_RPL 0x3 -/* LDT segment has TI set, GDT has it cleared */ -#define SEGMENT_LDT 0x4 -#define SEGMENT_GDT 0x0 /* * Matching rules for certain types of segments. @@ -192,17 +191,6 @@ #define get_kernel_rpl() 0 #endif -/* User mode is privilege level 3 */ -#define USER_RPL 0x3 -/* LDT segment has TI set, GDT has it cleared */ -#define SEGMENT_LDT 0x4 -#define SEGMENT_GDT 0x0 - -/* Bottom two bits of selector give the ring privilege level */ -#define SEGMENT_RPL_MASK 0x3 -/* Bit 2 is table indicator (LDT/GDT) */ -#define SEGMENT_TI_MASK 0x4 - #define IDT_ENTRIES 256 #define NUM_EXCEPTION_VECTORS 32 /* Bitmask of exception vectors which push an error code on the stack */ diff --git a/arch/x86/include/asm/serial.h b/arch/x86/include/asm/serial.h index 628c801535ea..460b84f64556 100644 --- a/arch/x86/include/asm/serial.h +++ b/arch/x86/include/asm/serial.h @@ -6,24 +6,24 @@ * * It'd be nice if someone built a serial card with a 24.576 MHz * clock, since the 16550A is capable of handling a top speed of 1.5 - * megabits/second; but this requires the faster clock. + * megabits/second; but this requires a faster clock. */ -#define BASE_BAUD ( 1843200 / 16 ) +#define BASE_BAUD (1843200/16) /* Standard COM flags (except for COM4, because of the 8514 problem) */ #ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) -#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | ASYNC_AUTO_IRQ) #else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) -#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +# define STD_COMX_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | 0 ) +# define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | 0 | 0 ) #endif -#define SERIAL_PORT_DFNS \ - /* UART CLK PORT IRQ FLAGS */ \ - { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ - { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ - { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ - { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ +#define SERIAL_PORT_DFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { .uart = 0, BASE_BAUD, 0x3F8, 4, STD_COMX_FLAGS }, /* ttyS0 */ \ + { .uart = 0, BASE_BAUD, 0x2F8, 3, STD_COMX_FLAGS }, /* ttyS1 */ \ + { .uart = 0, BASE_BAUD, 0x3E8, 4, STD_COMX_FLAGS }, /* ttyS2 */ \ + { .uart = 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ #endif /* _ASM_X86_SERIAL_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd27e08e23c..8cd1cc3bc835 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -150,6 +150,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) } void cpu_disable_common(void); +void cpu_die_common(unsigned int cpu); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus); diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 54f1c8068c02..625660f8a2fc 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) unsigned count = SPIN_THRESHOLD; do { - if (ACCESS_ONCE(lock->tickets.head) == inc.tail) + if (READ_ONCE(lock->tickets.head) == inc.tail) goto out; cpu_relax(); } while (--count); @@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { arch_spinlock_t old, new; - old.tickets = ACCESS_ONCE(lock->tickets); + old.tickets = READ_ONCE(lock->tickets); if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) return 0; @@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return tmp.tail != tmp.head; } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } @@ -183,11 +183,22 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (arch_spin_is_locked(lock)) + __ticket_t head = ACCESS_ONCE(lock->tickets.head); + + for (;;) { + struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + /* + * We need to check "unlocked" in a loop, tmp.head == head + * can be false positive because of overflow. + */ + if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || + tmp.head != head) + break; + cpu_relax(); + } } -#ifndef CONFIG_QUEUE_RWLOCK /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -198,91 +209,15 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) * irq-safe write-lock, but readers can get non-irqsafe * read-locks. * - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - */ - -/** - * read_can_lock - would read_trylock() succeed? - * @lock: the rwlock in question. - */ -static inline int arch_read_can_lock(arch_rwlock_t *lock) -{ - return lock->lock > 0; -} - -/** - * write_can_lock - would write_trylock() succeed? - * @lock: the rwlock in question. + * On x86, we implement read-write locks using the generic qrwlock with + * x86 specific optimization. */ -static inline int arch_write_can_lock(arch_rwlock_t *lock) -{ - return lock->write == WRITE_LOCK_CMP; -} -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" - "jns 1f\n" - "call __read_lock_failed\n\t" - "1:\n" - ::LOCK_PTR_REG (rw) : "memory"); -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" - "jz 1f\n" - "call __write_lock_failed\n\t" - "1:\n" - ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) - : "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *lock) -{ - READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; - - if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) - return 1; - READ_LOCK_ATOMIC(inc)(count); - return 0; -} - -static inline int arch_write_trylock(arch_rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)&lock->write; - - if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) - return 1; - atomic_add(WRITE_LOCK_CMP, count); - return 0; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" - :"+m" (rw->lock) : : "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" - : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); -} -#else #include <asm/qrwlock.h> -#endif /* CONFIG_QUEUE_RWLOCK */ #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#undef READ_LOCK_SIZE -#undef READ_LOCK_ATOMIC -#undef WRITE_LOCK_ADD -#undef WRITE_LOCK_SUB -#undef WRITE_LOCK_CMP - #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 73c4c007200f..5f9d7572d82b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,10 +34,6 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } -#ifdef CONFIG_QUEUE_RWLOCK #include <asm-generic/qrwlock_types.h> -#else -#include <asm/rwlock.h> -#endif #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index d7f3b3b78ac3..751bf4b7bf11 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,12 +79,12 @@ do { \ #else /* CONFIG_X86_32 */ /* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" +#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" #define __EXTRA_CLOBBER \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15" + "r12", "r13", "r14", "r15", "flags" #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ @@ -100,7 +100,11 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ -/* Save restore flags to clear handle leaking NT */ +/* + * There is no need to save or restore flags, because flags are always + * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL + * has no effect. + */ #define switch_to(prev, next, last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 854053889d4d..547e344a6dc6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -141,7 +141,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY) + _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); #ifdef CONFIG_TRACING asmlinkage void trace_page_fault(void); +#define trace_stack_segment stack_segment #define trace_divide_error divide_error #define trace_bounds bounds #define trace_invalid_op invalid_op diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 2d60a7813dfe..fc808b83fccb 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -33,8 +33,8 @@ * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). */ -#define MAX_CPUS_PER_UVHUB 64 -#define MAX_CPUS_PER_SOCKET 32 +#define MAX_CPUS_PER_UVHUB 128 +#define MAX_CPUS_PER_SOCKET 64 #define ADP_SZ 64 /* hardware-provided max. */ #define UV_CPUS_PER_AS 32 /* hardware-provided max. */ #define ITEMS_PER_DESC 8 diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index c63e925fd6b7..a00ad8f2a657 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -164,7 +164,7 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* @@ -601,16 +601,16 @@ struct uv_hub_nmi_s { struct uv_cpu_nmi_s { struct uv_hub_nmi_s *hub; - atomic_t state; - atomic_t pinging; + int state; + int pinging; int queries; int pings; }; -DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); -#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi)) +DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); + #define uv_hub_nmi (uv_cpu_nmi.hub) -#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu)) +#define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu)) #define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) /* uv_cpu_nmi_states */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3c3366c2e37f..f556c4843aa1 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -70,4 +70,25 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) ++s->seq; } +#ifdef CONFIG_X86_64 + +#define VGETCPU_CPU_MASK 0xfff + +static inline unsigned int __getcpu(void) +{ + unsigned int p; + + /* + * Load per CPU data from GDT. LSL is faster than RDTSCP and + * works on all CPUs. This is volatile so that it orders + * correctly wrt barrier() and to keep gcc from cleverly + * hoisting it out of the calling function. + */ + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + + return p; +} + +#endif /* CONFIG_X86_64 */ + #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index bcbfade26d8d..45afaee9555c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_XSAVES 0x00100000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -159,6 +160,8 @@ enum vmcs_field { EOI_EXIT_BITMAP3_HIGH = 0x00002023, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 2a46ca720afc..6ba66ee79710 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,15 +4,7 @@ #include <linux/seqlock.h> #include <uapi/asm/vsyscall.h> -#define VGETCPU_RDTSCP 1 -#define VGETCPU_LSL 2 - -/* kernel space (writeable) */ -extern int vgetcpu_mode; -extern struct timezone sys_tz; - -#include <asm/vvar.h> - +#ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); /* @@ -20,25 +12,12 @@ extern void map_vsyscall(void); * Returns true if handled. */ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); - -#ifdef CONFIG_X86_64 - -#define VGETCPU_CPU_MASK 0xfff - -static inline unsigned int __getcpu(void) +#else +static inline void map_vsyscall(void) {} +static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { - unsigned int p; - - if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { - /* Load per CPU data from RDTSCP */ - native_read_tscp(&p); - } else { - /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); - } - - return p; + return false; } -#endif /* CONFIG_X86_64 */ +#endif #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 5d2b9ad2c6d2..3f32dfc2ab73 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -44,8 +44,6 @@ extern char __vvar_page; /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(0, volatile unsigned long, jiffies) -DECLARE_VVAR(16, int, vgetcpu_mode) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) #undef DECLARE_VVAR diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index e45e4da96bf1..f58a9c7a3c86 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -172,7 +172,6 @@ struct x86_platform_ops { struct pci_dev; struct msi_msg; -struct msi_desc; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); @@ -183,8 +182,6 @@ struct x86_msi_ops { void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev); int (*setup_hpet_msi)(unsigned int irq, unsigned int id); - u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); - u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); }; struct IO_APIC_route_entry; diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h new file mode 100644 index 000000000000..0d809e9fc975 --- /dev/null +++ b/arch/x86/include/asm/xen/cpuid.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir@xen.org> + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* + * For compatibility with other hypervisor interfaces, the Xen cpuid leaves + * can be found at the first otherwise unused 0x100 aligned boundary starting + * from 0x40000000. + * + * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid + * leaves will start at 0x40000100 + */ + +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000x00) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000x01) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000x02) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +/* + * Leaf 5 (0x40000x04) + * HVM-specific features + */ + +/* EAX Features */ +/* Virtualized APIC registers */ +#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) +/* Virtualized x2APIC accesses */ +#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) +/* Memory mapped from other domains has valid IOMMU entries */ +#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) + +#define XEN_CPUID_MAX_NUM_LEAVES 4 + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 7f02fe4e2c7b..acd844c017d3 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -22,8 +22,8 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, } static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) { } + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c949923a5668..5eea09915a15 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -41,10 +41,12 @@ typedef struct xpaddr { extern unsigned long *machine_to_phys_mapping; extern unsigned long machine_to_phys_nr; +extern unsigned long *xen_p2m_addr; +extern unsigned long xen_p2m_size; +extern unsigned long xen_max_p2m_pfn; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); -extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); @@ -52,17 +54,52 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -extern int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -extern int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn); -extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); +/* + * Helper functions to write or read unsigned long values to/from + * memory, when the access may fault. + */ +static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) +{ + return __put_user(val, (unsigned long __user *)addr); +} + +static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +{ + return __get_user(*val, (unsigned long __user *)addr); +} + +/* + * When to use pfn_to_mfn(), __pfn_to_mfn() or get_phys_to_machine(): + * - pfn_to_mfn() returns either INVALID_P2M_ENTRY or the mfn. No indicator + * bits (identity or foreign) are set. + * - __pfn_to_mfn() returns the found entry of the p2m table. A possibly set + * identity or foreign indicator will be still set. __pfn_to_mfn() is + * encapsulating get_phys_to_machine() which is called in special cases only. + * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special + * cases needing an extended handling. + */ +static inline unsigned long __pfn_to_mfn(unsigned long pfn) +{ + unsigned long mfn; + + if (pfn < xen_p2m_size) + mfn = xen_p2m_addr[pfn]; + else if (unlikely(pfn < xen_max_p2m_pfn)) + return get_phys_to_machine(pfn); + else + return IDENTITY_FRAME(pfn); + + if (unlikely(mfn == INVALID_P2M_ENTRY)) + return get_phys_to_machine(pfn); + + return mfn; +} + static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -70,7 +107,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - mfn = get_phys_to_machine(pfn); + mfn = __pfn_to_mfn(pfn); if (mfn != INVALID_P2M_ENTRY) mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); @@ -83,7 +120,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; - return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; + return __pfn_to_mfn(pfn) != INVALID_P2M_ENTRY; } static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) @@ -102,7 +139,7 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + ret = xen_safe_read_ulong(&machine_to_phys_mapping[mfn], &pfn); if (ret < 0) return ~0; @@ -117,7 +154,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) return mfn; pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) != mfn) { + if (__pfn_to_mfn(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -133,8 +170,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) * entry doesn't map back to the mfn and m2p_override doesn't have a * valid entry for it. */ - if (pfn == ~0 && - get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) + if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) pfn = mfn; return pfn; @@ -180,7 +216,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) return mfn; pfn = mfn_to_pfn(mfn); - if (get_phys_to_machine(pfn) != mfn) + if (__pfn_to_mfn(pfn) != mfn) return -1; /* force !pfn_valid() */ return pfn; } @@ -236,4 +272,11 @@ void make_lowmem_page_readwrite(void *vaddr); #define xen_remap(cookie, size) ioremap((cookie), (size)); #define xen_unmap(cookie) iounmap((cookie)) +static inline bool xen_arch_need_swiotlb(struct device *dev, + unsigned long pfn, + unsigned long mfn) +{ + return false; +} + #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 7e7a79ada658..5fa9770035dc 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -16,6 +16,7 @@ #define XSTATE_Hi16_ZMM 0x80 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index bbae02470701..d993e33f5236 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -21,11 +21,6 @@ * this size. */ -/* - * Odd: 'make headers_check' complains about numa.h if I try - * to collapse the next two #ifdef lines to a single line: - * #if defined(__KERNEL__) && defined(CONFIG_EFI) - */ #ifndef __KERNEL__ #define E820_X_MAX E820MAX #endif diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb37bfe..6e1aaf73852a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index e21331ce368f..c8aa65d56027 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -152,6 +152,45 @@ #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (x & 0xff) +#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) +#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) +#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) +#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + #define MSR_AMD64_MC0_MASK 0xc0010044 #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) @@ -206,6 +245,7 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ /* Fam 16h MSRs */ @@ -345,6 +385,8 @@ #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 +#define MSR_MISC_PWR_MGMT 0x000001aa + #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 #define ENERGY_PERF_BIAS_PERFORMANCE 0 #define ENERGY_PERF_BIAS_NORMAL 6 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420376eb..b813bf9da1e2 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,10 +67,13 @@ #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -114,6 +117,9 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } + { EXIT_REASON_INVVPID, "INVVPID" }, \ + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_XSAVES, "XSAVES" }, \ + { EXIT_REASON_XRSTORS, "XRSTORS" } #endif /* _UAPIVMX_H */ |