diff options
Diffstat (limited to 'arch/x86/include')
33 files changed, 710 insertions, 292 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 5af926c050f0..21bc53f5ed0c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -78,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void) #define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address +union acpi_subtable_headers; + +int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, + const unsigned long end); + +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); + /* * Check if the CPU can handle C2 and deeper */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ba99ef75f56c..ca9ae606aab9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -156,102 +156,50 @@ static inline int alternatives_text_reserved(void *start, void *end) #define ALT_CALL_INSTR "call BUG_func" -#define b_replacement(num) "664"#num -#define e_replacement(num) "665"#num +#define alt_slen "772b-771b" +#define alt_total_slen "773b-771b" +#define alt_rlen "775f-774f" -#define alt_end_marker "663" -#define alt_slen "662b-661b" -#define alt_total_slen alt_end_marker"b-661b" -#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" - -#define OLDINSTR(oldinstr, num) \ - "# ALT: oldnstr\n" \ - "661:\n\t" oldinstr "\n662:\n" \ +#define OLDINSTR(oldinstr) \ + "# ALT: oldinstr\n" \ + "771:\n\t" oldinstr "\n772:\n" \ "# ALT: padding\n" \ - ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ - "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \ - alt_end_marker ":\n" - -/* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" - -/* - * Pad the second replacement alternative with additional NOPs if it is - * additionally longer than the first replacement alternative. - */ -#define OLDINSTR_2(oldinstr, num1, num2) \ - "# ALT: oldinstr2\n" \ - "661:\n\t" oldinstr "\n662:\n" \ - "# ALT: padding2\n" \ - ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ - "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ - alt_end_marker ":\n" - -#define OLDINSTR_3(oldinsn, n1, n2, n3) \ - "# ALT: oldinstr3\n" \ - "661:\n\t" oldinsn "\n662:\n" \ - "# ALT: padding3\n" \ - ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ - " - (" alt_slen ")) > 0) * " \ - "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ - " - (" alt_slen ")), 0x90\n" \ - alt_end_marker ":\n" - -#define ALTINSTR_ENTRY(ft_flags, num) \ - " .long 661b - .\n" /* label */ \ - " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen ")-(" alt_slen ")),0x90\n" \ + "773:\n" + +#define ALTINSTR_ENTRY(ft_flags) \ + ".pushsection .altinstructions,\"a\"\n" \ + " .long 771b - .\n" /* label */ \ + " .long 774f - .\n" /* new instruction */ \ " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n" /* replacement len */ + " .byte " alt_rlen "\n" /* replacement len */ \ + ".popsection\n" -#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ - "# ALT: replacement " #num "\n" \ - b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" +#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + "# ALT: replacement\n" \ + "774:\n\t" newinstr "\n775:\n" \ + ".popsection\n" /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, ft_flags) \ - OLDINSTR(oldinstr, 1) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags, 1) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr, 1) \ - ".popsection\n" + OLDINSTR(oldinstr) \ + ALTINSTR_ENTRY(ft_flags) \ + ALTINSTR_REPLACEMENT(newinstr) #define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ - OLDINSTR_2(oldinstr, 1, 2) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags1, 1) \ - ALTINSTR_ENTRY(ft_flags2, 2) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinstr1, 1) \ - ALTINSTR_REPLACEMENT(newinstr2, 2) \ - ".popsection\n" + ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2) /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ #define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ - ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ - newinstr_yes, ft_flags) - -#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \ - newinsn3, ft_flags3) \ - OLDINSTR_3(oldinsn, 1, 2, 3) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(ft_flags1, 1) \ - ALTINSTR_ENTRY(ft_flags2, 2) \ - ALTINSTR_ENTRY(ft_flags3, 3) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinsn1, 1) \ - ALTINSTR_REPLACEMENT(newinsn2, 2) \ - ALTINSTR_REPLACEMENT(newinsn3, 3) \ - ".popsection\n" + ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags) + +#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \ + newinstr3, ft_flags3) \ + ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \ + newinstr3, ft_flags3) /* * Alternative instructions for different CPU types or capabilities. @@ -266,14 +214,11 @@ static inline int alternatives_text_reserved(void *start, void *end) * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, ft_flags) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") #define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory") -#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ - asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory") - /* * Alternative inline assembly with input. * @@ -283,18 +228,28 @@ static inline int alternatives_text_reserved(void *start, void *end) * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, ft_flags, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ + asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : output : "i" (0), ## input) -/* Like alternative_io, but for replacing a direct call with another one. */ -#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ - : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) +/* + * Like alternative_io, but for replacing a direct call with another one. + * + * Use the %c operand modifier which is the generic way to print a bare + * constant expression with all syntax-specific punctuation omitted. %P + * is the x86-specific variant which can handle constants too, for + * historical reasons, but it should be used primarily for PIC + * references: i.e., if used for a function, it would add the PLT + * suffix. + */ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ + asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* * Like alternative_call, but there are two features and respective functions. @@ -302,12 +257,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ - asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ - "call %c[new2]", ft_flags2) \ - : output, ASM_CALL_CONSTRAINT \ - : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ + output, input...) \ + asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ + "call %c[new2]", ft_flags2) \ + : ALT_OUTPUT_SP(output) \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input) /* @@ -322,6 +277,8 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr +#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__ + /* Macro for creating assembler functions avoiding any C magic. */ #define DEFINE_ASM_FUNC(func, instr, sec) \ asm (".pushsection " #sec ", \"ax\"\n" \ @@ -388,22 +345,23 @@ void nop_func(void); * @newinstr. ".skip" directive takes care of proper instruction padding * in case @newinstr is longer than @oldinstr. */ -.macro ALTERNATIVE oldinstr, newinstr, ft_flags -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f - .popsection +#define __ALTERNATIVE(oldinst, newinst, flag) \ +740: \ + oldinst ; \ +741: \ + .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\ +742: \ + .pushsection .altinstructions,"a" ; \ + altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \ + .popsection ; \ + .pushsection .altinstr_replacement,"ax" ; \ +743: \ + newinst ; \ +744: \ + .popsection ; - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: - .popsection +.macro ALTERNATIVE oldinstr, newinstr, ft_flags + __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags) .endm #define old_len 141b-140b @@ -412,65 +370,18 @@ void nop_func(void); #define new_len3 146f-145f /* - * gas compatible max based on the idea from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax - * - * The additional "-" is needed because gas uses a "true" value of -1. - */ -#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) -#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c)) - - -/* * Same as ALTERNATIVE macro above but for two alternatives. If CPU * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has * @feature2, it replaces @oldinstr with @feature2. */ .macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 -140: - \oldinstr -141: - .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_2(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f - altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - .popsection + __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1), + \newinstr2, \ft_flags2) .endm .macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3 -140: - \oldinstr -141: - .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \ - (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f - altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f - altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: - \newinstr3 -146: - .popsection + __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2), + \newinstr3, \ft_flags3) .endm /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 5c37944c8a5e..6f3b6aef47ba 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -21,8 +21,8 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, unsigned long); -extern int amd_smn_read(u16 node, u32 address, u32 *value); -extern int amd_smn_write(u16 node, u32 address, u32 value); +int __must_check amd_smn_read(u16 node, u32 address, u32 *value); +int __must_check amd_smn_write(u16 node, u32 address, u32 value); struct amd_l3_cache { unsigned indices; diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 7cd752557905..31d19c815f99 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -93,7 +93,7 @@ * */ enum cfi_mode { - CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */ + CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */ CFI_OFF, /* Taditional / IBT depending on .config */ CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */ CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ed2797f132ce..62cef2113ca7 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -93,10 +93,9 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ - : [ptr] "+m" (*(_ptr)), \ - "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ o.full; \ @@ -122,12 +121,11 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 \ asm volatile(ALTERNATIVE(_lock_loc \ "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %[ptr]", X86_FEATURE_CX8) \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ CC_SET(e) \ : CC_OUT(e) (ret), \ - [ptr] "+m" (*(_ptr)), \ "+a" (o.low), "+d" (o.high) \ - : "b" (n.low), "c" (n.high), "S" (_ptr) \ + : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ : "memory"); \ \ if (unlikely(!ret)) \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a1dd81027c2d..dd4682857c12 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -361,6 +361,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ #define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ @@ -446,6 +447,7 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -470,6 +472,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1dc600fa3ba5..521aad70e41b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -229,7 +229,8 @@ static inline bool efi_is_native(void) static inline void *efi64_zero_upper(void *p) { - ((u32 *)p)[1] = 0; + if (p) + ((u32 *)p)[1] = 0; return p; } @@ -315,6 +316,10 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) +/* EFI SMBIOS protocol */ +#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \ + ((protocol), (smbioshandle), (type), efi64_zero_upper(record), \ + efi64_zero_upper(phandle)) /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro @@ -384,24 +389,8 @@ static inline void efi_reserve_boot_services(void) } #endif /* CONFIG_EFI */ -#ifdef CONFIG_EFI_FAKE_MEMMAP -extern void __init efi_fake_memmap_early(void); -extern void __init efi_fake_memmap(void); -#else -static inline void efi_fake_memmap_early(void) -{ -} - -static inline void efi_fake_memmap(void) -{ -} -#endif - extern int __init efi_memmap_alloc(unsigned int num_entries, struct efi_memory_map_data *data); -extern void __efi_memmap_free(u64 phys, unsigned long size, - unsigned long flags); -#define __efi_memmap_free __efi_memmap_free extern int __init efi_memmap_install(struct efi_memory_map_data *data); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7e523bb3d2d3..fb2809b20b0a 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -73,19 +73,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #endif /* - * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), - * but not enough for x86 stack utilization comfort. To keep - * reasonable stack head room, reduce the maximum offset to 8 bits. - * - * The actual entropy will be further reduced by the compiler when - * applying stack alignment constraints (see cc_stack_align4/8 in + * This value will get limited by KSTACK_OFFSET_MAX(), which is 10 + * bits. The actual entropy will be further reduced by the compiler + * when applying stack alignment constraints (see cc_stack_align4/8 in * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32) * low bits from any entropy chosen here. * - * Therefore, final stack offset entropy will be 5 (x86_64) or - * 6 (ia32) bits. + * Therefore, final stack offset entropy will be 7 (x86_64) or + * 8 (ia32) bits. */ - choose_random_kstack_offset(rdtsc() & 0xFF); + choose_random_kstack_offset(rdtsc()); } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index cc9ccf61b6bd..14d72727d7ee 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -6,6 +6,7 @@ struct x86_mapping_info { void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void (*free_pgt_page)(void *, void *); /* free buf for page table */ void *context; /* context for alloc_pgt_page */ unsigned long page_flag; /* page flag for PMD or PUD entry */ unsigned long offset; /* ident mapping offset */ @@ -16,4 +17,6 @@ struct x86_mapping_info { int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, unsigned long pstart, unsigned long pend); +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd); + #endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c5ae649d2df..cf7fc2b8e3ce 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -54,6 +54,26 @@ static __always_inline void native_halt(void) asm volatile("hlt": : :"memory"); } +static __always_inline int native_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static __always_inline unsigned long native_local_irq_save(void) +{ + unsigned long flags = native_save_fl(); + + native_irq_disable(); + + return flags; +} + +static __always_inline void native_local_irq_restore(unsigned long flags) +{ + if (!native_irqs_disabled_flags(flags)) + native_irq_enable(); +} + #endif #ifdef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ece45b3f6f20..f8ca74e7678f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); +void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dfd2e9699bd7..3ad29b128943 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -261,7 +261,8 @@ enum mcp_flags { MCP_DONTLOG = BIT(2), /* only clear, don't log */ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ }; -bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); + +void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 048081b226d7..82c6a4d350e0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -666,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -787,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1170,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index cc6b8e087192..af4302d79b59 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -54,7 +54,7 @@ static inline void clear_page(void *page) clear_page_rep, X86_FEATURE_REP_GOOD, clear_page_erms, X86_FEATURE_ERMS, "=D" (page), - "0" (page) + "D" (page) : "cc", "memory", "rax", "rcx"); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 65b8e5bb902c..e39311a89bf4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline bool pte_decrypted(pte_t pte) +{ + return cc_mkdec(pte_val(pte)) == pte_val(pte); +} + #define pmd_dirty pmd_dirty static inline bool pmd_dirty(pmd_t pmd) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b78644962626..2f321137736c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -549,6 +549,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, + PG_LEVEL_256T, PG_LEVEL_NUM }; diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 000000000000..24e3a53ca255 --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \ + ".long 1b - %c2 - .\n\t" \ + ".popsection" \ + :"=r" (__ret) \ + :"i" ((unsigned long)0x0123456789abcdefull), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \ + ".long 1b - 1 - .\n\t" \ + ".popsection" \ + :"+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 9aee31862b4a..4b2abce2e3e7 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); + +bool set_memory_enc_stop_conversion(void); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); + int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_nonglobal(unsigned long addr, int numpages); int set_memory_global(unsigned long addr, int numpages); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index e61e68d71cba..0667b2a88614 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -28,6 +28,8 @@ #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ #ifndef __ASSEMBLY__ +#include <linux/cache.h> + #include <asm/bootparam.h> #include <asm/x86_init.h> @@ -133,6 +135,12 @@ asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data #endif /* __i386__ */ #endif /* _SETUP */ +#ifdef CONFIG_CMDLINE_BOOL +extern bool builtin_cmdline_added __ro_after_init; +#else +#define builtin_cmdline_added 0 +#endif + #else /* __ASSEMBLY */ .macro __RESERVE_BRK name, size diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 5a8246dd532f..e90d403f2068 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -98,6 +98,19 @@ enum psc_op { /* GHCBData[63:32] */ \ (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) +/* GHCB Run at VMPL Request/Response */ +#define GHCB_MSR_VMPL_REQ 0x016 +#define GHCB_MSR_VMPL_REQ_LEVEL(v) \ + /* GHCBData[39:32] */ \ + (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + /* GHCBDdata[11:0] */ \ + GHCB_MSR_VMPL_REQ) + +#define GHCB_MSR_VMPL_RESP 0x017 +#define GHCB_MSR_VMPL_RESP_VAL(v) \ + /* GHCBData[63:32] */ \ + (((u64)(v) & GENMASK_ULL(63, 32)) >> 32) + /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 @@ -109,6 +122,7 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) +#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5) /* * SNP Page State Change NAE event @@ -163,6 +177,10 @@ struct snp_psc_desc { #define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ #define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ #define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ +#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */ +#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */ +#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */ +#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ca20cc4e5826..ac5886ce252e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -152,10 +152,119 @@ struct snp_secrets_page { u8 vmpck2[VMPCK_KEY_LEN]; u8 vmpck3[VMPCK_KEY_LEN]; struct secrets_os_area os_area; - u8 rsvd3[3840]; + + u8 vmsa_tweak_bitmap[64]; + + /* SVSM fields */ + u64 svsm_base; + u64 svsm_size; + u64 svsm_caa; + u32 svsm_max_version; + u8 svsm_guest_vmpl; + u8 rsvd3[3]; + + /* Remainder of page */ + u8 rsvd4[3744]; } __packed; +/* + * The SVSM Calling Area (CA) related structures. + */ +struct svsm_ca { + u8 call_pending; + u8 mem_available; + u8 rsvd1[6]; + + u8 svsm_buffer[PAGE_SIZE - 8]; +}; + +#define SVSM_SUCCESS 0 +#define SVSM_ERR_INCOMPLETE 0x80000000 +#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001 +#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002 +#define SVSM_ERR_INVALID_ADDRESS 0x80000003 +#define SVSM_ERR_INVALID_FORMAT 0x80000004 +#define SVSM_ERR_INVALID_PARAMETER 0x80000005 +#define SVSM_ERR_INVALID_REQUEST 0x80000006 +#define SVSM_ERR_BUSY 0x80000007 +#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006 + +/* + * The SVSM PVALIDATE related structures + */ +struct svsm_pvalidate_entry { + u64 page_size : 2, + action : 1, + ignore_cf : 1, + rsvd : 8, + pfn : 52; +}; + +struct svsm_pvalidate_call { + u16 num_entries; + u16 cur_index; + + u8 rsvd1[4]; + + struct svsm_pvalidate_entry entry[]; +}; + +#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \ + offsetof(struct svsm_pvalidate_call, entry)) / \ + sizeof(struct svsm_pvalidate_entry)) + +/* + * The SVSM Attestation related structures + */ +struct svsm_loc_entry { + u64 pa; + u32 len; + u8 rsvd[4]; +}; + +struct svsm_attest_call { + struct svsm_loc_entry report_buf; + struct svsm_loc_entry nonce; + struct svsm_loc_entry manifest_buf; + struct svsm_loc_entry certificates_buf; + + /* For attesting a single service */ + u8 service_guid[16]; + u32 service_manifest_ver; + u8 rsvd[4]; +}; + +/* + * SVSM protocol structure + */ +struct svsm_call { + struct svsm_ca *caa; + u64 rax; + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 rax_out; + u64 rcx_out; + u64 rdx_out; + u64 r8_out; + u64 r9_out; +}; + +#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x)) +#define SVSM_CORE_REMAP_CA 0 +#define SVSM_CORE_PVALIDATE 1 +#define SVSM_CORE_CREATE_VCPU 2 +#define SVSM_CORE_DELETE_VCPU 3 + +#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x)) +#define SVSM_ATTEST_SERVICES 0 +#define SVSM_ATTEST_SINGLE_SERVICE 1 + #ifdef CONFIG_AMD_MEM_ENCRYPT + +extern u8 snp_vmpl; + extern void __sev_es_ist_enter(struct pt_regs *regs); extern void __sev_es_ist_exit(void); static __always_inline void sev_es_ist_enter(struct pt_regs *regs) @@ -181,6 +290,14 @@ static __always_inline void sev_es_nmi_complete(void) extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); extern void sev_enable(struct boot_params *bp); +/* + * RMPADJUST modifies the RMP permissions of a page of a lesser- + * privileged (numerically higher) VMPL. + * + * If the guest is running at a higher-privilege than the privilege + * level the instruction is targeting, the instruction will succeed, + * otherwise, it will fail. + */ static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { int rc; @@ -225,11 +342,16 @@ bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); void sev_show_status(void); -#else +void snp_update_svsm_ca(void); + +#else /* !CONFIG_AMD_MEM_ENCRYPT */ + +#define snp_vmpl 0 static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } @@ -253,12 +375,17 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } - +static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) +{ + return -ENOTTY; +} static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } static inline void sev_show_status(void) { } -#endif +static inline void snp_update_svsm_ca(void) { } + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a35936b512fe..ca073f40698f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -35,6 +35,7 @@ struct smp_ops { int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); + void (*stop_this_cpu)(void); void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 405efb3e4996..94408a784c8e 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -28,9 +28,6 @@ static inline cycles_t get_cycles(void) } #define get_cycles get_cycles -extern struct system_counterval_t convert_art_to_tsc(u64 art); -extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); - extern void tsc_early_init(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0f9bab92a43d..3a7755c1a441 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -78,10 +78,10 @@ extern int __get_user_bad(void); int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ __chk_user_ptr(ptr); \ - asm volatile("call __" #fn "_%c4" \ + asm volatile("call __" #fn "_%c[size]" \ : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ - : "0" (ptr), "i" (sizeof(*(ptr)))); \ + : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 761173ccc33c..6c9e5bdd3916 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -56,6 +56,5 @@ # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE -# define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 0ef36190abe6..b2d2df026f6e 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -328,9 +328,8 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, * due to unsigned comparison. * * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, - * but that case is also unlikely and will also take the unlikely path - * here. + * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that + * case is also unlikely and will also take the unlikely path here. */ if (unlikely(delta > vd->max_cycles)) { /* diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index be199a9b2676..93226281b450 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <linux/hrtimer.h> #include <linux/timekeeper_internal.h> #include <vdso/datapage.h> #include <asm/vgtod.h> diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 7aa38b2ad8a9..a0ce291abcae 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -14,11 +14,6 @@ #include <uapi/linux/time.h> -#ifdef BUILD_VDSO32_64 -typedef u64 gtod_long_t; -#else -typedef unsigned long gtod_long_t; -#endif #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index ac9fc51e2b18..c9cf43d5ef23 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -7,51 +7,321 @@ #include <linux/stringify.h> /* - * The hypercall definitions differ in the low word of the %edx argument - * in the following way: the old port base interface uses the port - * number to distinguish between high- and low bandwidth versions. + * VMware hypercall ABI. + * + * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall) + * have up to 6 input and 6 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg2 - Hypercall command + * arg3 bits [15:0] - Port number, LB and direction flags + * + * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB + * hypercalls. They also have up to 6 input and 6 output on registers + * arguments, with different argument to register mapping: + * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3), + * %rsi (arg4), %rdi (arg5). + * + * - High bandwidth (HB) hypercalls are I/O port based only. They have + * up to 7 input and 7 output arguments passed and returned using + * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3), + * %esi (arg4), %edi (arg5), %ebp (arg6). + * The following input arguments must be initialized by the caller: + * arg0 - VMWARE_HYPERVISOR_MAGIC + * arg1 - Hypercall command + * arg3 bits [15:0] - Port number, HB and direction flags + * + * For compatibility purposes, x86_64 systems use only lower 32 bits + * for input and output arguments. + * + * The hypercall definitions differ in the low word of the %edx (arg3) + * in the following way: the old I/O port based interface uses the port + * number to distinguish between high- and low bandwidth versions, and + * uses IN/OUT instructions to define transfer direction. * * The new vmcall interface instead uses a set of flags to select * bandwidth mode and transfer direction. The flags should be loaded - * into %dx by any user and are automatically replaced by the port - * number if the VMWARE_HYPERVISOR_PORT method is used. - * - * In short, new driver code should strictly use the new definition of - * %dx content. + * into arg3 by any user and are automatically replaced by the port + * number if the I/O port method is used. */ -/* Old port-based version */ -#define VMWARE_HYPERVISOR_PORT 0x5658 -#define VMWARE_HYPERVISOR_PORT_HB 0x5659 +#define VMWARE_HYPERVISOR_HB BIT(0) +#define VMWARE_HYPERVISOR_OUT BIT(1) -/* Current vmcall / vmmcall version */ -#define VMWARE_HYPERVISOR_HB BIT(0) -#define VMWARE_HYPERVISOR_OUT BIT(1) +#define VMWARE_HYPERVISOR_PORT 0x5658 +#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \ + VMWARE_HYPERVISOR_HB) -/* The low bandwidth call. The low word of edx is presumed clear. */ -#define VMWARE_HYPERCALL \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \ - "inl (%%dx), %%eax", \ - "vmcall", X86_FEATURE_VMCALL, \ - "vmmcall", X86_FEATURE_VMW_VMMCALL) +#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U +#define VMWARE_CMD_GETVERSION 10 +#define VMWARE_CMD_GETHZ 45 +#define VMWARE_CMD_GETVCPU_INFO 68 +#define VMWARE_CMD_STEALCLOCK 91 /* - * The high bandwidth out call. The low word of edx is presumed to have the - * HB and OUT bits set. + * Hypercall command mask: + * bits [6:0] command, range [0, 127] + * bits [19:16] sub-command, range [0, 15] */ -#define VMWARE_HYPERCALL_HB_OUT \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ - "rep outsb", \ - "vmcall", X86_FEATURE_VMCALL, \ - "vmmcall", X86_FEATURE_VMW_VMMCALL) +#define VMWARE_CMD_MASK 0xf007fU + +#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) +#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) + +extern unsigned long vmware_hypercall_slow(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); + +#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL +#define VMWARE_TDX_HCALL_FUNC 1 + +extern unsigned long vmware_tdx_hypercall(unsigned long cmd, + unsigned long in1, unsigned long in3, + unsigned long in4, unsigned long in5, + u32 *out1, u32 *out2, u32 *out3, + u32 *out4, u32 *out5); /* - * The high bandwidth in call. The low word of edx is presumed to have the - * HB bit set. + * The low bandwidth call. The low word of %edx is presumed to have OUT bit + * set. The high word of %edx may contain input data from the caller. */ -#define VMWARE_HYPERCALL_HB_IN \ - ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \ - "rep insb", \ - "vmcall", X86_FEATURE_VMCALL, \ +#define VMWARE_HYPERCALL \ + ALTERNATIVE_2("movw %[port], %%dx\n\t" \ + "inl (%%dx), %%eax", \ + "vmcall", X86_FEATURE_VMCALL, \ "vmmcall", X86_FEATURE_VMW_VMMCALL) + +static inline +unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + NULL, NULL, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1, + u32 *out1, u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, 0, 0, 0, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (0) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out2) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + NULL, out2, NULL, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1, + unsigned long in3, u32 *out2, + u32 *out3, u32 *out4, u32 *out5) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, 0, 0, + NULL, out2, out3, out4, out5); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4), + "=D" (*out5) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, u32 *out1, + u32 *out2, u32 *out3) +{ + unsigned long out0; + + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + if (unlikely(!alternatives_patched) && !__is_defined(MODULE)) + return vmware_hypercall_slow(cmd, in1, in3, in4, in5, + out1, out2, out3, NULL, NULL); + + asm_inline volatile (VMWARE_HYPERCALL + : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) + : [port] "i" (VMWARE_HYPERVISOR_PORT), + "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (in1), + "c" (cmd), + "d" (in3), + "S" (in4), + "D" (in5) + : "cc", "memory"); + return out0; +} + +#ifdef CONFIG_X86_64 +#define VMW_BP_CONSTRAINT "r" +#else +#define VMW_BP_CONSTRAINT "m" +#endif + +/* + * High bandwidth calls are not supported on encrypted memory guests. + * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use + * low bandwidth hypercall if memory encryption is set. + * This assumption simplifies HB hypercall implementation to just I/O port + * based approach without alternative patching. + */ +static inline +unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep outsb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} + +static inline +unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + u32 *out1) +{ + unsigned long out0; + + asm_inline volatile ( + UNWIND_HINT_SAVE + "push %%" _ASM_BP "\n\t" + UNWIND_HINT_UNDEFINED + "mov %[in6], %%" _ASM_BP "\n\t" + "rep insb\n\t" + "pop %%" _ASM_BP "\n\t" + UNWIND_HINT_RESTORE + : "=a" (out0), "=b" (*out1) + : "a" (VMWARE_HYPERVISOR_MAGIC), + "b" (cmd), + "c" (in2), + "d" (in3 | VMWARE_HYPERVISOR_PORT_HB), + "S" (in4), + "D" (in5), + [in6] VMW_BP_CONSTRAINT (in6) + : "cc", "memory"); + return out0; +} +#undef VMW_BP_CONSTRAINT +#undef VMWARE_HYPERCALL + #endif diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index fe42067cd6d8..09b1d7e607c1 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -77,7 +77,7 @@ #define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */ #define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */ #define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */ -#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */ +#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */ #define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */ #define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */ #define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index e8d7d4941c4c..422a47746657 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -5,45 +5,12 @@ #include <linux/bitops.h> #include <linux/wordpart.h> -/* - * This is largely generic for little-endian machines, but the - * optimal byte mask counting is probably going to be something - * that is architecture-specific. If you have a reliably fast - * bit count instruction, that might be better than the multiply - * and shift, for example. - */ struct word_at_a_time { const unsigned long one_bits, high_bits; }; #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } -#ifdef CONFIG_64BIT - -/* - * Jan Achrenius on G+: microoptimized version of - * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" - * that works for the bytemasks without having to - * mask them first. - */ -static inline long count_masked_bytes(unsigned long mask) -{ - return mask*0x0001020304050608ul >> 56; -} - -#else /* 32-bit case */ - -/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ -static inline long count_masked_bytes(long mask) -{ - /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ - long a = (0x0ff0001+mask) >> 23; - /* Fix the 1 for 00 case */ - return a & mask; -} - -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, return bits; } +#ifdef CONFIG_64BIT + +/* Keep the initial has_zero() value for both bitmask and size calc */ +#define create_zero_mask(bits) (bits) + +static inline unsigned long zero_bytemask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +#define find_zero(bits) (__ffs(bits) >> 3) + +#else + +/* Create the final mask for both bytemask and size */ static inline unsigned long create_zero_mask(unsigned long bits) { bits = (bits - 1) & ~bits; @@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits) /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ static inline unsigned long find_zero(unsigned long mask) { - return count_masked_bytes(mask); + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; } +#endif + /* * Load an unaligned word from kernel space. * diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 6149eabe200f..213cf5379a5a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -149,12 +149,22 @@ struct x86_init_acpi { * @enc_status_change_finish Notify HV after the encryption status of a range is changed * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + * @enc_kexec_begin Begin the two-step process of converting shared memory back + * to private. It stops the new conversions from being started + * and waits in-flight conversions to finish, if possible. + * @enc_kexec_finish Finish the two-step process of converting shared memory to + * private. All memory is private after the call when + * the function returns. + * It is called on only one CPU while the others are shut down + * and with interrupts disabled. */ struct x86_guest { - bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); - bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); + void (*enc_kexec_begin)(void); + void (*enc_kexec_finish)(void); }; /** diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ |