summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-23 10:13:36 +0100
committerIngo Molnar <mingo@kernel.org>2015-03-23 10:13:36 +0100
commiteda2360ad18b7cde87728fad85c6735a52c2576e (patch)
tree2bc28704b40686c0d3d0fd5aa7d000f9d9dd153f /arch/x86/kernel
parent1d23c4518b1f3a03c278f23333149245c178d2a6 (diff)
parentbc465aa9d045feb0e13b4a8f32cc33c1943f62d6 (diff)
Merge tag 'v4.0-rc5' into x86/fpu, to prevent conflicts
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c46
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic.c3
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c22
-rw-r--r--arch/x86/kernel/cpu/common.c23
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c76
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/early_printk.c187
-rw-r--r--arch/x86/kernel/entry_32.S3
-rw-r--r--arch/x86/kernel/entry_64.S16
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head64.c11
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i387.c3
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c56
-rw-r--r--arch/x86/kernel/kprobes/opt.c5
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/livepatch.c90
-rw-r--r--arch/x86/kernel/module.c14
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/rtc.c4
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/uprobes.c153
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c10
-rw-r--r--arch/x86/kernel/xsave.c10
42 files changed, 694 insertions, 188 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5d4502c8b983..cdb1b70ddad0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,6 +16,10 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
+KASAN_SANITIZE_head$(BITS).o := n
+KASAN_SANITIZE_dumpstack.o := n
+KASAN_SANITIZE_dumpstack_$(BITS).o := n
+
CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
@@ -63,6 +67,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_X86_TSC) += trace_clock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a18fff361c7f..803b684676ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
int rc, irq, trigger, polarity;
+ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+ *irqp = gsi;
+ return 0;
+ }
+
rc = acpi_get_override_irq(gsi, &trigger, &polarity);
if (rc == 0) {
trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
@@ -845,13 +850,7 @@ int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
- struct acpi_table_boot *sb;
-
- sb = (struct acpi_table_boot *)table;
- if (!sb) {
- printk(KERN_WARNING PREFIX "Unable to map SBF\n");
- return -ENODEV;
- }
+ struct acpi_table_boot *sb = (struct acpi_table_boot *)table;
sbf_port = sb->cmos_index; /* Save CMOS port */
@@ -865,13 +864,7 @@ static struct resource *hpet_res __initdata;
static int __init acpi_parse_hpet(struct acpi_table_header *table)
{
- struct acpi_table_hpet *hpet_tbl;
-
- hpet_tbl = (struct acpi_table_hpet *)table;
- if (!hpet_tbl) {
- printk(KERN_WARNING PREFIX "Unable to map HPET\n");
- return -ENODEV;
- }
+ struct acpi_table_hpet *hpet_tbl = (struct acpi_table_hpet *)table;
if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) {
printk(KERN_WARNING PREFIX "HPET timers must be located in "
@@ -1345,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
}
/*
+ * ACPI offers an alternative platform interface model that removes
+ * ACPI hardware requirements for platforms that do not implement
+ * the PC Architecture.
+ *
+ * We initialize the Hardware-reduced ACPI model here:
+ */
+static void __init acpi_reduced_hw_init(void)
+{
+ if (acpi_gbl_reduced_hardware) {
+ /*
+ * Override x86_init functions and bypass legacy pic
+ * in Hardware-reduced ACPI mode
+ */
+ x86_init.timers.timer_init = x86_init_noop;
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ legacy_pic = &null_legacy_pic;
+ }
+}
+
+/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
*/
@@ -1543,6 +1556,11 @@ int __init early_acpi_boot_init(void)
*/
early_acpi_process_madt();
+ /*
+ * Hardware-reduced ACPI mode initialization:
+ */
+ acpi_reduced_hw_init();
+
return 0;
}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 31368207837c..d1daead5fcdd 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)
header->pmode_cr0 = read_cr0();
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
- header->pmode_cr4 = read_cr4();
+ header->pmode_cr4 = __read_cr4();
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b665d241efad..ad3639ae1b9b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1580,8 +1580,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
* under KVM
*/
if (max_physical_apicid > 255 ||
- (IS_ENABLED(CONFIG_HYPERVISOR_GUEST) &&
- !hypervisor_x2apic_available())) {
+ !hypervisor_x2apic_available()) {
pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
x2apic_disable();
return;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c2fd21fed002..017149cded07 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -37,10 +37,12 @@ static const struct apic apic_numachip;
static unsigned int get_apic_id(unsigned long x)
{
unsigned long value;
- unsigned int id;
+ unsigned int id = (x >> 24) & 0xff;
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ id |= (value << 2) & 0xff00;
+ }
return id;
}
@@ -155,10 +157,18 @@ static int __init numachip_probe(void)
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
- if (c->phys_proc_id != node) {
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
+ u64 val;
+ u32 nodes = 1;
+
+ this_cpu_write(cpu_llc_id, node);
+
+ /* Account for nodes per socket in multi-core-module processors */
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, val);
+ nodes = ((val >> 3) & 7) + 1;
}
+
+ c->phys_proc_id = node / nodes;
}
static int __init numachip_system_init(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b15bffcaba6d..2346c95c6ab1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -19,6 +19,7 @@
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
+#include <asm/tlbflush.h>
#include <asm/debugreg.h>
#include <asm/sections.h>
#include <asm/vsyscall.h>
@@ -278,7 +279,7 @@ __setup("nosmep", setup_disable_smep);
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP))
- set_in_cr4(X86_CR4_SMEP);
+ cr4_set_bits(X86_CR4_SMEP);
}
static __init int setup_disable_smap(char *arg)
@@ -298,9 +299,9 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_SMAP)) {
#ifdef CONFIG_X86_SMAP
- set_in_cr4(X86_CR4_SMAP);
+ cr4_set_bits(X86_CR4_SMAP);
#else
- clear_in_cr4(X86_CR4_SMAP);
+ cr4_clear_bits(X86_CR4_SMAP);
#endif
}
}
@@ -1295,6 +1296,12 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
/*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
+
+ /*
* Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure.
*/
@@ -1313,7 +1320,7 @@ void cpu_init(void)
pr_debug("Initializing CPU#%d\n", cpu);
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
@@ -1389,12 +1396,18 @@ void cpu_init(void)
wait_for_master_cpu(cpu);
+ /*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
+
show_ucode_info_early();
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_current_idt();
switch_to_new_gdt(cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 94d7dcb12145..50163fa9034f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -565,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
{ 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
{ 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
- { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" },
- { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" },
+ { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" },
+ { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" },
{ 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
{ 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
{ 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c7035073dfc1..659643376dbf 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -952,20 +952,18 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
int type, char *buf)
{
- ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
- int n = 0;
-
- if (len > 1) {
- const struct cpumask *mask;
-
- mask = to_cpumask(this_leaf->shared_cpu_map);
- n = type ?
- cpulist_scnprintf(buf, len-2, mask) :
- cpumask_scnprintf(buf, len-2, mask);
- buf[n++] = '\n';
- buf[n] = '\0';
- }
- return n;
+ const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+ int ret;
+
+ if (type)
+ ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
+ cpumask_pr_args(mask));
+ else
+ ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb",
+ cpumask_pr_args(mask));
+ buf[ret++] = '\n';
+ buf[ret] = '\0';
+ return ret;
}
static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cdfed7953963..3c036cb4a370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,6 +44,7 @@
#include <asm/processor.h>
#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -151,14 +152,11 @@ static struct mce_log mcelog = {
void mce_log(struct mce *mce)
{
unsigned next, entry;
- int ret = 0;
/* Emit the trace record: */
trace_mce_record(mce);
- ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
- if (ret == NOTIFY_STOP)
- return;
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
mce->finished = 0;
wmb();
@@ -1452,7 +1450,7 @@ static void __mcheck_cpu_init_generic(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | m_fl, &all_banks);
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index ec2663a708e4..737b0ad4e61a 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,6 +9,7 @@
#include <asm/processor.h>
#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -65,7 +66,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
"Intel old style machine check architecture supported.\n");
/* Enable MCE: */
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
printk(KERN_INFO
"Intel old style machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index bd5d46a32210..44f138296fbe 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,6 +8,7 @@
#include <asm/processor.h>
#include <asm/traps.h>
+#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>
@@ -36,7 +37,7 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
- set_in_cr4(X86_CR4_MCE);
+ cr4_set_bits(X86_CR4_MCE);
printk(KERN_INFO
"Winchip machine check reporting enabled on CPU#0.\n");
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index c6826d1e8082..746e7fd08aad 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
struct microcode_header_intel mc_header;
unsigned int mc_size;
+ if (leftover < sizeof(mc_header)) {
+ pr_err("error! Truncated header in microcode data file\n");
+ break;
+ }
+
if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index ec9df6f9cd47..420eb933189c 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start,
unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
int i;
- while (leftover) {
+ while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+
+ if (leftover < sizeof(mc_header))
+ break;
+
mc_header = (struct microcode_header_intel *)ucode_ptr;
mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 9e451b0876b5..f8c81ba0b465 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -138,8 +138,8 @@ static void prepare_set(void)
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
+ cr4 = __read_cr4();
+ __write_cr4(cr4 & ~X86_CR4_PGE);
}
/*
@@ -171,7 +171,7 @@ static void post_set(void)
/* Restore value of CR4 */
if (cpu_has_pge)
- write_cr4(cr4);
+ __write_cr4(cr4);
}
static void cyrix_set_arr(unsigned int reg, unsigned long base,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0e25a1bc5ab5..7d74f7b3c6ba 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
+ cr4 = __read_cr4();
+ __write_cr4(cr4 & ~X86_CR4_PGE);
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
@@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)
/* Restore value of CR4 */
if (cpu_has_pge)
- write_cr4(cr4);
+ __write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 143e5f5dc855..b71a7f86d68a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,8 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
#include <asm/timer.h>
#include <asm/desc.h>
#include <asm/ldt.h>
@@ -43,6 +45,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
+struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1327,8 +1331,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
- if (x86_pmu.attr_rdpmc)
- set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1804,14 +1806,44 @@ static int x86_pmu_event_init(struct perf_event *event)
event->destroy(event);
}
+ if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+ event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
return err;
}
+static void refresh_pce(void *ignored)
+{
+ if (current->mm)
+ load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+ if (!current->mm)
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
- if (!x86_pmu.attr_rdpmc)
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
@@ -1829,16 +1861,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
}
-static void change_rdpmc(void *info)
-{
- bool enable = !!(unsigned long)info;
-
- if (enable)
- set_in_cr4(X86_CR4_PCE);
- else
- clear_in_cr4(X86_CR4_PCE);
-}
-
static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1850,14 +1872,27 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (ret)
return ret;
+ if (val > 2)
+ return -EINVAL;
+
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
- if (!!val != !!x86_pmu.attr_rdpmc) {
- x86_pmu.attr_rdpmc = !!val;
- on_each_cpu(change_rdpmc, (void *)val, 1);
+ if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+ /*
+ * Changing into or out of always available, aka
+ * perf-event-bypassing mode. This path is extremely slow,
+ * but only root can trigger it, so it's okay.
+ */
+ if (val == 2)
+ static_key_slow_inc(&rdpmc_always_available);
+ else
+ static_key_slow_dec(&rdpmc_always_available);
+ on_each_cpu(refresh_pce, NULL, 1);
}
+ x86_pmu.attr_rdpmc = val;
+
return count;
}
@@ -1900,6 +1935,9 @@ static struct pmu pmu = {
.event_init = x86_pmu_event_init,
+ .event_mapped = x86_pmu_event_mapped,
+ .event_unmapped = x86_pmu_event_unmapped,
+
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
@@ -1914,13 +1952,15 @@ static struct pmu pmu = {
.flush_branch_stack = x86_pmu_flush_branch_stack,
};
-void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
{
struct cyc2ns_data *data;
userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0;
- userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits;
if (!sched_clock_stable())
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 4e6cdb0ddc70..df525d2be1e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@ struct event_constraint {
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
+
struct amd_nb {
int nb_id; /* NorthBridge id */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b74ebc7c4402..cf3df1d8d039 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -265,7 +265,10 @@ int __die(const char *str, struct pt_regs *regs, long err)
printk("SMP ");
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
+ printk("DEBUG_PAGEALLOC ");
+#endif
+#ifdef CONFIG_KASAN
+ printk("KASAN");
#endif
printk("\n");
if (notify_die(DIE_OOPS, str, regs, err,
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 01d1c187c9f9..a62536a1be88 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -19,6 +19,7 @@
#include <linux/usb/ehci_def.h>
#include <linux/efi.h>
#include <asm/efi.h>
+#include <asm/pci_x86.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -76,7 +77,7 @@ static struct console early_vga_console = {
/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */
-static int early_serial_base = 0x3f8; /* ttyS0 */
+static unsigned long early_serial_base = 0x3f8; /* ttyS0 */
#define XMTRDY 0x20
@@ -94,13 +95,40 @@ static int early_serial_base = 0x3f8; /* ttyS0 */
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+ uint32_t *vaddr = (uint32_t *)addr;
+ /* shift implied by pointer type */
+ writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+ uint32_t *vaddr = (uint32_t *)addr;
+ /* shift implied by pointer type */
+ return readl(vaddr + offset);
+}
+
+static unsigned int io_serial_in(unsigned long addr, int offset)
+{
+ return inb(addr + offset);
+}
+
+static void io_serial_out(unsigned long addr, int offset, int value)
+{
+ outb(value, addr + offset);
+}
+
+static unsigned int (*serial_in)(unsigned long addr, int offset) = io_serial_in;
+static void (*serial_out)(unsigned long addr, int offset, int value) = io_serial_out;
+
static int early_serial_putc(unsigned char ch)
{
unsigned timeout = 0xffff;
- while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
+ while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
- outb(ch, early_serial_base + TXR);
+ serial_out(early_serial_base, TXR, ch);
return timeout ? 0 : -1;
}
@@ -114,13 +142,28 @@ static void early_serial_write(struct console *con, const char *s, unsigned n)
}
}
+static __init void early_serial_hw_init(unsigned divisor)
+{
+ unsigned char c;
+
+ serial_out(early_serial_base, LCR, 0x3); /* 8n1 */
+ serial_out(early_serial_base, IER, 0); /* no interrupt */
+ serial_out(early_serial_base, FCR, 0); /* no fifo */
+ serial_out(early_serial_base, MCR, 0x3); /* DTR + RTS */
+
+ c = serial_in(early_serial_base, LCR);
+ serial_out(early_serial_base, LCR, c | DLAB);
+ serial_out(early_serial_base, DLL, divisor & 0xff);
+ serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff);
+ serial_out(early_serial_base, LCR, c & ~DLAB);
+}
+
#define DEFAULT_BAUD 9600
static __init void early_serial_init(char *s)
{
- unsigned char c;
unsigned divisor;
- unsigned baud = DEFAULT_BAUD;
+ unsigned long baud = DEFAULT_BAUD;
char *e;
if (*s == ',')
@@ -145,24 +188,124 @@ static __init void early_serial_init(char *s)
s++;
}
- outb(0x3, early_serial_base + LCR); /* 8n1 */
- outb(0, early_serial_base + IER); /* no interrupt */
- outb(0, early_serial_base + FCR); /* no fifo */
- outb(0x3, early_serial_base + MCR); /* DTR + RTS */
+ if (*s) {
+ if (kstrtoul(s, 0, &baud) < 0 || baud == 0)
+ baud = DEFAULT_BAUD;
+ }
+
+ /* Convert from baud to divisor value */
+ divisor = 115200 / baud;
+
+ /* These will always be IO based ports */
+ serial_in = io_serial_in;
+ serial_out = io_serial_out;
+
+ /* Set up the HW */
+ early_serial_hw_init(divisor);
+}
+
+#ifdef CONFIG_PCI
+/*
+ * early_pci_serial_init()
+ *
+ * This function is invoked when the early_printk param starts with "pciserial"
+ * The rest of the param should be ",B:D.F,baud" where B, D & F describe the
+ * location of a PCI device that must be a UART device.
+ */
+static __init void early_pci_serial_init(char *s)
+{
+ unsigned divisor;
+ unsigned long baud = DEFAULT_BAUD;
+ u8 bus, slot, func;
+ uint32_t classcode, bar0;
+ uint16_t cmdreg;
+ char *e;
+
+
+ /*
+ * First, part the param to get the BDF values
+ */
+ if (*s == ',')
+ ++s;
+
+ if (*s == 0)
+ return;
+
+ bus = (u8)simple_strtoul(s, &e, 16);
+ s = e;
+ if (*s != ':')
+ return;
+ ++s;
+ slot = (u8)simple_strtoul(s, &e, 16);
+ s = e;
+ if (*s != '.')
+ return;
+ ++s;
+ func = (u8)simple_strtoul(s, &e, 16);
+ s = e;
+
+ /* A baud might be following */
+ if (*s == ',')
+ s++;
+
+ /*
+ * Second, find the device from the BDF
+ */
+ cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND);
+ classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+ bar0 = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+
+ /*
+ * Verify it is a UART type device
+ */
+ if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) &&
+ (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) ||
+ (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */
+ return;
+
+ /*
+ * Determine if it is IO or memory mapped
+ */
+ if (bar0 & 0x01) {
+ /* it is IO mapped */
+ serial_in = io_serial_in;
+ serial_out = io_serial_out;
+ early_serial_base = bar0&0xfffffffc;
+ write_pci_config(bus, slot, func, PCI_COMMAND,
+ cmdreg|PCI_COMMAND_IO);
+ } else {
+ /* It is memory mapped - assume 32-bit alignment */
+ serial_in = mem32_serial_in;
+ serial_out = mem32_serial_out;
+ /* WARNING! assuming the address is always in the first 4G */
+ early_serial_base =
+ (unsigned long)early_ioremap(bar0 & 0xfffffff0, 0x10);
+ write_pci_config(bus, slot, func, PCI_COMMAND,
+ cmdreg|PCI_COMMAND_MEMORY);
+ }
+ /*
+ * Lastly, initalize the hardware
+ */
if (*s) {
- baud = simple_strtoul(s, &e, 0);
- if (baud == 0 || s == e)
+ if (strcmp(s, "nocfg") == 0)
+ /* Sometimes, we want to leave the UART alone
+ * and assume the BIOS has set it up correctly.
+ * "nocfg" tells us this is the case, and we
+ * should do no more setup.
+ */
+ return;
+ if (kstrtoul(s, 0, &baud) < 0 || baud == 0)
baud = DEFAULT_BAUD;
}
+ /* Convert from baud to divisor value */
divisor = 115200 / baud;
- c = inb(early_serial_base + LCR);
- outb(c | DLAB, early_serial_base + LCR);
- outb(divisor & 0xff, early_serial_base + DLL);
- outb((divisor >> 8) & 0xff, early_serial_base + DLH);
- outb(c & ~DLAB, early_serial_base + LCR);
+
+ /* Set up the HW */
+ early_serial_hw_init(divisor);
}
+#endif
static struct console early_serial_console = {
.name = "earlyser",
@@ -210,6 +353,13 @@ static int __init setup_early_printk(char *buf)
early_serial_init(buf + 4);
early_console_register(&early_serial_console, keep);
}
+#ifdef CONFIG_PCI
+ if (!strncmp(buf, "pciserial", 9)) {
+ early_pci_serial_init(buf + 9);
+ early_console_register(&early_serial_console, keep);
+ buf += 9; /* Keep from match the above "serial" */
+ }
+#endif
if (!strncmp(buf, "vga", 3) &&
boot_params.screen_info.orig_video_isVGA == 1) {
max_xpos = boot_params.screen_info.orig_video_cols;
@@ -226,11 +376,6 @@ static int __init setup_early_printk(char *buf)
early_console_register(&xenboot_console, keep);
#endif
#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
- if (!strncmp(buf, "mrst", 4)) {
- mrst_early_console_init();
- early_console_register(&early_mrst_console, keep);
- }
-
if (!strncmp(buf, "hsu", 3)) {
hsu_early_console_init(buf + 3);
early_console_register(&early_hsu_console, keep);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d4199b03e..31e2d5bf3e38 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -982,6 +982,9 @@ ENTRY(xen_hypervisor_callback)
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
+#ifndef CONFIG_PREEMPT
+ call xen_maybe_preempt_hcall
+#endif
jmp ret_from_intr
CFI_ENDPROC
ENDPROC(xen_hypervisor_callback)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index db13655c3a2a..1d74d161687c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -269,11 +269,14 @@ ENTRY(ret_from_fork)
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
jz 1f
- testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
- jnz int_ret_from_sys_call
-
- RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
- jmp ret_from_sys_call # go to the SYSRET fastpath
+ /*
+ * By the time we get here, we have no idea whether our pt_regs,
+ * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+ * the slow path, or one of the ia32entry paths.
+ * Use int_ret_from_sys_call to return, since it can safely handle
+ * all of the above.
+ */
+ jmp int_ret_from_sys_call
1:
subq $REST_SKIP, %rsp # leave space for volatiles
@@ -1208,6 +1211,9 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
popq %rsp
CFI_DEF_CFA_REGISTER rsp
decl PER_CPU_VAR(irq_count)
+#ifndef CONFIG_PREEMPT
+ call xen_maybe_preempt_hcall
+#endif
jmp error_exit
CFI_ENDPROC
END(xen_do_hypervisor_callback)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d6c1b9836995..2911ef3a9f1c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void)
asmlinkage __visible void __init i386_start_kernel(void)
{
+ cr4_init_shadow();
sanitize_boot_params(&boot_params);
/* Call the subarch specific early setup function */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index eda1a865641e..c4f8d4659070 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,6 +27,7 @@
#include <asm/bios_ebda.h>
#include <asm/bootparam_utils.h>
#include <asm/microcode.h>
+#include <asm/kasan.h>
/*
* Manage page tables very early on.
@@ -46,7 +47,7 @@ static void __init reset_early_page_tables(void)
next_early_pgt = 0;
- write_cr3(__pa(early_level4_pgt));
+ write_cr3(__pa_nodebug(early_level4_pgt));
}
/* Create a new PMD entry */
@@ -59,7 +60,7 @@ int __init early_make_pgtable(unsigned long address)
pmdval_t pmd, *pmd_p;
/* Invalid address or early pgt is done ? */
- if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
+ if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
return -1;
again:
@@ -155,9 +156,13 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
+ cr4_init_shadow();
+
/* Kill off the identity-map trampoline */
reset_early_page_tables();
+ kasan_map_early_shadow(early_level4_pgt);
+
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
@@ -179,6 +184,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
/* set init_level4_pgt kernel high mapping*/
init_level4_pgt[511] = early_level4_pgt[511];
+ kasan_map_early_shadow(init_level4_pgt);
+
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a468c0a65c42..6fd514d9f69a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -514,8 +514,38 @@ ENTRY(phys_base)
/* This must match the first entry in level2_kernel_pgt */
.quad 0x0000000000000000
+#ifdef CONFIG_KASAN
+#define FILL(VAL, COUNT) \
+ .rept (COUNT) ; \
+ .quad (VAL) ; \
+ .endr
+
+NEXT_PAGE(kasan_zero_pte)
+ FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512)
+NEXT_PAGE(kasan_zero_pmd)
+ FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512)
+NEXT_PAGE(kasan_zero_pud)
+ FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512)
+
+#undef FILL
+#endif
+
+
#include "../../x86/xen/xen-head.S"
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
+
+#ifdef CONFIG_KASAN
+/*
+ * This page used as early shadow. We don't use empty_zero_page
+ * at early stages, stack instrumentation could write some garbage
+ * to this page.
+ * Latter we reuse it as zero shadow for large ranges of memory
+ * that allowed to access, but not instrumented by kasan
+ * (vmalloc/vmemmap ...).
+ */
+NEXT_PAGE(kasan_zero_page)
+ .skip PAGE_SIZE
+#endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 319bcb9372fe..3acbff4716b0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -168,7 +168,7 @@ static void _hpet_print_config(const char *function, int line)
#define hpet_print_config() \
do { \
if (hpet_verbose) \
- _hpet_print_config(__FUNCTION__, __LINE__); \
+ _hpet_print_config(__func__, __LINE__); \
} while (0)
/*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 0f3de6674ae3..29e982ada854 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -13,6 +13,7 @@
#include <asm/sigcontext.h>
#include <asm/processor.h>
#include <asm/math_emu.h>
+#include <asm/tlbflush.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/i387.h>
@@ -197,7 +198,7 @@ void fpu_init(void)
if (cpu_has_xmm)
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
- set_in_cr4(cr4_mask);
+ cr4_set_bits(cr4_mask);
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 705ef8d48e2d..67b1cbe0093a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void)
irq = __this_cpu_read(vector_irq[vector]);
if (irq >= 0) {
desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
data = irq_desc_get_irq_data(desc);
cpumask_copy(&affinity_new, data->affinity);
cpu_clear(this_cpu, affinity_new);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 98f654d466e5..4e3d5a9621fe 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
- W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+ W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
@@ -223,27 +223,48 @@ static unsigned long
__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
+ unsigned long faddr;
kp = get_kprobe((void *)addr);
- /* There is no probe, return original address */
- if (!kp)
+ faddr = ftrace_location(addr);
+ /*
+ * Addresses inside the ftrace location are refused by
+ * arch_check_ftrace_location(). Something went terribly wrong
+ * if such an address is checked here.
+ */
+ if (WARN_ON(faddr && faddr != addr))
+ return 0UL;
+ /*
+ * Use the current code if it is not modified by Kprobe
+ * and it cannot be modified by ftrace.
+ */
+ if (!kp && !faddr)
return addr;
/*
- * Basically, kp->ainsn.insn has an original instruction.
- * However, RIP-relative instruction can not do single-stepping
- * at different place, __copy_instruction() tweaks the displacement of
- * that instruction. In that case, we can't recover the instruction
- * from the kp->ainsn.insn.
+ * Basically, kp->ainsn.insn has an original instruction.
+ * However, RIP-relative instruction can not do single-stepping
+ * at different place, __copy_instruction() tweaks the displacement of
+ * that instruction. In that case, we can't recover the instruction
+ * from the kp->ainsn.insn.
*
- * On the other hand, kp->opcode has a copy of the first byte of
- * the probed instruction, which is overwritten by int3. And
- * the instruction at kp->addr is not modified by kprobes except
- * for the first byte, we can recover the original instruction
- * from it and kp->opcode.
+ * On the other hand, in case on normal Kprobe, kp->opcode has a copy
+ * of the first byte of the probed instruction, which is overwritten
+ * by int3. And the instruction at kp->addr is not modified by kprobes
+ * except for the first byte, we can recover the original instruction
+ * from it and kp->opcode.
+ *
+ * In case of Kprobes using ftrace, we do not have a copy of
+ * the original instruction. In fact, the ftrace location might
+ * be modified at anytime and even could be in an inconsistent state.
+ * Fortunately, we know that the original code is the ideal 5-byte
+ * long NOP.
*/
- memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- buf[0] = kp->opcode;
+ memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ if (faddr)
+ memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
+ else
+ buf[0] = kp->opcode;
return (unsigned long)buf;
}
@@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
* Recover the probed instruction at addr for further analysis.
* Caller must lock kprobes by kprobe_mutex, or disable preemption
* for preventing to release referencing kprobes.
+ * Returns zero if the instruction can not get recovered.
*/
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
{
@@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr)
* normally used, we just go through if there is no kprobe.
*/
__addr = recover_probed_instruction(buf, addr);
+ if (!__addr)
+ return 0;
kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
insn_get_length(&insn);
@@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src)
unsigned long recovered_insn =
recover_probed_instruction(buf, (unsigned long)src);
+ if (!recovered_insn)
+ return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint, failed to recover */
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7c523bbf3dc8..7b3b9d15c47a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr)
*/
return 0;
recovered_insn = recover_probed_instruction(buf, addr);
+ if (!recovered_insn)
+ return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
/* Another subsystem puts a breakpoint */
@@ -322,7 +324,8 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
* Target instructions MUST be relocatable (checked inside)
* This is called when new aggr(opt)probe is allocated or reused.
*/
-int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
+ struct kprobe *__unused)
{
u8 *buf;
int ret;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
u8 ret;
u8 old;
- old = ACCESS_ONCE(zero_stats);
+ old = READ_ONCE(zero_stats);
if (unlikely(old)) {
ret = cmpxchg(&zero_stats, old, 0);
/* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
int cpu;
u64 start;
unsigned long flags;
+ __ticket_t head;
if (in_nmi())
return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
*/
__ticket_enter_slowpath(lock);
+ /* make sure enter_slowpath, which is atomic does not cross the read */
+ smp_mb__after_atomic();
+
/*
* check again make sure it didn't become free while
* we weren't looking.
*/
- if (ACCESS_ONCE(lock->tickets.head) == want) {
+ head = READ_ONCE(lock->tickets.head);
+ if (__tickets_equal(head, want)) {
add_stats(TAKEN_SLOW_PICKUP, 1);
goto out;
}
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
add_stats(RELEASED_SLOW, 1);
for_each_cpu(cpu, &waiting_cpus) {
const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
- if (ACCESS_ONCE(w->lock) == lock &&
- ACCESS_ONCE(w->want) == ticket) {
+ if (READ_ONCE(w->lock) == lock &&
+ READ_ONCE(w->want) == ticket) {
add_stats(RELEASED_SLOW_KICKED, 1);
kvm_kick_cpu(cpu);
break;
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
new file mode 100644
index 000000000000..ff3c3101d003
--- /dev/null
+++ b/arch/x86/kernel/livepatch.c
@@ -0,0 +1,90 @@
+/*
+ * livepatch.c - x86-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+ * Copyright (C) 2014 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/page_types.h>
+#include <asm/elf.h>
+#include <asm/livepatch.h>
+
+/**
+ * klp_write_module_reloc() - write a relocation in a module
+ * @mod: module in which the section to be modified is found
+ * @type: ELF relocation type (see asm/elf.h)
+ * @loc: address that the relocation should be written to
+ * @value: relocation value (sym address + addend)
+ *
+ * This function writes a relocation to the specified location for
+ * a particular module.
+ */
+int klp_write_module_reloc(struct module *mod, unsigned long type,
+ unsigned long loc, unsigned long value)
+{
+ int ret, numpages, size = 4;
+ bool readonly;
+ unsigned long val;
+ unsigned long core = (unsigned long)mod->module_core;
+ unsigned long core_ro_size = mod->core_ro_size;
+ unsigned long core_size = mod->core_size;
+
+ switch (type) {
+ case R_X86_64_NONE:
+ return 0;
+ case R_X86_64_64:
+ val = value;
+ size = 8;
+ break;
+ case R_X86_64_32:
+ val = (u32)value;
+ break;
+ case R_X86_64_32S:
+ val = (s32)value;
+ break;
+ case R_X86_64_PC32:
+ val = (u32)(value - loc);
+ break;
+ default:
+ /* unsupported relocation type */
+ return -EINVAL;
+ }
+
+ if (loc < core || loc >= core + core_size)
+ /* loc does not point to any symbol inside the module */
+ return -EINVAL;
+
+ if (loc < core + core_ro_size)
+ readonly = true;
+ else
+ readonly = false;
+
+ /* determine if the relocation spans a page boundary */
+ numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2;
+
+ if (readonly)
+ set_memory_rw(loc & PAGE_MASK, numpages);
+
+ ret = probe_kernel_write((void *)loc, &val, size);
+
+ if (readonly)
+ set_memory_ro(loc & PAGE_MASK, numpages);
+
+ return ret;
+}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e69f9882bf95..d1ac80b72c72 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/kasan.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/gfp.h>
@@ -83,13 +84,22 @@ static unsigned long int get_module_load_offset(void)
void *module_alloc(unsigned long size)
{
+ void *p;
+
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
- return __vmalloc_node_range(size, 1,
+
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
- PAGE_KERNEL_EXEC, NUMA_NO_NODE,
+ PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
+ if (p && (kasan_module_alloc(p, size) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+
+ return p;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 83480373a642..dcaf4b00d0b4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@
#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
+#include <asm/tlbflush.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -148,7 +149,7 @@ void flush_thread(void)
static void hard_disable_TSC(void)
{
- write_cr4(read_cr4() | X86_CR4_TSD);
+ cr4_set_bits(X86_CR4_TSD);
}
void disable_TSC(void)
@@ -165,7 +166,7 @@ void disable_TSC(void)
static void hard_enable_TSC(void)
{
- write_cr4(read_cr4() & ~X86_CR4_TSD);
+ cr4_clear_bits(X86_CR4_TSD);
}
static void enable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8f3ebfe710d0..603c4f99cb5a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
- cr4 = read_cr4_safe();
+ cr4 = __read_cr4_safe();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5a2c02913af3..67fcc43577d2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
- cr4 = read_cr4();
+ cr4 = __read_cr4();
printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
fs, fsindex, gs, gsindex, shadowgs);
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index fe3dbfe0c4a5..cd9685235df9 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -49,11 +49,11 @@ int mach_set_rtc_mmss(const struct timespec *now)
retval = set_rtc_time(&tm);
if (retval)
printk(KERN_ERR "%s: RTC write failed with error %d\n",
- __FUNCTION__, retval);
+ __func__, retval);
} else {
printk(KERN_ERR
"%s: Invalid RTC value: write of %lx to RTC failed\n",
- __FUNCTION__, nowtime);
+ __func__, nowtime);
retval = -EINVAL;
}
return retval;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c4648adadd7d..0a2421cca01f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,6 +89,7 @@
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/bugs.h>
+#include <asm/kasan.h>
#include <asm/vsyscall.h>
#include <asm/cpu.h>
@@ -1174,9 +1175,11 @@ void __init setup_arch(char **cmdline_p)
x86_init.paging.pagetable_init();
+ kasan_init();
+
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
- mmu_cr4_features = read_cr4();
+ mmu_cr4_features = __read_cr4();
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 2a33c8f68319..e5042463c1bc 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -69,7 +69,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
get_user_try {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 92b83e299ed3..7ee7369d5aec 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
goto exit;
conditional_sti(regs);
- if (!user_mode(regs))
+ if (!user_mode_vm(regs))
die("bounds", regs, error_code);
if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@@ -637,7 +637,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
*/
- if (!dr6 && user_mode(regs))
+ if (!dr6 && user_mode_vm(regs))
user_icebp = 1;
/* Catch kmemcheck conditions first of all! */
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a947021f..81f8adb0679e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -66,27 +66,54 @@
* Good-instruction tables for 32-bit apps. This is non-const and volatile
* to keep gcc from statically optimizing it out, as variable_test_bit makes
* some versions of gcc to think only *(unsigned long*) is used.
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
+ * (why we support bound (62) then? it's similar, and similarly unused...)
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * 07,17,1f - pop es/ss/ds
+ * Normally not used in userspace, but would execute if used.
+ * Can cause GP or stack exception if tries to load wrong segment descriptor.
+ * We hesitate to run them under single step since kernel's handling
+ * of userspace single-stepping (TF flag) is fragile.
+ * We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e)
+ * on the same grounds that they are never used.
+ * cd - int N.
+ * Used by userspace for "int 80" syscall entry. (Other "int N"
+ * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ * Not supported since kernel's handling of userspace single-stepping
+ * (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
*/
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static volatile u32 good_insns_32[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
+ W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */
W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
- W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
- W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
- W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = {
#define good_insns_32 NULL
#endif
-/* Good-instruction tables for 64-bit apps */
+/* Good-instruction tables for 64-bit apps.
+ *
+ * Genuinely invalid opcodes:
+ * 06,07 - formerly push/pop es
+ * 0e - formerly push cs
+ * 16,17 - formerly push/pop ss
+ * 1e,1f - formerly push/pop ds
+ * 27,2f,37,3f - formerly daa/das/aaa/aas
+ * 60,61 - formerly pusha/popa
+ * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
+ * 82 - formerly redundant encoding of Group1
+ * 9a - formerly call seg:ofs
+ * ce - formerly into
+ * d4,d5 - formerly aam/aad
+ * d6 - formerly undocumented salc
+ * ea - formerly jmp seg:ofs
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * cd - int N.
+ * Used by userspace for "int 80" syscall entry. (Other "int N"
+ * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ * Not supported since kernel's handling of userspace single-stepping
+ * (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
+ */
#if defined(CONFIG_X86_64)
static volatile u32 good_insns_64[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+ W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */
W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
- W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
- W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
- W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
- W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */
W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
- W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+ W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
- W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */
+ W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = {
#define good_insns_64 NULL
#endif
-/* Using this for both 64-bit and 32-bit apps */
+/* Using this for both 64-bit and 32-bit apps.
+ * Opcodes we don't support:
+ * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
+ * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
+ * Also encodes tons of other system insns if mod=11.
+ * Some are in fact non-system: xend, xtest, rdtscp, maybe more
+ * 0f 05 - syscall
+ * 0f 06 - clts (CPL0 insn)
+ * 0f 07 - sysret
+ * 0f 08 - invd (CPL0 insn)
+ * 0f 09 - wbinvd (CPL0 insn)
+ * 0f 0b - ud2
+ * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
+ * 0f 34 - sysenter
+ * 0f 35 - sysexit
+ * 0f 37 - getsec
+ * 0f 78 - vmread (Intel VMX. CPL0 insn)
+ * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
+ * Note: with prefixes, these two opcodes are
+ * extrq/insertq/AVX512 convert vector ops.
+ * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
+ * {rd,wr}{fs,gs}base,{s,l,m}fence.
+ * Why? They are all user-executable.
+ */
static volatile u32 good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
- W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
- W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
- W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+ W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+ W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
- W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
- W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
- W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
- W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
- W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
+ W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
#undef W
/*
- * opcodes we'll probably never support:
- *
- * 6c-6d, e4-e5, ec-ed - in
- * 6e-6f, e6-e7, ee-ef - out
- * cc, cd - int3, int
- * cf - iret
- * d6 - illegal instruction
- * f1 - int1/icebp
- * f4 - hlt
- * fa, fb - cli, sti
- * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
- *
- * invalid opcodes in 64-bit mode:
- *
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
- * 63 - we support this opcode in x86_64 but not in i386.
- *
* opcodes we may need to refine support for:
*
* 0f - 2-byte instructions: For many of these instructions, the validity
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 040681928e9d..37d8fa4438f0 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -50,13 +50,19 @@ EXPORT_SYMBOL(csum_partial);
#undef memset
#undef memmove
+extern void *__memset(void *, int, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
extern void *memset(void *, int, __kernel_size_t);
extern void *memcpy(void *, const void *, __kernel_size_t);
-extern void *__memcpy(void *, const void *, __kernel_size_t);
+extern void *memmove(void *, const void *, __kernel_size_t);
+
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(memmove);
#ifndef CONFIG_DEBUG_VIRTUAL
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index de9dcf89a302..0bf82c5ac529 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -12,6 +12,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/sigframe.h>
+#include <asm/tlbflush.h>
#include <asm/xcr.h>
/*
@@ -378,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Sanitize the copied state etc.
*/
- struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
@@ -392,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
*/
drop_fpu(tsk);
- if (__copy_from_user(xsave, buf_fx, state_size) ||
+ if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
__copy_from_user(&env, buf, sizeof(env))) {
+ fpu_finit(fpu);
err = -1;
} else {
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
- set_used_math();
}
+ set_used_math();
if (use_eager_fpu()) {
preempt_disable();
math_state_restore();
@@ -453,7 +455,7 @@ static void prepare_fx_sw_frame(void)
*/
static inline void xstate_enable(void)
{
- set_in_cr4(X86_CR4_OSXSAVE);
+ cr4_set_bits(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
}