summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/powerflags.c3
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S31
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/vsyscall_64.c77
6 files changed, 94 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 5abbea297e0c..7b3fe56b1c21 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
"100mhzsteps",
"hwpstate",
"", /* tsc invariant mapped to constant_tsc */
- /* nothing */
+ "cpb", /* core performance boost */
+ "eff_freq_ro", /* Readonly aperf/mperf */
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f3f6f5344001..22d0e21b4dd7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
#else
movl %esp, %eax
#endif
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace_sig
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..a20e1cb9dc87 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
+ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- je int_ret_from_sys_call
+ jz retint_restore_args
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
* after the swapgs, so that it can do the swapgs
* for the guest and jump here on syscall.
*/
-ENTRY(system_call_after_swapgs)
+GLOBAL(system_call_after_swapgs)
movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
- GET_THREAD_INFO(%rcx)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz tracesys
system_call_fastpath:
cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
/* edi: flagmask */
sysret_check:
LOCKDEP_SYS_EXIT
- GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- movl TI_flags(%rcx),%edx
+ movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
andl %edi,%edx
jnz sysret_careful
CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
/* Do syscall tracing */
tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz auditsys
#endif
SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $3,CS-ARGOFFSET(%rsp)
- je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
ENTRY(\sym)
INTR_FRAME
pushq_cfi $~(\num)
+.Lcommon_\sym:
interrupt \do_sym
jmp ret_from_intr
CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ ALIGN
+ INTR_FRAME
+.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
.if NUM_INVALIDATE_TLB_VECTORS > \idx
-apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
- invalidate_interrupt\idx smp_invalidate_interrupt
+ENTRY(invalidate_interrupt\idx)
+ pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
+ jmp .Lcommon_invalidate_interrupt0
+ CFI_ADJUST_CFA_OFFSET -8
+END(invalidate_interrupt\idx)
.endif
.endr
+ CFI_ENDPROC
+apicinterrupt INVALIDATE_TLB_VECTOR_START, \
+ invalidate_interrupt0, smp_invalidate_interrupt
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ee5d4fbd53b4..15763af7bfe3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.orig_ax = -1;
regs.ip = (unsigned long) kernel_thread_helper;
regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | 0x2;
+ regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
/* Ok, create the new process.. */
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a8e3eb83466c..fa1191fb679d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
== NOTIFY_STOP)
return;
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
-#ifdef CONFIG_KPROBES
+
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
-#else
- if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
- == NOTIFY_STOP)
- return;
-#endif
preempt_conditional_sti(regs);
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b94..b07ba9393564 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
static int __init vsyscall_setup(char *str)
{
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
+static bool write_ok_or_segv(unsigned long ptr, size_t size)
+{
+ /*
+ * XXX: if access_ok, get_user, and put_user handled
+ * sig_on_uaccess_error, this could go away.
+ */
+
+ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
+ siginfo_t info;
+ struct thread_struct *thread = &current->thread;
+
+ thread->error_code = 6; /* user fault, no page, write */
+ thread->cr2 = ptr;
+ thread->trap_no = 14;
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = (void __user *)ptr;
+
+ force_sig_info(SIGSEGV, &info, current);
+ return false;
+ } else {
+ return true;
+ }
+}
+
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
+ int prev_sig_on_uaccess_error;
long ret;
/*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL);
+ /*
+ * With a real vsyscall, page faults cause SIGSEGV. We want to
+ * preserve that behavior to make writing exploits harder.
+ */
+ prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+ current_thread_info()->sig_on_uaccess_error = 1;
+
+ /*
+ * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
+ * 64-bit, so we don't need to special-case it here. For all the
+ * vsyscalls, 0 means "don't write anything" not "write it at
+ * address 0".
+ */
+ ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
+ if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+ !write_ok_or_segv(regs->si, sizeof(struct timezone)))
+ break;
+
ret = sys_gettimeofday(
(struct timeval __user *)regs->di,
(struct timezone __user *)regs->si);
break;
case 1:
+ if (!write_ok_or_segv(regs->di, sizeof(time_t)))
+ break;
+
ret = sys_time((time_t __user *)regs->di);
break;
case 2:
+ if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+ !write_ok_or_segv(regs->si, sizeof(unsigned)))
+ break;
+
ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si,
0);
break;
}
+ current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+
if (ret == -EFAULT) {
- /*
- * Bad news -- userspace fed a bad pointer to a vsyscall.
- *
- * With a real vsyscall, that would have caused SIGSEGV.
- * To make writing reliable exploits using the emulated
- * vsyscalls harder, generate SIGSEGV here as well.
- */
+ /* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
- goto sigsegv;
+
+ /*
+ * If we failed to generate a signal for any reason,
+ * generate one here. (This should be impossible.)
+ */
+ if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
+ !sigismember(&tsk->pending.signal, SIGSEGV)))
+ goto sigsegv;
+
+ return true; /* Don't emulate the ret. */
}
regs->ax = ret;