From 3d214faea6e4f9b6018bf8589f4b245126349c0a Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:36 +0100 Subject: [S390] kdump: Add KEXEC_CRASH_CONTROL_MEMORY_LIMIT On s390 there is a different KEXEC_CONTROL_MEMORY_LIMIT for the normal and the kdump kexec case. Therefore this patch introduces a new macro KEXEC_CRASH_CONTROL_MEMORY_LIMIT. This is set to KEXEC_CONTROL_MEMORY_LIMIT for all architectures that do not define KEXEC_CRASH_CONTROL_MEMORY_LIMIT. Acked-by: Vivek Goyal Acked-by: Andrew Morton Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/kexec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a342cd7..07d9aba75562 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -33,6 +33,10 @@ #error KEXEC_ARCH not defined #endif +#ifndef KEXEC_CRASH_CONTROL_MEMORY_LIMIT +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT KEXEC_CONTROL_MEMORY_LIMIT +#endif + #define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define KEXEC_CORE_NOTE_NAME "CORE" #define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) -- cgit v1.2.3-58-ga151 From d3bf37955d46718ee1a7f1fc69f953d2328ba7c2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:37 +0100 Subject: [S390] kdump: Add size to elfcorehdr kernel parameter Currently only the address of the pre-allocated ELF header is passed with the elfcorehdr= kernel parameter. In order to reserve memory for the header in the 2nd kernel also the size is required. Current kdump architecture backends use different methods to do that, e.g. x86 uses the memmap= kernel parameter. On s390 there is no easy way to transfer this information. Therefore the elfcorehdr kernel parameter is extended to also pass the size. This now can also be used as standard mechanism by all future kdump architecture backends. The syntax of the kernel parameter is extended as follows: elfcorehdr=[size[KMG]@]offset[KMG] This change is backward compatible because elfcorehdr=size is still allowed. Acked-by: Vivek Goyal Acked-by: Andrew Morton Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 6 +++--- include/linux/crash_dump.h | 1 + kernel/crash_dump.c | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 93413ce96883..1fbe3625b2da 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -741,10 +741,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/block/cfq-iosched.txt and Documentation/block/deadline-iosched.txt for details. - elfcorehdr= [IA-64,PPC,SH,X86] + elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390] Specifies physical address of start of kernel core - image elf header. Generally kexec loader will - pass this option to capture kernel. + image elf header and optionally the size. Generally + kexec loader will pass this option to capture kernel. See Documentation/kdump/kdump.txt for details. enable_mtrr_cleanup [X86] diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 74054074e876..5c4abce94ad1 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -10,6 +10,7 @@ #define ELFCORE_ADDR_ERR (-2ULL) extern unsigned long long elfcorehdr_addr; +extern unsigned long long elfcorehdr_size; extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 5f85690285d4..69ebf3380bac 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -19,9 +19,16 @@ unsigned long saved_max_pfn; */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; +/* + * stores the size of elf header of crash image + */ +unsigned long long elfcorehdr_size; + /* * elfcorehdr= specifies the location of elf core header stored by the crashed * kernel. This option will be passed by kexec loader to the capture kernel. + * + * Syntax: elfcorehdr=[size[KMG]@]offset[KMG] */ static int __init setup_elfcorehdr(char *arg) { @@ -29,6 +36,10 @@ static int __init setup_elfcorehdr(char *arg) if (!arg) return -EINVAL; elfcorehdr_addr = memparse(arg, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } return end > arg ? 0 : -EINVAL; } early_param("elfcorehdr", setup_elfcorehdr); -- cgit v1.2.3-58-ga151 From 558df7209e7997275f6b8ad37737494cf2da1512 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:43 +0100 Subject: [S390] kdump: Add infrastructure for unmapping crashkernel memory This patch introduces a mechanism that allows architecture backends to remove page tables for the crashkernel memory. This can protect the loaded kdump kernel from being overwritten by broken kernel code. Two new functions crash_map_reserved_pages() and crash_unmap_reserved_pages() are added that can be implemented by architecture code. The crash_map_reserved_pages() function is called before and crash_unmap_reserved_pages() after the crashkernel segments are loaded. The functions are also called in crash_shrink_memory() to create/remove page tables when the crashkernel memory size is reduced. To support architectures that have large pages this patch also introduces a new define KEXEC_CRASH_MEM_ALIGN. The crashkernel start and size must always be aligned with KEXEC_CRASH_MEM_ALIGN. Cc: Andrew Morton Acked-by: Vivek Goyal Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/kexec.h | 6 ++++++ kernel/kexec.c | 21 +++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 07d9aba75562..fe45136b32cc 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -37,6 +37,10 @@ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT KEXEC_CONTROL_MEMORY_LIMIT #endif +#ifndef KEXEC_CRASH_MEM_ALIGN +#define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE +#endif + #define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define KEXEC_CORE_NOTE_NAME "CORE" #define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) @@ -133,6 +137,8 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); +void crash_map_reserved_pages(void); +void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); void vmcoreinfo_append_str(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/kernel/kexec.c b/kernel/kexec.c index d3b8a4ceb90b..dc7bc0829286 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -999,6 +999,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); + crash_map_reserved_pages(); } if (result) goto out; @@ -1015,6 +1016,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, goto out; } kimage_terminate(image); + if (flags & KEXEC_ON_CRASH) + crash_unmap_reserved_pages(); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); @@ -1026,6 +1029,18 @@ out: return result; } +/* + * Add and remove page tables for crashkernel memory + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +void __weak crash_map_reserved_pages(void) +{} + +void __weak crash_unmap_reserved_pages(void) +{} + #ifdef CONFIG_COMPAT asmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long nr_segments, @@ -1134,14 +1149,16 @@ int crash_shrink_memory(unsigned long new_size) goto unlock; } - start = roundup(start, PAGE_SIZE); - end = roundup(start + new_size, PAGE_SIZE); + start = roundup(start, KEXEC_CRASH_MEM_ALIGN); + end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); + crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) release_resource(&crashk_res); crashk_res.end = end - 1; + crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); -- cgit v1.2.3-58-ga151 From 20b40a794baf3b4b0320c0a77ce944d5d1a01f25 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:47 +0100 Subject: [S390] signal race with restarting system calls For a ERESTARTNOHAND/ERESTARTSYS/ERESTARTNOINTR restarting system call do_signal will prepare the restart of the system call with a rewind of the PSW before calling get_signal_to_deliver (where the debugger might take control). For A ERESTART_RESTARTBLOCK restarting system call do_signal will set -EINTR as return code. There are two issues with this approach: 1) strace never sees ERESTARTNOHAND, ERESTARTSYS, ERESTARTNOINTR or ERESTART_RESTARTBLOCK as the rewinding already took place or the return code has been changed to -EINTR 2) if get_signal_to_deliver does not return with a signal to deliver the restart via the repeat of the svc instruction is left in place. This opens a race if another signal is made pending before the system call instruction can be reexecuted. The original system call will be restarted even if the second signal would have ended the system call with -EINTR. These two issues can be solved by dropping the early rewind of the system call before get_signal_to_deliver has been called and by using the TIF_RESTART_SVC magic to do the restart if no signal has to be delivered. The only situation where the system call restart via the repeat of the svc instruction is appropriate is when a SA_RESTART signal is delivered to user space. Unfortunately this breaks inferior calls by the debugger again. The system call number and the length of the system call instruction is lost over the inferior call and user space will see ERESTARTNOHAND/ ERESTARTSYS/ERESTARTNOINTR/ERESTART_RESTARTBLOCK. To correct this a new ptrace interface is added to save/restore the system call number and system call instruction length. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 5 +- arch/s390/include/asm/syscall.h | 5 +- arch/s390/include/asm/thread_info.h | 1 + arch/s390/kernel/asm-offsets.c | 3 +- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/entry.S | 28 +++++----- arch/s390/kernel/entry64.S | 30 +++++----- arch/s390/kernel/ptrace.c | 51 ++++++++++++++++- arch/s390/kernel/signal.c | 107 ++++++++++++++++++------------------ include/linux/elf.h | 1 + 10 files changed, 142 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c9e98c..93c907b4776f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -328,8 +328,7 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int svc_code; }; /* @@ -487,6 +486,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d8..614267f60713 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include +#include #include /* @@ -25,7 +26,7 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return regs->svc_code ? (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +38,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f9a9a10979c9..0c4788eb5a65 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1140035b1cd8..751318765e2e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,8 +45,7 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b8c4ad..d7c8e54c32e7 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -342,7 +342,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 195387ab7c98..afe3685d30e7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ @@ -229,7 +228,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -239,12 +238,12 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: sll %r7,2 # svc number *4 @@ -335,10 +334,11 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number b BASED(sysc_nr_ok) # restart svc # @@ -346,7 +346,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -361,7 +361,7 @@ sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) + icm %r0,3,SP_SVC_CODE(%r15) st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) @@ -454,7 +454,7 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -531,7 +531,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -550,7 +550,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -966,7 +966,7 @@ cleanup_system_call: st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 0(4,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 3257f7f55551..7ff07d3a29c1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER @@ -250,7 +249,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -260,14 +259,14 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) + llgh %r7,SP_SVC_CODE+2(%r15) slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: larl %r10,sys_call_table @@ -358,11 +357,12 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 j sysc_nr_ok # restart svc # @@ -370,7 +370,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -382,7 +382,7 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) + llgh %r0,SP_SVC_CODE+2(%r15) stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls @@ -470,7 +470,7 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -551,7 +551,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -571,7 +571,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -973,7 +973,7 @@ cleanup_system_call: stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ae0e14b8880c..bae1cc49fe96 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,6 +42,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; @@ -303,6 +304,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) high order bit but older gdb's rely on it */ data |= PSW_ADDR_AMODE; #endif + if (addr == (addr_t) &dummy->regs.psw.addr) + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -610,6 +618,11 @@ static int __poke_user_compat(struct task_struct *child, /* Build a 64 bit psw address from 31 bit address. */ task_pt_regs(child)->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) &task_pt_regs(child)->psw @@ -737,7 +750,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + regs->svc_code = 0; ret = -1; } @@ -899,6 +912,26 @@ static int s390_last_break_get(struct task_struct *target, #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -925,6 +958,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1104,6 +1145,14 @@ static const struct user_regset s390_compat_regsets[] = { .align = sizeof(long), .get = s390_compat_last_break_get, }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1cc5ec3..e751cab80e04 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -156,7 +157,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } @@ -401,7 +402,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +421,43 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = regs->svc_code; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } + regs->svc_code = current_thread_info()->system_call; if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (regs->svc_code > 0) { + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = regs->psw.addr - + (regs->svc_code >> 16); + break; + } + /* No longer in a system call */ + regs->svc_code = 0; + } + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +471,28 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + if (regs->svc_code > 0) { + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->svc_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_RESTART_SVC); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +500,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) diff --git a/include/linux/elf.h b/include/linux/elf.h index 110821cb6ea5..31f0508d7da7 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -395,6 +395,7 @@ typedef struct elf64_shdr { #define NT_S390_CTRS 0x304 /* s390 control registers */ #define NT_S390_PREFIX 0x305 /* s390 prefix register */ #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ -- cgit v1.2.3-58-ga151