diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 15:44:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 15:44:27 -0700 |
commit | ac4de9543aca59f2b763746647577302fbedd57e (patch) | |
tree | 40407750569ee030de56233c41c9a97f7e89cf67 /arch | |
parent | 26935fb06ee88f1188789807687c03041f3c70d9 (diff) | |
parent | de32a8177f64bc62e1b19c685dd391af664ab13f (diff) |
Merge branch 'akpm' (patches from Andrew Morton)
Merge more patches from Andrew Morton:
"The rest of MM. Plus one misc cleanup"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (35 commits)
mm/Kconfig: add MMU dependency for MIGRATION.
kernel: replace strict_strto*() with kstrto*()
mm, thp: count thp_fault_fallback anytime thp fault fails
thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page()
thp: do_huge_pmd_anonymous_page() cleanup
thp: move maybe_pmd_mkwrite() out of mk_huge_pmd()
mm: cleanup add_to_page_cache_locked()
thp: account anon transparent huge pages into NR_ANON_PAGES
truncate: drop 'oldsize' truncate_pagecache() parameter
mm: make lru_add_drain_all() selective
memcg: document cgroup dirty/writeback memory statistics
memcg: add per cgroup writeback pages accounting
memcg: check for proper lock held in mem_cgroup_update_page_stat
memcg: remove MEMCG_NR_FILE_MAPPED
memcg: reduce function dereference
memcg: avoid overflow caused by PAGE_ALIGN
memcg: rename RESOURCE_MAX to RES_COUNTER_MAX
memcg: correct RESOURCE_MAX to ULLONG_MAX
mm: memcg: do not trap chargers with full callstack on OOM
mm: memcg: rework and document OOM waiting and wakeup
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/alpha/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/arc/mm/fault.c | 11 | ||||
-rw-r--r-- | arch/arm/mm/fault.c | 23 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 31 | ||||
-rw-r--r-- | arch/avr32/mm/fault.c | 4 | ||||
-rw-r--r-- | arch/cris/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/frv/mm/fault.c | 10 | ||||
-rw-r--r-- | arch/hexagon/mm/vm_fault.c | 6 | ||||
-rw-r--r-- | arch/ia64/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/m32r/mm/fault.c | 10 | ||||
-rw-r--r-- | arch/m68k/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/metag/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/microblaze/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/mips/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/mn10300/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/openrisc/mm/fault.c | 1 | ||||
-rw-r--r-- | arch/parisc/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/score/mm/fault.c | 13 | ||||
-rw-r--r-- | arch/sh/mm/fault.c | 9 | ||||
-rw-r--r-- | arch/sparc/mm/fault_32.c | 12 | ||||
-rw-r--r-- | arch/sparc/mm/fault_64.c | 6 | ||||
-rw-r--r-- | arch/tile/mm/fault.c | 13 | ||||
-rw-r--r-- | arch/um/kernel/trap.c | 22 | ||||
-rw-r--r-- | arch/unicore32/mm/fault.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 43 | ||||
-rw-r--r-- | arch/xtensa/mm/fault.c | 2 |
28 files changed, 177 insertions, 121 deletions
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 0c4132dd3507..98838a05ba6d 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, const struct exception_table_entry *fixup; int fault, si_code = SEGV_MAPERR; siginfo_t info; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (cause > 0 ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* As of EV6, a load into $31/$f31 is a prefetch, and never faults (or is suppressed by the PALcode). Support that for older CPUs @@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (address >= TASK_SIZE) goto vmalloc_fault; #endif - + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -142,6 +142,7 @@ retry: } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } /* If for any reason at all we couldn't handle the fault, diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 0fd1f0d515ff..d63f3de0cd5b 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) siginfo_t info; int fault, ret; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * We fault-in kernel-space virtual memory on-demand. The @@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -117,12 +118,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -201,10 +202,6 @@ no_context: die("Oops", regs, address); out_of_memory: - if (is_global_init(tsk)) { - yield(); - goto survive; - } up_read(&mm->mmap_sem); if (user_mode(regs)) { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c97f7940cb95..eb8830a4c5ed 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - int write = fsr & FSR_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, fsr)) return 0; @@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (fsr & FSR_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -349,6 +352,13 @@ retry: if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -359,13 +369,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we - * have no context to handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6c8ba25bf6bb..6d6acf153bff 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (esr & ESR_LNX_EXEC) { - vm_flags = VM_EXEC; - } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { - vm_flags = VM_WRITE; - mm_flags |= FAULT_FLAG_WRITE; - } - tsk = current; mm = tsk->mm; @@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + mm_flags |= FAULT_FLAG_USER; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -288,6 +291,13 @@ retry: VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we have no context to + * handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -298,13 +308,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we have no context to - * handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to successfully fix up diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index b2f2d2d66849..0eca93327195 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) local_irq_enable(); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); @@ -228,9 +230,9 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - pagefault_out_of_memory(); if (!user_mode(regs)) goto no_context; + pagefault_out_of_memory(); return; do_sigbus: diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 73312ab6c696..1790f22e71a2 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, struct vm_area_struct * vma; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; D(printk(KERN_DEBUG "Page fault for %lX on %X at %lX, prot %d write %d\n", @@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -155,6 +156,7 @@ retry: } else if (writeaccess == 1) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 331c1e2cfb67..9a66372fc7c7 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear struct vm_area_struct *vma; struct mm_struct *mm; unsigned long _pme, lrai, lrad, fixup; + unsigned long flags = 0; siginfo_t info; pgd_t *pge; pud_t *pue; pte_t *pte; - int write; int fault; #if 0 @@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear if (in_atomic() || !mm) goto no_context; + if (user_mode(__frame)) + flags |= FAULT_FLAG_USER; + down_read(&mm->mmap_sem); vma = find_vma(mm, ear0); @@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear */ good_area: info.si_code = SEGV_ACCERR; - write = 0; switch (esr0 & ESR0_ATXC) { default: /* handle write to write protected page */ @@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; - write = 1; + flags |= FAULT_FLAG_WRITE; break; /* handle read from protected page */ @@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, ear0, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 1bd276dbec7d..8704c9320032 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) int si_code = SEGV_MAPERR; int fault; const struct exception_table_entry *fixup; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (cause > 0 ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * If we're in an interrupt or have no user context, @@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) local_irq_enable(); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -96,6 +97,7 @@ good_area: case FLT_STORE: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; break; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 6cf0341f978e..7225dad87094 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0); - /* mmap_sem is performance critical.... */ prefetchw(&mm->mmap_sem); @@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re if (notify_page_fault(regs, TRAP_BRKPT)) return; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (mask & VM_WRITE) + flags |= FAULT_FLAG_WRITE; retry: down_read(&mm->mmap_sem); diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index 3cdfa9c1d091..e9c6a8014bd6 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, struct mm_struct *mm; struct vm_area_struct * vma; unsigned long page, addr; - int write; + unsigned long flags = 0; int fault; siginfo_t info; @@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (error_code & ACE_USERMODE) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, */ good_area: info.si_code = SEGV_ACCERR; - write = 0; switch (error_code & (ACE_WRITE|ACE_PROTECTION)) { default: /* 3: write, present */ /* fall through */ case ACE_WRITE: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; - write++; + flags |= FAULT_FLAG_WRITE; break; case ACE_PROTECTION: /* read, present */ case 0: /* read, not present */ @@ -194,7 +196,7 @@ good_area: */ addr = (address & PAGE_MASK); set_thread_fault_code(error_code); - fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, addr, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index a563727806bf..eb1d61f68725 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 8fddf46e6c62..332680e5ebf2 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma, *prev_vma; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write_access ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; @@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); @@ -121,6 +122,7 @@ good_area: if (write_access) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 731f739d17a1..fa4cf52aa7a6 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, int code = SEGV_MAPERR; int is_write = error_code & ESR_S; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (is_write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; regs->ear = address; regs->esr = error_code; @@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -199,6 +201,7 @@ good_area: if (unlikely(is_write)) { if (unlikely(!(vma->vm_flags & VM_WRITE))) goto bad_area; + flags |= FAULT_FLAG_WRITE; /* a read */ } else { /* protection fault */ diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 85df1cd8d446..becc42bb1849 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, const int field = sizeof(unsigned long) * 2; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), @@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -114,6 +115,7 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (cpu_has_rixi) { if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { @@ -241,6 +243,8 @@ out_of_memory: * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); + if (!user_mode(regs)) + goto no_context; pagefault_out_of_memory(); return; diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 8a2e6ded9a44..3516cbdf1ee9 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, if (in_atomic() || !mm) goto no_context; + if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 4a41f8493ab0..0703acf7d327 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) { /* Exception was in userspace: reenable interrupts */ local_irq_enable(); + flags |= FAULT_FLAG_USER; } else { /* If exception was in a syscall, then IRQ's may have * been enabled or disabled. If they were enabled, diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f247a3480e8e..d10d27a720c0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (acc_type & VM_WRITE) + flags |= FAULT_FLAG_WRITE; retry: down_read(&mm->mmap_sem); vma = find_vma_prev(mm, address, &prev_vma); @@ -203,8 +207,7 @@ good_area: * fault. */ - fault = handle_mm_fault(mm, vma, address, - flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0)); + fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2dd69bf4af46..51ab9e7e6c39 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ - if (is_write) - flags |= FAULT_FLAG_WRITE; - #ifdef CONFIG_PPC_ICSWX /* * we need to do this early because this "data storage @@ -288,6 +285,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) store_update_sp = store_updates_sp(regs); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -415,6 +415,7 @@ good_area: } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; /* a read */ } else { /* protection fault */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7de4469915f0..fc6679210d83 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access) address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 6b18fb0189ae..52238983527d 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; + unsigned long flags = 0; siginfo_t info; int fault; @@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -95,18 +99,18 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -167,11 +171,6 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 1f49c28affa9..541dc6101508 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, struct mm_struct *mm; struct vm_area_struct * vma; int fault; - int write = error_code & FAULT_CODE_WRITE; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -476,6 +474,11 @@ good_area: set_thread_fault_code(error_code); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (error_code & FAULT_CODE_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index e98bfda205a2..59dbd4645725 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; int from_user = !(regs->psr & PSR_PS); int fault, code; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (text_fault) address = regs->pc; @@ -235,6 +234,11 @@ good_area: goto bad_area; } + if (from_user) + flags |= FAULT_FLAG_USER; + if (write) + flags |= FAULT_FLAG_WRITE; + /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write) struct vm_area_struct *vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned int flags = FAULT_FLAG_USER; int code; code = SEGV_MAPERR; @@ -402,11 +407,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { + switch (handle_mm_fault(mm, vma, address, flags)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 5062ff389e83..2ebec263d685 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -315,7 +315,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) bad_kernel_pc(regs, address); return; } - } + } else + flags |= FAULT_FLAG_USER; /* * If we're in an interrupt or have no user @@ -418,13 +419,14 @@ good_area: vma->vm_file != NULL) set_thread_fault_code(fault_code | FAULT_CODE_BLKCOMMIT); + + flags |= FAULT_FLAG_WRITE; } else { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 111d5a9b76f1..4c288f199453 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_regs *regs, if (!is_page_fault) write = 1; - flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; is_kernel_mode = !user_mode(regs); @@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (!is_kernel_mode) + flags |= FAULT_FLAG_USER; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -425,12 +427,12 @@ good_area: #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } - survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -555,11 +557,6 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } if (is_kernel_mode) goto no_context; pagefault_out_of_memory(); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 089f3987e273..5c3aef74237f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, pmd_t *pmd; pte_t *pte; int err = -EFAULT; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (is_write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; *code_out = SEGV_MAPERR; @@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (in_atomic()) goto out_nosemaphore; + if (is_user) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -58,12 +59,15 @@ retry: good_area: *code_out = SEGV_ACCERR; - if (is_write && !(vma->vm_flags & VM_WRITE)) - goto out; - - /* Don't require VM_READ|VM_EXEC for write faults! */ - if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) - goto out; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + flags |= FAULT_FLAG_WRITE; + } else { + /* Don't require VM_READ|VM_EXEC for write faults! */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + } do { int fault; @@ -124,6 +128,8 @@ out_of_memory: * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); + if (!is_user) + goto out_nosemaphore; pagefault_out_of_memory(); return 0; } diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index f9b5c10bccee..0dc922dba915 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (!(fsr ^ 0x12)) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -278,6 +282,13 @@ retry: (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -288,13 +299,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we - * have no context to handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 654be4ae3047..3aaeffcfd67a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, force_sig_info_fault(SIGBUS, code, address, tsk, fault); } -static noinline int +static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if (fatal_signal_pending(current)) { - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); - if (!(error_code & PF_USER)) - no_context(regs, error_code, address, 0, 0); - return 1; + if (fatal_signal_pending(current) && !(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address, 0, 0); + return; } - if (!(fault & VM_FAULT_ERROR)) - return 0; if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ @@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); - return 1; + return; } up_read(¤t->mm->mmap_sem); @@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, else BUG(); } - return 1; } static int spurious_fault_check(unsigned long error_code, pte_t *pte) @@ -1011,9 +1002,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; struct mm_struct *mm; int fault; - int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -1083,6 +1072,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) if (user_mode_vm(regs)) { local_irq_enable(); error_code |= PF_USER; + flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); @@ -1109,6 +1099,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } + if (error_code & PF_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in @@ -1187,9 +1180,17 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - if (mm_fault_error(regs, error_code, address, fault)) - return; + /* + * If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because it + * would already be released in __lock_page_or_retry in mm/filemap.c. + */ + if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + mm_fault_error(regs, error_code, address, fault); + return; } /* diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 4b7bc8db170f..70fa7bc42b4a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs) address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); #endif + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); |