diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 92 |
1 files changed, 64 insertions, 28 deletions
diff --git a/mm/memory.c b/mm/memory.c index 33f45edf8272..32e9b7aec366 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -300,15 +300,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ struct mmu_gather_batch *batch; VM_BUG_ON(!tlb->end); - - if (!tlb->page_size) - tlb->page_size = page_size; - else { - if (page_size != tlb->page_size) - return true; - } + VM_WARN_ON(tlb->page_size != page_size); batch = tlb->active; + /* + * Add the page and check if we are full. If so + * force a flush. + */ + batch->pages[batch->nr++] = page; if (batch->nr == batch->max) { if (!tlb_next_batch(tlb)) return true; @@ -316,7 +315,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ } VM_BUG_ON_PAGE(batch->nr > batch->max, page); - batch->pages[batch->nr++] = page; return false; } @@ -528,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb, end -= PMD_SIZE; if (addr > end - 1) return; - + /* + * We add page table cache pages with PAGE_SIZE, + * (see pte_free_tlb()), flush the tlb if we need + */ + tlb_remove_check_page_size_change(tlb, PAGE_SIZE); pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); @@ -1118,8 +1120,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_t *start_pte; pte_t *pte; swp_entry_t entry; - struct page *pending_page = NULL; + tlb_remove_check_page_size_change(tlb, PAGE_SIZE); again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); @@ -1172,7 +1174,6 @@ again: print_bad_pte(vma, addr, ptent, page); if (unlikely(__tlb_remove_page(tlb, page))) { force_flush = 1; - pending_page = page; addr += PAGE_SIZE; break; } @@ -1213,11 +1214,6 @@ again: if (force_flush) { force_flush = 0; tlb_flush_mmu_free(tlb); - if (pending_page) { - /* remove the page with new size */ - __tlb_remove_pte_page(tlb, pending_page); - pending_page = NULL; - } if (addr != end) goto again; } @@ -1240,7 +1236,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, if (next - addr != HPAGE_PMD_SIZE) { VM_BUG_ON_VMA(vma_is_anonymous(vma) && !rwsem_is_locked(&tlb->mm->mmap_sem), vma); - split_huge_pmd(vma, pmd, addr); + __split_huge_pmd(vma, pmd, addr, false, NULL); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ @@ -2939,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, return true; } +static void deposit_prealloc_pte(struct fault_env *fe) +{ + struct vm_area_struct *vma = fe->vma; + + pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); + /* + * We are going to consume the prealloc table, + * count that as nr_ptes. + */ + atomic_long_inc(&vma->vm_mm->nr_ptes); + fe->prealloc_pte = 0; +} + static int do_set_pmd(struct fault_env *fe, struct page *page) { struct vm_area_struct *vma = fe->vma; @@ -2953,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ret = VM_FAULT_FALLBACK; page = compound_head(page); + /* + * Archs like ppc64 need additonal space to store information + * related to pte entry. Use the preallocated table for that. + */ + if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { + fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); + if (!fe->prealloc_pte) + return VM_FAULT_OOM; + smp_wmb(); /* See comment in __pte_alloc() */ + } + fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); if (unlikely(!pmd_none(*fe->pmd))) goto out; @@ -2966,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); page_add_file_rmap(page, true); + /* + * deposit and withdraw with pmd lock held + */ + if (arch_needs_pgtable_deposit()) + deposit_prealloc_pte(fe); set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); @@ -2975,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) ret = 0; count_vm_event(THP_FILE_MAPPED); out: + /* + * If we are going to fallback to pte mapping, do a + * withdraw with pmd lock held. + */ + if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) + fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, + fe->pmd); spin_unlock(fe->ptl); return ret; } @@ -3014,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, ret = do_set_pmd(fe, page); if (ret != VM_FAULT_FALLBACK) - return ret; + goto fault_handled; } if (!fe->pte) { ret = pte_alloc_one_map(fe); if (ret) - return ret; + goto fault_handled; } /* Re-check under ptl */ - if (unlikely(!pte_none(*fe->pte))) - return VM_FAULT_NOPAGE; + if (unlikely(!pte_none(*fe->pte))) { + ret = VM_FAULT_NOPAGE; + goto fault_handled; + } flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); @@ -3045,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, fe->address, fe->pte); + ret = 0; - return 0; +fault_handled: + /* preallocated pagetable is unused: free it */ + if (fe->prealloc_pte) { + pte_free(fe->vma->vm_mm, fe->prealloc_pte); + fe->prealloc_pte = 0; + } + return ret; } static unsigned long fault_around_bytes __read_mostly = @@ -3145,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); - /* preallocated pagetable is unused: free it */ - if (fe->prealloc_pte) { - pte_free(fe->vma->vm_mm, fe->prealloc_pte); - fe->prealloc_pte = 0; - } /* Huge page is mapped? Page fault is solved */ if (pmd_trans_huge(*fe->pmd)) { ret = VM_FAULT_NOPAGE; @@ -3454,7 +3490,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd) /* COW handled on pte level: split pmd */ VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); - split_huge_pmd(fe->vma, fe->pmd, fe->address); + __split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL); return VM_FAULT_FALLBACK; } |