diff options
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r-- | include/linux/pgtable.h | 176 |
1 files changed, 39 insertions, 137 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c5a51481bbb9..5063b482e34f 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -94,14 +94,22 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) #define pte_offset_kernel pte_offset_kernel #endif -#if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ - pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte)) +#ifdef CONFIG_HIGHPTE +#define __pte_map(pmd, address) \ + ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address))) +#define pte_unmap(pte) do { \ + kunmap_local((pte)); \ + /* rcu_read_unlock() to be added later */ \ +} while (0) #else -#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) ((void)(pte)) /* NOP */ +static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address) +{ + return pte_offset_kernel(pmd, address); +} +static inline void pte_unmap(pte_t *pte) +{ + /* rcu_read_unlock() to be added later */ +} #endif /* Find an entry in the second-level page table.. */ @@ -204,12 +212,26 @@ static inline int pudp_set_access_flags(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + +#ifndef pmdp_get +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); int r = 1; if (!pte_young(pte)) r = 0; @@ -296,7 +318,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_clear(mm, address, ptep); page_table_check_pte_clear(mm, address, pte); return pte; @@ -309,20 +331,6 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, ptep_get_and_clear(mm, addr, ptep); } -#ifndef ptep_get -static inline pte_t ptep_get(pte_t *ptep) -{ - return READ_ONCE(*ptep); -} -#endif - -#ifndef pmdp_get -static inline pmd_t pmdp_get(pmd_t *pmdp) -{ - return READ_ONCE(*pmdp); -} -#endif - #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH /* * For walking the pagetables without holding any locks. Some architectures @@ -511,7 +519,7 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pte_t old_pte = *ptep; + pte_t old_pte = ptep_get(ptep); set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); } #endif @@ -591,6 +599,10 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif +#ifndef arch_needs_pgtable_deposit +#define arch_needs_pgtable_deposit() (false) +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is an implementation of pmdp_establish() that is only suitable for an @@ -1292,9 +1304,10 @@ static inline int pud_trans_huge(pud_t pud) } #endif -/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */ -static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) +static inline int pud_trans_unstable(pud_t *pud) { +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pudval = READ_ONCE(*pud); if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval)) @@ -1303,121 +1316,10 @@ static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud) pud_clear_bad(pud); return 1; } - return 0; -} - -/* See pmd_trans_unstable for discussion. */ -static inline int pud_trans_unstable(pud_t *pud) -{ -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ - defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) - return pud_none_or_trans_huge_or_dev_or_clear_bad(pud); -#else - return 0; -#endif -} - -#ifndef arch_needs_pgtable_deposit -#define arch_needs_pgtable_deposit() (false) -#endif -/* - * This function is meant to be used by sites walking pagetables with - * the mmap_lock held in read mode to protect against MADV_DONTNEED and - * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd - * into a null pmd and the transhuge page fault can convert a null pmd - * into an hugepmd or into a regular pmd (if the hugepage allocation - * fails). While holding the mmap_lock in read mode the pmd becomes - * stable and stops changing under us only if it's not null and not a - * transhuge pmd. When those races occurs and this function makes a - * difference vs the standard pmd_none_or_clear_bad, the result is - * undefined so behaving like if the pmd was none is safe (because it - * can return none anyway). The compiler level barrier() is critically - * important to compute the two checks atomically on the same pmdval. - * - * For 32bit kernels with a 64bit large pmd_t this automatically takes - * care of reading the pmd atomically to avoid SMP race conditions - * against pmd_populate() when the mmap_lock is hold for reading by the - * caller (a special atomic read not done by "gcc" as in the generic - * version above, is also needed when THP is disabled because the page - * fault can populate the pmd from under us). - */ -static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) -{ - pmd_t pmdval = pmdp_get_lockless(pmd); - /* - * The barrier will stabilize the pmdval in a register or on - * the stack so that it will stop changing under the code. - * - * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, - * pmdp_get_lockless is allowed to return a not atomic pmdval - * (for example pointing to an hugepage that has never been - * mapped in the pmd). The below checks will only care about - * the low part of the pmd with 32bit PAE x86 anyway, with the - * exception of pmd_none(). So the important thing is that if - * the low part of the pmd is found null, the high part will - * be also null or the pmd_none() check below would be - * confused. - */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - barrier(); #endif - /* - * !pmd_present() checks for pmd migration entries - * - * The complete check uses is_pmd_migration_entry() in linux/swapops.h - * But using that requires moving current function and pmd_trans_unstable() - * to linux/swapops.h to resolve dependency, which is too much code move. - * - * !pmd_present() is equivalent to is_pmd_migration_entry() currently, - * because !pmd_present() pages can only be under migration not swapped - * out. - * - * pmd_none() is preserved for future condition checks on pmd migration - * entries and not confusing with this function name, although it is - * redundant with !pmd_present(). - */ - if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || - (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) - return 1; - if (unlikely(pmd_bad(pmdval))) { - pmd_clear_bad(pmd); - return 1; - } return 0; } -/* - * This is a noop if Transparent Hugepage Support is not built into - * the kernel. Otherwise it is equivalent to - * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in - * places that already verified the pmd is not none and they want to - * walk ptes while holding the mmap sem in read mode (write mode don't - * need this). If THP is not enabled, the pmd can't go away under the - * code even if MADV_DONTNEED runs, but if THP is enabled we need to - * run a pmd_trans_unstable before walking the ptes after - * split_huge_pmd returns (because it may have run when the pmd become - * null, but then a page fault can map in a THP and not a regular page). - */ -static inline int pmd_trans_unstable(pmd_t *pmd) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - return pmd_none_or_trans_huge_or_clear_bad(pmd); -#else - return 0; -#endif -} - -/* - * the ordering of these checks is important for pmds with _page_devmap set. - * if we check pmd_trans_unstable() first we will trip the bad_pmd() check - * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly - * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. - */ -static inline int pmd_devmap_trans_unstable(pmd_t *pmd) -{ - return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); -} - #ifndef CONFIG_NUMA_BALANCING /* * Technically a PTE can be PROTNONE even when not doing NUMA balancing but |