diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-17 14:06:53 -0600 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-17 14:06:53 -0600 |
commit | 4f292c4de4f6fb83776c0ff22674121eb6ddfa2f (patch) | |
tree | 7625005ed153dbc8341867bfc0076aae5adf93f9 /include/linux/pgtable.h | |
parent | 03d84bd6d43269df2dc63b2945dfed6610fac526 (diff) | |
parent | 3e844d842d49cdbe61a4b338bdd512654179488a (diff) |
Merge tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Dave Hansen:
"New Feature:
- Randomize the per-cpu entry areas
Cleanups:
- Have CR3_ADDR_MASK use PHYSICAL_PAGE_MASK instead of open coding it
- Move to "native" set_memory_rox() helper
- Clean up pmd_get_atomic() and i386-PAE
- Remove some unused page table size macros"
* tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits)
x86/mm: Ensure forced page table splitting
x86/kasan: Populate shadow for shared chunk of the CPU entry area
x86/kasan: Add helpers to align shadow addresses up and down
x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names
x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area
x86/mm: Recompute physical address for every page of per-CPU CEA mapping
x86/mm: Rename __change_page_attr_set_clr(.checkalias)
x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()
x86/mm: Untangle __change_page_attr_set_clr(.checkalias)
x86/mm: Add a few comments
x86/mm: Fix CR3_ADDR_MASK
x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros
mm: Convert __HAVE_ARCH_P..P_GET to the new style
mm: Remove pointless barrier() after pmdp_get_lockless()
x86/mm/pae: Get rid of set_64bit()
x86_64: Remove pointless set_64bit() usage
x86/mm/pae: Be consistent with pXXp_get_and_clear()
x86/mm/pae: Use WRITE_ONCE()
x86/mm/pae: Don't (ab)use atomic64
mm/gup: Fix the lockless PMD access
...
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r-- | include/linux/pgtable.h | 73 |
1 files changed, 46 insertions, 27 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index dfabd549d2e7..1159b25b0542 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -309,24 +309,28 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, ptep_get_and_clear(mm, addr, ptep); } -#ifndef __HAVE_ARCH_PTEP_GET +#ifndef ptep_get static inline pte_t ptep_get(pte_t *ptep) { return READ_ONCE(*ptep); } #endif -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +#ifndef pmdp_get +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + +#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH /* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. + * For walking the pagetables without holding any locks. Some architectures + * (eg x86-32 PAE) cannot load the entries atomically without using expensive + * instructions. We are guaranteed that a PTE will only either go from not + * present to present, or present to not present -- it will not switch to a + * completely different present page without a TLB flush inbetween; which we + * are blocking by holding interrupts off. * * Setting ptes from not present to present goes: * @@ -361,15 +365,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep) return pte; } -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#define ptep_get_lockless ptep_get_lockless + +#if CONFIG_PGTABLE_LEVELS > 2 +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + pmd_t pmd; + + do { + pmd.pmd_low = pmdp->pmd_low; + smp_rmb(); + pmd.pmd_high = pmdp->pmd_high; + smp_rmb(); + } while (unlikely(pmd.pmd_low != pmdp->pmd_low)); + + return pmd; +} +#define pmdp_get_lockless pmdp_get_lockless +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ + /* * We require that the PTE can be read atomically. */ +#ifndef ptep_get_lockless static inline pte_t ptep_get_lockless(pte_t *ptep) { return ptep_get(ptep); } -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif + +#ifndef pmdp_get_lockless +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + return pmdp_get(pmdp); +} +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1313,18 +1344,6 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -#ifndef pmd_read_atomic -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - /* - * Depend on compiler for an atomic pmd read. NOTE: this is - * only going to work, if the pmdval_t isn't larger than - * an unsigned long. - */ - return *pmdp; -} -#endif - #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) #endif @@ -1351,13 +1370,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. * * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, - * pmd_read_atomic is allowed to return a not atomic pmdval + * pmdp_get_lockless is allowed to return a not atomic pmdval * (for example pointing to an hugepage that has never been * mapped in the pmd). The below checks will only care about * the low part of the pmd with 32bit PAE x86 anyway, with the |