diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:34:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:34:12 -0800 |
commit | 8a8ca83ec3cf7ffc69020c189e3d368b1d4ba98a (patch) | |
tree | 62b54c44b652cb894f0e9d4847b753259c5de4a8 /include | |
parent | e857b6fcc5af0fbe042bec7e56a1533fe78ef594 (diff) | |
parent | c2208046bba6842dc232a600dc5cafc2fca41078 (diff) |
Merge tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner:
"Core:
- Better handling of page table leaves on archictectures which have
architectures have non-pagetable aligned huge/large pages. For such
architectures a leaf can actually be part of a larger entry.
- Prevent a deadlock vs exec_update_mutex
Architectures:
- The related updates for page size calculation of leaf entries
- The usual churn to support new CPUs
- Small fixes and improvements all over the place"
* tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
perf/x86/intel: Add Tremont Topdown support
uprobes/x86: Fix fall-through warnings for Clang
perf/x86: Fix fall-through warnings for Clang
kprobes/x86: Fix fall-through warnings for Clang
perf/x86/intel/lbr: Fix the return type of get_lbr_cycles()
perf/x86/intel: Fix rtm_abort_event encoding on Ice Lake
x86/kprobes: Restore BTF if the single-stepping is cancelled
perf: Break deadlock involving exec_update_mutex
sparc64/mm: Implement pXX_leaf_size() support
powerpc/8xx: Implement pXX_leaf_size() support
arm64/mm: Implement pXX_leaf_size() support
perf/core: Fix arch_perf_get_page_size()
mm: Introduce pXX_leaf_size()
mm/gup: Provide gup_get_pte() more generic
perf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY
perf/x86/intel/uncore: Add Rocket Lake support
perf/x86/msr: Add Rocket Lake CPU support
perf/x86/cstate: Add Rocket Lake CPU support
perf/x86/intel: Add Rocket Lake CPU support
perf,mm: Handle non-page-table-aligned hugetlbfs
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/perf_event.h | 6 | ||||
-rw-r--r-- | include/linux/pgtable.h | 71 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 6 |
3 files changed, 82 insertions, 1 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 96450f6fb1de..9a38f579bc76 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1028,6 +1028,8 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; + u64 data_page_size; + u64 code_page_size; } ____cacheline_aligned; /* default value for data source */ @@ -1585,4 +1587,8 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now); +#ifdef CONFIG_MMU +extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); +#endif + #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e237004d498d..8fcdfa52eb4b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep) } #endif +#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking any + * locks. For this we would like to load the pointers atomically, but sometimes + * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What + * we do have is the guarantee that a PTE will only either go from not present + * to present, or present to not present or both -- it will not switch to a + * completely different present page without a TLB flush in between; something + * that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. + * We load pte_high *after* loading pte_low, which ensures we don't see an older + * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't + * picked up a changed pte high. We might have gotten rubbish values from + * pte_low and pte_high, but we are guaranteed that pte_low will not have the + * present bit set *unless* it is 'l'. Because get_user_pages_fast() only + * operates on present ptes we're safe. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + pte_t pte; + + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} +#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +/* + * We require that the PTE can be read atomically. + */ +static inline pte_t ptep_get_lockless(pte_t *ptep) +{ + return ptep_get(ptep); +} +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, @@ -1494,4 +1549,20 @@ typedef unsigned int pgtbl_mod_mask; #define pmd_leaf(x) 0 #endif +#ifndef pgd_leaf_size +#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT) +#endif +#ifndef p4d_leaf_size +#define p4d_leaf_size(x) P4D_SIZE +#endif +#ifndef pud_leaf_size +#define pud_leaf_size(x) PUD_SIZE +#endif +#ifndef pmd_leaf_size +#define pmd_leaf_size(x) PMD_SIZE +#endif +#ifndef pte_leaf_size +#define pte_leaf_size(x) PAGE_SIZE +#endif + #endif /* _LINUX_PGTABLE_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b95d3c485d27..b15e3447cd9f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -143,8 +143,10 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -896,6 +898,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, |