summaryrefslogtreecommitdiff
path: root/kernel/events
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:34:12 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:34:12 -0800
commit8a8ca83ec3cf7ffc69020c189e3d368b1d4ba98a (patch)
tree62b54c44b652cb894f0e9d4847b753259c5de4a8 /kernel/events
parente857b6fcc5af0fbe042bec7e56a1533fe78ef594 (diff)
parentc2208046bba6842dc232a600dc5cafc2fca41078 (diff)
Merge tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner: "Core: - Better handling of page table leaves on archictectures which have architectures have non-pagetable aligned huge/large pages. For such architectures a leaf can actually be part of a larger entry. - Prevent a deadlock vs exec_update_mutex Architectures: - The related updates for page size calculation of leaf entries - The usual churn to support new CPUs - Small fixes and improvements all over the place" * tag 'perf-core-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) perf/x86/intel: Add Tremont Topdown support uprobes/x86: Fix fall-through warnings for Clang perf/x86: Fix fall-through warnings for Clang kprobes/x86: Fix fall-through warnings for Clang perf/x86/intel/lbr: Fix the return type of get_lbr_cycles() perf/x86/intel: Fix rtm_abort_event encoding on Ice Lake x86/kprobes: Restore BTF if the single-stepping is cancelled perf: Break deadlock involving exec_update_mutex sparc64/mm: Implement pXX_leaf_size() support powerpc/8xx: Implement pXX_leaf_size() support arm64/mm: Implement pXX_leaf_size() support perf/core: Fix arch_perf_get_page_size() mm: Introduce pXX_leaf_size() mm/gup: Provide gup_get_pte() more generic perf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY perf/x86/intel/uncore: Add Rocket Lake support perf/x86/msr: Add Rocket Lake CPU support perf/x86/cstate: Add Rocket Lake CPU support perf/x86/intel: Add Rocket Lake CPU support perf,mm: Handle non-page-table-aligned hugetlbfs ...
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c160
1 files changed, 136 insertions, 24 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dc568ca295bd..19ae6c931c52 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -51,6 +51,8 @@
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include <linux/min_heap.h>
+#include <linux/highmem.h>
+#include <linux/pgtable.h>
#include "internal.h"
@@ -1895,6 +1897,12 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
if (sample_type & PERF_SAMPLE_CGROUP)
size += sizeof(data->cgroup);
+ if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ size += sizeof(data->data_page_size);
+
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ size += sizeof(data->code_page_size);
+
event->header_size = size;
}
@@ -6931,6 +6939,12 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_CGROUP)
perf_output_put(handle, data->cgroup);
+ if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ perf_output_put(handle, data->data_page_size);
+
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ perf_output_put(handle, data->code_page_size);
+
if (sample_type & PERF_SAMPLE_AUX) {
perf_output_put(handle, data->aux_size);
@@ -6988,6 +7002,93 @@ static u64 perf_virt_to_phys(u64 virt)
return phys_addr;
}
+/*
+ * Return the pagetable size of a given virtual address.
+ */
+static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
+{
+ u64 size = 0;
+
+#ifdef CONFIG_HAVE_FAST_GUP
+ pgd_t *pgdp, pgd;
+ p4d_t *p4dp, p4d;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
+
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
+ return 0;
+
+ if (pgd_leaf(pgd))
+ return pgd_leaf_size(pgd);
+
+ p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (!p4d_present(p4d))
+ return 0;
+
+ if (p4d_leaf(p4d))
+ return p4d_leaf_size(p4d);
+
+ pudp = pud_offset_lockless(p4dp, p4d, addr);
+ pud = READ_ONCE(*pudp);
+ if (!pud_present(pud))
+ return 0;
+
+ if (pud_leaf(pud))
+ return pud_leaf_size(pud);
+
+ pmdp = pmd_offset_lockless(pudp, pud, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (!pmd_present(pmd))
+ return 0;
+
+ if (pmd_leaf(pmd))
+ return pmd_leaf_size(pmd);
+
+ ptep = pte_offset_map(&pmd, addr);
+ pte = ptep_get_lockless(ptep);
+ if (pte_present(pte))
+ size = pte_leaf_size(pte);
+ pte_unmap(ptep);
+#endif /* CONFIG_HAVE_FAST_GUP */
+
+ return size;
+}
+
+static u64 perf_get_page_size(unsigned long addr)
+{
+ struct mm_struct *mm;
+ unsigned long flags;
+ u64 size;
+
+ if (!addr)
+ return 0;
+
+ /*
+ * Software page-table walkers must disable IRQs,
+ * which prevents any tear down of the page tables.
+ */
+ local_irq_save(flags);
+
+ mm = current->mm;
+ if (!mm) {
+ /*
+ * For kernel threads and the like, use init_mm so that
+ * we can find kernel memory.
+ */
+ mm = &init_mm;
+ }
+
+ size = perf_get_pgtable_size(mm, addr);
+
+ local_irq_restore(flags);
+
+ return size;
+}
+
static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
struct perf_callchain_entry *
@@ -7023,7 +7124,7 @@ void perf_prepare_sample(struct perf_event_header *header,
__perf_event_header__init_id(header, data, event);
- if (sample_type & PERF_SAMPLE_IP)
+ if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
data->ip = perf_instruction_pointer(regs);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
@@ -7142,6 +7243,17 @@ void perf_prepare_sample(struct perf_event_header *header,
}
#endif
+ /*
+ * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't
+ * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
+ * but the value will not dump to the userspace.
+ */
+ if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ data->data_page_size = perf_get_page_size(data->addr);
+
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ data->code_page_size = perf_get_page_size(data->ip);
+
if (sample_type & PERF_SAMPLE_AUX) {
u64 size;
@@ -11720,24 +11832,6 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}
- if (task) {
- err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
- if (err)
- goto err_task;
-
- /*
- * Preserve ptrace permission check for backwards compatibility.
- *
- * We must hold exec_update_mutex across this and any potential
- * perf_install_in_context() call for this new event to
- * serialize against exec() altering our credentials (and the
- * perf_event_exit_task() that could imply).
- */
- err = -EACCES;
- if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
- goto err_cred;
- }
-
if (flags & PERF_FLAG_PID_CGROUP)
cgroup_fd = pid;
@@ -11745,7 +11839,7 @@ SYSCALL_DEFINE5(perf_event_open,
NULL, NULL, cgroup_fd);
if (IS_ERR(event)) {
err = PTR_ERR(event);
- goto err_cred;
+ goto err_task;
}
if (is_sampling_event(event)) {
@@ -11864,6 +11958,24 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
}
+ if (task) {
+ err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
+ if (err)
+ goto err_file;
+
+ /*
+ * Preserve ptrace permission check for backwards compatibility.
+ *
+ * We must hold exec_update_mutex across this and any potential
+ * perf_install_in_context() call for this new event to
+ * serialize against exec() altering our credentials (and the
+ * perf_event_exit_task() that could imply).
+ */
+ err = -EACCES;
+ if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+ goto err_cred;
+ }
+
if (move_group) {
gctx = __perf_event_ctx_lock_double(group_leader, ctx);
@@ -12039,7 +12151,10 @@ err_locked:
if (move_group)
perf_event_ctx_unlock(group_leader, gctx);
mutex_unlock(&ctx->mutex);
-/* err_file: */
+err_cred:
+ if (task)
+ mutex_unlock(&task->signal->exec_update_mutex);
+err_file:
fput(event_file);
err_context:
perf_unpin_context(ctx);
@@ -12051,9 +12166,6 @@ err_alloc:
*/
if (!event_file)
free_event(event);
-err_cred:
- if (task)
- mutex_unlock(&task->signal->exec_update_mutex);
err_task:
if (task)
put_task_struct(task);