diff options
-rw-r--r-- | arch/arm/kernel/hw_breakpoint.c | 8 | ||||
-rw-r--r-- | arch/arm64/kernel/hw_breakpoint.c | 4 | ||||
-rw-r--r-- | arch/x86/events/amd/core.c | 37 | ||||
-rw-r--r-- | arch/x86/events/amd/lbr.c | 13 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 13 | ||||
-rw-r--r-- | arch/x86/events/rapl.c | 7 | ||||
-rw-r--r-- | include/linux/perf_event.h | 37 | ||||
-rw-r--r-- | kernel/events/core.c | 273 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 4 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/perf_skip.c | 137 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_perf_skip.c | 15 | ||||
-rw-r--r-- | tools/testing/selftests/perf_events/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/perf_events/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/perf_events/watermark_signal.c | 146 |
14 files changed, 525 insertions, 172 deletions
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index dc0fb7a81371..054e9199f30d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address &= ~alignment_mask; hw->ctrl.len <<= offset; - if (uses_default_overflow_handler(bp)) { + if (is_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * Otherwise, insert a temporary mismatch breakpoint so that * we can single-step over the watchpoint trigger. */ - if (!uses_default_overflow_handler(wp)) + if (!is_default_overflow_handler(wp)) continue; step: enable_single_step(wp, instruction_pointer(regs)); @@ -811,7 +811,7 @@ step: info->trigger = addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); - if (uses_default_overflow_handler(wp)) + if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); } @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (uses_default_overflow_handler(bp)) + if (is_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 2f5755192c2b..722ac45f9f7b 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -655,7 +655,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (uses_default_overflow_handler(bp)) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -734,7 +734,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = uses_default_overflow_handler(wp); + int step = is_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 985ef3b47919..1fc4ce44e743 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -647,7 +647,7 @@ static void amd_pmu_cpu_dead(int cpu) } } -static inline void amd_pmu_set_global_ctl(u64 ctl) +static __always_inline void amd_pmu_set_global_ctl(u64 ctl) { wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); } @@ -907,6 +907,37 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) return amd_pmu_adjust_nmi_window(handled); } +/* + * AMD-specific callback invoked through perf_snapshot_branch_stack static + * call, defined in include/linux/perf_event.h. See its definition for API + * details. It's up to caller to provide enough space in *entries* to fit all + * LBR records, otherwise returned result will be truncated to *cnt* entries. + */ +static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + struct cpu_hw_events *cpuc; + unsigned long flags; + + /* + * The sequence of steps to freeze LBR should be completely inlined + * and contain no branches to minimize contamination of LBR snapshot + */ + local_irq_save(flags); + amd_pmu_core_disable_all(); + __amd_pmu_lbr_disable(); + + cpuc = this_cpu_ptr(&cpu_hw_events); + + amd_pmu_lbr_read(); + cnt = min(cnt, x86_pmu.lbr_nr); + memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); + + amd_pmu_v2_enable_all(0); + local_irq_restore(flags); + + return cnt; +} + static int amd_pmu_v2_handle_irq(struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1443,6 +1474,10 @@ static int __init amd_core_pmu_init(void) static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + + /* Only support branch_stack snapshot on perfmon v2 */ + if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq) + static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack); } else if (!amd_brs_init()) { /* * BRS requires special event constraints and flushing on ctxsw. diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 5149830c7c4f..19c7b76e21bc 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -310,10 +310,6 @@ int amd_pmu_lbr_hw_config(struct perf_event *event) { int ret = 0; - /* LBR is not recommended in counting mode */ - if (!is_sampling_event(event)) - return -EINVAL; - ret = amd_pmu_lbr_setup_filter(event); if (!ret) event->attach_state |= PERF_ATTACH_SCHED_CB; @@ -414,18 +410,11 @@ void amd_pmu_lbr_enable_all(void) void amd_pmu_lbr_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - u64 dbg_ctl, dbg_extn_cfg; if (!cpuc->lbr_users || !x86_pmu.lbr_nr) return; - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - - if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - } + __amd_pmu_lbr_disable(); } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index fb56518356ec..72b022a1e16c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1329,6 +1329,19 @@ void amd_pmu_lbr_enable_all(void); void amd_pmu_lbr_disable_all(void); int amd_pmu_lbr_hw_config(struct perf_event *event); +static __always_inline void __amd_pmu_lbr_disable(void) +{ + u64 dbg_ctl, dbg_extn_cfg; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } +} + #ifdef CONFIG_PERF_EVENTS_AMD_BRS #define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index fb2b1961e5a3..ca5f687fa420 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -675,10 +675,8 @@ static const struct attribute_group *rapl_attr_update[] = { static int __init init_rapl_pmus(void) { int maxdie = topology_max_packages() * topology_max_dies_per_package(); - size_t size; - size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); - rapl_pmus = kzalloc(size, GFP_KERNEL); + rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, maxdie), GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; @@ -808,6 +806,9 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..a5304ae8c654 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,11 +809,8 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; -#ifdef CONFIG_BPF_SYSCALL - perf_overflow_handler_t orig_overflow_handler; struct bpf_prog *prog; u64 bpf_cookie; -#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; @@ -883,6 +880,7 @@ struct perf_event_pmu_context { unsigned int nr_events; unsigned int nr_cgroups; + unsigned int nr_freq; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -897,6 +895,11 @@ struct perf_event_pmu_context { int rotate_necessary; }; +static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) +{ + return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active); +} + struct perf_event_groups { struct rb_root tree; u64 index; @@ -1342,8 +1345,10 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) +is_default_overflow_handler(struct perf_event *event) { + perf_overflow_handler_t overflow_handler = event->overflow_handler; + if (likely(overflow_handler == perf_event_output_forward)) return true; if (unlikely(overflow_handler == perf_event_output_backward)) @@ -1351,22 +1356,6 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler) return false; } -#define is_default_overflow_handler(event) \ - __is_default_overflow_handler((event)->overflow_handler) - -#ifdef CONFIG_BPF_SYSCALL -static inline bool uses_default_overflow_handler(struct perf_event *event) -{ - if (likely(is_default_overflow_handler(event))) - return true; - - return __is_default_overflow_handler(event->orig_overflow_handler); -} -#else -#define uses_default_overflow_handler(event) \ - is_default_overflow_handler(event) -#endif - extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, @@ -1697,6 +1686,14 @@ perf_event_addr_filters(struct perf_event *event) return ifh; } +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* Only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + extern void perf_event_addr_filters_sync(struct perf_event *event); extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); diff --git a/kernel/events/core.c b/kernel/events/core.c index 724e6d7e128f..6b0a66ed2ae3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2302,8 +2302,10 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx) if (!is_software_event(event)) cpc->active_oncpu--; - if (event->attr.freq && event->attr.sample_freq) + if (event->attr.freq && event->attr.sample_freq) { ctx->nr_freq--; + epc->nr_freq--; + } if (event->attr.exclusive || !cpc->active_oncpu) cpc->exclusive = 0; @@ -2558,9 +2560,10 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx) if (!is_software_event(event)) cpc->active_oncpu++; - if (event->attr.freq && event->attr.sample_freq) + if (event->attr.freq && event->attr.sample_freq) { ctx->nr_freq++; - + epc->nr_freq++; + } if (event->attr.exclusive) cpc->exclusive = 1; @@ -4123,30 +4126,14 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo } } -/* - * combine freq adjustment with unthrottling to avoid two passes over the - * events. At the same time, make sure, having freq events does not change - * the rate of unthrottling as that would introduce bias. - */ -static void -perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle) +static void perf_adjust_freq_unthr_events(struct list_head *event_list) { struct perf_event *event; struct hw_perf_event *hwc; u64 now, period = TICK_NSEC; s64 delta; - /* - * only need to iterate over all events iff: - * - context have events in frequency mode (needs freq adjust) - * - there are events to unthrottle on this cpu - */ - if (!(ctx->nr_freq || unthrottle)) - return; - - raw_spin_lock(&ctx->lock); - - list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + list_for_each_entry(event, event_list, active_list) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -4154,18 +4141,17 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle) if (!event_filter_match(event)) continue; - perf_pmu_disable(event->pmu); - hwc = &event->hw; if (hwc->interrupts == MAX_INTERRUPTS) { hwc->interrupts = 0; perf_log_throttle(event, 1); - event->pmu->start(event, 0); + if (!event->attr.freq || !event->attr.sample_freq) + event->pmu->start(event, 0); } if (!event->attr.freq || !event->attr.sample_freq) - goto next; + continue; /* * stop the event and update event->count @@ -4187,8 +4173,41 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle) perf_adjust_period(event, period, delta, false); event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); - next: - perf_pmu_enable(event->pmu); + } +} + +/* + * combine freq adjustment with unthrottling to avoid two passes over the + * events. At the same time, make sure, having freq events does not change + * the rate of unthrottling as that would introduce bias. + */ +static void +perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle) +{ + struct perf_event_pmu_context *pmu_ctx; + + /* + * only need to iterate over all events iff: + * - context have events in frequency mode (needs freq adjust) + * - there are events to unthrottle on this cpu + */ + if (!(ctx->nr_freq || unthrottle)) + return; + + raw_spin_lock(&ctx->lock); + + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { + if (!(pmu_ctx->nr_freq || unthrottle)) + continue; + if (!perf_pmu_ctx_is_active(pmu_ctx)) + continue; + if (pmu_ctx->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) + continue; + + perf_pmu_disable(pmu_ctx->pmu); + perf_adjust_freq_unthr_events(&pmu_ctx->pinned_active); + perf_adjust_freq_unthr_events(&pmu_ctx->flexible_active); + perf_pmu_enable(pmu_ctx->pmu); } raw_spin_unlock(&ctx->lock); @@ -6684,14 +6703,6 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ -static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) -{ - /* only the parent has fasync state */ - if (event->parent) - event = event->parent; - return &event->fasync; -} - void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); @@ -9544,6 +9555,100 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r return true; } +#ifdef CONFIG_BPF_SYSCALL +static int bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct bpf_perf_event_data_kern ctx = { + .data = data, + .event = event, + }; + struct bpf_prog *prog; + int ret = 0; + + ctx.regs = perf_arch_bpf_user_pt_regs(regs); + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) + goto out; + rcu_read_lock(); + prog = READ_ONCE(event->prog); + if (prog) { + perf_prepare_sample(data, event, regs); + ret = bpf_prog_run(prog, &ctx); + } + rcu_read_unlock(); +out: + __this_cpu_dec(bpf_prog_active); + + return ret; +} + +static inline int perf_event_set_bpf_handler(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + if (event->overflow_handler_context) + /* hw breakpoint or kernel counter */ + return -EINVAL; + + if (event->prog) + return -EEXIST; + + if (prog->type != BPF_PROG_TYPE_PERF_EVENT) + return -EINVAL; + + if (event->attr.precise_ip && + prog->call_get_stack && + (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) || + event->attr.exclude_callchain_kernel || + event->attr.exclude_callchain_user)) { + /* + * On perf_event with precise_ip, calling bpf_get_stack() + * may trigger unwinder warnings and occasional crashes. + * bpf_get_[stack|stackid] works around this issue by using + * callchain attached to perf_sample_data. If the + * perf_event does not full (kernel and user) callchain + * attached to perf_sample_data, do not allow attaching BPF + * program that calls bpf_get_[stack|stackid]. + */ + return -EPROTO; + } + + event->prog = prog; + event->bpf_cookie = bpf_cookie; + return 0; +} + +static inline void perf_event_free_bpf_handler(struct perf_event *event) +{ + struct bpf_prog *prog = event->prog; + + if (!prog) + return; + + event->prog = NULL; + bpf_prog_put(prog); +} +#else +static inline int bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + return 1; +} + +static inline int perf_event_set_bpf_handler(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_free_bpf_handler(struct perf_event *event) +{ +} +#endif + /* * Generic event overflow handling, sampling. */ @@ -9564,6 +9669,9 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); + if (event->prog && !bpf_overflow_handler(event, data, regs)) + return ret; + /* * XXX event_limit might not quite work as expected on inherited * events @@ -10422,97 +10530,6 @@ static void perf_event_free_filter(struct perf_event *event) ftrace_profile_free_filter(event); } -#ifdef CONFIG_BPF_SYSCALL -static void bpf_overflow_handler(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - struct bpf_perf_event_data_kern ctx = { - .data = data, - .event = event, - }; - struct bpf_prog *prog; - int ret = 0; - - ctx.regs = perf_arch_bpf_user_pt_regs(regs); - if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) - goto out; - rcu_read_lock(); - prog = READ_ONCE(event->prog); - if (prog) { - perf_prepare_sample(data, event, regs); - ret = bpf_prog_run(prog, &ctx); - } - rcu_read_unlock(); -out: - __this_cpu_dec(bpf_prog_active); - if (!ret) - return; - - event->orig_overflow_handler(event, data, regs); -} - -static int perf_event_set_bpf_handler(struct perf_event *event, - struct bpf_prog *prog, - u64 bpf_cookie) -{ - if (event->overflow_handler_context) - /* hw breakpoint or kernel counter */ - return -EINVAL; - - if (event->prog) - return -EEXIST; - - if (prog->type != BPF_PROG_TYPE_PERF_EVENT) - return -EINVAL; - - if (event->attr.precise_ip && - prog->call_get_stack && - (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) || - event->attr.exclude_callchain_kernel || - event->attr.exclude_callchain_user)) { - /* - * On perf_event with precise_ip, calling bpf_get_stack() - * may trigger unwinder warnings and occasional crashes. - * bpf_get_[stack|stackid] works around this issue by using - * callchain attached to perf_sample_data. If the - * perf_event does not full (kernel and user) callchain - * attached to perf_sample_data, do not allow attaching BPF - * program that calls bpf_get_[stack|stackid]. - */ - return -EPROTO; - } - - event->prog = prog; - event->bpf_cookie = bpf_cookie; - event->orig_overflow_handler = READ_ONCE(event->overflow_handler); - WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); - return 0; -} - -static void perf_event_free_bpf_handler(struct perf_event *event) -{ - struct bpf_prog *prog = event->prog; - - if (!prog) - return; - - WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler); - event->prog = NULL; - bpf_prog_put(prog); -} -#else -static int perf_event_set_bpf_handler(struct perf_event *event, - struct bpf_prog *prog, - u64 bpf_cookie) -{ - return -EOPNOTSUPP; -} -static void perf_event_free_bpf_handler(struct perf_event *event) -{ -} -#endif - /* * returns true if the event is a tracepoint, or a kprobe/upprobe created * with perf_event_open() @@ -11971,13 +11988,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING) - if (overflow_handler == bpf_overflow_handler) { + if (parent_event->prog) { struct bpf_prog *prog = parent_event->prog; bpf_prog_inc(prog); event->prog = prog; - event->orig_overflow_handler = - parent_event->orig_overflow_handler; } #endif } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 60ed43d1c29e..4013408ce012 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -22,6 +22,10 @@ static void perf_output_wakeup(struct perf_output_handle *handle) atomic_set(&handle->rb->poll, EPOLLIN); handle->event->pending_wakeup = 1; + + if (*perf_event_fasync(handle->event) && !handle->event->pending_kill) + handle->event->pending_kill = POLL_IN; + irq_work_queue(&handle->event->pending_irq); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_skip.c b/tools/testing/selftests/bpf/prog_tests/perf_skip.c new file mode 100644 index 000000000000..37d8618800e4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_skip.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <test_progs.h> +#include "test_perf_skip.skel.h" +#include <linux/compiler.h> +#include <linux/hw_breakpoint.h> +#include <sys/mman.h> + +#ifndef TRAP_PERF +#define TRAP_PERF 6 +#endif + +int sigio_count, sigtrap_count; + +static void handle_sigio(int sig __always_unused) +{ + ++sigio_count; +} + +static void handle_sigtrap(int signum __always_unused, + siginfo_t *info, + void *ucontext __always_unused) +{ + ASSERT_EQ(info->si_code, TRAP_PERF, "si_code"); + ++sigtrap_count; +} + +static noinline int test_function(void) +{ + asm volatile (""); + return 0; +} + +void serial_test_perf_skip(void) +{ + struct sigaction action = {}; + struct sigaction previous_sigtrap; + sighandler_t previous_sigio = SIG_ERR; + struct test_perf_skip *skel = NULL; + struct perf_event_attr attr = {}; + int perf_fd = -1; + int err; + struct f_owner_ex owner; + struct bpf_link *prog_link = NULL; + + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = handle_sigtrap; + sigemptyset(&action.sa_mask); + if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction")) + return; + + previous_sigio = signal(SIGIO, handle_sigio); + if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal")) + goto cleanup; + + skel = test_perf_skip__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + attr.type = PERF_TYPE_BREAKPOINT; + attr.size = sizeof(attr); + attr.bp_type = HW_BREAKPOINT_X; + attr.bp_addr = (uintptr_t)test_function; + attr.bp_len = sizeof(long); + attr.sample_period = 1; + attr.sample_type = PERF_SAMPLE_IP; + attr.pinned = 1; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.precise_ip = 3; + attr.sigtrap = 1; + attr.remove_on_exec = 1; + + perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); + if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n"); + test__skip(); + goto cleanup; + } + if (!ASSERT_OK(perf_fd < 0, "perf_event_open")) + goto cleanup; + + /* Configure the perf event to signal on sample. */ + err = fcntl(perf_fd, F_SETFL, O_ASYNC); + if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)")) + goto cleanup; + + owner.type = F_OWNER_TID; + owner.pid = syscall(__NR_gettid); + err = fcntl(perf_fd, F_SETOWN_EX, &owner); + if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)")) + goto cleanup; + + /* Allow at most one sample. A sample rejected by bpf should + * not count against this. + */ + err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1); + if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)")) + goto cleanup; + + prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd); + if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event")) + goto cleanup; + + /* Configure the bpf program to suppress the sample. */ + skel->bss->ip = (uintptr_t)test_function; + test_function(); + + ASSERT_EQ(sigio_count, 0, "sigio_count"); + ASSERT_EQ(sigtrap_count, 0, "sigtrap_count"); + + /* Configure the bpf program to allow the sample. */ + skel->bss->ip = 0; + test_function(); + + ASSERT_EQ(sigio_count, 1, "sigio_count"); + ASSERT_EQ(sigtrap_count, 1, "sigtrap_count"); + + /* Test that the sample above is the only one allowed (by perf, not + * by bpf) + */ + test_function(); + + ASSERT_EQ(sigio_count, 1, "sigio_count"); + ASSERT_EQ(sigtrap_count, 1, "sigtrap_count"); + +cleanup: + bpf_link__destroy(prog_link); + if (perf_fd >= 0) + close(perf_fd); + test_perf_skip__destroy(skel); + + if (previous_sigio != SIG_ERR) + signal(SIGIO, previous_sigio); + sigaction(SIGTRAP, &previous_sigtrap, NULL); +} diff --git a/tools/testing/selftests/bpf/progs/test_perf_skip.c b/tools/testing/selftests/bpf/progs/test_perf_skip.c new file mode 100644 index 000000000000..7eb8b6de7a57 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_perf_skip.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +uintptr_t ip; + +SEC("perf_event") +int handler(struct bpf_perf_event_data *data) +{ + /* Skip events that have the correct ip. */ + return ip != PT_REGS_IP(&data->regs); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index 790c47001e77..ee93dc4969b8 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only sigtrap_threads remove_on_exec +watermark_signal diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index db93c4ff081a..70e3ff211278 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal include ../lib.mk diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c new file mode 100644 index 000000000000..49dc1e831174 --- /dev/null +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <linux/perf_event.h> +#include <stddef.h> +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define __maybe_unused __attribute__((__unused__)) + +static int sigio_count; + +static void handle_sigio(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + ++sigio_count; +} + +static void do_child(void) +{ + raise(SIGSTOP); + + for (int i = 0; i < 20; ++i) + sleep(1); + + raise(SIGSTOP); + + exit(0); +} + +TEST(watermark_signal) +{ + struct perf_event_attr attr; + struct perf_event_mmap_page *p = NULL; + struct sigaction previous_sigio, sigio = { 0 }; + pid_t child = -1; + int child_status; + int fd = -1; + long page_size = sysconf(_SC_PAGE_SIZE); + + sigio.sa_sigaction = handle_sigio; + EXPECT_EQ(sigaction(SIGIO, &sigio, &previous_sigio), 0); + + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_DUMMY; + attr.sample_period = 1; + attr.disabled = 1; + attr.watermark = 1; + attr.context_switch = 1; + attr.wakeup_watermark = 1; + + child = fork(); + EXPECT_GE(child, 0); + if (child == 0) + do_child(); + else if (child < 0) { + perror("fork()"); + goto cleanup; + } + + if (waitpid(child, &child_status, WSTOPPED) != child || + !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) { + fprintf(stderr, + "failed to sycnhronize with child errno=%d status=%x\n", + errno, + child_status); + goto cleanup; + } + + fd = syscall(__NR_perf_event_open, &attr, child, -1, -1, + PERF_FLAG_FD_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "failed opening event %llx\n", attr.config); + goto cleanup; + } + + if (fcntl(fd, F_SETFL, FASYNC)) { + perror("F_SETFL FASYNC"); + goto cleanup; + } + + if (fcntl(fd, F_SETOWN, getpid())) { + perror("F_SETOWN getpid()"); + goto cleanup; + } + + if (fcntl(fd, F_SETSIG, SIGIO)) { + perror("F_SETSIG SIGIO"); + goto cleanup; + } + + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (p == NULL) { + perror("mmap"); + goto cleanup; + } + + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) { + perror("PERF_EVENT_IOC_ENABLE"); + goto cleanup; + } + + if (kill(child, SIGCONT) < 0) { + perror("SIGCONT"); + goto cleanup; + } + + if (waitpid(child, &child_status, WSTOPPED) != -1 || errno != EINTR) + fprintf(stderr, + "expected SIGIO to terminate wait errno=%d status=%x\n%d", + errno, + child_status, + sigio_count); + + EXPECT_GE(sigio_count, 1); + +cleanup: + if (p != NULL) + munmap(p, 2 * page_size); + + if (fd >= 0) + close(fd); + + if (child > 0) { + kill(child, SIGKILL); + waitpid(child, NULL, 0); + } + + sigaction(SIGIO, &previous_sigio, NULL); +} + +TEST_HARNESS_MAIN |