diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-10 08:18:46 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-03-10 08:18:46 -0800 |
commit | 49be4fb28109b86a8ffe117415c306389a394cb2 (patch) | |
tree | d34fac155c6290d0096b5c9758250859b221973a | |
parent | 44889ba56cbb3d51154660ccd15818bc77276696 (diff) | |
parent | 5b201a82cd9d0945d70562974ea6ad8e3b1861b5 (diff) |
Merge tag 'perf-tools-fixes-for-v6.3-1-2023-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo:
- Add Adrian Hunter to MAINTAINERS as a perf tools reviewer
- Sync various tools/ copies of kernel headers with the kernel sources,
this time trying to avoid first merging with upstream to then update
but instead copy from upstream so that a merge is avoided and the end
result after merging this pull request is the one expected,
tools/perf/check-headers.sh (mostly) happy, less warnings while
building tools/perf/
- Fix counting when initial delay configured by setting
perf_attr.enable_on_exec when starting workloads from the perf
command line
- Don't avoid emitting a PERF_RECORD_MMAP2 in 'perf inject
--buildid-all' when that record comes with a build-id, otherwise we
end up not being able to resolve symbols
- Don't use comma as the CSV output separator the "stat+csv_output"
test, as comma can appear on some tests as a modifier for an event,
use @ instead, ditto for the JSON linter test
- The offcpu test was looking for some bits being set on
task_struct->prev_state without masking other bits not important for
this specific 'perf test', fix it
* tag 'perf-tools-fixes-for-v6.3-1-2023-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf tools: Add Adrian Hunter to MAINTAINERS as a reviewer
tools headers UAPI: Sync linux/perf_event.h with the kernel sources
tools headers x86 cpufeatures: Sync with the kernel sources
tools include UAPI: Sync linux/vhost.h with the kernel sources
tools arch x86: Sync the msr-index.h copy with the kernel sources
tools headers kvm: Sync uapi/{asm/linux} kvm.h headers with the kernel sources
tools include UAPI: Synchronize linux/fcntl.h with the kernel sources
tools headers: Synchronize {linux,vdso}/bits.h with the kernel sources
tools headers UAPI: Sync linux/prctl.h with the kernel sources
tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
perf stat: Fix counting when initial delay configured
tools headers svm: Sync svm headers with the kernel sources
perf test: Avoid counting commas in json linter
perf tests stat+csv_output: Switch CSV separator to @
perf inject: Fix --buildid-all not to eat up MMAP2
tools arch x86: Sync the msr-index.h copy with the kernel sources
perf test: Fix offcpu test prev_state check
25 files changed, 154 insertions, 54 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..150ff03b42eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16391,6 +16391,7 @@ R: Alexander Shishkin <alexander.shishkin@linux.intel.com> R: Jiri Olsa <jolsa@kernel.org> R: Namhyung Kim <namhyung@kernel.org> R: Ian Rogers <irogers@google.com> +R: Adrian Hunter <adrian.hunter@intel.com> L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index a7a857f1784d..f8129c624b07 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -109,6 +109,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ +#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ struct kvm_vcpu_init { __u32 target; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b70111a75688..b89005819cd5 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 20 /* N 32-bit words worth of info */ +#define NCAPINTS 21 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index c44b56f7ffba..5dfa4fb76f4b 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -124,6 +124,7 @@ #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define DISABLED_MASK20 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 37ff47552bcb..ad35355ee43e 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -25,6 +25,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -33,6 +34,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ @@ -49,6 +51,10 @@ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +/* A mask for bits which the kernel toggles when controlling mitigations */ +#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ + | SPEC_CTRL_RRSBA_DIS_S) + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -189,6 +195,9 @@ #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af +#define MSR_SNOOP_RSP_0 0x00001328 +#define MSR_SNOOP_RSP_1 0x00001329 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 @@ -566,6 +575,26 @@ #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +/* SNP feature bits enabled by the hypervisor */ +#define MSR_AMD64_SNP_VTOM BIT_ULL(3) +#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4) +#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5) +#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6) +#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7) +#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8) +#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9) +#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10) +#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11) +#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12) +#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14) +#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16) +#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17) + +/* SNP feature bits reserved for future use. */ +#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) +#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) +#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18) + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f /* AMD Collaborative Processor Performance Control MSRs */ @@ -1061,6 +1090,8 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 +#define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index aff774775c67..7ba1726b71c7 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -98,6 +98,7 @@ #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define REQUIRED_MASK20 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index e48deab8901d..7f467fe05d42 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/ioctl.h> +#include <linux/stddef.h> #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 @@ -507,8 +508,8 @@ struct kvm_nested_state { * KVM_{GET,PUT}_NESTED_STATE ioctl values. */ union { - struct kvm_vmx_nested_state_data vmx[0]; - struct kvm_svm_nested_state_data svm[0]; + __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx); + __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm); } data; }; @@ -525,6 +526,35 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + +/* + * Masked event layout. + * Bits Description + * ---- ----------- + * 7:0 event select (low bits) + * 15:8 umask match + * 31:16 unused + * 35:32 event select (high bits) + * 36:54 unused + * 55 exclude bit + * 63:56 umask mask + */ + +#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \ + (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \ + (((mask) & 0xFFULL) << 56) | \ + (((match) & 0xFFULL) << 8) | \ + ((__u64)(!!(exclude)) << 55)) + +#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ + (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index f69c168391aa..80e1df482337 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,12 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd +#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe +#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ + /* SW_EXITINFO1[3:0] */ \ + (((((u64)reason_set) & 0xf)) | \ + /* SW_EXITINFO1[11:4] */ \ + ((((u64)reason_code) & 0xff) << 4)) #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 5418e2f99834..a91ac666f758 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -7,7 +7,7 @@ #include <asm/alternative.h> #include <asm/export.h> -.pushsection .noinstr.text, "ax" +.section .noinstr.text, "ax" /* * We build a jump to memcpy_orig by default which gets NOPped out on @@ -42,7 +42,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) /* @@ -183,4 +183,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) RET SYM_FUNC_END(memcpy_orig) -.popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index fc9ffd3ff3b2..6143b1a6fa2c 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -6,6 +6,8 @@ #include <asm/alternative.h> #include <asm/export.h> +.section .noinstr.text, "ax" + /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is @@ -43,7 +45,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS_WEAK(memset, __memset) +SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) /* diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 87d112650dfb..7c0cf5031abe 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -6,7 +6,6 @@ #include <vdso/bits.h> #include <asm/bitsperlong.h> -#define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 2f86b2ad6d7e..e8c07da58c9f 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -43,6 +43,7 @@ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ #define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ +#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */ /* (1U << 31) is reserved for signed error codes */ /* diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 55155e262646..d77aef872a0a 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -583,6 +583,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -595,11 +597,17 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_SIDA_WRITE 3 #define KVM_S390_MEMOP_ABSOLUTE_READ 4 #define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -1175,6 +1183,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 +#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index ccb7f5dad59b..37675437b768 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -374,6 +374,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ #define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -515,6 +516,8 @@ struct perf_event_attr { * truncated accordingly on 32 bit architectures. */ __u64 sig_data; + + __u64 config3; /* extension of config2 */ }; /* diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index a5e06dcbba13..1312a137f7fb 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -281,6 +281,12 @@ struct prctl_mm_map { # define PR_SME_VL_LEN_MASK 0xffff # define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ +/* Memory deny write / execute */ +#define PR_SET_MDWE 65 +# define PR_MDWE_REFUSE_EXEC_GAIN 1 + +#define PR_GET_MDWE 66 + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index f9f115a7c75b..92e1b700b51c 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -180,4 +180,12 @@ */ #define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) +/* Resume a device so it can resume processing virtqueue requests + * + * After the return of this ioctl the device will have restored all the + * necessary states and it is fully operational to continue processing the + * virtqueue descriptors. + */ +#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + #endif diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h index 6d005a1f5d94..388b212088ea 100644 --- a/tools/include/vdso/bits.h +++ b/tools/include/vdso/bits.h @@ -5,5 +5,6 @@ #include <vdso/const.h> #define BIT(nr) (UL(1) << (nr)) +#define BIT_ULL(nr) (ULL(1) << (nr)) #endif /* __VDSO_BITS_H */ diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f8182417b734..10bb1d494258 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -538,6 +538,7 @@ static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, dso->hit = 1; } dso__put(dso); + perf_event__repipe(tool, event, sample, machine); return 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5d18a5a6f662..fa7c40956d0f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -539,12 +539,7 @@ static int enable_counters(void) return err; } - /* - * We need to enable counters only if: - * - we don't have tracee (attaching to task or cpu) - * - we have initial delay configured - */ - if (!target__none(&target)) { + if (!target__enable_on_exec(&target)) { if (!all_counters_use_bpf) evlist__enable(evsel_list); } @@ -914,7 +909,7 @@ try_again_reset: return err; } - if (stat_config.initial_delay) { + if (target.initial_delay) { pr_info(EVLIST_DISABLED_MSG); } else { err = enable_counters(); @@ -926,8 +921,8 @@ try_again_reset: if (forks) evlist__start_workload(evsel_list); - if (stat_config.initial_delay > 0) { - usleep(stat_config.initial_delay * USEC_PER_MSEC); + if (target.initial_delay > 0) { + usleep(target.initial_delay * USEC_PER_MSEC); err = enable_counters(); if (err) return -1; @@ -1248,7 +1243,7 @@ static struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, "aggregate counts per numa node", AGGR_NODE), - OPT_INTEGER('D', "delay", &stat_config.initial_delay, + OPT_INTEGER('D', "delay", &target.initial_delay, "ms to wait before starting measurement after program start (-1: start with events disabled)"), OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py index d90f8d102eb9..97598d14e532 100644 --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py @@ -40,19 +40,6 @@ def is_counter_value(num): return isfloat(num) or num == '<not counted>' or num == '<not supported>' def check_json_output(expected_items): - if expected_items != -1: - for line in Lines: - if 'failed' not in line: - count = 0 - count = line.count(',') - if count != expected_items and count >= 1 and count <= 3 and 'metric-value' in line: - # Events that generate >1 metric may have isolated metric - # values and possibly other prefixes like interval, core and - # aggregate-number. - continue - if count != expected_items: - raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}' - f' in \'{line}\'') checks = { 'aggregate-number': lambda x: isfloat(x), 'core': lambda x: True, @@ -73,6 +60,16 @@ def check_json_output(expected_items): } input = '[\n' + ','.join(Lines) + '\n]' for item in json.loads(input): + if expected_items != -1: + count = len(item) + if count != expected_items and count >= 1 and count <= 4 and 'metric-value' in item: + # Events that generate >1 metric may have isolated metric + # values and possibly other prefixes like interval, core and + # aggregate-number. + pass + elif count != expected_items: + raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}' + f' in \'{item}\'') for key, value in item.items(): if key not in checks: raise RuntimeError(f'Unexpected key: key={key} value={value}') @@ -82,11 +79,11 @@ def check_json_output(expected_items): try: if args.no_args or args.system_wide or args.event: - expected_items = 6 - elif args.interval or args.per_thread or args.system_wide_no_aggr: expected_items = 7 - elif args.per_core or args.per_socket or args.per_node or args.per_die: + elif args.interval or args.per_thread or args.system_wide_no_aggr: expected_items = 8 + elif args.per_core or args.per_socket or args.per_node or args.per_die: + expected_items = 9 else: # If no option is specified, don't check the number of items. expected_items = -1 diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh index b7f050aa6210..324fc9e6edd7 100755 --- a/tools/perf/tests/shell/stat+csv_output.sh +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -7,6 +7,7 @@ set -e skip_test=0 +csv_sep=@ function commachecker() { @@ -34,7 +35,7 @@ function commachecker() [ "$x" = "Failed" ] && continue # Count the number of commas - x=$(echo $line | tr -d -c ',') + x=$(echo $line | tr -d -c $csv_sep) cnt="${#x}" # echo $line $cnt [[ ! "$cnt" =~ $exp ]] && { @@ -54,7 +55,7 @@ function ParanoidAndNotRoot() check_no_args() { echo -n "Checking CSV output: no args " - perf stat -x, true 2>&1 | commachecker --no-args + perf stat -x$csv_sep true 2>&1 | commachecker --no-args echo "[Success]" } @@ -66,7 +67,7 @@ check_system_wide() echo "[Skip] paranoid and not root" return fi - perf stat -x, -a true 2>&1 | commachecker --system-wide + perf stat -x$csv_sep -a true 2>&1 | commachecker --system-wide echo "[Success]" } @@ -79,14 +80,14 @@ check_system_wide_no_aggr() return fi echo -n "Checking CSV output: system wide no aggregation " - perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr + perf stat -x$csv_sep -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr echo "[Success]" } check_interval() { echo -n "Checking CSV output: interval " - perf stat -x, -I 1000 true 2>&1 | commachecker --interval + perf stat -x$csv_sep -I 1000 true 2>&1 | commachecker --interval echo "[Success]" } @@ -94,7 +95,7 @@ check_interval() check_event() { echo -n "Checking CSV output: event " - perf stat -x, -e cpu-clock true 2>&1 | commachecker --event + perf stat -x$csv_sep -e cpu-clock true 2>&1 | commachecker --event echo "[Success]" } @@ -106,7 +107,7 @@ check_per_core() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-core -a true 2>&1 | commachecker --per-core + perf stat -x$csv_sep --per-core -a true 2>&1 | commachecker --per-core echo "[Success]" } @@ -118,7 +119,7 @@ check_per_thread() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread + perf stat -x$csv_sep --per-thread -a true 2>&1 | commachecker --per-thread echo "[Success]" } @@ -130,7 +131,7 @@ check_per_die() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-die -a true 2>&1 | commachecker --per-die + perf stat -x$csv_sep --per-die -a true 2>&1 | commachecker --per-die echo "[Success]" } @@ -142,7 +143,7 @@ check_per_node() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-node -a true 2>&1 | commachecker --per-node + perf stat -x$csv_sep --per-node -a true 2>&1 | commachecker --per-node echo "[Success]" } @@ -154,7 +155,7 @@ check_per_socket() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket + perf stat -x$csv_sep --per-socket -a true 2>&1 | commachecker --per-socket echo "[Success]" } diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 38e3b287dbb2..d877a0a9731f 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -277,7 +277,7 @@ int on_switch(u64 *ctx) else prev_state = get_task_state(prev); - return off_cpu_stat(ctx, prev, next, prev_state); + return off_cpu_stat(ctx, prev, next, prev_state & 0xff); } char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 534d36d26fc3..a07473703c6d 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -842,11 +842,7 @@ int create_perf_stat_counter(struct evsel *evsel, if (evsel__is_group_leader(evsel)) { attr->disabled = 1; - /* - * In case of initial_delay we enable tracee - * events manually. - */ - if (target__none(target) && !config->initial_delay) + if (target__enable_on_exec(target)) attr->enable_on_exec = 1; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b1c29156c560..bf1794ebc916 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -166,7 +166,6 @@ struct perf_stat_config { FILE *output; unsigned int interval; unsigned int timeout; - int initial_delay; unsigned int unit_width; unsigned int metric_only_len; int times; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index daec6cba500d..880f1af7f6ad 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -18,6 +18,7 @@ struct target { bool per_thread; bool use_bpf; bool hybrid; + int initial_delay; const char *attr_map; }; @@ -72,6 +73,17 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__enable_on_exec(struct target *target) +{ + /* + * Normally enable_on_exec should be set if: + * 1) The tracee process is forked (not attaching to existed task or cpu). + * 2) And initial_delay is not configured. + * Otherwise, we enable tracee events manually. + */ + return target__none(target) && !target->initial_delay; +} + static inline bool target__has_per_thread(struct target *target) { return target->system_wide && target->per_thread; |