diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-16 10:15:32 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-16 10:15:32 +0200 |
commit | 4d66020dcef83314092f2c8c89152a8d122627e2 (patch) | |
tree | edc887c616a323b07386963651eb2f89ed0ade1f | |
parent | 77dbd72b982ca648b42b4feac5f8b2ea55e4ed09 (diff) | |
parent | f37c3bbc635994eda203a6da4ba0f9d05165a8d6 (diff) |
Merge tag 'trace-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt:
"New:
- The Real Time Linux Analysis (RTLA) tool is added to the tools
directory.
- Can safely filter on user space pointers with: field.ustring ~
"match-string"
- eprobes can now be filtered like any other event.
- trace_marker(_raw) now uses stream_open() to allow multiple threads
to safely write to it. Note, this could possibly break existing
user space, but we will not know until we hear about it, and then
can revert the change if need be.
- New field in events to display when bottom halfs are disabled.
- Sorting of the ftrace functions are now done at compile time
instead of at bootup.
Infrastructure changes to support future efforts:
- Added __rel_loc type for trace events. Similar to __data_loc but
the offset to the dynamic data is based off of the location of the
descriptor and not the beginning of the event. Needed for user
defined events.
- Some simplification of event trigger code.
- Make synthetic events process its callback better to not hinder
other event callbacks that are registered. Needed for user defined
events.
And other small fixes and cleanups"
* tag 'trace-v5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (50 commits)
tracing: Add ustring operation to filtering string pointers
rtla: Add rtla timerlat hist documentation
rtla: Add rtla timerlat top documentation
rtla: Add rtla timerlat documentation
rtla: Add rtla osnoise hist documentation
rtla: Add rtla osnoise top documentation
rtla: Add rtla osnoise man page
rtla: Add Documentation
rtla/timerlat: Add timerlat hist mode
rtla: Add timerlat tool and timelart top mode
rtla/osnoise: Add the hist mode
rtla/osnoise: Add osnoise top mode
rtla: Add osnoise tool
rtla: Helper functions for rtla
rtla: Real-Time Linux Analysis tool
tracing/osnoise: Properly unhook events if start_per_cpu_kthreads() fails
tracing: Remove duplicate warnings when calling trace_create_file()
tracing/kprobes: 'nmissed' not showed correctly for kretprobe
tracing: Add test for user space strings when filtering on string pointers
tracing: Have syscall trace events use trace_event_buffer_lock_reserve()
...
70 files changed, 6706 insertions, 292 deletions
diff --git a/Documentation/tools/rtla/Makefile b/Documentation/tools/rtla/Makefile new file mode 100644 index 000000000000..9f2b84af1a6c --- /dev/null +++ b/Documentation/tools/rtla/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Based on bpftool's Documentation Makefile + +INSTALL ?= install +RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty + +PREFIX ?= /usr/share +MANDIR ?= $(PREFIX)/man +MAN1DIR = $(MANDIR)/man1 + +MAN1_RST = $(wildcard rtla*.rst) + +_DOC_MAN1 = $(patsubst %.rst,%.1,$(MAN1_RST)) +DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1)) + +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) +RST2MAN_OPTS += --verbose + +$(OUTPUT)%.1: %.rst +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif + rst2man $(RST2MAN_OPTS) $< > $@ + +man1: $(DOC_MAN1) +man: man1 + +clean: + $(RM) $(DOC_MAN1) + +install: man + $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR) + $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(MAN1DIR) + +uninstall: + $(RM) $(addprefix $(DESTDIR)$(MAN1DIR)/,$(_DOC_MAN1)) + $(RMDIR) $(DESTDIR)$(MAN1DIR) + +.PHONY: man man1 clean install uninstall +.DEFAULT_GOAL := man diff --git a/Documentation/tools/rtla/common_appendix.rst b/Documentation/tools/rtla/common_appendix.rst new file mode 100644 index 000000000000..b494084acccd --- /dev/null +++ b/Documentation/tools/rtla/common_appendix.rst @@ -0,0 +1,12 @@ +REPORTING BUGS +============== +Report bugs to <lkml@vger.kernel.org> + +LICENSE +======= +**rtla** is Free Software licensed under the GNU GPLv2 + +COPYING +======= +Copyright \(C) 2021 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/tools/rtla/common_hist_options.rst b/Documentation/tools/rtla/common_hist_options.rst new file mode 100644 index 000000000000..0266cd08a6c9 --- /dev/null +++ b/Documentation/tools/rtla/common_hist_options.rst @@ -0,0 +1,23 @@ +**-b**, **--bucket-size** *N* + + Set the histogram bucket size (default *1*). + +**-e**, **--entries** *N* + + Set the number of entries of the histogram (default 256). + +**--no-header** + + Do not print header. + +**--no-summary** + + Do not print summary. + +**--no-index** + + Do not print index. + +**--with-zeros** + + Print zero only entries. diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst new file mode 100644 index 000000000000..721790ad984e --- /dev/null +++ b/Documentation/tools/rtla/common_options.rst @@ -0,0 +1,28 @@ +**-c**, **--cpus** *cpu-list* + + Set the osnoise tracer to run the sample threads in the cpu-list. + +**-d**, **--duration** *time[s|m|h|d]* + + Set the duration of the session. + +**-D**, **--debug** + + Print debug info. + +**-t**, **--trace**\[*=file*] + + Save the stopped trace to [*file|osnoise_trace.txt*]. + +**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period* + + Set scheduling parameters to the osnoise tracer threads, the format to set the priority are: + + - *o:prio* - use SCHED_OTHER with *prio*; + - *r:prio* - use SCHED_RR with *prio*; + - *f:prio* - use SCHED_FIFO with *prio*; + - *d:runtime[us|ms|s]:period[us|ms|s]* - use SCHED_DEADLINE with *runtime* and *period* in nanoseconds. + +**-h**, **--help** + + Print help menu. diff --git a/Documentation/tools/rtla/common_osnoise_description.rst b/Documentation/tools/rtla/common_osnoise_description.rst new file mode 100644 index 000000000000..8973c5df888f --- /dev/null +++ b/Documentation/tools/rtla/common_osnoise_description.rst @@ -0,0 +1,8 @@ +The **rtla osnoise** tool is an interface for the *osnoise* tracer. The +*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the +time in a loop while with preemption, softirq and IRQs enabled, thus +allowing all the sources of operating systme noise during its execution. +The *osnoise*'s tracer threads take note of the delta between each time +read, along with an interference counter of all sources of interference. +At the end of each period, the *osnoise* tracer displays a summary of +the results. diff --git a/Documentation/tools/rtla/common_osnoise_options.rst b/Documentation/tools/rtla/common_osnoise_options.rst new file mode 100644 index 000000000000..d556883e4e26 --- /dev/null +++ b/Documentation/tools/rtla/common_osnoise_options.rst @@ -0,0 +1,17 @@ +**-p**, **--period** *us* + + Set the *osnoise* tracer period in microseconds. + +**-r**, **--runtime** *us* + + Set the *osnoise* tracer runtime in microseconds. + +**-s**, **--stop** *us* + + Stop the trace if a single sample is higher than the argument in microseconds. + If **-T** is set, it will also save the trace to the output. + +**-S**, **--stop-total** *us* + + Stop the trace if the total sample is higher than the argument in microseconds. + If **-T** is set, it will also save the trace to the output. diff --git a/Documentation/tools/rtla/common_timerlat_description.rst b/Documentation/tools/rtla/common_timerlat_description.rst new file mode 100644 index 000000000000..321201cb8597 --- /dev/null +++ b/Documentation/tools/rtla/common_timerlat_description.rst @@ -0,0 +1,10 @@ +The **rtla timerlat** tool is an interface for the *timerlat* tracer. The +*timerlat* tracer dispatches a kernel thread per-cpu. These threads +set a periodic timer to wake themselves up and go back to sleep. After +the wakeup, they collect and generate useful information for the +debugging of operating system timer latency. + +The *timerlat* tracer outputs information in two ways. It periodically +prints the timer latency at the timer *IRQ* handler and the *Thread* +handler. It also enable the trace of the most relevant information via +**osnoise:** tracepoints. diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst new file mode 100644 index 000000000000..e9c1bfd55d48 --- /dev/null +++ b/Documentation/tools/rtla/common_timerlat_options.rst @@ -0,0 +1,16 @@ +**-p**, **--period** *us* + + Set the *timerlat* tracer period in microseconds. + +**-i**, **--irq** *us* + + Stop trace if the *IRQ* latency is higher than the argument in us. + +**-T**, **--thread** *us* + + Stop trace if the *Thread* latency is higher than the argument in us. + +**-s**, **--stack** *us* + + Save the stack trace at the *IRQ* if a *Thread* latency is higher than the + argument in us. diff --git a/Documentation/tools/rtla/common_top_options.rst b/Documentation/tools/rtla/common_top_options.rst new file mode 100644 index 000000000000..f48878938f84 --- /dev/null +++ b/Documentation/tools/rtla/common_top_options.rst @@ -0,0 +1,3 @@ +**-q**, **--quiet** + + Print only a summary at the end of the session. diff --git a/Documentation/tools/rtla/rtla-osnoise-hist.rst b/Documentation/tools/rtla/rtla-osnoise-hist.rst new file mode 100644 index 000000000000..52298ddd8701 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise-hist.rst @@ -0,0 +1,66 @@ +=================== +rtla-osnoise-hist +=================== +------------------------------------------------------ +Display a histogram of the osnoise tracer samples +------------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise hist** [*OPTIONS*] + +DESCRIPTION +=========== +.. include:: common_osnoise_description.rst + +The **rtla osnoise hist** tool collects all **osnoise:sample_threshold** +occurrence in a histogram, displaying the results in a user-friendly way. +The tool also allows many configurations of the *osnoise* tracer and the +collection of the tracer output. + +OPTIONS +======= +.. include:: common_osnoise_options.rst + +.. include:: common_hist_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, *osnoise* tracer threads are set to run with real-time +priority *FIFO:1*, on CPUs *0-11*, for *900ms* at each period (*1s* by +default). The reason for reducing the runtime is to avoid starving the +**rtla** tool. The tool is also set to run for *one minute*. The output +histogram is set to group outputs in buckets of *10us* and *25* entries:: + + [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25 + # RTLA osnoise histogram + # Time unit is microseconds (us) + # Duration: 0 00:01:00 + Index CPU-000 CPU-001 CPU-002 CPU-003 CPU-004 CPU-005 CPU-006 CPU-007 CPU-008 CPU-009 CPU-010 CPU-011 + 0 42982 46287 51779 53740 52024 44817 49898 36500 50408 50128 49523 52377 + 10 12224 8356 2912 878 2667 10155 4573 18894 4214 4836 5708 2413 + 20 8 5 12 2 13 24 20 41 29 53 39 39 + 30 1 1 0 0 10 3 6 19 15 31 30 38 + 40 0 0 0 0 0 4 2 7 2 3 8 11 + 50 0 0 0 0 0 0 0 0 0 1 1 2 + over: 0 0 0 0 0 0 0 0 0 0 0 0 + count: 55215 54649 54703 54620 54714 55003 54499 55461 54668 55052 55309 54880 + min: 0 0 0 0 0 0 0 0 0 0 0 0 + avg: 0 0 0 0 0 0 0 0 0 0 0 0 + max: 30 30 20 20 30 40 40 40 40 50 50 50 + +SEE ALSO +======== +**rtla-osnoise**\(1), **rtla-osnoise-top**\(1) + +*osnoise* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-osnoise-top.rst b/Documentation/tools/rtla/rtla-osnoise-top.rst new file mode 100644 index 000000000000..5d75d1394516 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise-top.rst @@ -0,0 +1,61 @@ +=================== +rtla-osnoise-top +=================== +----------------------------------------------- +Display a summary of the operating system noise +----------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise top** [*OPTIONS*] + +DESCRIPTION +=========== +.. include:: common_osnoise_description.rst + +**rtla osnoise top** collects the periodic summary from the *osnoise* tracer, +including the counters of the occurrence of the interference source, +displaying the results in a user-friendly format. + +The tool also allows many configurations of the *osnoise* tracer and the +collection of the tracer output. + +OPTIONS +======= +.. include:: common_osnoise_options.rst + +.. include:: common_top_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, the **rtla osnoise top** tool is set to run with a +real-time priority *FIFO:1*, on CPUs *0-3*, for *900ms* at each period +(*1s* by default). The reason for reducing the runtime is to avoid starving +the rtla tool. The tool is also set to run for *one minute* and to display +a summary of the report at the end of the session:: + + [root@f34 ~]# rtla osnoise top -P F:1 -c 0-3 -r 900000 -d 1M -q + Operating System Noise + duration: 0 00:01:00 | time is in us + CPU Period Runtime Noise % CPU Aval Max Noise Max Single HW NMI IRQ Softirq Thread + 0 #59 53100000 304896 99.42580 6978 56 549 0 53111 1590 13 + 1 #59 53100000 338339 99.36282 8092 24 399 0 53130 1448 31 + 2 #59 53100000 290842 99.45227 6582 39 855 0 53110 1406 12 + 3 #59 53100000 204935 99.61405 6251 33 290 0 53156 1460 12 + +SEE ALSO +======== + +**rtla-osnoise**\(1), **rtla-osnoise-hist**\(1) + +Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-osnoise.rst b/Documentation/tools/rtla/rtla-osnoise.rst new file mode 100644 index 000000000000..c129b206ce34 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise.rst @@ -0,0 +1,59 @@ +=============== +rtla-osnoise +=============== +------------------------------------------------------------------ +Measure the operating system noise +------------------------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise** [*MODE*] ... + +DESCRIPTION +=========== + +.. include:: common_osnoise_description.rst + +The *osnoise* tracer outputs information in two ways. It periodically prints +a summary of the noise of the operating system, including the counters of +the occurrence of the source of interference. It also provides information +for each noise via the **osnoise:** tracepoints. The **rtla osnoise top** +mode displays information about the periodic summary from the *osnoise* tracer. +The **rtla osnoise hist** mode displays information about the noise using +the **osnoise:** tracepoints. For further details, please refer to the +respective man page. + +MODES +===== +**top** + + Prints the summary from osnoise tracer. + +**hist** + + Prints a histogram of osnoise samples. + +If no MODE is given, the top mode is called, passing the arguments. + +OPTIONS +======= + +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding mode. + +SEE ALSO +======== +**rtla-osnoise-top**\(1), **rtla-osnoise-hist**\(1) + +Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-timerlat-hist.rst b/Documentation/tools/rtla/rtla-timerlat-hist.rst new file mode 100644 index 000000000000..e12eae1f3301 --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat-hist.rst @@ -0,0 +1,106 @@ +===================== +rtla-timerlat-hist +===================== +------------------------------------------------ +Histograms of the operating system timer latency +------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat hist** [*OPTIONS*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The **rtla timerlat hist** displays a histogram of each tracer event +occurrence. This tool uses the periodic information, and the +**osnoise:** tracepoints are enabled when using the **-T** option. + +OPTIONS +======= + +.. include:: common_timerlat_options.rst + +.. include:: common_hist_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, **rtla timerlat hist** is set to run for *10* minutes, +in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat +hist** will change the priority of the *timelat* threads to run under +*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The +*1ms* period is also passed to the *timerlat* tracer:: + + [root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms + # RTLA timerlat histogram + # Time unit is microseconds (us) + # Duration: 0 00:10:00 + Index IRQ-000 Thr-000 IRQ-001 Thr-001 IRQ-002 Thr-002 IRQ-003 Thr-003 IRQ-004 Thr-004 + 0 276489 0 206089 0 466018 0 481102 0 205546 0 + 1 318327 35487 388149 30024 94531 48382 83082 71078 388026 55730 + 2 3282 122584 4019 126527 28231 109012 23311 89309 4568 98739 + 3 940 11815 837 9863 6209 16227 6895 17196 910 9780 + 4 444 17287 424 11574 2097 38443 2169 36736 462 13476 + 5 206 43291 255 25581 1223 101908 1304 101137 236 28913 + 6 132 101501 96 64584 635 213774 757 215471 99 73453 + 7 74 169347 65 124758 350 57466 441 53639 69 148573 + 8 53 85183 31 156751 229 9052 306 9026 39 139907 + 9 22 10387 12 42762 161 2554 225 2689 19 26192 + 10 13 1898 8 5770 114 1247 128 1405 13 3772 + 11 9 560 9 924 71 686 76 765 8 713 + 12 4 256 2 360 50 411 64 474 3 278 + 13 2 167 2 172 43 256 53 350 4 180 + 14 1 88 1 116 15 198 42 223 0 115 + 15 2 63 3 94 11 139 20 150 0 58 + 16 2 37 0 56 5 78 10 102 0 39 + 17 0 18 0 28 4 57 8 80 0 15 + 18 0 8 0 17 2 50 6 56 0 12 + 19 0 9 0 5 0 19 0 48 0 18 + 20 0 4 0 8 0 11 2 27 0 4 + 21 0 2 0 3 1 9 1 18 0 6 + 22 0 1 0 3 1 7 0 3 0 5 + 23 0 2 0 4 0 2 0 7 0 2 + 24 0 2 0 2 1 3 0 3 0 5 + 25 0 0 0 1 0 1 0 1 0 3 + 26 0 1 0 0 0 2 0 2 0 0 + 27 0 0 0 3 0 1 0 0 0 1 + 28 0 0 0 3 0 0 0 1 0 0 + 29 0 0 0 2 0 2 0 1 0 3 + 30 0 1 0 0 0 0 0 0 0 0 + 31 0 1 0 0 0 0 0 2 0 2 + 32 0 0 0 1 0 2 0 0 0 0 + 33 0 0 0 2 0 0 0 0 0 1 + 34 0 0 0 0 0 0 0 0 0 2 + 35 0 1 0 1 0 0 0 0 0 1 + 36 0 1 0 0 0 1 0 1 0 0 + 37 0 0 0 1 0 0 0 0 0 0 + 40 0 0 0 0 0 1 0 1 0 0 + 41 0 0 0 0 0 0 0 0 0 1 + 42 0 0 0 0 0 0 0 0 0 1 + 44 0 0 0 0 0 1 0 0 0 0 + 46 0 0 0 0 0 0 0 1 0 0 + 47 0 0 0 0 0 0 0 0 0 1 + 50 0 0 0 0 0 0 0 0 0 1 + 54 0 0 0 1 0 0 0 0 0 0 + 58 0 0 0 1 0 0 0 0 0 0 + over: 0 0 0 0 0 0 0 0 0 0 + count: 600002 600002 600002 600002 600002 600002 600002 600002 600002 600002 + min: 0 1 0 1 0 1 0 1 0 1 + avg: 0 5 0 5 0 4 0 4 0 5 + max: 16 36 15 58 24 44 21 46 13 50 + +SEE ALSO +======== +**rtla-timerlat**\(1), **rtla-timerlat-top**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> diff --git a/Documentation/tools/rtla/rtla-timerlat-top.rst b/Documentation/tools/rtla/rtla-timerlat-top.rst new file mode 100644 index 000000000000..1c321de1c171 --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat-top.rst @@ -0,0 +1,145 @@ +==================== +rtla-timerlat-top +==================== +------------------------------------------- +Measures the operating system timer latency +------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat top** [*OPTIONS*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The **rtla timerlat top** displays a summary of the periodic output +from the *timerlat* tracer. It also provides information for each +operating system noise via the **osnoise:** tracepoints that can be +seem with the option **-T**. + +OPTIONS +======= + +.. include:: common_timerlat_options.rst + +.. include:: common_top_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= + +In the example below, the *timerlat* tracer is set to capture the stack trace at +the IRQ handler, printing it to the buffer if the *Thread* timer latency is +higher than *30 us*. It is also set to stop the session if a *Thread* timer +latency higher than *30 us* is hit. Finally, it is set to save the trace +buffer if the stop condition is hit:: + + [root@alien ~]# rtla timerlat top -s 30 -t 30 -T + Timer Latency + 0 00:00:59 | IRQ Timer Latency (us) | Thread Timer Latency (us) + CPU COUNT | cur min avg max | cur min avg max + 0 #58634 | 1 0 1 10 | 11 2 10 23 + 1 #58634 | 1 0 1 9 | 12 2 9 23 + 2 #58634 | 0 0 1 11 | 10 2 9 23 + 3 #58634 | 1 0 1 11 | 11 2 9 24 + 4 #58634 | 1 0 1 10 | 11 2 9 26 + 5 #58634 | 1 0 1 8 | 10 2 9 25 + 6 #58634 | 12 0 1 12 | 30 2 10 30 <--- CPU with spike + 7 #58634 | 1 0 1 9 | 11 2 9 23 + 8 #58633 | 1 0 1 9 | 11 2 9 26 + 9 #58633 | 1 0 1 9 | 10 2 9 26 + 10 #58633 | 1 0 1 13 | 11 2 9 28 + 11 #58633 | 1 0 1 13 | 12 2 9 24 + 12 #58633 | 1 0 1 8 | 10 2 9 23 + 13 #58633 | 1 0 1 10 | 10 2 9 22 + 14 #58633 | 1 0 1 18 | 12 2 9 27 + 15 #58633 | 1 0 1 10 | 11 2 9 28 + 16 #58633 | 0 0 1 11 | 7 2 9 26 + 17 #58633 | 1 0 1 13 | 10 2 9 24 + 18 #58633 | 1 0 1 9 | 13 2 9 22 + 19 #58633 | 1 0 1 10 | 11 2 9 23 + 20 #58633 | 1 0 1 12 | 11 2 9 28 + 21 #58633 | 1 0 1 14 | 11 2 9 24 + 22 #58633 | 1 0 1 8 | 11 2 9 22 + 23 #58633 | 1 0 1 10 | 11 2 9 27 + timerlat hit stop tracing + saving trace to timerlat_trace.txt + [root@alien bristot]# tail -60 timerlat_trace.txt + [...] + timerlat/5-79755 [005] ....... 426.271226: #58634 context thread timer_latency 10823 ns + sh-109404 [006] dnLh213 426.271247: #58634 context irq timer_latency 12505 ns + sh-109404 [006] dNLh313 426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns + sh-109404 [006] d...313 426.271263: thread_noise: sh:109404 start 426.271245853 duration 4769 ns + timerlat/6-79756 [006] ....... 426.271264: #58634 context thread timer_latency 30328 ns + timerlat/6-79756 [006] ....1.. 426.271265: <stack trace> + => timerlat_irq + => __hrtimer_run_queues + => hrtimer_interrupt + => __sysvec_apic_timer_interrupt + => sysvec_apic_timer_interrupt + => asm_sysvec_apic_timer_interrupt + => _raw_spin_unlock_irqrestore <---- spinlock that disabled interrupt. + => try_to_wake_up + => autoremove_wake_function + => __wake_up_common + => __wake_up_common_lock + => ep_poll_callback + => __wake_up_common + => __wake_up_common_lock + => fsnotify_add_event + => inotify_handle_inode_event + => fsnotify + => __fsnotify_parent + => __fput + => task_work_run + => exit_to_user_mode_prepare + => syscall_exit_to_user_mode + => do_syscall_64 + => entry_SYSCALL_64_after_hwframe + => 0x7265000001378c + => 0x10000cea7 + => 0x25a00000204a + => 0x12e302d00000000 + => 0x19b51010901b6 + => 0x283ce00726500 + => 0x61ea308872 + => 0x00000fe3 + bash-109109 [007] d..h... 426.271265: #58634 context irq timer_latency 1211 ns + timerlat/6-79756 [006] ....... 426.271267: timerlat_main: stop tracing hit on cpu 6 + +In the trace, it is possible the notice that the *IRQ* timer latency was +already high, accounting *12505 ns*. The IRQ delay was caused by the +*bash-109109* process that disabled IRQs in the wake-up path +(*_try_to_wake_up()* function). The duration of the IRQ handler that woke +up the timerlat thread, informed with the **osnoise:irq_noise** event, was +also high and added more *12553 ns* to the Thread latency. Finally, the +**osnoise:thread_noise** added by the currently running thread (including +the scheduling overhead) added more *4769 ns*. Summing up these values, +the *Thread* timer latency accounted for *30328 ns*. + +The primary reason for this high value is the wake-up path that was hit +twice during this case: when the *bash-109109* was waking up a thread +and then when the *timerlat* thread was awakened. This information can +then be used as the starting point of a more fine-grained analysis. + +Note that **rtla timerlat** was dispatched without changing *timerlat* tracer +threads' priority. That is generally not needed because these threads hava +priority *FIFO:95* by default, which is a common priority used by real-time +kernel developers to analyze scheduling delays. + +SEE ALSO +-------- +**rtla-timerlat**\(1), **rtla-timerlat-hist**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +------ +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-timerlat.rst b/Documentation/tools/rtla/rtla-timerlat.rst new file mode 100644 index 000000000000..44a49e6f302b --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat.rst @@ -0,0 +1,57 @@ +================ +rtla-timerlat +================ +------------------------------------------- +Measures the operating system timer latency +------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat** [*MODE*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The *timerlat* tracer outputs information in two ways. It periodically +prints the timer latency at the timer *IRQ* handler and the *Thread* handler. +It also provides information for each noise via the **osnoise:** tracepoints. +The **rtla timerlat top** mode displays a summary of the periodic output +from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram +of each tracer event occurrence. For further details, please refer to the +respective man page. + +MODES +===== +**top** + + Prints the summary from *timerlat* tracer. + +**hist** + + Prints a histogram of timerlat samples. + +If no *MODE* is given, the top mode is called, passing the arguments. + +OPTIONS +======= +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding mode. + +SEE ALSO +======== +**rtla-timerlat-top**\(1), **rtla-timerlat-hist**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla.rst b/Documentation/tools/rtla/rtla.rst new file mode 100644 index 000000000000..fc0d233efcd5 --- /dev/null +++ b/Documentation/tools/rtla/rtla.rst @@ -0,0 +1,48 @@ +========= +rtla +========= +-------------------------------- +Real-time Linux Analysis tool +-------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla** *COMMAND* [*OPTIONS*] + +DESCRIPTION +=========== +The **rtla** is a meta-tool that includes a set of commands that aims to +analyze the real-time properties of Linux. But instead of testing Linux +as a black box, **rtla** leverages kernel tracing capabilities to provide +precise information about the properties and root causes of unexpected +results. + +COMMANDS +======== +**osnoise** + + Gives information about the operating system noise (osnoise). + +**timerlat** + + Measures the IRQ and thread timer latency. + +OPTIONS +======= +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding command. + +SEE ALSO +======== +**rtla-osnoise**\(1), **rtla-timerlat**\(1) + +AUTHOR +====== +Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index 8ddb9b09451c..c47f381d0c00 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes prev_comm ~ "*sh*" prev_comm ~ "ba*sh" +If the field is a pointer that points into user space (for example +"filename" from sys_enter_openat), then you have to append ".ustring" to the +field name:: + + filename.ustring ~ "password" + +As the kernel will have to know how to retrieve the memory that the pointer +is at from user space. + 5.2 Setting filters ------------------- @@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of the filter string; the error message should still be useful though even without more accurate position info. +5.2.1 Filter limitations +------------------------ + +If a filter is placed on a string pointer ``(char *)`` that does not point +to a string on the ring buffer, but instead points to kernel or user space +memory, then, for safety reasons, at most 1024 bytes of the content is +copied onto a temporary buffer to do the compare. If the copy of the memory +faults (the pointer points to memory that should not be accessed), then the +string compare will be treated as not matching. + 5.3 Clearing filters -------------------- diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 3616839c5c4b..bafc02bf8220 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -109,12 +109,12 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) * also the directory that is being deleted. */ inode_unlock(inode); - inode_unlock(dentry->d_inode); + inode_unlock(d_inode(dentry)); ret = tracefs_ops.rmdir(name); inode_lock_nested(inode, I_MUTEX_PARENT); - inode_lock(dentry->d_inode); + inode_lock(d_inode(dentry)); kfree(name); @@ -284,7 +284,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) static int tracefs_apply_options(struct super_block *sb) { struct tracefs_fs_info *fsi = sb->s_fs_info; - struct inode *inode = sb->s_root->d_inode; + struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; inode->i_mode &= ~S_IALLUGO; @@ -403,18 +403,18 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) if (!parent) parent = tracefs_mount->mnt_root; - inode_lock(parent->d_inode); - if (unlikely(IS_DEADDIR(parent->d_inode))) + inode_lock(d_inode(parent)); + if (unlikely(IS_DEADDIR(d_inode(parent)))) dentry = ERR_PTR(-ENOENT); else dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && dentry->d_inode) { + if (!IS_ERR(dentry) && d_inode(dentry)) { dput(dentry); dentry = ERR_PTR(-EEXIST); } if (IS_ERR(dentry)) { - inode_unlock(parent->d_inode); + inode_unlock(d_inode(parent)); simple_release_fs(&tracefs_mount, &tracefs_mount_count); } @@ -423,7 +423,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) static struct dentry *failed_creating(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); + inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&tracefs_mount, &tracefs_mount_count); return NULL; @@ -431,7 +431,7 @@ static struct dentry *failed_creating(struct dentry *dentry) static struct dentry *end_creating(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); + inode_unlock(d_inode(dentry->d_parent)); return dentry; } @@ -489,7 +489,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; d_instantiate(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); + fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } @@ -516,8 +516,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); - inc_nlink(dentry->d_parent->d_inode); - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + inc_nlink(d_inode(dentry->d_parent)); + fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2d167ac3452c..70c069aef02c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -172,6 +172,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -782,6 +783,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_COMM, diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a8e97f84b652..7660a7846586 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (c) diff --git a/include/trace/perf.h b/include/trace/perf.h index dbc6c74defc3..ea4405de175a 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 08810a463880..8c6f7c433518 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -293,6 +317,19 @@ TRACE_MAKE_SYSTEM_STR(); #undef __get_str #define __get_str(field) ((char *)__get_dynamic_array(field)) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + #undef __get_bitmask #define __get_bitmask(field) \ ({ \ @@ -302,6 +339,15 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) \ + ({ \ + void *__bitmask = __get_rel_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_rel_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -471,6 +517,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(_type, _item, _len) { \ + .type = "__rel_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -519,6 +580,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data) - \ + offsetof(typeof(*entry), __rel_loc_##item) - \ + sizeof(u32); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). @@ -542,6 +619,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -706,6 +787,37 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __entry->__rel_loc_##item = __data_offsets.item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __assign_rel_str +#define __assign_rel_str(dst, src) \ + strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_rel_str_len +#define __assign_rel_str_len(dst, src, len) \ + do { \ + memcpy(__get_rel_str(dst), (src), (len)); \ + __get_rel_str(dst)[len] = '\0'; \ + } while (0) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_bitmask +#define __assign_rel_bitmask(dst, src, nr_bits) \ + memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -770,6 +882,10 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_rel_dynamic_array +#undef __get_rel_dynamic_array_len +#undef __get_rel_str +#undef __get_rel_bitmask #undef __print_array #undef __print_hex_dump diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 420ff4bc67fd..f468767bc287 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -915,6 +915,20 @@ config EVENT_TRACE_TEST_SYSCALLS TBD - enable a way to actually call the syscalls as we test their events +config FTRACE_SORT_STARTUP_TEST + bool "Verify compile time sorting of ftrace functions" + depends on DYNAMIC_FTRACE + depends on BUILDTIME_TABLE_SORT + help + Sorting of the mcount_loc sections that is used to find the + where the ftrace knows where to patch functions for tracing + and other callbacks is done at compile time. But if the sort + is not done correctly, it will cause non-deterministic failures. + When this is set, the sorted sections will be verified that they + are in deed sorted and will warn if they are not. + + If unsure, say N + config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index be5f6b32a012..6163b6f762f7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6394,6 +6394,27 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } +#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST +static void test_is_sorted(unsigned long *start, unsigned long count) +{ + int i; + + for (i = 1; i < count; i++) { + if (WARN(start[i - 1] > start[i], + "[%d] %pS at %lx is not sorted with %pS at %lx\n", i, + (void *)start[i - 1], start[i - 1], + (void *)start[i], start[i])) + break; + } + if (i == count) + pr_info("ftrace section at %px sorted properly\n", start); +} +#else +static void test_is_sorted(unsigned long *start, unsigned long count) +{ +} +#endif + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -6412,8 +6433,17 @@ static int ftrace_process_locs(struct module *mod, if (!count) return 0; - sort(start, count, sizeof(*start), - ftrace_cmp_ips, NULL); + /* + * Sorting mcount in vmlinux at build time depend on + * CONFIG_BUILDTIME_TABLE_SORT, while mcount loc in + * modules can not be sorted at build time. + */ + if (!IS_ENABLED(CONFIG_BUILDTIME_TABLE_SORT) || mod) { + sort(start, count, sizeof(*start), + ftrace_cmp_ips, NULL); + } else { + test_is_sorted(start, count); + } start_pg = ftrace_allocate_pages(count); if (!start_pg) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 78ea542ce3bc..a569a0cb81ee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -980,6 +980,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); + /* ring_buffer_unlock_commit() enables preemption */ + preempt_enable_notrace(); } else ring_buffer_unlock_commit(buffer, event); } @@ -2601,6 +2603,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_HARDIRQ; if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; + if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) + trace_flags |= TRACE_FLAG_BH_OFF; if (tif_need_resched()) trace_flags |= TRACE_FLAG_NEED_RESCHED; @@ -2745,8 +2749,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, *current_rb = tr->array_buffer.buffer; if (!tr->no_filter_buffering_ref && - (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && - (entry = this_cpu_read(trace_buffered_event))) { + (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { + preempt_disable_notrace(); /* * Filtering is on, so try to use the per cpu buffer first. * This buffer will simulate a ring_buffer_event, @@ -2764,33 +2768,38 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, * is still quicker than no copy on match, but having * to discard out of the ring buffer on a failed match. */ - int max_len = PAGE_SIZE - struct_size(entry, array, 1); + if ((entry = __this_cpu_read(trace_buffered_event))) { + int max_len = PAGE_SIZE - struct_size(entry, array, 1); - val = this_cpu_inc_return(trace_buffered_event_cnt); + val = this_cpu_inc_return(trace_buffered_event_cnt); - /* - * Preemption is disabled, but interrupts and NMIs - * can still come in now. If that happens after - * the above increment, then it will have to go - * back to the old method of allocating the event - * on the ring buffer, and if the filter fails, it - * will have to call ring_buffer_discard_commit() - * to remove it. - * - * Need to also check the unlikely case that the - * length is bigger than the temp buffer size. - * If that happens, then the reserve is pretty much - * guaranteed to fail, as the ring buffer currently - * only allows events less than a page. But that may - * change in the future, so let the ring buffer reserve - * handle the failure in that case. - */ - if (val == 1 && likely(len <= max_len)) { - trace_event_setup(entry, type, trace_ctx); - entry->array[0] = len; - return entry; + /* + * Preemption is disabled, but interrupts and NMIs + * can still come in now. If that happens after + * the above increment, then it will have to go + * back to the old method of allocating the event + * on the ring buffer, and if the filter fails, it + * will have to call ring_buffer_discard_commit() + * to remove it. + * + * Need to also check the unlikely case that the + * length is bigger than the temp buffer size. + * If that happens, then the reserve is pretty much + * guaranteed to fail, as the ring buffer currently + * only allows events less than a page. But that may + * change in the future, so let the ring buffer reserve + * handle the failure in that case. + */ + if (val == 1 && likely(len <= max_len)) { + trace_event_setup(entry, type, trace_ctx); + entry->array[0] = len; + /* Return with preemption disabled */ + return entry; + } + this_cpu_dec(trace_buffered_event_cnt); } - this_cpu_dec(trace_buffered_event_cnt); + /* __trace_buffer_lock_reserve() disables preemption */ + preempt_enable_notrace(); } entry = __trace_buffer_lock_reserve(*current_rb, type, len, @@ -4183,7 +4192,7 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" + "# / _-----=> irqs-off/BH-disabled\n" "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -4224,7 +4233,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); @@ -4834,6 +4843,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } +static int tracing_mark_open(struct inode *inode, struct file *filp) +{ + stream_open(inode, filp); + return tracing_open_generic_tr(inode, filp); +} + static int tracing_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -5635,7 +5650,7 @@ static const char readme_msg[] = "\t - a numeric literal: e.g. ms_per_sec=1000,\n" "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" "\n" - "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n" + "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" "\t multiplication(*) and division(/) operators. An operand can be either a\n" "\t variable reference, field or numeric literal.\n" "\n" @@ -6718,10 +6733,9 @@ waitagain: cnt = PAGE_SIZE - 1; /* reset all but tr, trace, and overruns */ - memset_startat(iter, 0, seq); + trace_iterator_reset(iter); cpumask_clear(iter->started); trace_seq_init(&iter->seq); - iter->pos = -1; trace_event_read_lock(); trace_access_lock(iter->cpu_file); @@ -7110,9 +7124,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tt) event_triggers_post_call(tr->trace_marker_file, tt); - if (written > 0) - *fpos += written; - return written; } @@ -7171,9 +7182,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); - if (written > 0) - *fpos += written; - return written; } @@ -7573,16 +7581,14 @@ static const struct file_operations tracing_free_buffer_fops = { }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_mark_raw_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_raw_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 38715aa6cfdf..d038ddbf1bea 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -83,6 +83,9 @@ enum trace_type { #undef __dynamic_array #define __dynamic_array(type, item) type item[]; +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item) type item[]; + #undef F_STRUCT #define F_STRUCT(args...) args @@ -1334,10 +1337,12 @@ __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { - /* Simply release the temp buffer */ + /* Simply release the temp buffer and enable preemption */ this_cpu_dec(trace_buffered_event_cnt); + preempt_enable_notrace(); return; } + /* ring_buffer_discard_commit() enables preemption */ ring_buffer_discard_commit(buffer, event); } @@ -1465,6 +1470,7 @@ struct filter_pred { static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; @@ -1572,15 +1578,13 @@ extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); -extern int event_enable_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +extern int event_enable_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); @@ -1606,6 +1610,30 @@ get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); +extern bool event_trigger_check_remove(const char *glob); +extern bool event_trigger_empty_param(const char *param); +extern int event_trigger_separate_filter(char *param_and_filter, char **param, + char **filter, bool param_required); +extern struct event_trigger_data * +event_trigger_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data); +extern int event_trigger_parse_num(char *trigger, + struct event_trigger_data *trigger_data); +extern int event_trigger_set_filter(struct event_command *cmd_ops, + struct trace_event_file *file, + char *param, + struct event_trigger_data *trigger_data); +extern void event_trigger_reset_filter(struct event_command *cmd_ops, + struct event_trigger_data *trigger_data); +extern int event_trigger_register(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + char *cmd, + char *trigger, + struct event_trigger_data *trigger_data, + int *n_registered); /** * struct event_trigger_ops - callbacks for trace event triggers @@ -1613,10 +1641,20 @@ extern int register_trigger_hist_enable_disable_cmds(void); * The methods in this structure provide per-event trigger hooks for * various trigger operations. * + * The @init and @free methods are used during trigger setup and + * teardown, typically called from an event_command's @parse() + * function implementation. + * + * The @print method is used to print the trigger spec. + * + * The @trigger method is the function that actually implements the + * trigger and is called in the context of the triggering event + * whenever that event occurs. + * * All the methods below, except for @init() and @free(), must be * implemented. * - * @func: The trigger 'probe' function called when the triggering + * @trigger: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with @@ -1645,9 +1683,10 @@ extern int register_trigger_hist_enable_disable_cmds(void); * (see trace_event_triggers.c). */ struct event_trigger_ops { - void (*func)(struct event_trigger_data *data, - struct trace_buffer *buffer, void *rec, - struct ring_buffer_event *rbe); + void (*trigger)(struct event_trigger_data *data, + struct trace_buffer *buffer, + void *rec, + struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, @@ -1696,7 +1735,7 @@ struct event_trigger_ops { * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * - * @func: The callback function responsible for parsing and + * @parse: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other @@ -1731,21 +1770,24 @@ struct event_trigger_ops { * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. + * This callback function allows a single event_command to + * support multiple trigger implementations via different sets of + * event_trigger_ops, depending on the value of the @param + * string. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; - int (*func)(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *params); + int (*parse)(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, + char *param_and_filter); int (*reg)(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); @@ -1926,14 +1968,7 @@ extern struct trace_iterator *tracepoint_print_iter; */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { - const size_t offset = offsetof(struct trace_iterator, seq); - - /* - * Keep gcc from complaining about overwriting more than just one - * member in the structure. - */ - memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); - + memset_startat(iter, 0, seq); iter->pos = -1; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 928867f527e7..191db32dec46 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -489,18 +489,12 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) if (trace_trigger_soft_disabled(edata->file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = edata->file; - dsize = get_eprobe_size(&edata->ep->tp, rec); - fbuffer.regs = NULL; - - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file, - call->event.type, - sizeof(*entry) + edata->ep->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + + entry = trace_event_buffer_reserve(&fbuffer, edata->file, + sizeof(*entry) + edata->ep->tp.size + dsize); + + if (!entry) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); @@ -549,29 +543,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data, } static struct event_trigger_ops eprobe_trigger_ops = { - .func = eprobe_trigger_func, + .trigger = eprobe_trigger_func, .print = eprobe_trigger_print, .init = eprobe_trigger_init, .free = eprobe_trigger_free, }; -static int eprobe_trigger_cmd_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { return -1; } -static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) +static int eprobe_trigger_reg_func(char *glob, + struct event_trigger_data *data, + struct trace_event_file *file) { return -1; } -static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) +static void eprobe_trigger_unreg_func(char *glob, + struct event_trigger_data *data, + struct trace_event_file *file) { } @@ -586,7 +580,7 @@ static struct event_command event_trigger_cmd = { .name = "eprobe", .trigger_type = ETT_EVENT_EPROBE, .flags = EVENT_CMD_FL_NEEDS_REC, - .func = eprobe_trigger_cmd_func, + .parse = eprobe_trigger_cmd_parse, .reg = eprobe_trigger_reg_func, .unreg = eprobe_trigger_unreg_func, .unreg_all = NULL, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 92be9cb1d7d4..3147614c1812 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3461,10 +3461,8 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, tr, &ftrace_tr_enable_fops); - if (!entry) { - pr_warn("Could not create tracefs 'enable' entry\n"); + if (!entry) return -ENOMEM; - } /* There are not as crucial, just warn if they are not created */ @@ -3480,17 +3478,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); /* ring buffer internal formats */ - entry = trace_create_file("header_page", TRACE_MODE_READ, d_events, + trace_create_file("header_page", TRACE_MODE_READ, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); - if (!entry) - pr_warn("Could not create tracefs 'header_page' entry\n"); - entry = trace_create_file("header_event", TRACE_MODE_READ, d_events, + trace_create_file("header_event", TRACE_MODE_READ, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); - if (!entry) - pr_warn("Could not create tracefs 'header_event' entry\n"); tr->event_dir = d_events; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c9124038b140..b458a9afa2c0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> */ +#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 1024 + +struct ustring_buffer { + char buffer[USTRING_BUF_SIZE]; +}; + +static __percpu struct ustring_buffer *ustring_per_cpu; + +static __always_inline char *test_string(char *str) +{ + struct ustring_buffer *ubuf; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* For safety, do not trust the string pointer */ + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + return NULL; + return kstr; +} + +static __always_inline char *test_ustring(char *str) +{ + struct ustring_buffer *ubuf; + char __user *ustr; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* user space address? */ + ustr = (char __user *)str; + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + return NULL; + + return kstr; +} + /* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { @@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } -/* Filter predicate for char * pointers */ -static int filter_pred_pchar(struct filter_pred *pred, void *event) +static __always_inline int filter_pchar(struct filter_pred *pred, char *str) { - char **addr = (char **)(event + pred->offset); int cmp, match; - int len = strlen(*addr) + 1; /* including tailing '\0' */ + int len; - cmp = pred->regex.match(*addr, &pred->regex, len); + len = strlen(str) + 1; /* including tailing '\0' */ + cmp = pred->regex.match(str, &pred->regex, len); match = cmp ^ pred->not; return match; } +/* Filter predicate for char * pointers */ +static int filter_pred_pchar(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_string(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} + +/* Filter predicate for char * pointers in user space*/ +static int filter_pred_pchar_user(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_ustring(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} /* * Filter predicate for dynamic sized arrays of characters. @@ -706,6 +777,29 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) return match; } +/* + * Filter predicate for relative dynamic sized arrays of characters. + * These are implemented through a list of strings at the end + * of the entry as same as dynamic string. + * The difference is that the relative one records the location offset + * from the field itself, not the event entry. + */ +static int filter_pred_strrelloc(struct filter_pred *pred, void *event) +{ + u32 *item = (u32 *)(event + pred->offset); + u32 str_item = *item; + int str_loc = str_item & 0xffff; + int str_len = str_item >> 16; + char *addr = (char *)(&item[1]) + str_loc; + int cmp, match; + + cmp = pred->regex.match(addr, &pred->regex, str_len); + + match = cmp ^ pred->not; + + return match; +} + /* Filter predicate for CPUs. */ static int filter_pred_cpu(struct filter_pred *pred, void *event) { @@ -756,7 +850,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event) * * Note: * - @str might not be NULL-terminated if it's of type DYN_STRING - * or STATIC_STRING, unless @len is zero. + * RDYN_STRING, or STATIC_STRING, unless @len is zero. */ static int regex_match_full(char *str, struct regex *r, int len) @@ -1083,6 +1177,9 @@ int filter_assign_type(const char *type) if (strstr(type, "__data_loc") && strstr(type, "char")) return FILTER_DYN_STRING; + if (strstr(type, "__rel_loc") && strstr(type, "char")) + return FILTER_RDYN_STRING; + if (strchr(type, '[') && strstr(type, "char")) return FILTER_STATIC_STRING; @@ -1158,6 +1255,7 @@ static int parse_pred(const char *str, void *data, struct filter_pred *pred = NULL; char num_buf[24]; /* Big enough to hold an address */ char *field_name; + bool ustring = false; char q; u64 val; int len; @@ -1192,6 +1290,12 @@ static int parse_pred(const char *str, void *data, return -EINVAL; } + /* See if the field is a user space string */ + if ((len = str_has_prefix(str + i, ".ustring"))) { + ustring = true; + i += len; + } + while (isspace(str[i])) i++; @@ -1318,10 +1422,24 @@ static int parse_pred(const char *str, void *data, pred->fn = filter_pred_string; pred->regex.field_len = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) + } else if (field->filter_type == FILTER_DYN_STRING) { pred->fn = filter_pred_strloc; - else - pred->fn = filter_pred_pchar; + } else if (field->filter_type == FILTER_RDYN_STRING) + pred->fn = filter_pred_strrelloc; + else { + + if (!ustring_per_cpu) { + /* Once allocated, keep it around for good */ + ustring_per_cpu = alloc_percpu(struct ustring_buffer); + if (!ustring_per_cpu) + goto err_mem; + } + + if (ustring) + pred->fn = filter_pred_pchar_user; + else + pred->fn = filter_pred_pchar; + } /* go past the last quote */ i++; @@ -1387,6 +1505,9 @@ static int parse_pred(const char *str, void *data, err_free: kfree(pred); return -EINVAL; +err_mem: + kfree(pred); + return -ENOMEM; } enum { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 319f9c8ca7e7..5e6a988a8a51 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -217,6 +217,20 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, return (u64)(unsigned long)addr; } +static u64 hist_field_reldynstring(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, + struct ring_buffer_event *rbe, + void *event) +{ + u32 *item = event + hist_field->field->offset; + u32 str_item = *item; + int str_loc = str_item & 0xffff; + char *addr = (char *)&item[1] + str_loc; + + return (u64)(unsigned long)addr; +} + static u64 hist_field_pstring(struct hist_field *hist_field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -1956,8 +1970,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (field->filter_type == FILTER_STATIC_STRING) { hist_field->fn = hist_field_string; hist_field->size = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) + } else if (field->filter_type == FILTER_DYN_STRING) { hist_field->fn = hist_field_dynstring; + } else if (field->filter_type == FILTER_RDYN_STRING) + hist_field->fn = hist_field_reldynstring; else hist_field->fn = hist_field_pstring; } else { @@ -2745,9 +2761,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, } static struct event_command trigger_hist_cmd; -static int event_hist_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +static int event_hist_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); static bool compatible_keys(struct hist_trigger_data *target_hist_data, struct hist_trigger_data *hist_data, @@ -2950,8 +2966,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, var_hist->hist_data = hist_data; /* Create the new histogram with our variable */ - ret = event_hist_trigger_func(&trigger_hist_cmd, file, - "", "hist", cmd); + ret = event_hist_trigger_parse(&trigger_hist_cmd, file, + "", "hist", cmd); if (ret) { kfree(cmd); kfree(var_hist->cmd); @@ -4961,7 +4977,8 @@ static inline void add_to_key(char *compound_key, void *key, struct ftrace_event_field *field; field = key_field->field; - if (field->filter_type == FILTER_DYN_STRING) + if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) size = *(u32 *)(rec + field->offset) >> 16; else if (field->filter_type == FILTER_STATIC_STRING) size = field->size; @@ -5712,8 +5729,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data) for (i = 0; i < hist_data->n_field_var_hists; i++) { file = hist_data->field_var_hists[i]->hist_data->event_file; cmd = hist_data->field_var_hists[i]->cmd; - ret = event_hist_trigger_func(&trigger_hist_cmd, file, - "!hist", "hist", cmd); + ret = event_hist_trigger_parse(&trigger_hist_cmd, file, + "!hist", "hist", cmd); WARN_ON_ONCE(ret < 0); } } @@ -5742,7 +5759,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_ops = { - .func = event_hist_trigger, + .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_init, .free = event_hist_trigger_free, @@ -5776,7 +5793,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_named_ops = { - .func = event_hist_trigger, + .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_named_init, .free = event_hist_trigger_named_free, @@ -5893,7 +5910,7 @@ static bool hist_trigger_match(struct event_trigger_data *data, return true; } -static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, +static int hist_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6045,7 +6062,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, return false; } -static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, +static void hist_unregister_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6129,9 +6146,9 @@ static void hist_unreg_all(struct trace_event_file *file) } } -static int event_hist_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int event_hist_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT; struct event_trigger_data *trigger_data; @@ -6245,7 +6262,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, goto out_free; } - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); se_name = trace_event_name(file->event_call); se = find_synth_event(se_name); if (se) @@ -6254,7 +6271,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, goto out_free; } - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of triggers registered, * but if it didn't register any it returns zero. Consider no @@ -6297,7 +6314,7 @@ enable: return ret; out_unreg: - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); @@ -6314,7 +6331,7 @@ static struct event_command trigger_hist_cmd = { .name = "hist", .trigger_type = ETT_EVENT_HIST, .flags = EVENT_CMD_FL_NEEDS_REC, - .func = event_hist_trigger_func, + .parse = event_hist_trigger_parse, .reg = hist_register_trigger, .unreg = hist_unregister_trigger, .unreg_all = hist_unreg_all, @@ -6366,28 +6383,28 @@ hist_enable_count_trigger(struct event_trigger_data *data, } static struct event_trigger_ops hist_enable_trigger_ops = { - .func = hist_enable_trigger, + .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_enable_count_trigger_ops = { - .func = hist_enable_count_trigger, + .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_trigger_ops = { - .func = hist_enable_trigger, + .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_count_trigger_ops = { - .func = hist_enable_count_trigger, + .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, @@ -6429,7 +6446,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file) static struct event_command trigger_hist_enable_cmd = { .name = ENABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, @@ -6440,7 +6457,7 @@ static struct event_command trigger_hist_enable_cmd = { static struct event_command trigger_hist_disable_cmd = { .name = DISABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index c188045c5f97..d6b4935a78c0 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -168,10 +168,14 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) continue; if (field->filter_type == FILTER_STATIC_STRING) continue; - if (field->filter_type == FILTER_DYN_STRING) { + if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) { u32 *str_item; int str_loc = entry_size & 0xffff; + if (field->filter_type == FILTER_RDYN_STRING) + str_loc -= field->offset + field->size; + str_item = (u32 *)(entry + field->offset); *str_item = str_loc; /* string length is 0. */ } else { @@ -214,7 +218,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (field->filter_type == FILTER_STATIC_STRING) { strlcpy(entry + field->offset, addr, field->size); - } else if (field->filter_type == FILTER_DYN_STRING) { + } else if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) { int str_len = strlen(addr) + 1; int str_loc = entry_size & 0xffff; u32 *str_item; @@ -229,6 +234,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) strlcpy(entry + (entry_size - str_len), addr, str_len); str_item = (u32 *)(entry + field->offset); + if (field->filter_type == FILTER_RDYN_STRING) + str_loc -= field->offset + field->size; *str_item = (str_len << 16) | str_loc; } else { char **paddr; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index ca9c13b2ecf4..154db74dadbc 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1979,7 +1979,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val); /** * synth_event_add_val - Add a named field's value to an open synth trace * @field_name: The name of the synthetic event field value to set - * @val: The value to set the next field to + * @val: The value to set the named field to * @trace_state: A pointer to object tracking the piecewise trace state * * Set the value of the named field in an event that's been opened by @@ -2054,6 +2054,13 @@ static int create_synth_event(const char *raw_command) last_cmd_set(raw_command); + name = raw_command; + + /* Don't try to process if not our system */ + if (name[0] != 's' || name[1] != ':') + return -ECANCELED; + name += 2; + p = strpbrk(raw_command, " \t"); if (!p) { synth_err(SYNTH_ERR_INVALID_CMD, 0); @@ -2062,12 +2069,6 @@ static int create_synth_event(const char *raw_command) fields = skip_spaces(p); - name = raw_command; - - if (name[0] != 's' || name[1] != ':') - return -ECANCELED; - name += 2; - /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 3d5c07239a2a..d00fee705f9c 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file, if (data->paused) continue; if (!rec) { - data->ops->func(data, buffer, rec, event); + data->ops->trigger(data, buffer, rec, event); continue; } filter = rcu_dereference_sched(data->filter); @@ -78,7 +78,7 @@ event_triggers_call(struct trace_event_file *file, tt |= data->cmd_ops->trigger_type; continue; } - data->ops->func(data, buffer, rec, event); + data->ops->trigger(data, buffer, rec, event); } return tt; } @@ -106,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file, if (data->paused) continue; if (data->cmd_ops->trigger_type & tt) - data->ops->func(data, NULL, NULL, NULL); + data->ops->trigger(data, NULL, NULL, NULL); } } EXPORT_SYMBOL_GPL(event_triggers_post_call); @@ -245,7 +245,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff) mutex_lock(&trigger_cmd_mutex); list_for_each_entry(p, &trigger_commands, list) { if (strcmp(p->name, command) == 0) { - ret = p->func(p, file, buff, command, next); + ret = p->parse(p, file, buff, command, next); goto out_unlock; } } @@ -540,7 +540,6 @@ void update_cond_flag(struct trace_event_file *file) /** * register_trigger - Generic event_command @reg implementation * @glob: The raw string used to register the trigger - * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data to associate with the trigger * @file: The trace_event_file associated with the event * @@ -551,7 +550,7 @@ void update_cond_flag(struct trace_event_file *file) * * Return: 0 on success, errno otherwise */ -static int register_trigger(char *glob, struct event_trigger_ops *ops, +static int register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -589,7 +588,6 @@ out: /** * unregister_trigger - Generic event_command @unreg implementation * @glob: The raw string used to register the trigger - * @ops: The trigger ops associated with the trigger * @test: Trigger-specific data used to find the trigger to remove * @file: The trace_event_file associated with the event * @@ -598,7 +596,7 @@ out: * Usually used directly as the @unreg method in event command * implementations. */ -static void unregister_trigger(char *glob, struct event_trigger_ops *ops, +static void unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file) { @@ -621,8 +619,350 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, data->ops->free(data->ops, data); } +/* + * Event trigger parsing helper functions. + * + * These functions help make it easier to write an event trigger + * parsing function i.e. the struct event_command.parse() callback + * function responsible for parsing and registering a trigger command + * written to the 'trigger' file. + * + * A trigger command (or just 'trigger' for short) takes the form: + * [trigger] [if filter] + * + * The struct event_command.parse() callback (and other struct + * event_command functions) refer to several components of a trigger + * command. Those same components are referenced by the event trigger + * parsing helper functions defined below. These components are: + * + * cmd - the trigger command name + * glob - the trigger command name optionally prefaced with '!' + * param_and_filter - text following cmd and ':' + * param - text following cmd and ':' and stripped of filter + * filter - the optional filter text following (and including) 'if' + * + * To illustrate the use of these componenents, here are some concrete + * examples. For the following triggers: + * + * echo 'traceon:5 if pid == 0' > trigger + * - 'traceon' is both cmd and glob + * - '5 if pid == 0' is the param_and_filter + * - '5' is the param + * - 'if pid == 0' is the filter + * + * echo 'enable_event:sys:event:n' > trigger + * - 'enable_event' is both cmd and glob + * - 'sys:event:n' is the param_and_filter + * - 'sys:event:n' is the param + * - there is no filter + * + * echo 'hist:keys=pid if prio > 50' > trigger + * - 'hist' is both cmd and glob + * - 'keys=pid if prio > 50' is the param_and_filter + * - 'keys=pid' is the param + * - 'if prio > 50' is the filter + * + * echo '!enable_event:sys:event:n' > trigger + * - 'enable_event' the cmd + * - '!enable_event' is the glob + * - 'sys:event:n' is the param_and_filter + * - 'sys:event:n' is the param + * - there is no filter + * + * echo 'traceoff' > trigger + * - 'traceoff' is both cmd and glob + * - there is no param_and_filter + * - there is no param + * - there is no filter + * + * There are a few different categories of event trigger covered by + * these helpers: + * + * - triggers that don't require a parameter e.g. traceon + * - triggers that do require a parameter e.g. enable_event and hist + * - triggers that though they may not require a param may support an + * optional 'n' param (n = number of times the trigger should fire) + * e.g.: traceon:5 or enable_event:sys:event:n + * - triggers that do not support an 'n' param e.g. hist + * + * These functions can be used or ignored as necessary - it all + * depends on the complexity of the trigger, and the granularity of + * the functions supported reflects the fact that some implementations + * may need to customize certain aspects of their implementations and + * won't need certain functions. For instance, the hist trigger + * implementation doesn't use event_trigger_separate_filter() because + * it has special requirements for handling the filter. + */ + +/** + * event_trigger_check_remove - check whether an event trigger specifies remove + * @glob: The trigger command string, with optional remove(!) operator + * + * The event trigger callback implementations pass in 'glob' as a + * parameter. This is the command name either with or without a + * remove(!) operator. This function simply parses the glob and + * determines whether the command corresponds to a trigger removal or + * a trigger addition. + * + * Return: true if this is a remove command, false otherwise + */ +bool event_trigger_check_remove(const char *glob) +{ + return (glob && glob[0] == '!') ? true : false; +} + +/** + * event_trigger_empty_param - check whether the param is empty + * @param: The trigger param string + * + * The event trigger callback implementations pass in 'param' as a + * parameter. This corresponds to the string following the command + * name minus the command name. This function can be called by a + * callback implementation for any command that requires a param; a + * callback that doesn't require a param can ignore it. + * + * Return: true if this is an empty param, false otherwise + */ +bool event_trigger_empty_param(const char *param) +{ + return !param; +} + +/** + * event_trigger_separate_filter - separate an event trigger from a filter + * @param: The param string containing trigger and possibly filter + * @trigger: outparam, will be filled with a pointer to the trigger + * @filter: outparam, will be filled with a pointer to the filter + * @param_required: Specifies whether or not the param string is required + * + * Given a param string of the form '[trigger] [if filter]', this + * function separates the filter from the trigger and returns the + * trigger in *trigger and the filter in *filter. Either the *trigger + * or the *filter may be set to NULL by this function - if not set to + * NULL, they will contain strings corresponding to the trigger and + * filter. + * + * There are two cases that need to be handled with respect to the + * passed-in param: either the param is required, or it is not + * required. If @param_required is set, and there's no param, it will + * return -EINVAL. If @param_required is not set and there's a param + * that starts with a number, that corresponds to the case of a + * trigger with :n (n = number of times the trigger should fire) and + * the parsing continues normally; otherwise the function just returns + * and assumes param just contains a filter and there's nothing else + * to do. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_separate_filter(char *param_and_filter, char **param, + char **filter, bool param_required) +{ + int ret = 0; + + *param = *filter = NULL; + + if (!param_and_filter) { + if (param_required) + ret = -EINVAL; + goto out; + } + + /* + * Here we check for an optional param. The only legal + * optional param is :n, and if that's the case, continue + * below. Otherwise we assume what's left is a filter and + * return it as the filter string for the caller to deal with. + */ + if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) { + *filter = param_and_filter; + goto out; + } + + /* + * Separate the param from the filter (param [if filter]). + * Here we have either an optional :n param or a required + * param and an optional filter. + */ + *param = strsep(¶m_and_filter, " \t"); + + /* + * Here we have a filter, though it may be empty. + */ + if (param_and_filter) { + *filter = skip_spaces(param_and_filter); + if (!**filter) + *filter = NULL; + } +out: + return ret; +} + +/** + * event_trigger_alloc - allocate and init event_trigger_data for a trigger + * @cmd_ops: The event_command operations for the trigger + * @cmd: The cmd string + * @param: The param string + * @private_data: User data to associate with the event trigger + * + * Allocate an event_trigger_data instance and initialize it. The + * @cmd_ops are used along with the @cmd and @param to get the + * trigger_ops to assign to the event_trigger_data. @private_data can + * also be passed in and associated with the event_trigger_data. + * + * Use event_trigger_free() to free an event_trigger_data object. + * + * Return: The trigger_data object success, NULL otherwise + */ +struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data) +{ + struct event_trigger_data *trigger_data; + struct event_trigger_ops *trigger_ops; + + trigger_ops = cmd_ops->get_trigger_ops(cmd, param); + + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + if (!trigger_data) + return NULL; + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; + trigger_data->cmd_ops = cmd_ops; + trigger_data->private_data = private_data; + + INIT_LIST_HEAD(&trigger_data->list); + INIT_LIST_HEAD(&trigger_data->named_list); + RCU_INIT_POINTER(trigger_data->filter, NULL); + + return trigger_data; +} + +/** + * event_trigger_parse_num - parse and return the number param for a trigger + * @param: The param string + * @trigger_data: The trigger_data for the trigger + * + * Parse the :n (n = number of times the trigger should fire) param + * and set the count variable in the trigger_data to the parsed count. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_parse_num(char *param, + struct event_trigger_data *trigger_data) +{ + char *number; + int ret = 0; + + if (param) { + number = strsep(¶m, ":"); + + if (!strlen(number)) + return -EINVAL; + + /* + * We use the callback data field (which is a pointer) + * as our counter. + */ + ret = kstrtoul(number, 0, &trigger_data->count); + } + + return ret; +} + +/** + * event_trigger_set_filter - set an event trigger's filter + * @cmd_ops: The event_command operations for the trigger + * @file: The event file for the trigger's event + * @param: The string containing the filter + * @trigger_data: The trigger_data for the trigger + * + * Set the filter for the trigger. If the filter is NULL, just return + * without error. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_set_filter(struct event_command *cmd_ops, + struct trace_event_file *file, + char *param, + struct event_trigger_data *trigger_data) +{ + if (param && cmd_ops->set_filter) + return cmd_ops->set_filter(param, trigger_data, file); + + return 0; +} + +/** + * event_trigger_reset_filter - reset an event trigger's filter + * @cmd_ops: The event_command operations for the trigger + * @trigger_data: The trigger_data for the trigger + * + * Reset the filter for the trigger to no filter. + */ +void event_trigger_reset_filter(struct event_command *cmd_ops, + struct event_trigger_data *trigger_data) +{ + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); +} + +/** + * event_trigger_register - register an event trigger + * @cmd_ops: The event_command operations for the trigger + * @file: The event file for the trigger's event + * @glob: The trigger command string, with optional remove(!) operator + * @cmd: The cmd string + * @param: The param string + * @trigger_data: The trigger_data for the trigger + * @n_registered: optional outparam, the number of triggers registered + * + * Register an event trigger. The @cmd_ops are used to call the + * cmd_ops->reg() function which actually does the registration. The + * cmd_ops->reg() function returns the number of triggers registered, + * which is assigned to n_registered, if n_registered is non-NULL. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_register(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + char *cmd, + char *param, + struct event_trigger_data *trigger_data, + int *n_registered) +{ + int ret; + + if (n_registered) + *n_registered = 0; + + ret = cmd_ops->reg(glob, trigger_data, file); + /* + * The above returns on success the # of functions enabled, + * but if it didn't find any functions it returns zero. + * Consider no functions a failure too. + */ + if (!ret) { + cmd_ops->unreg(glob, trigger_data, file); + ret = -ENOENT; + } else if (ret > 0) { + if (n_registered) + *n_registered = ret; + /* Just return zero, not the number of enabled functions */ + ret = 0; + } + + return ret; +} + +/* + * End event trigger parsing helper functions. + */ + /** - * event_trigger_callback - Generic event_command @func implementation + * event_trigger_parse - Generic event_command @parse implementation * @cmd_ops: The command ops, used for trigger registration * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger @@ -632,15 +972,15 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, * Common implementation for event command parsing and trigger * instantiation. * - * Usually used directly as the @func method in event command + * Usually used directly as the @parse method in event command * implementations. * * Return: 0 on success, errno otherwise */ static int -event_trigger_callback(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +event_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct event_trigger_data *trigger_data; struct event_trigger_ops *trigger_ops; @@ -673,7 +1013,7 @@ event_trigger_callback(struct event_command *cmd_ops, INIT_LIST_HEAD(&trigger_data->named_list); if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); kfree(trigger_data); ret = 0; goto out; @@ -708,14 +1048,14 @@ event_trigger_callback(struct event_command *cmd_ops, out_reg: /* Up the trigger_data count to make sure reg doesn't free it on failure */ event_trigger_init(trigger_ops, trigger_data); - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ if (!ret) { - cmd_ops->unreg(glob, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob, trigger_data, file); ret = -ENOENT; } else if (ret > 0) ret = 0; @@ -1023,28 +1363,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops traceon_trigger_ops = { - .func = traceon_trigger, + .trigger = traceon_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceon_count_trigger_ops = { - .func = traceon_count_trigger, + .trigger = traceon_count_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_trigger_ops = { - .func = traceoff_trigger, + .trigger = traceoff_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_count_trigger_ops = { - .func = traceoff_count_trigger, + .trigger = traceoff_count_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1069,7 +1409,7 @@ onoff_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_traceon_cmd = { .name = "traceon", .trigger_type = ETT_TRACE_ONOFF, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1080,7 +1420,7 @@ static struct event_command trigger_traceoff_cmd = { .name = "traceoff", .trigger_type = ETT_TRACE_ONOFF, .flags = EVENT_CMD_FL_POST_TRIGGER, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1116,14 +1456,14 @@ snapshot_count_trigger(struct event_trigger_data *data, } static int -register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, +register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { if (tracing_alloc_snapshot_instance(file->tr) != 0) return 0; - return register_trigger(glob, ops, data, file); + return register_trigger(glob, data, file); } static int @@ -1135,14 +1475,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops snapshot_trigger_ops = { - .func = snapshot_trigger, + .trigger = snapshot_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops snapshot_count_trigger_ops = { - .func = snapshot_count_trigger, + .trigger = snapshot_count_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1157,7 +1497,7 @@ snapshot_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_snapshot_cmd = { .name = "snapshot", .trigger_type = ETT_SNAPSHOT, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_snapshot_trigger, .unreg = unregister_trigger, .get_trigger_ops = snapshot_get_trigger_ops, @@ -1226,14 +1566,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops stacktrace_trigger_ops = { - .func = stacktrace_trigger, + .trigger = stacktrace_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops stacktrace_count_trigger_ops = { - .func = stacktrace_count_trigger, + .trigger = stacktrace_count_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1249,7 +1589,7 @@ static struct event_command trigger_stacktrace_cmd = { .name = "stacktrace", .trigger_type = ETT_STACKTRACE, .flags = EVENT_CMD_FL_POST_TRIGGER, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = stacktrace_get_trigger_ops, @@ -1353,36 +1693,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_enable_trigger_ops = { - .func = event_enable_trigger, + .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_enable_count_trigger_ops = { - .func = event_enable_count_trigger, + .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_trigger_ops = { - .func = event_enable_trigger, + .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_count_trigger_ops = { - .func = event_enable_count_trigger, + .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -int event_enable_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +int event_enable_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; @@ -1455,7 +1795,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, trigger_data->private_data = enable_data; if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); kfree(trigger_data); kfree(enable_data); ret = 0; @@ -1502,7 +1842,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, ret = trace_event_enable_disable(event_enable_file, 1, 1); if (ret < 0) goto out_put; - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. @@ -1532,7 +1872,6 @@ int event_enable_trigger_func(struct event_command *cmd_ops, } int event_enable_register_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -1574,7 +1913,6 @@ out: } void event_enable_unregister_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file) { @@ -1628,7 +1966,7 @@ event_enable_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_enable_cmd = { .name = ENABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, @@ -1638,7 +1976,7 @@ static struct event_command trigger_enable_cmd = { static struct event_command trigger_disable_cmd = { .name = DISABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 4e1257f50aa3..508f14af4f2c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -328,11 +328,9 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) static void __disable_trace_kprobe(struct trace_probe *tp) { - struct trace_probe *pos; struct trace_kprobe *tk; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tk = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { if (!trace_kprobe_is_registered(tk)) continue; if (trace_kprobe_is_return(tk)) @@ -349,7 +347,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp) static int enable_trace_kprobe(struct trace_event_call *call, struct trace_event_file *file) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_kprobe *tk; bool enabled; int ret = 0; @@ -370,8 +368,7 @@ static int enable_trace_kprobe(struct trace_event_call *call, if (enabled) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tk = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { if (trace_kprobe_has_gone(tk)) continue; ret = __enable_trace_kprobe(tk); @@ -560,11 +557,9 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, struct trace_kprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; - struct trace_probe *pos; int i; - list_for_each_entry(pos, &tpe->probes, list) { - orig = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(orig, &tpe->probes, tp.list) { if (strcmp(trace_kprobe_symbol(orig), trace_kprobe_symbol(comp)) || trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) @@ -1176,15 +1171,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_kprobe *tk; + unsigned long nmissed; if (!is_trace_kprobe(ev)) return 0; tk = to_trace_kprobe(ev); + nmissed = trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), - tk->rp.kp.nmissed); + nmissed); return 0; } @@ -1384,17 +1382,11 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = trace_file; - dsize = __get_data_size(&tk->tp, regs); - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, - call->event.type, - sizeof(*entry) + tk->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + tk->tp.size + dsize); + if (!entry) return; fbuffer.regs = regs; @@ -1431,16 +1423,11 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = trace_file; - dsize = __get_data_size(&tk->tp, regs); - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, - call->event.type, - sizeof(*entry) + tk->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + tk->tp.size + dsize); + if (!entry) return; fbuffer.regs = regs; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 89d6cbac6f10..870a08da5b48 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -138,8 +138,7 @@ static void osnoise_unregister_instance(struct trace_array *tr) if (!found) return; - synchronize_rcu(); - kfree(inst); + kvfree_rcu(inst); } /* @@ -2122,6 +2121,13 @@ out_unhook_irq: return -EINVAL; } +static void osnoise_unhook_events(void) +{ + unhook_thread_events(); + unhook_softirq_events(); + unhook_irq_events(); +} + /* * osnoise_workload_start - start the workload and hook to events */ @@ -2154,7 +2160,14 @@ static int osnoise_workload_start(void) retval = start_per_cpu_kthreads(); if (retval) { - unhook_irq_events(); + trace_osnoise_callback_enabled = false; + /* + * Make sure that ftrace_nmi_enter/exit() see + * trace_osnoise_callback_enabled as false before continuing. + */ + barrier(); + + osnoise_unhook_events(); return retval; } @@ -2185,9 +2198,7 @@ static void osnoise_workload_stop(void) stop_per_cpu_kthreads(); - unhook_irq_events(); - unhook_softirq_events(); - unhook_thread_events(); + osnoise_unhook_events(); } static void osnoise_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3547e7176ff7..8aa493d25c73 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -445,14 +445,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) char irqs_off; int hardirq; int softirq; + int bh_off; int nmi; nmi = entry->flags & TRACE_FLAG_NMI; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + bh_off = entry->flags & TRACE_FLAG_BH_OFF; irqs_off = + (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + bh_off ? 'b' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.'; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 3ed2a3f37297..73d90179b51b 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -356,6 +356,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs) return -EINVAL; } *pbuf = kstrndup(str, len - 1, GFP_KERNEL); + if (!*pbuf) + return -ENOMEM; return 0; } @@ -1138,8 +1140,7 @@ int trace_probe_remove_file(struct trace_probe *tp, return -ENOENT; list_del_rcu(&link->list); - synchronize_rcu(); - kfree(link); + kvfree_rcu(link); if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8bfcd3b09422..f755bde42fd0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -323,8 +323,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -367,8 +366,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, sys_data->exit_event->event.type, sizeof(*entry), trace_ctx); if (!event) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 4f35514a48f3..9711589273cd 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -410,12 +410,10 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, struct trace_uprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; - struct trace_probe *pos; struct inode *comp_inode = d_real_inode(comp->path.dentry); int i; - list_for_each_entry(pos, &tpe->probes, list) { - orig = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(orig, &tpe->probes, tp.list) { if (comp_inode != d_real_inode(orig->path.dentry) || comp->offset != orig->offset) continue; @@ -950,8 +948,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; - struct trace_buffer *buffer; - struct ring_buffer_event *event; + struct trace_event_buffer fbuffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); @@ -966,12 +963,10 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0); - if (!event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); + if (!entry) return; - entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { entry->vaddr[0] = func; entry->vaddr[1] = instruction_pointer(regs); @@ -983,7 +978,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); + trace_event_buffer_commit(&fbuffer); } /* uprobe handler */ @@ -1076,14 +1071,12 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) static void __probe_event_disable(struct trace_probe *tp) { - struct trace_probe *pos; struct trace_uprobe *tu; tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { if (!tu->inode) continue; @@ -1095,7 +1088,7 @@ static void __probe_event_disable(struct trace_probe *tp) static int probe_event_enable(struct trace_event_call *call, struct trace_event_file *file, filter_func_t filter) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; bool enabled; int ret; @@ -1130,8 +1123,7 @@ static int probe_event_enable(struct trace_event_call *call, if (ret) goto err_flags; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = trace_uprobe_enable(tu, filter); if (ret) { __probe_event_disable(tp); @@ -1276,7 +1268,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, static int uprobe_perf_close(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; int ret = 0; @@ -1288,8 +1280,7 @@ static int uprobe_perf_close(struct trace_event_call *call, if (trace_uprobe_filter_remove(tu->tp.event->filter, event)) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (ret) break; @@ -1301,7 +1292,7 @@ static int uprobe_perf_close(struct trace_event_call *call, static int uprobe_perf_open(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; int err = 0; @@ -1313,8 +1304,7 @@ static int uprobe_perf_open(struct trace_event_call *call, if (trace_uprobe_filter_add(tu->tp.event->filter, event)) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); @@ -1620,6 +1610,11 @@ create_local_trace_uprobe(char *name, unsigned long offs, tu->path = path; tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); + if (!tu->filename) { + ret = -ENOMEM; + goto error; + } + init_trace_event_call(tu); ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1a72b7d95cdc..4d34dc0b0fee 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -21,6 +21,7 @@ static const char *random_strings[] = { static void simple_thread_func(int cnt) { + unsigned long bitmask[1] = {0xdeadbeefUL}; int array[6]; int len = cnt % 5; int i; @@ -43,6 +44,8 @@ static void simple_thread_func(int cnt) trace_foo_with_template_cond("prints other times", cnt); trace_foo_with_template_print("I have to be different", cnt); + + trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); } static int simple_thread(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index e61471ab7d14..cbbbb83beced 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -155,7 +155,7 @@ * * To assign this string, use the helper macro __assign_str_len(). * - * __assign_str(foo, bar, len); + * __assign_str_len(foo, bar, len); * * Then len + 1 is allocated to the ring buffer, and a nul terminating * byte is added. This is similar to: @@ -506,6 +506,39 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, TP_ARGS(foo, bar), TP_printk("bar %s %d", __get_str(foo), __entry->bar)); +/* + * There are yet another __rel_loc dynamic data attribute. If you + * use __rel_dynamic_array() and __rel_string() etc. macros, you + * can use this attribute. There is no difference from the viewpoint + * of functionality with/without 'rel' but the encoding is a bit + * different. This is expected to be used with user-space event, + * there is no reason that the kernel event use this, but only for + * testing. + */ + +TRACE_EVENT(foo_rel_loc, + + TP_PROTO(const char *foo, int bar, unsigned long *mask), + + TP_ARGS(foo, bar, mask), + + TP_STRUCT__entry( + __rel_string( foo, foo ) + __field( int, bar ) + __rel_bitmask( bitmask, + BITS_PER_BYTE * sizeof(unsigned long) ) + ), + + TP_fast_assign( + __assign_rel_str(foo, foo); + __entry->bar = bar; + __assign_rel_bitmask(bitmask, mask, + BITS_PER_BYTE * sizeof(unsigned long)); + ), + + TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar, + __get_rel_bitmask(bitmask)) +); #endif /***** NOTICE! The #if protection ends here. *****/ diff --git a/scripts/Makefile b/scripts/Makefile index 9adb6d247818..b082d2f93357 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -17,6 +17,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include +HOSTLDLIBS_sorttable = -lpthread HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include HOSTCFLAGS_sign-file.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_sign-file = $(CRYPTO_LIBS) @@ -29,7 +30,10 @@ ARCH := x86 endif HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED -HOSTLDLIBS_sorttable = -lpthread +endif + +ifdef CONFIG_DYNAMIC_FTRACE +HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 9716f285e404..82bc736532f1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -403,6 +403,9 @@ if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then ${RESOLVE_BTFIDS} vmlinux fi +info SYSMAP System.map +mksysmap vmlinux System.map + if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then info SORTTAB vmlinux if ! sorttable vmlinux; then @@ -411,9 +414,6 @@ if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then fi fi -info SYSMAP System.map -mksysmap vmlinux System.map - # step a (see comment above) if [ -n "${CONFIG_KALLSYMS}" ]; then mksysmap ${kallsyms_vmlinux} .tmp_System.map diff --git a/scripts/sorttable.c b/scripts/sorttable.c index b7c2ad71f9cf..70bdc787ddfb 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -30,6 +30,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <errno.h> +#include <pthread.h> #include <tools/be_byteshift.h> #include <tools/le_byteshift.h> diff --git a/scripts/sorttable.h b/scripts/sorttable.h index a2baa2fefb13..1e8b77928fa4 100644 --- a/scripts/sorttable.h +++ b/scripts/sorttable.h @@ -19,6 +19,9 @@ #undef extable_ent_size #undef compare_extable +#undef get_mcount_loc +#undef sort_mcount_loc +#undef elf_mcount_loc #undef do_sort #undef Elf_Addr #undef Elf_Ehdr @@ -41,6 +44,9 @@ #ifdef SORTTABLE_64 # define extable_ent_size 16 # define compare_extable compare_extable_64 +# define get_mcount_loc get_mcount_loc_64 +# define sort_mcount_loc sort_mcount_loc_64 +# define elf_mcount_loc elf_mcount_loc_64 # define do_sort do_sort_64 # define Elf_Addr Elf64_Addr # define Elf_Ehdr Elf64_Ehdr @@ -62,6 +68,9 @@ #else # define extable_ent_size 8 # define compare_extable compare_extable_32 +# define get_mcount_loc get_mcount_loc_32 +# define sort_mcount_loc sort_mcount_loc_32 +# define elf_mcount_loc elf_mcount_loc_32 # define do_sort do_sort_32 # define Elf_Addr Elf32_Addr # define Elf_Ehdr Elf32_Ehdr @@ -84,8 +93,6 @@ #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* ORC unwinder only support X86_64 */ -#include <errno.h> -#include <pthread.h> #include <asm/orc_types.h> #define ERRSTR_MAXSZ 256 @@ -191,7 +198,64 @@ static int compare_extable(const void *a, const void *b) return 1; return 0; } +#ifdef MCOUNT_SORT_ENABLED +struct elf_mcount_loc { + Elf_Ehdr *ehdr; + Elf_Shdr *init_data_sec; + uint_t start_mcount_loc; + uint_t stop_mcount_loc; +}; + +/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */ +static void *sort_mcount_loc(void *arg) +{ + struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg; + uint_t offset = emloc->start_mcount_loc - _r(&(emloc->init_data_sec)->sh_addr) + + _r(&(emloc->init_data_sec)->sh_offset); + uint_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc; + unsigned char *start_loc = (void *)emloc->ehdr + offset; + + qsort(start_loc, count/sizeof(uint_t), sizeof(uint_t), compare_extable); + return NULL; +} + +/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */ +static void get_mcount_loc(uint_t *_start, uint_t *_stop) +{ + FILE *file_start, *file_stop; + char start_buff[20]; + char stop_buff[20]; + int len = 0; + + file_start = popen(" grep start_mcount System.map | awk '{print $1}' ", "r"); + if (!file_start) { + fprintf(stderr, "get start_mcount_loc error!"); + return; + } + + file_stop = popen(" grep stop_mcount System.map | awk '{print $1}' ", "r"); + if (!file_stop) { + fprintf(stderr, "get stop_mcount_loc error!"); + pclose(file_start); + return; + } + + while (fgets(start_buff, sizeof(start_buff), file_start) != NULL) { + len = strlen(start_buff); + start_buff[len - 1] = '\0'; + } + *_start = strtoul(start_buff, NULL, 16); + + while (fgets(stop_buff, sizeof(stop_buff), file_stop) != NULL) { + len = strlen(stop_buff); + stop_buff[len - 1] = '\0'; + } + *_stop = strtoul(stop_buff, NULL, 16); + pclose(file_start); + pclose(file_stop); +} +#endif static int do_sort(Elf_Ehdr *ehdr, char const *const fname, table_sort_t custom_sort) @@ -217,6 +281,12 @@ static int do_sort(Elf_Ehdr *ehdr, int idx; unsigned int shnum; unsigned int shstrndx; +#ifdef MCOUNT_SORT_ENABLED + struct elf_mcount_loc mstruct; + uint_t _start_mcount_loc = 0; + uint_t _stop_mcount_loc = 0; + pthread_t mcount_sort_thread; +#endif #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) unsigned int orc_ip_size = 0; unsigned int orc_size = 0; @@ -253,6 +323,17 @@ static int do_sort(Elf_Ehdr *ehdr, symtab_shndx = (Elf32_Word *)((const char *)ehdr + _r(&s->sh_offset)); +#ifdef MCOUNT_SORT_ENABLED + /* locate the .init.data section in vmlinux */ + if (!strcmp(secstrings + idx, ".init.data")) { + get_mcount_loc(&_start_mcount_loc, &_stop_mcount_loc); + mstruct.ehdr = ehdr; + mstruct.init_data_sec = s; + mstruct.start_mcount_loc = _start_mcount_loc; + mstruct.stop_mcount_loc = _stop_mcount_loc; + } +#endif + #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* locate the ORC unwind tables */ if (!strcmp(secstrings + idx, ".orc_unwind_ip")) { @@ -294,6 +375,23 @@ static int do_sort(Elf_Ehdr *ehdr, goto out; } #endif + +#ifdef MCOUNT_SORT_ENABLED + if (!mstruct.init_data_sec || !_start_mcount_loc || !_stop_mcount_loc) { + fprintf(stderr, + "incomplete mcount's sort in file: %s\n", + fname); + goto out; + } + + /* create thread to sort mcount_loc concurrently */ + if (pthread_create(&mcount_sort_thread, NULL, &sort_mcount_loc, &mstruct)) { + fprintf(stderr, + "pthread_create mcount_sort_thread failed '%s': %s\n", + strerror(errno), fname); + goto out; + } +#endif if (!extab_sec) { fprintf(stderr, "no __ex_table in file: %s\n", fname); goto out; @@ -364,11 +462,11 @@ out: void *retval = NULL; /* wait for ORC tables sort done */ rc = pthread_join(orc_sort_thread, &retval); - if (rc) + if (rc) { fprintf(stderr, "pthread_join failed '%s': %s\n", strerror(errno), fname); - else if (retval) { + } else if (retval) { rc = -1; fprintf(stderr, "failed to sort ORC tables '%s': %s\n", @@ -376,5 +474,23 @@ out: } } #endif + +#ifdef MCOUNT_SORT_ENABLED + if (mcount_sort_thread) { + void *retval = NULL; + /* wait for mcount sort done */ + rc = pthread_join(mcount_sort_thread, &retval); + if (rc) { + fprintf(stderr, + "pthread_join failed '%s': %s\n", + strerror(errno), fname); + } else if (retval) { + rc = -1; + fprintf(stderr, + "failed to sort mcount '%s': %s\n", + (char *)retval, fname); + } + } +#endif return rc; } diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fe58843d047c..8e24c4c78c7f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field) return 0; } +static int field_is_relative_dynamic(struct tep_format_field *field) +{ + if (strncmp(field->type, "__rel_loc", 9) == 0) + return 1; + + return 0; +} + static int field_is_long(struct tep_format_field *field) { /* includes long long */ @@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_STRING; if (field_is_dynamic(field)) field->flags |= TEP_FIELD_IS_DYNAMIC; + if (field_is_relative_dynamic(field)) + field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; if (field_is_long(field)) field->flags |= TEP_FIELD_IS_LONG; @@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, arg->type = TEP_PRINT_STRING; arg->string.string = token; - arg->string.offset = -1; + arg->string.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar arg->type = TEP_PRINT_BITMASK; arg->bitmask.bitmask = token; - arg->bitmask.offset = -1; + arg->bitmask.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_int_array(event, arg, tok); } - if (strcmp(token, "__get_str") == 0) { + if (strcmp(token, "__get_str") == 0 || + strcmp(token, "__get_rel_str") == 0) { free_token(token); return process_str(event, arg, tok); } - if (strcmp(token, "__get_bitmask") == 0) { + if (strcmp(token, "__get_bitmask") == 0 || + strcmp(token, "__get_rel_bitmask") == 0) { free_token(token); return process_bitmask(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array") == 0) { + if (strcmp(token, "__get_dynamic_array") == 0 || + strcmp(token, "__get_rel_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array_len") == 0) { + if (strcmp(token, "__get_dynamic_array_len") == 0 || + strcmp(token, "__get_rel_dynamic_array_len") == 0) { free_token(token); return process_dynamic_array_len(event, arg, tok); } @@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_STRING: { int str_offset; - if (arg->string.offset == -1) { - struct tep_format_field *f; + if (!arg->string.field) + arg->string.field = tep_find_any_field(event, arg->string.string); + if (!arg->string.field) + break; - f = tep_find_any_field(event, arg->string.string); - arg->string.offset = f->offset; - } - str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, + *(unsigned int *)(data + arg->string.field->offset)); str_offset &= 0xffff; + if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) + str_offset += arg->string.field->offset + arg->string.field->size; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; } @@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, int bitmask_offset; int bitmask_size; - if (arg->bitmask.offset == -1) { - struct tep_format_field *f; - - f = tep_find_any_field(event, arg->bitmask.bitmask); - arg->bitmask.offset = f->offset; - } - bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + if (!arg->bitmask.field) + arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); + if (!arg->bitmask.field) + break; + bitmask_offset = data2host4(tep, + *(unsigned int *)(data + arg->bitmask.field->offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; + if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) + bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; @@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, data + offset, field->size); *len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else *len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a67ad9a5b835..41d4f9f6a843 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,6 +125,7 @@ enum tep_format_flags { TEP_FIELD_IS_LONG = 32, TEP_FIELD_IS_FLAG = 64, TEP_FIELD_IS_SYMBOLIC = 128, + TEP_FIELD_IS_RELATIVE = 256, }; struct tep_format_field { @@ -153,12 +154,12 @@ struct tep_print_arg_atom { struct tep_print_arg_string { char *string; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_bitmask { char *bitmask; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 368826bb5a57..5df177070d53 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { addr = *(unsigned int *)val; - val = record->data + (addr & 0xffff); size = addr >> 16; + addr &= 0xffff; + if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) + addr += arg->str.field->offset + arg->str.field->size; + val = record->data + addr; } /* diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index df9fc00b4cd6..273237a9c5e6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } val = (uintptr_t)(sample->raw_data + offset); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8f7705bbc2da..9e0aee276df8 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; + if (flags & TEP_FIELD_IS_RELATIVE) + offset += fmtf->offset + fmtf->size; } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac0127be0459..f29d37004f55 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2706,6 +2706,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } return sample->raw_data + offset; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7f782a31bda3..82c7f034d91a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 32a721b3e9a5..a5d945415bbc 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c0c010350bc2..d1f1501ce7fc 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -942,6 +942,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a111065b484e..d9a106f0edb2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2365,6 +2365,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; /* record max width for output */ if (size > hde->dynamic_len) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 000000000000..2d52ff0bff7d --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,102 @@ +NAME := rtla +VERSION := 0.5 + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) +LDFLAGS := -ggdb +LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rtla/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ +endif + +.PHONY: all +all: rtla + +rtla: $(OBJ) doc + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) + +static: $(OBJ) + $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl + +.PHONY: install +install: doc_install + $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) + $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 000000000000..6c88446f7e74 --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,36 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla is a meta-tool that includes a set of commands that +aims to analyze the real-time properties of Linux. But, instead of +testing Linux as a black box, rtla leverages kernel tracing +capabilities to provide precise information about the properties +and root causes of unexpected results. + +Installing RTLA + +RTLA depends on some libraries and tools. More precisely, it depends on the +following libraries: + + - libtracefs + - libtraceevent + - procps + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 000000000000..7b73d1eccd0e --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * enable_osnoise - enable osnoise tracer in the trace_instance + */ +int enable_osnoise(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "osnoise"); +} + +/* + * enable_timerlat - enable timerlat tracer in the trace_instance + */ +int enable_timerlat(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "timerlat"); +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable osnoiser tracer for tracing\n"); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +static void osnoise_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + osnoise_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + osnoise_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + osnoise_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + osnoise_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 000000000000..9e4b2e2a4559 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "trace.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; +}; + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +/* + * osnoise_tool - osnoise based tool definition. + */ +struct osnoise_tool { + struct trace_instance trace; + struct osnoise_context *context; + void *data; + void *params; + time_t start_time; +}; + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(char *tracer); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 000000000000..180fcbe423cd --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" + +struct osnoise_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int set_sched; + int output_divisor; + struct sched_attr sched_param; + + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + duration = duration / params->output_divisor; + + if (data->bucket_size) + bucket = duration / data->bucket_size; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->output_divisor * params->bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_hist_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_sample / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct osnoise_hist_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'e'}, + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + osnoise_hist_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_hist_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_hist_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->no_header = 1; + break; + case '1': /* no summary */ + params->no_summary = 1; + break; + case '2': /* no index */ + params->no_index = 1; + break; + case '3': /* with zeros */ + params->with_zeros = 1; + break; + default: + osnoise_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_index && !params->with_zeros) + osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + + return params; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct osnoise_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_hist_set_signals - handles the signal to stop the tool + */ +static void +osnoise_hist_set_signals(struct osnoise_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int osnoise_hist_main(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = osnoise_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_destroy; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_destroy; + } + + retval = osnoise_init_trace_hist(tool); + if (retval) + goto out_destroy; + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + osnoise_read_trace_hist(tool); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + osnoise_free_histogram(tool->data); +out_destroy: + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 000000000000..332b2ac205fc --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise top parameters + */ +struct osnoise_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void +osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Operating System Noise"); + trace_seq_printf(s, " "); + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI IRQ Softirq Thread"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +void osnoise_top_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + osnoise_top_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_top_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_top_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + default: + osnoise_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("osnoise needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; + +out_err: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_top_set_signals - handles the signal to stop the tool + */ +static void osnoise_top_set_signals(struct osnoise_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int osnoise_top_main(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_top_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_top(params); + if (!tool) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = osnoise_top_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_top_set_signals(params); + + do { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + osnoise_print_stats(params, tool); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + } while (!stop_tracing); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 000000000000..09bd21b8af81 --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(); + exit(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(); + exit(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 000000000000..97abbf494fee --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "timerlat.h" + +static void timerlat_usage(void) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + timerlat_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + timerlat_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + timerlat_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + timerlat_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 000000000000..88561bfd14f3 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +int timerlat_hist_main(int argc, char *argv[]); +int timerlat_top_main(int argc, char *argv[]); +int timerlat_main(int argc, char *argv[]); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 000000000000..235f9620ef3d --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int set_sched; + struct sched_attr sched_param; + + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct timerlat_hist_cpu { + int *irq; + int *thread; + + int irq_count; + int thread_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + latency = latency / params->output_divisor; + + if (data->bucket_size) + bucket = latency / data->bucket_size; + + if (!thread) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long thread, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_irq / data->hist[cpu].irq_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for all CPUs + */ +static void +timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(char *usage) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_hist_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'e'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + timerlat_hist_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_hist_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + case '0': /* no irq */ + params->no_irq = 1; + break; + case '1': /* no thread */ + params->no_thread = 1; + break; + case '2': /* no header */ + params->no_header = 1; + break; + case '3': /* no summary */ + params->no_summary = 1; + break; + case '4': /* no index */ + params->no_index = 1; + break; + case '5': /* with zeros */ + params->with_zeros = 1; + break; + default: + timerlat_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_irq && params->no_thread) + timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + + if (params->no_index && !params->with_zeros) + timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + + return params; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + if (params->print_stack) { + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct timerlat_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_hist_set_signals - handles the signal to stop the tool + */ +static void +timerlat_hist_set_signals(struct timerlat_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int timerlat_hist_main(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = timerlat_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = timerlat_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_hist; + } + + trace = &tool->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + timerlat_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + timerlat_free_histogram(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 000000000000..1ebd5291539c --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct timerlat_top_cpu { + int irq_count; + int thread_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void +timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct osnoise_tool *top) +{ + struct timerlat_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, + params->output_divisor == 1 ? "ns" : "us", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + + struct timerlat_top_params *params = top->params; + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - |"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_thread / cpu_data->thread_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + timerlat_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + timerlat_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(char *usage) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_top_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + timerlat_top_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_top_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + default: + timerlat_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(top->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(top->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + + if (params->print_stack) { + retval = osnoise_set_print_stack(top->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct timerlat_top_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + top->params = params; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_top_set_signals - handles the signal to stop the tool + */ +static void +timerlat_top_set_signals(struct timerlat_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int timerlat_top_main(int argc, char *argv[]) +{ + struct timerlat_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *top; + int return_value = 1; + int retval; + + params = timerlat_top_parse_args(argc, argv); + if (!params) + exit(1); + + top = timerlat_init_top(params); + if (!top) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = timerlat_top_apply_config(top, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &top->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + timerlat_print_stats(params, top); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + }; + + timerlat_print_stats(params, top); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + timerlat_free_top(top->data); + osnoise_destroy_tool(top); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 000000000000..107a0c6387f7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("tracer %s not found!\n", tracer_name); + + err_msg("failed to enable the tracer %s\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + } + + if (trace->seq) + free(trace->seq); + + if (trace->tep) + tep_free(trace->tep); +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 000000000000..0ea1df0ad9a7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +int enable_osnoise(struct trace_instance *trace); +int enable_timerlat(struct trace_instance *trace); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 000000000000..1c9f0eea6166 --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <proc/readproc.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = localtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour - 1, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char + * in the monitored_cpus. + * + * XXX: convert to a bitmask. + */ +int parse_cpu_list(char *cpu_list, char **monitored_cpus) +{ + char *mon_cpus; + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + mon_cpus = malloc(nr_cpus * sizeof(char)); + memset(mon_cpus, 0, (nr_cpus * sizeof(char))); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_list: adding cpu %d\n", cpu); + mon_cpus[cpu] = 1; + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_list: adding cpu %d\n", i); + mon_cpus[i] = 1; + } + } + + if (*p == ',') + p++; + } + + *monitored_cpus = mon_cpus; + + return 0; + +err: + debug_msg("Error parsing the cpu list %s", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifdef __x86_64__ +# define __NR_sched_setattr 314 +# define __NR_sched_getattr 315 +#elif __i386__ +# define __NR_sched_setattr 351 +# define __NR_sched_getattr 352 +#elif __arm__ +# define __NR_sched_setattr 380 +# define __NR_sched_getattr 381 +#elif __aarch64__ +# define __NR_sched_setattr 274 +# define __NR_sched_getattr 275 +#elif __powerpc__ +# define __NR_sched_setattr 355 +# define __NR_sched_getattr 356 +#elif __s390x__ +# define __NR_sched_setattr 345 +# define __NR_sched_getattr 346 +#endif + +#define SCHED_DEADLINE 6 + +static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + unsigned int size, unsigned int flags) +{ + return syscall(__NR_sched_getattr, pid, attr, size, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("boost_with_deadline failed to boost pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm + * + * This function uses procps to list the currently running threads and then + * set the sched_attr *attr to the threads that start with char *comm. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm, struct sched_attr *attr) +{ + int flags = PROC_FILLCOM | PROC_FILLSTAT; + PROCTAB *ptp; + proc_t task; + int retval; + + ptp = openproc(flags); + if (!ptp) { + err_msg("error openproc()\n"); + return -ENOENT; + } + + memset(&task, 0, sizeof(task)); + + while (readproc(ptp, &task)) { + retval = strncmp(comm, task.cmd, strlen(comm)); + if (retval) + continue; + retval = __set_sched_attr(task.tid, attr); + if (retval) + goto out_err; + } + + closeproc(ptp); + return 0; + +out_err: + closeproc(ptp); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_OTHER)) + return -1; + if (prio > sched_get_priority_max(SCHED_OTHER)) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_priority = prio; + break; + default: + return -1; + } + return 0; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 000000000000..9aa962319ca2 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <time.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; + +int parse_prio(char *arg, struct sched_attr *sched_param); +int set_comm_sched_attr(const char *comm, struct sched_attr *attr); |