diff options
70 files changed, 6706 insertions, 292 deletions
diff --git a/Documentation/tools/rtla/Makefile b/Documentation/tools/rtla/Makefile new file mode 100644 index 000000000000..9f2b84af1a6c --- /dev/null +++ b/Documentation/tools/rtla/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Based on bpftool's Documentation Makefile + +INSTALL ?= install +RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty + +PREFIX ?= /usr/share +MANDIR ?= $(PREFIX)/man +MAN1DIR = $(MANDIR)/man1 + +MAN1_RST = $(wildcard rtla*.rst) + +_DOC_MAN1 = $(patsubst %.rst,%.1,$(MAN1_RST)) +DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1)) + +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) +RST2MAN_OPTS += --verbose + +$(OUTPUT)%.1: %.rst +ifndef RST2MAN_DEP + $(error "rst2man not found, but required to generate man pages") +endif + rst2man $(RST2MAN_OPTS) $< > $@ + +man1: $(DOC_MAN1) +man: man1 + +clean: + $(RM) $(DOC_MAN1) + +install: man + $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR) + $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(MAN1DIR) + +uninstall: + $(RM) $(addprefix $(DESTDIR)$(MAN1DIR)/,$(_DOC_MAN1)) + $(RMDIR) $(DESTDIR)$(MAN1DIR) + +.PHONY: man man1 clean install uninstall +.DEFAULT_GOAL := man diff --git a/Documentation/tools/rtla/common_appendix.rst b/Documentation/tools/rtla/common_appendix.rst new file mode 100644 index 000000000000..b494084acccd --- /dev/null +++ b/Documentation/tools/rtla/common_appendix.rst @@ -0,0 +1,12 @@ +REPORTING BUGS +============== +Report bugs to <lkml@vger.kernel.org> + +LICENSE +======= +**rtla** is Free Software licensed under the GNU GPLv2 + +COPYING +======= +Copyright \(C) 2021 Red Hat, Inc. Free use of this software is granted under +the terms of the GNU Public License (GPL). diff --git a/Documentation/tools/rtla/common_hist_options.rst b/Documentation/tools/rtla/common_hist_options.rst new file mode 100644 index 000000000000..0266cd08a6c9 --- /dev/null +++ b/Documentation/tools/rtla/common_hist_options.rst @@ -0,0 +1,23 @@ +**-b**, **--bucket-size** *N* + + Set the histogram bucket size (default *1*). + +**-e**, **--entries** *N* + + Set the number of entries of the histogram (default 256). + +**--no-header** + + Do not print header. + +**--no-summary** + + Do not print summary. + +**--no-index** + + Do not print index. + +**--with-zeros** + + Print zero only entries. diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst new file mode 100644 index 000000000000..721790ad984e --- /dev/null +++ b/Documentation/tools/rtla/common_options.rst @@ -0,0 +1,28 @@ +**-c**, **--cpus** *cpu-list* + + Set the osnoise tracer to run the sample threads in the cpu-list. + +**-d**, **--duration** *time[s|m|h|d]* + + Set the duration of the session. + +**-D**, **--debug** + + Print debug info. + +**-t**, **--trace**\[*=file*] + + Save the stopped trace to [*file|osnoise_trace.txt*]. + +**-P**, **--priority** *o:prio|r:prio|f:prio|d:runtime:period* + + Set scheduling parameters to the osnoise tracer threads, the format to set the priority are: + + - *o:prio* - use SCHED_OTHER with *prio*; + - *r:prio* - use SCHED_RR with *prio*; + - *f:prio* - use SCHED_FIFO with *prio*; + - *d:runtime[us|ms|s]:period[us|ms|s]* - use SCHED_DEADLINE with *runtime* and *period* in nanoseconds. + +**-h**, **--help** + + Print help menu. diff --git a/Documentation/tools/rtla/common_osnoise_description.rst b/Documentation/tools/rtla/common_osnoise_description.rst new file mode 100644 index 000000000000..8973c5df888f --- /dev/null +++ b/Documentation/tools/rtla/common_osnoise_description.rst @@ -0,0 +1,8 @@ +The **rtla osnoise** tool is an interface for the *osnoise* tracer. The +*osnoise* tracer dispatches a kernel thread per-cpu. These threads read the +time in a loop while with preemption, softirq and IRQs enabled, thus +allowing all the sources of operating systme noise during its execution. +The *osnoise*'s tracer threads take note of the delta between each time +read, along with an interference counter of all sources of interference. +At the end of each period, the *osnoise* tracer displays a summary of +the results. diff --git a/Documentation/tools/rtla/common_osnoise_options.rst b/Documentation/tools/rtla/common_osnoise_options.rst new file mode 100644 index 000000000000..d556883e4e26 --- /dev/null +++ b/Documentation/tools/rtla/common_osnoise_options.rst @@ -0,0 +1,17 @@ +**-p**, **--period** *us* + + Set the *osnoise* tracer period in microseconds. + +**-r**, **--runtime** *us* + + Set the *osnoise* tracer runtime in microseconds. + +**-s**, **--stop** *us* + + Stop the trace if a single sample is higher than the argument in microseconds. + If **-T** is set, it will also save the trace to the output. + +**-S**, **--stop-total** *us* + + Stop the trace if the total sample is higher than the argument in microseconds. + If **-T** is set, it will also save the trace to the output. diff --git a/Documentation/tools/rtla/common_timerlat_description.rst b/Documentation/tools/rtla/common_timerlat_description.rst new file mode 100644 index 000000000000..321201cb8597 --- /dev/null +++ b/Documentation/tools/rtla/common_timerlat_description.rst @@ -0,0 +1,10 @@ +The **rtla timerlat** tool is an interface for the *timerlat* tracer. The +*timerlat* tracer dispatches a kernel thread per-cpu. These threads +set a periodic timer to wake themselves up and go back to sleep. After +the wakeup, they collect and generate useful information for the +debugging of operating system timer latency. + +The *timerlat* tracer outputs information in two ways. It periodically +prints the timer latency at the timer *IRQ* handler and the *Thread* +handler. It also enable the trace of the most relevant information via +**osnoise:** tracepoints. diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst new file mode 100644 index 000000000000..e9c1bfd55d48 --- /dev/null +++ b/Documentation/tools/rtla/common_timerlat_options.rst @@ -0,0 +1,16 @@ +**-p**, **--period** *us* + + Set the *timerlat* tracer period in microseconds. + +**-i**, **--irq** *us* + + Stop trace if the *IRQ* latency is higher than the argument in us. + +**-T**, **--thread** *us* + + Stop trace if the *Thread* latency is higher than the argument in us. + +**-s**, **--stack** *us* + + Save the stack trace at the *IRQ* if a *Thread* latency is higher than the + argument in us. diff --git a/Documentation/tools/rtla/common_top_options.rst b/Documentation/tools/rtla/common_top_options.rst new file mode 100644 index 000000000000..f48878938f84 --- /dev/null +++ b/Documentation/tools/rtla/common_top_options.rst @@ -0,0 +1,3 @@ +**-q**, **--quiet** + + Print only a summary at the end of the session. diff --git a/Documentation/tools/rtla/rtla-osnoise-hist.rst b/Documentation/tools/rtla/rtla-osnoise-hist.rst new file mode 100644 index 000000000000..52298ddd8701 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise-hist.rst @@ -0,0 +1,66 @@ +=================== +rtla-osnoise-hist +=================== +------------------------------------------------------ +Display a histogram of the osnoise tracer samples +------------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise hist** [*OPTIONS*] + +DESCRIPTION +=========== +.. include:: common_osnoise_description.rst + +The **rtla osnoise hist** tool collects all **osnoise:sample_threshold** +occurrence in a histogram, displaying the results in a user-friendly way. +The tool also allows many configurations of the *osnoise* tracer and the +collection of the tracer output. + +OPTIONS +======= +.. include:: common_osnoise_options.rst + +.. include:: common_hist_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, *osnoise* tracer threads are set to run with real-time +priority *FIFO:1*, on CPUs *0-11*, for *900ms* at each period (*1s* by +default). The reason for reducing the runtime is to avoid starving the +**rtla** tool. The tool is also set to run for *one minute*. The output +histogram is set to group outputs in buckets of *10us* and *25* entries:: + + [root@f34 ~/]# rtla osnoise hist -P F:1 -c 0-11 -r 900000 -d 1M -b 10 -e 25 + # RTLA osnoise histogram + # Time unit is microseconds (us) + # Duration: 0 00:01:00 + Index CPU-000 CPU-001 CPU-002 CPU-003 CPU-004 CPU-005 CPU-006 CPU-007 CPU-008 CPU-009 CPU-010 CPU-011 + 0 42982 46287 51779 53740 52024 44817 49898 36500 50408 50128 49523 52377 + 10 12224 8356 2912 878 2667 10155 4573 18894 4214 4836 5708 2413 + 20 8 5 12 2 13 24 20 41 29 53 39 39 + 30 1 1 0 0 10 3 6 19 15 31 30 38 + 40 0 0 0 0 0 4 2 7 2 3 8 11 + 50 0 0 0 0 0 0 0 0 0 1 1 2 + over: 0 0 0 0 0 0 0 0 0 0 0 0 + count: 55215 54649 54703 54620 54714 55003 54499 55461 54668 55052 55309 54880 + min: 0 0 0 0 0 0 0 0 0 0 0 0 + avg: 0 0 0 0 0 0 0 0 0 0 0 0 + max: 30 30 20 20 30 40 40 40 40 50 50 50 + +SEE ALSO +======== +**rtla-osnoise**\(1), **rtla-osnoise-top**\(1) + +*osnoise* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-osnoise-top.rst b/Documentation/tools/rtla/rtla-osnoise-top.rst new file mode 100644 index 000000000000..5d75d1394516 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise-top.rst @@ -0,0 +1,61 @@ +=================== +rtla-osnoise-top +=================== +----------------------------------------------- +Display a summary of the operating system noise +----------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise top** [*OPTIONS*] + +DESCRIPTION +=========== +.. include:: common_osnoise_description.rst + +**rtla osnoise top** collects the periodic summary from the *osnoise* tracer, +including the counters of the occurrence of the interference source, +displaying the results in a user-friendly format. + +The tool also allows many configurations of the *osnoise* tracer and the +collection of the tracer output. + +OPTIONS +======= +.. include:: common_osnoise_options.rst + +.. include:: common_top_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, the **rtla osnoise top** tool is set to run with a +real-time priority *FIFO:1*, on CPUs *0-3*, for *900ms* at each period +(*1s* by default). The reason for reducing the runtime is to avoid starving +the rtla tool. The tool is also set to run for *one minute* and to display +a summary of the report at the end of the session:: + + [root@f34 ~]# rtla osnoise top -P F:1 -c 0-3 -r 900000 -d 1M -q + Operating System Noise + duration: 0 00:01:00 | time is in us + CPU Period Runtime Noise % CPU Aval Max Noise Max Single HW NMI IRQ Softirq Thread + 0 #59 53100000 304896 99.42580 6978 56 549 0 53111 1590 13 + 1 #59 53100000 338339 99.36282 8092 24 399 0 53130 1448 31 + 2 #59 53100000 290842 99.45227 6582 39 855 0 53110 1406 12 + 3 #59 53100000 204935 99.61405 6251 33 290 0 53156 1460 12 + +SEE ALSO +======== + +**rtla-osnoise**\(1), **rtla-osnoise-hist**\(1) + +Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-osnoise.rst b/Documentation/tools/rtla/rtla-osnoise.rst new file mode 100644 index 000000000000..c129b206ce34 --- /dev/null +++ b/Documentation/tools/rtla/rtla-osnoise.rst @@ -0,0 +1,59 @@ +=============== +rtla-osnoise +=============== +------------------------------------------------------------------ +Measure the operating system noise +------------------------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla osnoise** [*MODE*] ... + +DESCRIPTION +=========== + +.. include:: common_osnoise_description.rst + +The *osnoise* tracer outputs information in two ways. It periodically prints +a summary of the noise of the operating system, including the counters of +the occurrence of the source of interference. It also provides information +for each noise via the **osnoise:** tracepoints. The **rtla osnoise top** +mode displays information about the periodic summary from the *osnoise* tracer. +The **rtla osnoise hist** mode displays information about the noise using +the **osnoise:** tracepoints. For further details, please refer to the +respective man page. + +MODES +===== +**top** + + Prints the summary from osnoise tracer. + +**hist** + + Prints a histogram of osnoise samples. + +If no MODE is given, the top mode is called, passing the arguments. + +OPTIONS +======= + +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding mode. + +SEE ALSO +======== +**rtla-osnoise-top**\(1), **rtla-osnoise-hist**\(1) + +Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-timerlat-hist.rst b/Documentation/tools/rtla/rtla-timerlat-hist.rst new file mode 100644 index 000000000000..e12eae1f3301 --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat-hist.rst @@ -0,0 +1,106 @@ +===================== +rtla-timerlat-hist +===================== +------------------------------------------------ +Histograms of the operating system timer latency +------------------------------------------------ + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat hist** [*OPTIONS*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The **rtla timerlat hist** displays a histogram of each tracer event +occurrence. This tool uses the periodic information, and the +**osnoise:** tracepoints are enabled when using the **-T** option. + +OPTIONS +======= + +.. include:: common_timerlat_options.rst + +.. include:: common_hist_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= +In the example below, **rtla timerlat hist** is set to run for *10* minutes, +in the cpus *0-4*, *skipping zero* only lines. Moreover, **rtla timerlat +hist** will change the priority of the *timelat* threads to run under +*SCHED_DEADLINE* priority, with a *10us* runtime every *1ms* period. The +*1ms* period is also passed to the *timerlat* tracer:: + + [root@alien ~]# timerlat hist -d 10m -c 0-4 -P d:100us:1ms -p 1ms + # RTLA timerlat histogram + # Time unit is microseconds (us) + # Duration: 0 00:10:00 + Index IRQ-000 Thr-000 IRQ-001 Thr-001 IRQ-002 Thr-002 IRQ-003 Thr-003 IRQ-004 Thr-004 + 0 276489 0 206089 0 466018 0 481102 0 205546 0 + 1 318327 35487 388149 30024 94531 48382 83082 71078 388026 55730 + 2 3282 122584 4019 126527 28231 109012 23311 89309 4568 98739 + 3 940 11815 837 9863 6209 16227 6895 17196 910 9780 + 4 444 17287 424 11574 2097 38443 2169 36736 462 13476 + 5 206 43291 255 25581 1223 101908 1304 101137 236 28913 + 6 132 101501 96 64584 635 213774 757 215471 99 73453 + 7 74 169347 65 124758 350 57466 441 53639 69 148573 + 8 53 85183 31 156751 229 9052 306 9026 39 139907 + 9 22 10387 12 42762 161 2554 225 2689 19 26192 + 10 13 1898 8 5770 114 1247 128 1405 13 3772 + 11 9 560 9 924 71 686 76 765 8 713 + 12 4 256 2 360 50 411 64 474 3 278 + 13 2 167 2 172 43 256 53 350 4 180 + 14 1 88 1 116 15 198 42 223 0 115 + 15 2 63 3 94 11 139 20 150 0 58 + 16 2 37 0 56 5 78 10 102 0 39 + 17 0 18 0 28 4 57 8 80 0 15 + 18 0 8 0 17 2 50 6 56 0 12 + 19 0 9 0 5 0 19 0 48 0 18 + 20 0 4 0 8 0 11 2 27 0 4 + 21 0 2 0 3 1 9 1 18 0 6 + 22 0 1 0 3 1 7 0 3 0 5 + 23 0 2 0 4 0 2 0 7 0 2 + 24 0 2 0 2 1 3 0 3 0 5 + 25 0 0 0 1 0 1 0 1 0 3 + 26 0 1 0 0 0 2 0 2 0 0 + 27 0 0 0 3 0 1 0 0 0 1 + 28 0 0 0 3 0 0 0 1 0 0 + 29 0 0 0 2 0 2 0 1 0 3 + 30 0 1 0 0 0 0 0 0 0 0 + 31 0 1 0 0 0 0 0 2 0 2 + 32 0 0 0 1 0 2 0 0 0 0 + 33 0 0 0 2 0 0 0 0 0 1 + 34 0 0 0 0 0 0 0 0 0 2 + 35 0 1 0 1 0 0 0 0 0 1 + 36 0 1 0 0 0 1 0 1 0 0 + 37 0 0 0 1 0 0 0 0 0 0 + 40 0 0 0 0 0 1 0 1 0 0 + 41 0 0 0 0 0 0 0 0 0 1 + 42 0 0 0 0 0 0 0 0 0 1 + 44 0 0 0 0 0 1 0 0 0 0 + 46 0 0 0 0 0 0 0 1 0 0 + 47 0 0 0 0 0 0 0 0 0 1 + 50 0 0 0 0 0 0 0 0 0 1 + 54 0 0 0 1 0 0 0 0 0 0 + 58 0 0 0 1 0 0 0 0 0 0 + over: 0 0 0 0 0 0 0 0 0 0 + count: 600002 600002 600002 600002 600002 600002 600002 600002 600002 600002 + min: 0 1 0 1 0 1 0 1 0 1 + avg: 0 5 0 5 0 4 0 4 0 5 + max: 16 36 15 58 24 44 21 46 13 50 + +SEE ALSO +======== +**rtla-timerlat**\(1), **rtla-timerlat-top**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> diff --git a/Documentation/tools/rtla/rtla-timerlat-top.rst b/Documentation/tools/rtla/rtla-timerlat-top.rst new file mode 100644 index 000000000000..1c321de1c171 --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat-top.rst @@ -0,0 +1,145 @@ +==================== +rtla-timerlat-top +==================== +------------------------------------------- +Measures the operating system timer latency +------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat top** [*OPTIONS*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The **rtla timerlat top** displays a summary of the periodic output +from the *timerlat* tracer. It also provides information for each +operating system noise via the **osnoise:** tracepoints that can be +seem with the option **-T**. + +OPTIONS +======= + +.. include:: common_timerlat_options.rst + +.. include:: common_top_options.rst + +.. include:: common_options.rst + +EXAMPLE +======= + +In the example below, the *timerlat* tracer is set to capture the stack trace at +the IRQ handler, printing it to the buffer if the *Thread* timer latency is +higher than *30 us*. It is also set to stop the session if a *Thread* timer +latency higher than *30 us* is hit. Finally, it is set to save the trace +buffer if the stop condition is hit:: + + [root@alien ~]# rtla timerlat top -s 30 -t 30 -T + Timer Latency + 0 00:00:59 | IRQ Timer Latency (us) | Thread Timer Latency (us) + CPU COUNT | cur min avg max | cur min avg max + 0 #58634 | 1 0 1 10 | 11 2 10 23 + 1 #58634 | 1 0 1 9 | 12 2 9 23 + 2 #58634 | 0 0 1 11 | 10 2 9 23 + 3 #58634 | 1 0 1 11 | 11 2 9 24 + 4 #58634 | 1 0 1 10 | 11 2 9 26 + 5 #58634 | 1 0 1 8 | 10 2 9 25 + 6 #58634 | 12 0 1 12 | 30 2 10 30 <--- CPU with spike + 7 #58634 | 1 0 1 9 | 11 2 9 23 + 8 #58633 | 1 0 1 9 | 11 2 9 26 + 9 #58633 | 1 0 1 9 | 10 2 9 26 + 10 #58633 | 1 0 1 13 | 11 2 9 28 + 11 #58633 | 1 0 1 13 | 12 2 9 24 + 12 #58633 | 1 0 1 8 | 10 2 9 23 + 13 #58633 | 1 0 1 10 | 10 2 9 22 + 14 #58633 | 1 0 1 18 | 12 2 9 27 + 15 #58633 | 1 0 1 10 | 11 2 9 28 + 16 #58633 | 0 0 1 11 | 7 2 9 26 + 17 #58633 | 1 0 1 13 | 10 2 9 24 + 18 #58633 | 1 0 1 9 | 13 2 9 22 + 19 #58633 | 1 0 1 10 | 11 2 9 23 + 20 #58633 | 1 0 1 12 | 11 2 9 28 + 21 #58633 | 1 0 1 14 | 11 2 9 24 + 22 #58633 | 1 0 1 8 | 11 2 9 22 + 23 #58633 | 1 0 1 10 | 11 2 9 27 + timerlat hit stop tracing + saving trace to timerlat_trace.txt + [root@alien bristot]# tail -60 timerlat_trace.txt + [...] + timerlat/5-79755 [005] ....... 426.271226: #58634 context thread timer_latency 10823 ns + sh-109404 [006] dnLh213 426.271247: #58634 context irq timer_latency 12505 ns + sh-109404 [006] dNLh313 426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns + sh-109404 [006] d...313 426.271263: thread_noise: sh:109404 start 426.271245853 duration 4769 ns + timerlat/6-79756 [006] ....... 426.271264: #58634 context thread timer_latency 30328 ns + timerlat/6-79756 [006] ....1.. 426.271265: <stack trace> + => timerlat_irq + => __hrtimer_run_queues + => hrtimer_interrupt + => __sysvec_apic_timer_interrupt + => sysvec_apic_timer_interrupt + => asm_sysvec_apic_timer_interrupt + => _raw_spin_unlock_irqrestore <---- spinlock that disabled interrupt. + => try_to_wake_up + => autoremove_wake_function + => __wake_up_common + => __wake_up_common_lock + => ep_poll_callback + => __wake_up_common + => __wake_up_common_lock + => fsnotify_add_event + => inotify_handle_inode_event + => fsnotify + => __fsnotify_parent + => __fput + => task_work_run + => exit_to_user_mode_prepare + => syscall_exit_to_user_mode + => do_syscall_64 + => entry_SYSCALL_64_after_hwframe + => 0x7265000001378c + => 0x10000cea7 + => 0x25a00000204a + => 0x12e302d00000000 + => 0x19b51010901b6 + => 0x283ce00726500 + => 0x61ea308872 + => 0x00000fe3 + bash-109109 [007] d..h... 426.271265: #58634 context irq timer_latency 1211 ns + timerlat/6-79756 [006] ....... 426.271267: timerlat_main: stop tracing hit on cpu 6 + +In the trace, it is possible the notice that the *IRQ* timer latency was +already high, accounting *12505 ns*. The IRQ delay was caused by the +*bash-109109* process that disabled IRQs in the wake-up path +(*_try_to_wake_up()* function). The duration of the IRQ handler that woke +up the timerlat thread, informed with the **osnoise:irq_noise** event, was +also high and added more *12553 ns* to the Thread latency. Finally, the +**osnoise:thread_noise** added by the currently running thread (including +the scheduling overhead) added more *4769 ns*. Summing up these values, +the *Thread* timer latency accounted for *30328 ns*. + +The primary reason for this high value is the wake-up path that was hit +twice during this case: when the *bash-109109* was waking up a thread +and then when the *timerlat* thread was awakened. This information can +then be used as the starting point of a more fine-grained analysis. + +Note that **rtla timerlat** was dispatched without changing *timerlat* tracer +threads' priority. That is generally not needed because these threads hava +priority *FIFO:95* by default, which is a common priority used by real-time +kernel developers to analyze scheduling delays. + +SEE ALSO +-------- +**rtla-timerlat**\(1), **rtla-timerlat-hist**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +------ +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla-timerlat.rst b/Documentation/tools/rtla/rtla-timerlat.rst new file mode 100644 index 000000000000..44a49e6f302b --- /dev/null +++ b/Documentation/tools/rtla/rtla-timerlat.rst @@ -0,0 +1,57 @@ +================ +rtla-timerlat +================ +------------------------------------------- +Measures the operating system timer latency +------------------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla timerlat** [*MODE*] ... + +DESCRIPTION +=========== + +.. include:: common_timerlat_description.rst + +The *timerlat* tracer outputs information in two ways. It periodically +prints the timer latency at the timer *IRQ* handler and the *Thread* handler. +It also provides information for each noise via the **osnoise:** tracepoints. +The **rtla timerlat top** mode displays a summary of the periodic output +from the *timerlat* tracer. The **rtla hist hist** mode displays a histogram +of each tracer event occurrence. For further details, please refer to the +respective man page. + +MODES +===== +**top** + + Prints the summary from *timerlat* tracer. + +**hist** + + Prints a histogram of timerlat samples. + +If no *MODE* is given, the top mode is called, passing the arguments. + +OPTIONS +======= +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding mode. + +SEE ALSO +======== +**rtla-timerlat-top**\(1), **rtla-timerlat-hist**\(1) + +*timerlat* tracer documentation: <https://www.kernel.org/doc/html/latest/trace/timerlat-tracer.html> + +AUTHOR +====== +Written by Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/tools/rtla/rtla.rst b/Documentation/tools/rtla/rtla.rst new file mode 100644 index 000000000000..fc0d233efcd5 --- /dev/null +++ b/Documentation/tools/rtla/rtla.rst @@ -0,0 +1,48 @@ +========= +rtla +========= +-------------------------------- +Real-time Linux Analysis tool +-------------------------------- + +:Manual section: 1 + +SYNOPSIS +======== +**rtla** *COMMAND* [*OPTIONS*] + +DESCRIPTION +=========== +The **rtla** is a meta-tool that includes a set of commands that aims to +analyze the real-time properties of Linux. But instead of testing Linux +as a black box, **rtla** leverages kernel tracing capabilities to provide +precise information about the properties and root causes of unexpected +results. + +COMMANDS +======== +**osnoise** + + Gives information about the operating system noise (osnoise). + +**timerlat** + + Measures the IRQ and thread timer latency. + +OPTIONS +======= +**-h**, **--help** + + Display the help text. + +For other options, see the man page for the corresponding command. + +SEE ALSO +======== +**rtla-osnoise**\(1), **rtla-timerlat**\(1) + +AUTHOR +====== +Daniel Bristot de Oliveira <bristot@kernel.org> + +.. include:: common_appendix.rst diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index 8ddb9b09451c..c47f381d0c00 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes prev_comm ~ "*sh*" prev_comm ~ "ba*sh" +If the field is a pointer that points into user space (for example +"filename" from sys_enter_openat), then you have to append ".ustring" to the +field name:: + + filename.ustring ~ "password" + +As the kernel will have to know how to retrieve the memory that the pointer +is at from user space. + 5.2 Setting filters ------------------- @@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of the filter string; the error message should still be useful though even without more accurate position info. +5.2.1 Filter limitations +------------------------ + +If a filter is placed on a string pointer ``(char *)`` that does not point +to a string on the ring buffer, but instead points to kernel or user space +memory, then, for safety reasons, at most 1024 bytes of the content is +copied onto a temporary buffer to do the compare. If the copy of the memory +faults (the pointer points to memory that should not be accessed), then the +string compare will be treated as not matching. + 5.3 Clearing filters -------------------- diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 3616839c5c4b..bafc02bf8220 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -109,12 +109,12 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) * also the directory that is being deleted. */ inode_unlock(inode); - inode_unlock(dentry->d_inode); + inode_unlock(d_inode(dentry)); ret = tracefs_ops.rmdir(name); inode_lock_nested(inode, I_MUTEX_PARENT); - inode_lock(dentry->d_inode); + inode_lock(d_inode(dentry)); kfree(name); @@ -284,7 +284,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) static int tracefs_apply_options(struct super_block *sb) { struct tracefs_fs_info *fsi = sb->s_fs_info; - struct inode *inode = sb->s_root->d_inode; + struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; inode->i_mode &= ~S_IALLUGO; @@ -403,18 +403,18 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) if (!parent) parent = tracefs_mount->mnt_root; - inode_lock(parent->d_inode); - if (unlikely(IS_DEADDIR(parent->d_inode))) + inode_lock(d_inode(parent)); + if (unlikely(IS_DEADDIR(d_inode(parent)))) dentry = ERR_PTR(-ENOENT); else dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && dentry->d_inode) { + if (!IS_ERR(dentry) && d_inode(dentry)) { dput(dentry); dentry = ERR_PTR(-EEXIST); } if (IS_ERR(dentry)) { - inode_unlock(parent->d_inode); + inode_unlock(d_inode(parent)); simple_release_fs(&tracefs_mount, &tracefs_mount_count); } @@ -423,7 +423,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) static struct dentry *failed_creating(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); + inode_unlock(d_inode(dentry->d_parent)); dput(dentry); simple_release_fs(&tracefs_mount, &tracefs_mount_count); return NULL; @@ -431,7 +431,7 @@ static struct dentry *failed_creating(struct dentry *dentry) static struct dentry *end_creating(struct dentry *dentry) { - inode_unlock(dentry->d_parent->d_inode); + inode_unlock(d_inode(dentry->d_parent)); return dentry; } @@ -489,7 +489,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; d_instantiate(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); + fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } @@ -516,8 +516,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); d_instantiate(dentry, inode); - inc_nlink(dentry->d_parent->d_inode); - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + inc_nlink(d_inode(dentry->d_parent)); + fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2d167ac3452c..70c069aef02c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -172,6 +172,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -782,6 +783,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_RDYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, FILTER_COMM, diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a8e97f84b652..7660a7846586 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (c) diff --git a/include/trace/perf.h b/include/trace/perf.h index dbc6c74defc3..ea4405de175a 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -21,6 +21,22 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + #undef __perf_count #define __perf_count(c) (__count = (c)) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 08810a463880..8c6f7c433518 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR(); #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __bitmask +#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) -#undef __bitmask -#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) u32 item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -293,6 +317,19 @@ TRACE_MAKE_SYSTEM_STR(); #undef __get_str #define __get_str(field) ((char *)__get_dynamic_array(field)) +#undef __get_rel_dynamic_array +#define __get_rel_dynamic_array(field) \ + ((void *)(&__entry->__rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ + (__entry->__rel_loc_##field & 0xffff)) + +#undef __get_rel_dynamic_array_len +#define __get_rel_dynamic_array_len(field) \ + ((__entry->__rel_loc_##field >> 16) & 0xffff) + +#undef __get_rel_str +#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) + #undef __get_bitmask #define __get_bitmask(field) \ ({ \ @@ -302,6 +339,15 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ }) +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) \ + ({ \ + void *__bitmask = __get_rel_dynamic_array(field); \ + unsigned int __bitmask_size; \ + __bitmask_size = __get_rel_dynamic_array_len(field); \ + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ @@ -471,6 +517,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(_type, _item, _len) { \ + .type = "__rel_loc " #_type "[]", .name = #_item, \ + .size = 4, .align = 4, \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static struct trace_event_fields trace_event_fields_##call[] = { \ @@ -519,6 +580,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __item_length = (len) * sizeof(type); \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data) - \ + offsetof(typeof(*entry), __rel_loc_##item) - \ + sizeof(u32); \ + __data_offsets->item |= __item_length << 16; \ + __data_size += __item_length; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, \ + strlen((src) ? (const char *)(src) : "(null)") + 1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). @@ -542,6 +619,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ __bitmask_size_in_longs(nr_bits)) +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ @@ -706,6 +787,37 @@ static inline notrace int trace_event_get_offsets_##call( \ #define __assign_bitmask(dst, src, nr_bits) \ memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item, len) \ + __entry->__rel_loc_##item = __data_offsets.item; + +#undef __rel_string +#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) + +#undef __rel_string_len +#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) + +#undef __assign_rel_str +#define __assign_rel_str(dst, src) \ + strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); + +#undef __assign_rel_str_len +#define __assign_rel_str_len(dst, src, len) \ + do { \ + memcpy(__get_rel_str(dst), (src), (len)); \ + __get_rel_str(dst)[len] = '\0'; \ + } while (0) + +#undef __rel_bitmask +#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + +#undef __get_rel_bitmask +#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) + +#undef __assign_rel_bitmask +#define __assign_rel_bitmask(dst, src, nr_bits) \ + memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + #undef TP_fast_assign #define TP_fast_assign(args...) args @@ -770,6 +882,10 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __get_rel_dynamic_array +#undef __get_rel_dynamic_array_len +#undef __get_rel_str +#undef __get_rel_bitmask #undef __print_array #undef __print_hex_dump diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 420ff4bc67fd..f468767bc287 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -915,6 +915,20 @@ config EVENT_TRACE_TEST_SYSCALLS TBD - enable a way to actually call the syscalls as we test their events +config FTRACE_SORT_STARTUP_TEST + bool "Verify compile time sorting of ftrace functions" + depends on DYNAMIC_FTRACE + depends on BUILDTIME_TABLE_SORT + help + Sorting of the mcount_loc sections that is used to find the + where the ftrace knows where to patch functions for tracing + and other callbacks is done at compile time. But if the sort + is not done correctly, it will cause non-deterministic failures. + When this is set, the sorted sections will be verified that they + are in deed sorted and will warn if they are not. + + If unsure, say N + config RING_BUFFER_STARTUP_TEST bool "Ring buffer startup self test" depends on RING_BUFFER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index be5f6b32a012..6163b6f762f7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6394,6 +6394,27 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } +#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST +static void test_is_sorted(unsigned long *start, unsigned long count) +{ + int i; + + for (i = 1; i < count; i++) { + if (WARN(start[i - 1] > start[i], + "[%d] %pS at %lx is not sorted with %pS at %lx\n", i, + (void *)start[i - 1], start[i - 1], + (void *)start[i], start[i])) + break; + } + if (i == count) + pr_info("ftrace section at %px sorted properly\n", start); +} +#else +static void test_is_sorted(unsigned long *start, unsigned long count) +{ +} +#endif + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -6412,8 +6433,17 @@ static int ftrace_process_locs(struct module *mod, if (!count) return 0; - sort(start, count, sizeof(*start), - ftrace_cmp_ips, NULL); + /* + * Sorting mcount in vmlinux at build time depend on + * CONFIG_BUILDTIME_TABLE_SORT, while mcount loc in + * modules can not be sorted at build time. + */ + if (!IS_ENABLED(CONFIG_BUILDTIME_TABLE_SORT) || mod) { + sort(start, count, sizeof(*start), + ftrace_cmp_ips, NULL); + } else { + test_is_sorted(start, count); + } start_pg = ftrace_allocate_pages(count); if (!start_pg) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 78ea542ce3bc..a569a0cb81ee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -980,6 +980,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */ this_cpu_dec(trace_buffered_event_cnt); + /* ring_buffer_unlock_commit() enables preemption */ + preempt_enable_notrace(); } else ring_buffer_unlock_commit(buffer, event); } @@ -2601,6 +2603,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_HARDIRQ; if (in_serving_softirq()) trace_flags |= TRACE_FLAG_SOFTIRQ; + if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) + trace_flags |= TRACE_FLAG_BH_OFF; if (tif_need_resched()) trace_flags |= TRACE_FLAG_NEED_RESCHED; @@ -2745,8 +2749,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, *current_rb = tr->array_buffer.buffer; if (!tr->no_filter_buffering_ref && - (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && - (entry = this_cpu_read(trace_buffered_event))) { + (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { + preempt_disable_notrace(); /* * Filtering is on, so try to use the per cpu buffer first. * This buffer will simulate a ring_buffer_event, @@ -2764,33 +2768,38 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, * is still quicker than no copy on match, but having * to discard out of the ring buffer on a failed match. */ - int max_len = PAGE_SIZE - struct_size(entry, array, 1); + if ((entry = __this_cpu_read(trace_buffered_event))) { + int max_len = PAGE_SIZE - struct_size(entry, array, 1); - val = this_cpu_inc_return(trace_buffered_event_cnt); + val = this_cpu_inc_return(trace_buffered_event_cnt); - /* - * Preemption is disabled, but interrupts and NMIs - * can still come in now. If that happens after - * the above increment, then it will have to go - * back to the old method of allocating the event - * on the ring buffer, and if the filter fails, it - * will have to call ring_buffer_discard_commit() - * to remove it. - * - * Need to also check the unlikely case that the - * length is bigger than the temp buffer size. - * If that happens, then the reserve is pretty much - * guaranteed to fail, as the ring buffer currently - * only allows events less than a page. But that may - * change in the future, so let the ring buffer reserve - * handle the failure in that case. - */ - if (val == 1 && likely(len <= max_len)) { - trace_event_setup(entry, type, trace_ctx); - entry->array[0] = len; - return entry; + /* + * Preemption is disabled, but interrupts and NMIs + * can still come in now. If that happens after + * the above increment, then it will have to go + * back to the old method of allocating the event + * on the ring buffer, and if the filter fails, it + * will have to call ring_buffer_discard_commit() + * to remove it. + * + * Need to also check the unlikely case that the + * length is bigger than the temp buffer size. + * If that happens, then the reserve is pretty much + * guaranteed to fail, as the ring buffer currently + * only allows events less than a page. But that may + * change in the future, so let the ring buffer reserve + * handle the failure in that case. + */ + if (val == 1 && likely(len <= max_len)) { + trace_event_setup(entry, type, trace_ctx); + entry->array[0] = len; + /* Return with preemption disabled */ + return entry; + } + this_cpu_dec(trace_buffered_event_cnt); } - this_cpu_dec(trace_buffered_event_cnt); + /* __trace_buffer_lock_reserve() disables preemption */ + preempt_enable_notrace(); } entry = __trace_buffer_lock_reserve(*current_rb, type, len, @@ -4183,7 +4192,7 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" + "# / _-----=> irqs-off/BH-disabled\n" "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -4224,7 +4233,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); @@ -4834,6 +4843,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } +static int tracing_mark_open(struct inode *inode, struct file *filp) +{ + stream_open(inode, filp); + return tracing_open_generic_tr(inode, filp); +} + static int tracing_release(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -5635,7 +5650,7 @@ static const char readme_msg[] = "\t - a numeric literal: e.g. ms_per_sec=1000,\n" "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" "\n" - "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n" + "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" "\t multiplication(*) and division(/) operators. An operand can be either a\n" "\t variable reference, field or numeric literal.\n" "\n" @@ -6718,10 +6733,9 @@ waitagain: cnt = PAGE_SIZE - 1; /* reset all but tr, trace, and overruns */ - memset_startat(iter, 0, seq); + trace_iterator_reset(iter); cpumask_clear(iter->started); trace_seq_init(&iter->seq); - iter->pos = -1; trace_event_read_lock(); trace_access_lock(iter->cpu_file); @@ -7110,9 +7124,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tt) event_triggers_post_call(tr->trace_marker_file, tt); - if (written > 0) - *fpos += written; - return written; } @@ -7171,9 +7182,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, __buffer_unlock_commit(buffer, event); - if (written > 0) - *fpos += written; - return written; } @@ -7573,16 +7581,14 @@ static const struct file_operations tracing_free_buffer_fops = { }; static const struct file_operations tracing_mark_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; static const struct file_operations tracing_mark_raw_fops = { - .open = tracing_open_generic_tr, + .open = tracing_mark_open, .write = tracing_mark_raw_write, - .llseek = generic_file_llseek, .release = tracing_release_generic_tr, }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 38715aa6cfdf..d038ddbf1bea 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -83,6 +83,9 @@ enum trace_type { #undef __dynamic_array #define __dynamic_array(type, item) type item[]; +#undef __rel_dynamic_array +#define __rel_dynamic_array(type, item) type item[]; + #undef F_STRUCT #define F_STRUCT(args...) args @@ -1334,10 +1337,12 @@ __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) { if (this_cpu_read(trace_buffered_event) == event) { - /* Simply release the temp buffer */ + /* Simply release the temp buffer and enable preemption */ this_cpu_dec(trace_buffered_event_cnt); + preempt_enable_notrace(); return; } + /* ring_buffer_discard_commit() enables preemption */ ring_buffer_discard_commit(buffer, event); } @@ -1465,6 +1470,7 @@ struct filter_pred { static inline bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING || field->filter_type == FILTER_STATIC_STRING || field->filter_type == FILTER_PTR_STRING || field->filter_type == FILTER_COMM; @@ -1572,15 +1578,13 @@ extern int event_enable_trigger_print(struct seq_file *m, struct event_trigger_data *data); extern void event_enable_trigger_free(struct event_trigger_ops *ops, struct event_trigger_data *data); -extern int event_enable_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +extern int event_enable_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); extern int event_enable_register_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); extern void event_enable_unregister_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file); extern void trigger_data_free(struct event_trigger_data *data); @@ -1606,6 +1610,30 @@ get_named_trigger_data(struct event_trigger_data *data); extern int register_event_command(struct event_command *cmd); extern int unregister_event_command(struct event_command *cmd); extern int register_trigger_hist_enable_disable_cmds(void); +extern bool event_trigger_check_remove(const char *glob); +extern bool event_trigger_empty_param(const char *param); +extern int event_trigger_separate_filter(char *param_and_filter, char **param, + char **filter, bool param_required); +extern struct event_trigger_data * +event_trigger_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data); +extern int event_trigger_parse_num(char *trigger, + struct event_trigger_data *trigger_data); +extern int event_trigger_set_filter(struct event_command *cmd_ops, + struct trace_event_file *file, + char *param, + struct event_trigger_data *trigger_data); +extern void event_trigger_reset_filter(struct event_command *cmd_ops, + struct event_trigger_data *trigger_data); +extern int event_trigger_register(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + char *cmd, + char *trigger, + struct event_trigger_data *trigger_data, + int *n_registered); /** * struct event_trigger_ops - callbacks for trace event triggers @@ -1613,10 +1641,20 @@ extern int register_trigger_hist_enable_disable_cmds(void); * The methods in this structure provide per-event trigger hooks for * various trigger operations. * + * The @init and @free methods are used during trigger setup and + * teardown, typically called from an event_command's @parse() + * function implementation. + * + * The @print method is used to print the trigger spec. + * + * The @trigger method is the function that actually implements the + * trigger and is called in the context of the triggering event + * whenever that event occurs. + * * All the methods below, except for @init() and @free(), must be * implemented. * - * @func: The trigger 'probe' function called when the triggering + * @trigger: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that * registered the trigger (see struct event_command) along with @@ -1645,9 +1683,10 @@ extern int register_trigger_hist_enable_disable_cmds(void); * (see trace_event_triggers.c). */ struct event_trigger_ops { - void (*func)(struct event_trigger_data *data, - struct trace_buffer *buffer, void *rec, - struct ring_buffer_event *rbe); + void (*trigger)(struct event_trigger_data *data, + struct trace_buffer *buffer, + void *rec, + struct ring_buffer_event *rbe); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, @@ -1696,7 +1735,7 @@ struct event_trigger_ops { * All the methods below, except for @set_filter() and @unreg_all(), * must be implemented. * - * @func: The callback function responsible for parsing and + * @parse: The callback function responsible for parsing and * registering the trigger written to the 'trigger' file by the * user. It allocates the trigger instance and registers it with * the appropriate trace event. It makes use of the other @@ -1731,21 +1770,24 @@ struct event_trigger_ops { * * @get_trigger_ops: The callback function invoked to retrieve the * event_trigger_ops implementation associated with the command. + * This callback function allows a single event_command to + * support multiple trigger implementations via different sets of + * event_trigger_ops, depending on the value of the @param + * string. */ struct event_command { struct list_head list; char *name; enum event_trigger_type trigger_type; int flags; - int (*func)(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *params); + int (*parse)(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, + char *param_and_filter); int (*reg)(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg)(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file); void (*unreg_all)(struct trace_event_file *file); @@ -1926,14 +1968,7 @@ extern struct trace_iterator *tracepoint_print_iter; */ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) { - const size_t offset = offsetof(struct trace_iterator, seq); - - /* - * Keep gcc from complaining about overwriting more than just one - * member in the structure. - */ - memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); - + memset_startat(iter, 0, seq); iter->pos = -1; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 928867f527e7..191db32dec46 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -489,18 +489,12 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec) if (trace_trigger_soft_disabled(edata->file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = edata->file; - dsize = get_eprobe_size(&edata->ep->tp, rec); - fbuffer.regs = NULL; - - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file, - call->event.type, - sizeof(*entry) + edata->ep->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + + entry = trace_event_buffer_reserve(&fbuffer, edata->file, + sizeof(*entry) + edata->ep->tp.size + dsize); + + if (!entry) return; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); @@ -549,29 +543,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data, } static struct event_trigger_ops eprobe_trigger_ops = { - .func = eprobe_trigger_func, + .trigger = eprobe_trigger_func, .print = eprobe_trigger_print, .init = eprobe_trigger_init, .free = eprobe_trigger_free, }; -static int eprobe_trigger_cmd_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { return -1; } -static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) +static int eprobe_trigger_reg_func(char *glob, + struct event_trigger_data *data, + struct trace_event_file *file) { return -1; } -static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) +static void eprobe_trigger_unreg_func(char *glob, + struct event_trigger_data *data, + struct trace_event_file *file) { } @@ -586,7 +580,7 @@ static struct event_command event_trigger_cmd = { .name = "eprobe", .trigger_type = ETT_EVENT_EPROBE, .flags = EVENT_CMD_FL_NEEDS_REC, - .func = eprobe_trigger_cmd_func, + .parse = eprobe_trigger_cmd_parse, .reg = eprobe_trigger_reg_func, .unreg = eprobe_trigger_unreg_func, .unreg_all = NULL, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 92be9cb1d7d4..3147614c1812 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3461,10 +3461,8 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, tr, &ftrace_tr_enable_fops); - if (!entry) { - pr_warn("Could not create tracefs 'enable' entry\n"); + if (!entry) return -ENOMEM; - } /* There are not as crucial, just warn if they are not created */ @@ -3480,17 +3478,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); /* ring buffer internal formats */ - entry = trace_create_file("header_page", TRACE_MODE_READ, d_events, + trace_create_file("header_page", TRACE_MODE_READ, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); - if (!entry) - pr_warn("Could not create tracefs 'header_page' entry\n"); - entry = trace_create_file("header_event", TRACE_MODE_READ, d_events, + trace_create_file("header_event", TRACE_MODE_READ, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); - if (!entry) - pr_warn("Could not create tracefs 'header_event' entry\n"); tr->event_dir = d_events; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index c9124038b140..b458a9afa2c0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> */ +#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 1024 + +struct ustring_buffer { + char buffer[USTRING_BUF_SIZE]; +}; + +static __percpu struct ustring_buffer *ustring_per_cpu; + +static __always_inline char *test_string(char *str) +{ + struct ustring_buffer *ubuf; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* For safety, do not trust the string pointer */ + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + return NULL; + return kstr; +} + +static __always_inline char *test_ustring(char *str) +{ + struct ustring_buffer *ubuf; + char __user *ustr; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* user space address? */ + ustr = (char __user *)str; + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + return NULL; + + return kstr; +} + /* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { @@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } -/* Filter predicate for char * pointers */ -static int filter_pred_pchar(struct filter_pred *pred, void *event) +static __always_inline int filter_pchar(struct filter_pred *pred, char *str) { - char **addr = (char **)(event + pred->offset); int cmp, match; - int len = strlen(*addr) + 1; /* including tailing '\0' */ + int len; - cmp = pred->regex.match(*addr, &pred->regex, len); + len = strlen(str) + 1; /* including tailing '\0' */ + cmp = pred->regex.match(str, &pred->regex, len); match = cmp ^ pred->not; return match; } +/* Filter predicate for char * pointers */ +static int filter_pred_pchar(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_string(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} + +/* Filter predicate for char * pointers in user space*/ +static int filter_pred_pchar_user(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_ustring(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} /* * Filter predicate for dynamic sized arrays of characters. @@ -706,6 +777,29 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) return match; } +/* + * Filter predicate for relative dynamic sized arrays of characters. + * These are implemented through a list of strings at the end + * of the entry as same as dynamic string. + * The difference is that the relative one records the location offset + * from the field itself, not the event entry. + */ +static int filter_pred_strrelloc(struct filter_pred *pred, void *event) +{ + u32 *item = (u32 *)(event + pred->offset); + u32 str_item = *item; + int str_loc = str_item & 0xffff; + int str_len = str_item >> 16; + char *addr = (char *)(&item[1]) + str_loc; + int cmp, match; + + cmp = pred->regex.match(addr, &pred->regex, str_len); + + match = cmp ^ pred->not; + + return match; +} + /* Filter predicate for CPUs. */ static int filter_pred_cpu(struct filter_pred *pred, void *event) { @@ -756,7 +850,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event) * * Note: * - @str might not be NULL-terminated if it's of type DYN_STRING - * or STATIC_STRING, unless @len is zero. + * RDYN_STRING, or STATIC_STRING, unless @len is zero. */ static int regex_match_full(char *str, struct regex *r, int len) @@ -1083,6 +1177,9 @@ int filter_assign_type(const char *type) if (strstr(type, "__data_loc") && strstr(type, "char")) return FILTER_DYN_STRING; + if (strstr(type, "__rel_loc") && strstr(type, "char")) + return FILTER_RDYN_STRING; + if (strchr(type, '[') && strstr(type, "char")) return FILTER_STATIC_STRING; @@ -1158,6 +1255,7 @@ static int parse_pred(const char *str, void *data, struct filter_pred *pred = NULL; char num_buf[24]; /* Big enough to hold an address */ char *field_name; + bool ustring = false; char q; u64 val; int len; @@ -1192,6 +1290,12 @@ static int parse_pred(const char *str, void *data, return -EINVAL; } + /* See if the field is a user space string */ + if ((len = str_has_prefix(str + i, ".ustring"))) { + ustring = true; + i += len; + } + while (isspace(str[i])) i++; @@ -1318,10 +1422,24 @@ static int parse_pred(const char *str, void *data, pred->fn = filter_pred_string; pred->regex.field_len = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) + } else if (field->filter_type == FILTER_DYN_STRING) { pred->fn = filter_pred_strloc; - else - pred->fn = filter_pred_pchar; + } else if (field->filter_type == FILTER_RDYN_STRING) + pred->fn = filter_pred_strrelloc; + else { + + if (!ustring_per_cpu) { + /* Once allocated, keep it around for good */ + ustring_per_cpu = alloc_percpu(struct ustring_buffer); + if (!ustring_per_cpu) + goto err_mem; + } + + if (ustring) + pred->fn = filter_pred_pchar_user; + else + pred->fn = filter_pred_pchar; + } /* go past the last quote */ i++; @@ -1387,6 +1505,9 @@ static int parse_pred(const char *str, void *data, err_free: kfree(pred); return -EINVAL; +err_mem: + kfree(pred); + return -ENOMEM; } enum { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 319f9c8ca7e7..5e6a988a8a51 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -217,6 +217,20 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, return (u64)(unsigned long)addr; } +static u64 hist_field_reldynstring(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, + struct ring_buffer_event *rbe, + void *event) +{ + u32 *item = event + hist_field->field->offset; + u32 str_item = *item; + int str_loc = str_item & 0xffff; + char *addr = (char *)&item[1] + str_loc; + + return (u64)(unsigned long)addr; +} + static u64 hist_field_pstring(struct hist_field *hist_field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -1956,8 +1970,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (field->filter_type == FILTER_STATIC_STRING) { hist_field->fn = hist_field_string; hist_field->size = field->size; - } else if (field->filter_type == FILTER_DYN_STRING) + } else if (field->filter_type == FILTER_DYN_STRING) { hist_field->fn = hist_field_dynstring; + } else if (field->filter_type == FILTER_RDYN_STRING) + hist_field->fn = hist_field_reldynstring; else hist_field->fn = hist_field_pstring; } else { @@ -2745,9 +2761,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, } static struct event_command trigger_hist_cmd; -static int event_hist_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param); +static int event_hist_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param); static bool compatible_keys(struct hist_trigger_data *target_hist_data, struct hist_trigger_data *hist_data, @@ -2950,8 +2966,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, var_hist->hist_data = hist_data; /* Create the new histogram with our variable */ - ret = event_hist_trigger_func(&trigger_hist_cmd, file, - "", "hist", cmd); + ret = event_hist_trigger_parse(&trigger_hist_cmd, file, + "", "hist", cmd); if (ret) { kfree(cmd); kfree(var_hist->cmd); @@ -4961,7 +4977,8 @@ static inline void add_to_key(char *compound_key, void *key, struct ftrace_event_field *field; field = key_field->field; - if (field->filter_type == FILTER_DYN_STRING) + if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) size = *(u32 *)(rec + field->offset) >> 16; else if (field->filter_type == FILTER_STATIC_STRING) size = field->size; @@ -5712,8 +5729,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data) for (i = 0; i < hist_data->n_field_var_hists; i++) { file = hist_data->field_var_hists[i]->hist_data->event_file; cmd = hist_data->field_var_hists[i]->cmd; - ret = event_hist_trigger_func(&trigger_hist_cmd, file, - "!hist", "hist", cmd); + ret = event_hist_trigger_parse(&trigger_hist_cmd, file, + "!hist", "hist", cmd); WARN_ON_ONCE(ret < 0); } } @@ -5742,7 +5759,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_ops = { - .func = event_hist_trigger, + .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_init, .free = event_hist_trigger_free, @@ -5776,7 +5793,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_hist_trigger_named_ops = { - .func = event_hist_trigger, + .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_named_init, .free = event_hist_trigger_named_free, @@ -5893,7 +5910,7 @@ static bool hist_trigger_match(struct event_trigger_data *data, return true; } -static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, +static int hist_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6045,7 +6062,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, return false; } -static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, +static void hist_unregister_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -6129,9 +6146,9 @@ static void hist_unreg_all(struct trace_event_file *file) } } -static int event_hist_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +static int event_hist_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT; struct event_trigger_data *trigger_data; @@ -6245,7 +6262,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, goto out_free; } - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); se_name = trace_event_name(file->event_call); se = find_synth_event(se_name); if (se) @@ -6254,7 +6271,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, goto out_free; } - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of triggers registered, * but if it didn't register any it returns zero. Consider no @@ -6297,7 +6314,7 @@ enable: return ret; out_unreg: - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); @@ -6314,7 +6331,7 @@ static struct event_command trigger_hist_cmd = { .name = "hist", .trigger_type = ETT_EVENT_HIST, .flags = EVENT_CMD_FL_NEEDS_REC, - .func = event_hist_trigger_func, + .parse = event_hist_trigger_parse, .reg = hist_register_trigger, .unreg = hist_unregister_trigger, .unreg_all = hist_unreg_all, @@ -6366,28 +6383,28 @@ hist_enable_count_trigger(struct event_trigger_data *data, } static struct event_trigger_ops hist_enable_trigger_ops = { - .func = hist_enable_trigger, + .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_enable_count_trigger_ops = { - .func = hist_enable_count_trigger, + .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_trigger_ops = { - .func = hist_enable_trigger, + .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops hist_disable_count_trigger_ops = { - .func = hist_enable_count_trigger, + .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, @@ -6429,7 +6446,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file) static struct event_command trigger_hist_enable_cmd = { .name = ENABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, @@ -6440,7 +6457,7 @@ static struct event_command trigger_hist_enable_cmd = { static struct event_command trigger_hist_disable_cmd = { .name = DISABLE_HIST_STR, .trigger_type = ETT_HIST_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .unreg_all = hist_enable_unreg_all, diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index c188045c5f97..d6b4935a78c0 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -168,10 +168,14 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size) continue; if (field->filter_type == FILTER_STATIC_STRING) continue; - if (field->filter_type == FILTER_DYN_STRING) { + if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) { u32 *str_item; int str_loc = entry_size & 0xffff; + if (field->filter_type == FILTER_RDYN_STRING) + str_loc -= field->offset + field->size; + str_item = (u32 *)(entry + field->offset); *str_item = str_loc; /* string length is 0. */ } else { @@ -214,7 +218,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) if (field->filter_type == FILTER_STATIC_STRING) { strlcpy(entry + field->offset, addr, field->size); - } else if (field->filter_type == FILTER_DYN_STRING) { + } else if (field->filter_type == FILTER_DYN_STRING || + field->filter_type == FILTER_RDYN_STRING) { int str_len = strlen(addr) + 1; int str_loc = entry_size & 0xffff; u32 *str_item; @@ -229,6 +234,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry) strlcpy(entry + (entry_size - str_len), addr, str_len); str_item = (u32 *)(entry + field->offset); + if (field->filter_type == FILTER_RDYN_STRING) + str_loc -= field->offset + field->size; *str_item = (str_len << 16) | str_loc; } else { char **paddr; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index ca9c13b2ecf4..154db74dadbc 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1979,7 +1979,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val); /** * synth_event_add_val - Add a named field's value to an open synth trace * @field_name: The name of the synthetic event field value to set - * @val: The value to set the next field to + * @val: The value to set the named field to * @trace_state: A pointer to object tracking the piecewise trace state * * Set the value of the named field in an event that's been opened by @@ -2054,6 +2054,13 @@ static int create_synth_event(const char *raw_command) last_cmd_set(raw_command); + name = raw_command; + + /* Don't try to process if not our system */ + if (name[0] != 's' || name[1] != ':') + return -ECANCELED; + name += 2; + p = strpbrk(raw_command, " \t"); if (!p) { synth_err(SYNTH_ERR_INVALID_CMD, 0); @@ -2062,12 +2069,6 @@ static int create_synth_event(const char *raw_command) fields = skip_spaces(p); - name = raw_command; - - if (name[0] != 's' || name[1] != ':') - return -ECANCELED; - name += 2; - /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 3d5c07239a2a..d00fee705f9c 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file, if (data->paused) continue; if (!rec) { - data->ops->func(data, buffer, rec, event); + data->ops->trigger(data, buffer, rec, event); continue; } filter = rcu_dereference_sched(data->filter); @@ -78,7 +78,7 @@ event_triggers_call(struct trace_event_file *file, tt |= data->cmd_ops->trigger_type; continue; } - data->ops->func(data, buffer, rec, event); + data->ops->trigger(data, buffer, rec, event); } return tt; } @@ -106,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file, if (data->paused) continue; if (data->cmd_ops->trigger_type & tt) - data->ops->func(data, NULL, NULL, NULL); + data->ops->trigger(data, NULL, NULL, NULL); } } EXPORT_SYMBOL_GPL(event_triggers_post_call); @@ -245,7 +245,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff) mutex_lock(&trigger_cmd_mutex); list_for_each_entry(p, &trigger_commands, list) { if (strcmp(p->name, command) == 0) { - ret = p->func(p, file, buff, command, next); + ret = p->parse(p, file, buff, command, next); goto out_unlock; } } @@ -540,7 +540,6 @@ void update_cond_flag(struct trace_event_file *file) /** * register_trigger - Generic event_command @reg implementation * @glob: The raw string used to register the trigger - * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data to associate with the trigger * @file: The trace_event_file associated with the event * @@ -551,7 +550,7 @@ void update_cond_flag(struct trace_event_file *file) * * Return: 0 on success, errno otherwise */ -static int register_trigger(char *glob, struct event_trigger_ops *ops, +static int register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { @@ -589,7 +588,6 @@ out: /** * unregister_trigger - Generic event_command @unreg implementation * @glob: The raw string used to register the trigger - * @ops: The trigger ops associated with the trigger * @test: Trigger-specific data used to find the trigger to remove * @file: The trace_event_file associated with the event * @@ -598,7 +596,7 @@ out: * Usually used directly as the @unreg method in event command * implementations. */ -static void unregister_trigger(char *glob, struct event_trigger_ops *ops, +static void unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file) { @@ -621,8 +619,350 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, data->ops->free(data->ops, data); } +/* + * Event trigger parsing helper functions. + * + * These functions help make it easier to write an event trigger + * parsing function i.e. the struct event_command.parse() callback + * function responsible for parsing and registering a trigger command + * written to the 'trigger' file. + * + * A trigger command (or just 'trigger' for short) takes the form: + * [trigger] [if filter] + * + * The struct event_command.parse() callback (and other struct + * event_command functions) refer to several components of a trigger + * command. Those same components are referenced by the event trigger + * parsing helper functions defined below. These components are: + * + * cmd - the trigger command name + * glob - the trigger command name optionally prefaced with '!' + * param_and_filter - text following cmd and ':' + * param - text following cmd and ':' and stripped of filter + * filter - the optional filter text following (and including) 'if' + * + * To illustrate the use of these componenents, here are some concrete + * examples. For the following triggers: + * + * echo 'traceon:5 if pid == 0' > trigger + * - 'traceon' is both cmd and glob + * - '5 if pid == 0' is the param_and_filter + * - '5' is the param + * - 'if pid == 0' is the filter + * + * echo 'enable_event:sys:event:n' > trigger + * - 'enable_event' is both cmd and glob + * - 'sys:event:n' is the param_and_filter + * - 'sys:event:n' is the param + * - there is no filter + * + * echo 'hist:keys=pid if prio > 50' > trigger + * - 'hist' is both cmd and glob + * - 'keys=pid if prio > 50' is the param_and_filter + * - 'keys=pid' is the param + * - 'if prio > 50' is the filter + * + * echo '!enable_event:sys:event:n' > trigger + * - 'enable_event' the cmd + * - '!enable_event' is the glob + * - 'sys:event:n' is the param_and_filter + * - 'sys:event:n' is the param + * - there is no filter + * + * echo 'traceoff' > trigger + * - 'traceoff' is both cmd and glob + * - there is no param_and_filter + * - there is no param + * - there is no filter + * + * There are a few different categories of event trigger covered by + * these helpers: + * + * - triggers that don't require a parameter e.g. traceon + * - triggers that do require a parameter e.g. enable_event and hist + * - triggers that though they may not require a param may support an + * optional 'n' param (n = number of times the trigger should fire) + * e.g.: traceon:5 or enable_event:sys:event:n + * - triggers that do not support an 'n' param e.g. hist + * + * These functions can be used or ignored as necessary - it all + * depends on the complexity of the trigger, and the granularity of + * the functions supported reflects the fact that some implementations + * may need to customize certain aspects of their implementations and + * won't need certain functions. For instance, the hist trigger + * implementation doesn't use event_trigger_separate_filter() because + * it has special requirements for handling the filter. + */ + +/** + * event_trigger_check_remove - check whether an event trigger specifies remove + * @glob: The trigger command string, with optional remove(!) operator + * + * The event trigger callback implementations pass in 'glob' as a + * parameter. This is the command name either with or without a + * remove(!) operator. This function simply parses the glob and + * determines whether the command corresponds to a trigger removal or + * a trigger addition. + * + * Return: true if this is a remove command, false otherwise + */ +bool event_trigger_check_remove(const char *glob) +{ + return (glob && glob[0] == '!') ? true : false; +} + +/** + * event_trigger_empty_param - check whether the param is empty + * @param: The trigger param string + * + * The event trigger callback implementations pass in 'param' as a + * parameter. This corresponds to the string following the command + * name minus the command name. This function can be called by a + * callback implementation for any command that requires a param; a + * callback that doesn't require a param can ignore it. + * + * Return: true if this is an empty param, false otherwise + */ +bool event_trigger_empty_param(const char *param) +{ + return !param; +} + +/** + * event_trigger_separate_filter - separate an event trigger from a filter + * @param: The param string containing trigger and possibly filter + * @trigger: outparam, will be filled with a pointer to the trigger + * @filter: outparam, will be filled with a pointer to the filter + * @param_required: Specifies whether or not the param string is required + * + * Given a param string of the form '[trigger] [if filter]', this + * function separates the filter from the trigger and returns the + * trigger in *trigger and the filter in *filter. Either the *trigger + * or the *filter may be set to NULL by this function - if not set to + * NULL, they will contain strings corresponding to the trigger and + * filter. + * + * There are two cases that need to be handled with respect to the + * passed-in param: either the param is required, or it is not + * required. If @param_required is set, and there's no param, it will + * return -EINVAL. If @param_required is not set and there's a param + * that starts with a number, that corresponds to the case of a + * trigger with :n (n = number of times the trigger should fire) and + * the parsing continues normally; otherwise the function just returns + * and assumes param just contains a filter and there's nothing else + * to do. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_separate_filter(char *param_and_filter, char **param, + char **filter, bool param_required) +{ + int ret = 0; + + *param = *filter = NULL; + + if (!param_and_filter) { + if (param_required) + ret = -EINVAL; + goto out; + } + + /* + * Here we check for an optional param. The only legal + * optional param is :n, and if that's the case, continue + * below. Otherwise we assume what's left is a filter and + * return it as the filter string for the caller to deal with. + */ + if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) { + *filter = param_and_filter; + goto out; + } + + /* + * Separate the param from the filter (param [if filter]). + * Here we have either an optional :n param or a required + * param and an optional filter. + */ + *param = strsep(¶m_and_filter, " \t"); + + /* + * Here we have a filter, though it may be empty. + */ + if (param_and_filter) { + *filter = skip_spaces(param_and_filter); + if (!**filter) + *filter = NULL; + } +out: + return ret; +} + +/** + * event_trigger_alloc - allocate and init event_trigger_data for a trigger + * @cmd_ops: The event_command operations for the trigger + * @cmd: The cmd string + * @param: The param string + * @private_data: User data to associate with the event trigger + * + * Allocate an event_trigger_data instance and initialize it. The + * @cmd_ops are used along with the @cmd and @param to get the + * trigger_ops to assign to the event_trigger_data. @private_data can + * also be passed in and associated with the event_trigger_data. + * + * Use event_trigger_free() to free an event_trigger_data object. + * + * Return: The trigger_data object success, NULL otherwise + */ +struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data) +{ + struct event_trigger_data *trigger_data; + struct event_trigger_ops *trigger_ops; + + trigger_ops = cmd_ops->get_trigger_ops(cmd, param); + + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); + if (!trigger_data) + return NULL; + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; + trigger_data->cmd_ops = cmd_ops; + trigger_data->private_data = private_data; + + INIT_LIST_HEAD(&trigger_data->list); + INIT_LIST_HEAD(&trigger_data->named_list); + RCU_INIT_POINTER(trigger_data->filter, NULL); + + return trigger_data; +} + +/** + * event_trigger_parse_num - parse and return the number param for a trigger + * @param: The param string + * @trigger_data: The trigger_data for the trigger + * + * Parse the :n (n = number of times the trigger should fire) param + * and set the count variable in the trigger_data to the parsed count. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_parse_num(char *param, + struct event_trigger_data *trigger_data) +{ + char *number; + int ret = 0; + + if (param) { + number = strsep(¶m, ":"); + + if (!strlen(number)) + return -EINVAL; + + /* + * We use the callback data field (which is a pointer) + * as our counter. + */ + ret = kstrtoul(number, 0, &trigger_data->count); + } + + return ret; +} + +/** + * event_trigger_set_filter - set an event trigger's filter + * @cmd_ops: The event_command operations for the trigger + * @file: The event file for the trigger's event + * @param: The string containing the filter + * @trigger_data: The trigger_data for the trigger + * + * Set the filter for the trigger. If the filter is NULL, just return + * without error. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_set_filter(struct event_command *cmd_ops, + struct trace_event_file *file, + char *param, + struct event_trigger_data *trigger_data) +{ + if (param && cmd_ops->set_filter) + return cmd_ops->set_filter(param, trigger_data, file); + + return 0; +} + +/** + * event_trigger_reset_filter - reset an event trigger's filter + * @cmd_ops: The event_command operations for the trigger + * @trigger_data: The trigger_data for the trigger + * + * Reset the filter for the trigger to no filter. + */ +void event_trigger_reset_filter(struct event_command *cmd_ops, + struct event_trigger_data *trigger_data) +{ + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); +} + +/** + * event_trigger_register - register an event trigger + * @cmd_ops: The event_command operations for the trigger + * @file: The event file for the trigger's event + * @glob: The trigger command string, with optional remove(!) operator + * @cmd: The cmd string + * @param: The param string + * @trigger_data: The trigger_data for the trigger + * @n_registered: optional outparam, the number of triggers registered + * + * Register an event trigger. The @cmd_ops are used to call the + * cmd_ops->reg() function which actually does the registration. The + * cmd_ops->reg() function returns the number of triggers registered, + * which is assigned to n_registered, if n_registered is non-NULL. + * + * Return: 0 on success, errno otherwise + */ +int event_trigger_register(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, + char *cmd, + char *param, + struct event_trigger_data *trigger_data, + int *n_registered) +{ + int ret; + + if (n_registered) + *n_registered = 0; + + ret = cmd_ops->reg(glob, trigger_data, file); + /* + * The above returns on success the # of functions enabled, + * but if it didn't find any functions it returns zero. + * Consider no functions a failure too. + */ + if (!ret) { + cmd_ops->unreg(glob, trigger_data, file); + ret = -ENOENT; + } else if (ret > 0) { + if (n_registered) + *n_registered = ret; + /* Just return zero, not the number of enabled functions */ + ret = 0; + } + + return ret; +} + +/* + * End event trigger parsing helper functions. + */ + /** - * event_trigger_callback - Generic event_command @func implementation + * event_trigger_parse - Generic event_command @parse implementation * @cmd_ops: The command ops, used for trigger registration * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger @@ -632,15 +972,15 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, * Common implementation for event command parsing and trigger * instantiation. * - * Usually used directly as the @func method in event command + * Usually used directly as the @parse method in event command * implementations. * * Return: 0 on success, errno otherwise */ static int -event_trigger_callback(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +event_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct event_trigger_data *trigger_data; struct event_trigger_ops *trigger_ops; @@ -673,7 +1013,7 @@ event_trigger_callback(struct event_command *cmd_ops, INIT_LIST_HEAD(&trigger_data->named_list); if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); kfree(trigger_data); ret = 0; goto out; @@ -708,14 +1048,14 @@ event_trigger_callback(struct event_command *cmd_ops, out_reg: /* Up the trigger_data count to make sure reg doesn't free it on failure */ event_trigger_init(trigger_ops, trigger_data); - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. * Consider no functions a failure too. */ if (!ret) { - cmd_ops->unreg(glob, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob, trigger_data, file); ret = -ENOENT; } else if (ret > 0) ret = 0; @@ -1023,28 +1363,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops traceon_trigger_ops = { - .func = traceon_trigger, + .trigger = traceon_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceon_count_trigger_ops = { - .func = traceon_count_trigger, + .trigger = traceon_count_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_trigger_ops = { - .func = traceoff_trigger, + .trigger = traceoff_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops traceoff_count_trigger_ops = { - .func = traceoff_count_trigger, + .trigger = traceoff_count_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1069,7 +1409,7 @@ onoff_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_traceon_cmd = { .name = "traceon", .trigger_type = ETT_TRACE_ONOFF, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1080,7 +1420,7 @@ static struct event_command trigger_traceoff_cmd = { .name = "traceoff", .trigger_type = ETT_TRACE_ONOFF, .flags = EVENT_CMD_FL_POST_TRIGGER, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = onoff_get_trigger_ops, @@ -1116,14 +1456,14 @@ snapshot_count_trigger(struct event_trigger_data *data, } static int -register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, +register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { if (tracing_alloc_snapshot_instance(file->tr) != 0) return 0; - return register_trigger(glob, ops, data, file); + return register_trigger(glob, data, file); } static int @@ -1135,14 +1475,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops snapshot_trigger_ops = { - .func = snapshot_trigger, + .trigger = snapshot_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops snapshot_count_trigger_ops = { - .func = snapshot_count_trigger, + .trigger = snapshot_count_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1157,7 +1497,7 @@ snapshot_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_snapshot_cmd = { .name = "snapshot", .trigger_type = ETT_SNAPSHOT, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_snapshot_trigger, .unreg = unregister_trigger, .get_trigger_ops = snapshot_get_trigger_ops, @@ -1226,14 +1566,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, } static struct event_trigger_ops stacktrace_trigger_ops = { - .func = stacktrace_trigger, + .trigger = stacktrace_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; static struct event_trigger_ops stacktrace_count_trigger_ops = { - .func = stacktrace_count_trigger, + .trigger = stacktrace_count_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, @@ -1249,7 +1589,7 @@ static struct event_command trigger_stacktrace_cmd = { .name = "stacktrace", .trigger_type = ETT_STACKTRACE, .flags = EVENT_CMD_FL_POST_TRIGGER, - .func = event_trigger_callback, + .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, .get_trigger_ops = stacktrace_get_trigger_ops, @@ -1353,36 +1693,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops, } static struct event_trigger_ops event_enable_trigger_ops = { - .func = event_enable_trigger, + .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_enable_count_trigger_ops = { - .func = event_enable_count_trigger, + .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_trigger_ops = { - .func = event_enable_trigger, + .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; static struct event_trigger_ops event_disable_count_trigger_ops = { - .func = event_enable_count_trigger, + .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -int event_enable_trigger_func(struct event_command *cmd_ops, - struct trace_event_file *file, - char *glob, char *cmd, char *param) +int event_enable_trigger_parse(struct event_command *cmd_ops, + struct trace_event_file *file, + char *glob, char *cmd, char *param) { struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; @@ -1455,7 +1795,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, trigger_data->private_data = enable_data; if (glob[0] == '!') { - cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + cmd_ops->unreg(glob+1, trigger_data, file); kfree(trigger_data); kfree(enable_data); ret = 0; @@ -1502,7 +1842,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, ret = trace_event_enable_disable(event_enable_file, 1, 1); if (ret < 0) goto out_put; - ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); + ret = cmd_ops->reg(glob, trigger_data, file); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. @@ -1532,7 +1872,6 @@ int event_enable_trigger_func(struct event_command *cmd_ops, } int event_enable_register_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { @@ -1574,7 +1913,6 @@ out: } void event_enable_unregister_trigger(char *glob, - struct event_trigger_ops *ops, struct event_trigger_data *test, struct trace_event_file *file) { @@ -1628,7 +1966,7 @@ event_enable_get_trigger_ops(char *cmd, char *param) static struct event_command trigger_enable_cmd = { .name = ENABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, @@ -1638,7 +1976,7 @@ static struct event_command trigger_enable_cmd = { static struct event_command trigger_disable_cmd = { .name = DISABLE_EVENT_STR, .trigger_type = ETT_EVENT_ENABLE, - .func = event_enable_trigger_func, + .parse = event_enable_trigger_parse, .reg = event_enable_register_trigger, .unreg = event_enable_unregister_trigger, .get_trigger_ops = event_enable_get_trigger_ops, diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 4e1257f50aa3..508f14af4f2c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -328,11 +328,9 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk) static void __disable_trace_kprobe(struct trace_probe *tp) { - struct trace_probe *pos; struct trace_kprobe *tk; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tk = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { if (!trace_kprobe_is_registered(tk)) continue; if (trace_kprobe_is_return(tk)) @@ -349,7 +347,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp) static int enable_trace_kprobe(struct trace_event_call *call, struct trace_event_file *file) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_kprobe *tk; bool enabled; int ret = 0; @@ -370,8 +368,7 @@ static int enable_trace_kprobe(struct trace_event_call *call, if (enabled) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tk = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) { if (trace_kprobe_has_gone(tk)) continue; ret = __enable_trace_kprobe(tk); @@ -560,11 +557,9 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, struct trace_kprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; - struct trace_probe *pos; int i; - list_for_each_entry(pos, &tpe->probes, list) { - orig = container_of(pos, struct trace_kprobe, tp); + list_for_each_entry(orig, &tpe->probes, tp.list) { if (strcmp(trace_kprobe_symbol(orig), trace_kprobe_symbol(comp)) || trace_kprobe_offset(orig) != trace_kprobe_offset(comp)) @@ -1176,15 +1171,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_kprobe *tk; + unsigned long nmissed; if (!is_trace_kprobe(ev)) return 0; tk = to_trace_kprobe(ev); + nmissed = trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), - tk->rp.kp.nmissed); + nmissed); return 0; } @@ -1384,17 +1382,11 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, if (trace_trigger_soft_disabled(trace_file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = trace_file; - dsize = __get_data_size(&tk->tp, regs); - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, - call->event.type, - sizeof(*entry) + tk->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + tk->tp.size + dsize); + if (!entry) return; fbuffer.regs = regs; @@ -1431,16 +1423,11 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, if (trace_trigger_soft_disabled(trace_file)) return; - fbuffer.trace_ctx = tracing_gen_ctx(); - fbuffer.trace_file = trace_file; - dsize = __get_data_size(&tk->tp, regs); - fbuffer.event = - trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, - call->event.type, - sizeof(*entry) + tk->tp.size + dsize, - fbuffer.trace_ctx); - if (!fbuffer.event) + + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + tk->tp.size + dsize); + if (!entry) return; fbuffer.regs = regs; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 89d6cbac6f10..870a08da5b48 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -138,8 +138,7 @@ static void osnoise_unregister_instance(struct trace_array *tr) if (!found) return; - synchronize_rcu(); - kfree(inst); + kvfree_rcu(inst); } /* @@ -2122,6 +2121,13 @@ out_unhook_irq: return -EINVAL; } +static void osnoise_unhook_events(void) +{ + unhook_thread_events(); + unhook_softirq_events(); + unhook_irq_events(); +} + /* * osnoise_workload_start - start the workload and hook to events */ @@ -2154,7 +2160,14 @@ static int osnoise_workload_start(void) retval = start_per_cpu_kthreads(); if (retval) { - unhook_irq_events(); + trace_osnoise_callback_enabled = false; + /* + * Make sure that ftrace_nmi_enter/exit() see + * trace_osnoise_callback_enabled as false before continuing. + */ + barrier(); + + osnoise_unhook_events(); return retval; } @@ -2185,9 +2198,7 @@ static void osnoise_workload_stop(void) stop_per_cpu_kthreads(); - unhook_irq_events(); - unhook_softirq_events(); - unhook_thread_events(); + osnoise_unhook_events(); } static void osnoise_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3547e7176ff7..8aa493d25c73 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -445,14 +445,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) char irqs_off; int hardirq; int softirq; + int bh_off; int nmi; nmi = entry->flags & TRACE_FLAG_NMI; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + bh_off = entry->flags & TRACE_FLAG_BH_OFF; irqs_off = + (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' : (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + bh_off ? 'b' : (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.'; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 3ed2a3f37297..73d90179b51b 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -356,6 +356,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs) return -EINVAL; } *pbuf = kstrndup(str, len - 1, GFP_KERNEL); + if (!*pbuf) + return -ENOMEM; return 0; } @@ -1138,8 +1140,7 @@ int trace_probe_remove_file(struct trace_probe *tp, return -ENOENT; list_del_rcu(&link->list); - synchronize_rcu(); - kfree(link); + kvfree_rcu(link); if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8bfcd3b09422..f755bde42fd0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -323,8 +323,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, sys_data->enter_event->event.type, size, trace_ctx); if (!event) return; @@ -367,8 +366,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, sys_data->exit_event->event.type, sizeof(*entry), trace_ctx); if (!event) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 4f35514a48f3..9711589273cd 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -410,12 +410,10 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, struct trace_uprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; - struct trace_probe *pos; struct inode *comp_inode = d_real_inode(comp->path.dentry); int i; - list_for_each_entry(pos, &tpe->probes, list) { - orig = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(orig, &tpe->probes, tp.list) { if (comp_inode != d_real_inode(orig->path.dentry) || comp->offset != orig->offset) continue; @@ -950,8 +948,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; - struct trace_buffer *buffer; - struct ring_buffer_event *event; + struct trace_event_buffer fbuffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); @@ -966,12 +963,10 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, trace_file, - call->event.type, size, 0); - if (!event) + entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); + if (!entry) return; - entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { entry->vaddr[0] = func; entry->vaddr[1] = instruction_pointer(regs); @@ -983,7 +978,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); + trace_event_buffer_commit(&fbuffer); } /* uprobe handler */ @@ -1076,14 +1071,12 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) static void __probe_event_disable(struct trace_probe *tp) { - struct trace_probe *pos; struct trace_uprobe *tu; tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { if (!tu->inode) continue; @@ -1095,7 +1088,7 @@ static void __probe_event_disable(struct trace_probe *tp) static int probe_event_enable(struct trace_event_call *call, struct trace_event_file *file, filter_func_t filter) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; bool enabled; int ret; @@ -1130,8 +1123,7 @@ static int probe_event_enable(struct trace_event_call *call, if (ret) goto err_flags; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = trace_uprobe_enable(tu, filter); if (ret) { __probe_event_disable(tp); @@ -1276,7 +1268,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, static int uprobe_perf_close(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; int ret = 0; @@ -1288,8 +1280,7 @@ static int uprobe_perf_close(struct trace_event_call *call, if (trace_uprobe_filter_remove(tu->tp.event->filter, event)) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (ret) break; @@ -1301,7 +1292,7 @@ static int uprobe_perf_close(struct trace_event_call *call, static int uprobe_perf_open(struct trace_event_call *call, struct perf_event *event) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_uprobe *tu; int err = 0; @@ -1313,8 +1304,7 @@ static int uprobe_perf_open(struct trace_event_call *call, if (trace_uprobe_filter_add(tu->tp.event->filter, event)) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - tu = container_of(pos, struct trace_uprobe, tp); + list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); @@ -1620,6 +1610,11 @@ create_local_trace_uprobe(char *name, unsigned long offs, tu->path = path; tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); + if (!tu->filename) { + ret = -ENOMEM; + goto error; + } + init_trace_event_call(tu); ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1a72b7d95cdc..4d34dc0b0fee 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -21,6 +21,7 @@ static const char *random_strings[] = { static void simple_thread_func(int cnt) { + unsigned long bitmask[1] = {0xdeadbeefUL}; int array[6]; int len = cnt % 5; int i; @@ -43,6 +44,8 @@ static void simple_thread_func(int cnt) trace_foo_with_template_cond("prints other times", cnt); trace_foo_with_template_print("I have to be different", cnt); + + trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); } static int simple_thread(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index e61471ab7d14..cbbbb83beced 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -155,7 +155,7 @@ * * To assign this string, use the helper macro __assign_str_len(). * - * __assign_str(foo, bar, len); + * __assign_str_len(foo, bar, len); * * Then len + 1 is allocated to the ring buffer, and a nul terminating * byte is added. This is similar to: @@ -506,6 +506,39 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, TP_ARGS(foo, bar), TP_printk("bar %s %d", __get_str(foo), __entry->bar)); +/* + * There are yet another __rel_loc dynamic data attribute. If you + * use __rel_dynamic_array() and __rel_string() etc. macros, you + * can use this attribute. There is no difference from the viewpoint + * of functionality with/without 'rel' but the encoding is a bit + * different. This is expected to be used with user-space event, + * there is no reason that the kernel event use this, but only for + * testing. + */ + +TRACE_EVENT(foo_rel_loc, + + TP_PROTO(const char *foo, int bar, unsigned long *mask), + + TP_ARGS(foo, bar, mask), + + TP_STRUCT__entry( + __rel_string( foo, foo ) + __field( int, bar ) + __rel_bitmask( bitmask, + BITS_PER_BYTE * sizeof(unsigned long) ) + ), + + TP_fast_assign( + __assign_rel_str(foo, foo); + __entry->bar = bar; + __assign_rel_bitmask(bitmask, mask, + BITS_PER_BYTE * sizeof(unsigned long)); + ), + + TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar, + __get_rel_bitmask(bitmask)) +); #endif /***** NOTICE! The #if protection ends here. *****/ diff --git a/scripts/Makefile b/scripts/Makefile index 9adb6d247818..b082d2f93357 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -17,6 +17,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include +HOSTLDLIBS_sorttable = -lpthread HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include HOSTCFLAGS_sign-file.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_sign-file = $(CRYPTO_LIBS) @@ -29,7 +30,10 @@ ARCH := x86 endif HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED -HOSTLDLIBS_sorttable = -lpthread +endif + +ifdef CONFIG_DYNAMIC_FTRACE +HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 9716f285e404..82bc736532f1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -403,6 +403,9 @@ if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then ${RESOLVE_BTFIDS} vmlinux fi +info SYSMAP System.map +mksysmap vmlinux System.map + if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then info SORTTAB vmlinux if ! sorttable vmlinux; then @@ -411,9 +414,6 @@ if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then fi fi -info SYSMAP System.map -mksysmap vmlinux System.map - # step a (see comment above) if [ -n "${CONFIG_KALLSYMS}" ]; then mksysmap ${kallsyms_vmlinux} .tmp_System.map diff --git a/scripts/sorttable.c b/scripts/sorttable.c index b7c2ad71f9cf..70bdc787ddfb 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -30,6 +30,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <errno.h> +#include <pthread.h> #include <tools/be_byteshift.h> #include <tools/le_byteshift.h> diff --git a/scripts/sorttable.h b/scripts/sorttable.h index a2baa2fefb13..1e8b77928fa4 100644 --- a/scripts/sorttable.h +++ b/scripts/sorttable.h @@ -19,6 +19,9 @@ #undef extable_ent_size #undef compare_extable +#undef get_mcount_loc +#undef sort_mcount_loc +#undef elf_mcount_loc #undef do_sort #undef Elf_Addr #undef Elf_Ehdr @@ -41,6 +44,9 @@ #ifdef SORTTABLE_64 # define extable_ent_size 16 # define compare_extable compare_extable_64 +# define get_mcount_loc get_mcount_loc_64 +# define sort_mcount_loc sort_mcount_loc_64 +# define elf_mcount_loc elf_mcount_loc_64 # define do_sort do_sort_64 # define Elf_Addr Elf64_Addr # define Elf_Ehdr Elf64_Ehdr @@ -62,6 +68,9 @@ #else # define extable_ent_size 8 # define compare_extable compare_extable_32 +# define get_mcount_loc get_mcount_loc_32 +# define sort_mcount_loc sort_mcount_loc_32 +# define elf_mcount_loc elf_mcount_loc_32 # define do_sort do_sort_32 # define Elf_Addr Elf32_Addr # define Elf_Ehdr Elf32_Ehdr @@ -84,8 +93,6 @@ #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* ORC unwinder only support X86_64 */ -#include <errno.h> -#include <pthread.h> #include <asm/orc_types.h> #define ERRSTR_MAXSZ 256 @@ -191,7 +198,64 @@ static int compare_extable(const void *a, const void *b) return 1; return 0; } +#ifdef MCOUNT_SORT_ENABLED +struct elf_mcount_loc { + Elf_Ehdr *ehdr; + Elf_Shdr *init_data_sec; + uint_t start_mcount_loc; + uint_t stop_mcount_loc; +}; + +/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */ +static void *sort_mcount_loc(void *arg) +{ + struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg; + uint_t offset = emloc->start_mcount_loc - _r(&(emloc->init_data_sec)->sh_addr) + + _r(&(emloc->init_data_sec)->sh_offset); + uint_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc; + unsigned char *start_loc = (void *)emloc->ehdr + offset; + + qsort(start_loc, count/sizeof(uint_t), sizeof(uint_t), compare_extable); + return NULL; +} + +/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */ +static void get_mcount_loc(uint_t *_start, uint_t *_stop) +{ + FILE *file_start, *file_stop; + char start_buff[20]; + char stop_buff[20]; + int len = 0; + + file_start = popen(" grep start_mcount System.map | awk '{print $1}' ", "r"); + if (!file_start) { + fprintf(stderr, "get start_mcount_loc error!"); + return; + } + + file_stop = popen(" grep stop_mcount System.map | awk '{print $1}' ", "r"); + if (!file_stop) { + fprintf(stderr, "get stop_mcount_loc error!"); + pclose(file_start); + return; + } + + while (fgets(start_buff, sizeof(start_buff), file_start) != NULL) { + len = strlen(start_buff); + start_buff[len - 1] = '\0'; + } + *_start = strtoul(start_buff, NULL, 16); + + while (fgets(stop_buff, sizeof(stop_buff), file_stop) != NULL) { + len = strlen(stop_buff); + stop_buff[len - 1] = '\0'; + } + *_stop = strtoul(stop_buff, NULL, 16); + pclose(file_start); + pclose(file_stop); +} +#endif static int do_sort(Elf_Ehdr *ehdr, char const *const fname, table_sort_t custom_sort) @@ -217,6 +281,12 @@ static int do_sort(Elf_Ehdr *ehdr, int idx; unsigned int shnum; unsigned int shstrndx; +#ifdef MCOUNT_SORT_ENABLED + struct elf_mcount_loc mstruct; + uint_t _start_mcount_loc = 0; + uint_t _stop_mcount_loc = 0; + pthread_t mcount_sort_thread; +#endif #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) unsigned int orc_ip_size = 0; unsigned int orc_size = 0; @@ -253,6 +323,17 @@ static int do_sort(Elf_Ehdr *ehdr, symtab_shndx = (Elf32_Word *)((const char *)ehdr + _r(&s->sh_offset)); +#ifdef MCOUNT_SORT_ENABLED + /* locate the .init.data section in vmlinux */ + if (!strcmp(secstrings + idx, ".init.data")) { + get_mcount_loc(&_start_mcount_loc, &_stop_mcount_loc); + mstruct.ehdr = ehdr; + mstruct.init_data_sec = s; + mstruct.start_mcount_loc = _start_mcount_loc; + mstruct.stop_mcount_loc = _stop_mcount_loc; + } +#endif + #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED) /* locate the ORC unwind tables */ if (!strcmp(secstrings + idx, ".orc_unwind_ip")) { @@ -294,6 +375,23 @@ static int do_sort(Elf_Ehdr *ehdr, goto out; } #endif + +#ifdef MCOUNT_SORT_ENABLED + if (!mstruct.init_data_sec || !_start_mcount_loc || !_stop_mcount_loc) { + fprintf(stderr, + "incomplete mcount's sort in file: %s\n", + fname); + goto out; + } + + /* create thread to sort mcount_loc concurrently */ + if (pthread_create(&mcount_sort_thread, NULL, &sort_mcount_loc, &mstruct)) { + fprintf(stderr, + "pthread_create mcount_sort_thread failed '%s': %s\n", + strerror(errno), fname); + goto out; + } +#endif if (!extab_sec) { fprintf(stderr, "no __ex_table in file: %s\n", fname); goto out; @@ -364,11 +462,11 @@ out: void *retval = NULL; /* wait for ORC tables sort done */ rc = pthread_join(orc_sort_thread, &retval); - if (rc) + if (rc) { fprintf(stderr, "pthread_join failed '%s': %s\n", strerror(errno), fname); - else if (retval) { + } else if (retval) { rc = -1; fprintf(stderr, "failed to sort ORC tables '%s': %s\n", @@ -376,5 +474,23 @@ out: } } #endif + +#ifdef MCOUNT_SORT_ENABLED + if (mcount_sort_thread) { + void *retval = NULL; + /* wait for mcount sort done */ + rc = pthread_join(mcount_sort_thread, &retval); + if (rc) { + fprintf(stderr, + "pthread_join failed '%s': %s\n", + strerror(errno), fname); + } else if (retval) { + rc = -1; + fprintf(stderr, + "failed to sort mcount '%s': %s\n", + (char *)retval, fname); + } + } +#endif return rc; } diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fe58843d047c..8e24c4c78c7f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field) return 0; } +static int field_is_relative_dynamic(struct tep_format_field *field) +{ + if (strncmp(field->type, "__rel_loc", 9) == 0) + return 1; + + return 0; +} + static int field_is_long(struct tep_format_field *field) { /* includes long long */ @@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_STRING; if (field_is_dynamic(field)) field->flags |= TEP_FIELD_IS_DYNAMIC; + if (field_is_relative_dynamic(field)) + field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; if (field_is_long(field)) field->flags |= TEP_FIELD_IS_LONG; @@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, arg->type = TEP_PRINT_STRING; arg->string.string = token; - arg->string.offset = -1; + arg->string.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar arg->type = TEP_PRINT_BITMASK; arg->bitmask.bitmask = token; - arg->bitmask.offset = -1; + arg->bitmask.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_int_array(event, arg, tok); } - if (strcmp(token, "__get_str") == 0) { + if (strcmp(token, "__get_str") == 0 || + strcmp(token, "__get_rel_str") == 0) { free_token(token); return process_str(event, arg, tok); } - if (strcmp(token, "__get_bitmask") == 0) { + if (strcmp(token, "__get_bitmask") == 0 || + strcmp(token, "__get_rel_bitmask") == 0) { free_token(token); return process_bitmask(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array") == 0) { + if (strcmp(token, "__get_dynamic_array") == 0 || + strcmp(token, "__get_rel_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array_len") == 0) { + if (strcmp(token, "__get_dynamic_array_len") == 0 || + strcmp(token, "__get_rel_dynamic_array_len") == 0) { free_token(token); return process_dynamic_array_len(event, arg, tok); } @@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_STRING: { int str_offset; - if (arg->string.offset == -1) { - struct tep_format_field *f; + if (!arg->string.field) + arg->string.field = tep_find_any_field(event, arg->string.string); + if (!arg->string.field) + break; - f = tep_find_any_field(event, arg->string.string); - arg->string.offset = f->offset; - } - str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, + *(unsigned int *)(data + arg->string.field->offset)); str_offset &= 0xffff; + if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) + str_offset += arg->string.field->offset + arg->string.field->size; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; } @@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, int bitmask_offset; int bitmask_size; - if (arg->bitmask.offset == -1) { - struct tep_format_field *f; - - f = tep_find_any_field(event, arg->bitmask.bitmask); - arg->bitmask.offset = f->offset; - } - bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + if (!arg->bitmask.field) + arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); + if (!arg->bitmask.field) + break; + bitmask_offset = data2host4(tep, + *(unsigned int *)(data + arg->bitmask.field->offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; + if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) + bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; @@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, data + offset, field->size); *len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else *len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a67ad9a5b835..41d4f9f6a843 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,6 +125,7 @@ enum tep_format_flags { TEP_FIELD_IS_LONG = 32, TEP_FIELD_IS_FLAG = 64, TEP_FIELD_IS_SYMBOLIC = 128, + TEP_FIELD_IS_RELATIVE = 256, }; struct tep_format_field { @@ -153,12 +154,12 @@ struct tep_print_arg_atom { struct tep_print_arg_string { char *string; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_bitmask { char *bitmask; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 368826bb5a57..5df177070d53 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { addr = *(unsigned int *)val; - val = record->data + (addr & 0xffff); size = addr >> 16; + addr &= 0xffff; + if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) + addr += arg->str.field->offset + arg->str.field->size; + val = record->data + addr; } /* diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index df9fc00b4cd6..273237a9c5e6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } val = (uintptr_t)(sample->raw_data + offset); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8f7705bbc2da..9e0aee276df8 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; + if (flags & TEP_FIELD_IS_RELATIVE) + offset += fmtf->offset + fmtf->size; } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac0127be0459..f29d37004f55 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2706,6 +2706,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } return sample->raw_data + offset; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7f782a31bda3..82c7f034d91a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 32a721b3e9a5..a5d945415bbc 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c0c010350bc2..d1f1501ce7fc 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -942,6 +942,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a111065b484e..d9a106f0edb2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2365,6 +2365,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; /* record max width for output */ if (size > hde->dynamic_len) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 000000000000..2d52ff0bff7d --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,102 @@ +NAME := rtla +VERSION := 0.5 + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) +LDFLAGS := -ggdb +LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rtla/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ +endif + +.PHONY: all +all: rtla + +rtla: $(OBJ) doc + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) + +static: $(OBJ) + $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl + +.PHONY: install +install: doc_install + $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) + $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 000000000000..6c88446f7e74 --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,36 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla is a meta-tool that includes a set of commands that +aims to analyze the real-time properties of Linux. But, instead of +testing Linux as a black box, rtla leverages kernel tracing +capabilities to provide precise information about the properties +and root causes of unexpected results. + +Installing RTLA + +RTLA depends on some libraries and tools. More precisely, it depends on the +following libraries: + + - libtracefs + - libtraceevent + - procps + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 000000000000..7b73d1eccd0e --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * enable_osnoise - enable osnoise tracer in the trace_instance + */ +int enable_osnoise(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "osnoise"); +} + +/* + * enable_timerlat - enable timerlat tracer in the trace_instance + */ +int enable_timerlat(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "timerlat"); +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable osnoiser tracer for tracing\n"); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +static void osnoise_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + osnoise_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + osnoise_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + osnoise_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + osnoise_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 000000000000..9e4b2e2a4559 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "trace.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; +}; + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +/* + * osnoise_tool - osnoise based tool definition. + */ +struct osnoise_tool { + struct trace_instance trace; + struct osnoise_context *context; + void *data; + void *params; + time_t start_time; +}; + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(char *tracer); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 000000000000..180fcbe423cd --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" + +struct osnoise_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int set_sched; + int output_divisor; + struct sched_attr sched_param; + + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + duration = duration / params->output_divisor; + + if (data->bucket_size) + bucket = duration / data->bucket_size; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->output_divisor * params->bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_hist_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_sample / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct osnoise_hist_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'e'}, + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + osnoise_hist_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_hist_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_hist_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->no_header = 1; + break; + case '1': /* no summary */ + params->no_summary = 1; + break; + case '2': /* no index */ + params->no_index = 1; + break; + case '3': /* with zeros */ + params->with_zeros = 1; + break; + default: + osnoise_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_index && !params->with_zeros) + osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + + return params; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct osnoise_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_hist_set_signals - handles the signal to stop the tool + */ +static void +osnoise_hist_set_signals(struct osnoise_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int osnoise_hist_main(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = osnoise_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_destroy; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_destroy; + } + + retval = osnoise_init_trace_hist(tool); + if (retval) + goto out_destroy; + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + osnoise_read_trace_hist(tool); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + osnoise_free_histogram(tool->data); +out_destroy: + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 000000000000..332b2ac205fc --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise top parameters + */ +struct osnoise_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void +osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Operating System Noise"); + trace_seq_printf(s, " "); + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI IRQ Softirq Thread"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +void osnoise_top_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + osnoise_top_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_top_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_top_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + default: + osnoise_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("osnoise needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; + +out_err: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_top_set_signals - handles the signal to stop the tool + */ +static void osnoise_top_set_signals(struct osnoise_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int osnoise_top_main(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_top_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_top(params); + if (!tool) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = osnoise_top_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_top_set_signals(params); + + do { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + osnoise_print_stats(params, tool); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + } while (!stop_tracing); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 000000000000..09bd21b8af81 --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(); + exit(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(); + exit(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 000000000000..97abbf494fee --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "timerlat.h" + +static void timerlat_usage(void) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + timerlat_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + timerlat_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + timerlat_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + timerlat_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 000000000000..88561bfd14f3 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +int timerlat_hist_main(int argc, char *argv[]); +int timerlat_top_main(int argc, char *argv[]); +int timerlat_main(int argc, char *argv[]); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 000000000000..235f9620ef3d --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int set_sched; + struct sched_attr sched_param; + + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct timerlat_hist_cpu { + int *irq; + int *thread; + + int irq_count; + int thread_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + latency = latency / params->output_divisor; + + if (data->bucket_size) + bucket = latency / data->bucket_size; + + if (!thread) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long thread, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_irq / data->hist[cpu].irq_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for all CPUs + */ +static void +timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(char *usage) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_hist_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'e'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + timerlat_hist_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_hist_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + case '0': /* no irq */ + params->no_irq = 1; + break; + case '1': /* no thread */ + params->no_thread = 1; + break; + case '2': /* no header */ + params->no_header = 1; + break; + case '3': /* no summary */ + params->no_summary = 1; + break; + case '4': /* no index */ + params->no_index = 1; + break; + case '5': /* with zeros */ + params->with_zeros = 1; + break; + default: + timerlat_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_irq && params->no_thread) + timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + + if (params->no_index && !params->with_zeros) + timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + + return params; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + if (params->print_stack) { + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct timerlat_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_hist_set_signals - handles the signal to stop the tool + */ +static void +timerlat_hist_set_signals(struct timerlat_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int timerlat_hist_main(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = timerlat_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = timerlat_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_hist; + } + + trace = &tool->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + timerlat_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + timerlat_free_histogram(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 000000000000..1ebd5291539c --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct timerlat_top_cpu { + int irq_count; + int thread_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void +timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct osnoise_tool *top) +{ + struct timerlat_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, + params->output_divisor == 1 ? "ns" : "us", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + + struct timerlat_top_params *params = top->params; + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - |"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_thread / cpu_data->thread_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + timerlat_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + timerlat_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(char *usage) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_top_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + timerlat_top_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_top_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + default: + timerlat_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(top->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(top->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + + if (params->print_stack) { + retval = osnoise_set_print_stack(top->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct timerlat_top_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + top->params = params; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_top_set_signals - handles the signal to stop the tool + */ +static void +timerlat_top_set_signals(struct timerlat_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int timerlat_top_main(int argc, char *argv[]) +{ + struct timerlat_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *top; + int return_value = 1; + int retval; + + params = timerlat_top_parse_args(argc, argv); + if (!params) + exit(1); + + top = timerlat_init_top(params); + if (!top) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = timerlat_top_apply_config(top, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &top->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + timerlat_print_stats(params, top); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + }; + + timerlat_print_stats(params, top); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + timerlat_free_top(top->data); + osnoise_destroy_tool(top); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 000000000000..107a0c6387f7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("tracer %s not found!\n", tracer_name); + + err_msg("failed to enable the tracer %s\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + } + + if (trace->seq) + free(trace->seq); + + if (trace->tep) + tep_free(trace->tep); +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 000000000000..0ea1df0ad9a7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +int enable_osnoise(struct trace_instance *trace); +int enable_timerlat(struct trace_instance *trace); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 000000000000..1c9f0eea6166 --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <proc/readproc.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = localtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour - 1, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char + * in the monitored_cpus. + * + * XXX: convert to a bitmask. + */ +int parse_cpu_list(char *cpu_list, char **monitored_cpus) +{ + char *mon_cpus; + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + mon_cpus = malloc(nr_cpus * sizeof(char)); + memset(mon_cpus, 0, (nr_cpus * sizeof(char))); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_list: adding cpu %d\n", cpu); + mon_cpus[cpu] = 1; + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_list: adding cpu %d\n", i); + mon_cpus[i] = 1; + } + } + + if (*p == ',') + p++; + } + + *monitored_cpus = mon_cpus; + + return 0; + +err: + debug_msg("Error parsing the cpu list %s", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifdef __x86_64__ +# define __NR_sched_setattr 314 +# define __NR_sched_getattr 315 +#elif __i386__ +# define __NR_sched_setattr 351 +# define __NR_sched_getattr 352 +#elif __arm__ +# define __NR_sched_setattr 380 +# define __NR_sched_getattr 381 +#elif __aarch64__ +# define __NR_sched_setattr 274 +# define __NR_sched_getattr 275 +#elif __powerpc__ +# define __NR_sched_setattr 355 +# define __NR_sched_getattr 356 +#elif __s390x__ +# define __NR_sched_setattr 345 +# define __NR_sched_getattr 346 +#endif + +#define SCHED_DEADLINE 6 + +static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + unsigned int size, unsigned int flags) +{ + return syscall(__NR_sched_getattr, pid, attr, size, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("boost_with_deadline failed to boost pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm + * + * This function uses procps to list the currently running threads and then + * set the sched_attr *attr to the threads that start with char *comm. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm, struct sched_attr *attr) +{ + int flags = PROC_FILLCOM | PROC_FILLSTAT; + PROCTAB *ptp; + proc_t task; + int retval; + + ptp = openproc(flags); + if (!ptp) { + err_msg("error openproc()\n"); + return -ENOENT; + } + + memset(&task, 0, sizeof(task)); + + while (readproc(ptp, &task)) { + retval = strncmp(comm, task.cmd, strlen(comm)); + if (retval) + continue; + retval = __set_sched_attr(task.tid, attr); + if (retval) + goto out_err; + } + + closeproc(ptp); + return 0; + +out_err: + closeproc(ptp); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_OTHER)) + return -1; + if (prio > sched_get_priority_max(SCHED_OTHER)) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_priority = prio; + break; + default: + return -1; + } + return 0; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 000000000000..9aa962319ca2 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <time.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; + +int parse_prio(char *arg, struct sched_attr *sched_param); +int set_comm_sched_attr(const char *comm, struct sched_attr *attr); |