diff options
Diffstat (limited to 'tools/perf/Documentation/perf-intel-pt.txt')
-rw-r--r-- | tools/perf/Documentation/perf-intel-pt.txt | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index ff58bd4c381b..238ab9d3cb93 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -468,6 +468,8 @@ ptw Enable PTWRITE packets which are produced when a ptwrite instruction which contains "1" if the feature is supported and "0" otherwise. + As an alternative, refer to "Emulated PTWRITE" further below. + fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet provides the address of the ptwrite instruction. In the absence of fup_on_ptw, the decoder will use the address of the previous branch @@ -1398,6 +1400,76 @@ There were none. :17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax +Tracing Virtual Machines - Guest Code +------------------------------------- + +A common case for KVM test programs is that the test program acts as the +hypervisor, creating, running and destroying the virtual machine, and +providing the guest object code from its own object code. In this case, +the VM is not running an OS, but only the functions loaded into it by the +hypervisor test program, and conveniently, loaded at the same virtual +addresses. To support that, option "--guest-code" has been added to perf script +and perf kvm report. + +Here is an example tracing a test program from the kernel's KVM selftests: + + # perf record --kcore -e intel_pt/cyc/ -- tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.280 MB perf.data ] + # perf script --guest-code --itrace=bep --ns -F-period,+addr,+flags + [SNIP] + tsc_msrs_test 18436 [007] 10897.962087733: branches: call ffffffffc13b2ff5 __vmx_vcpu_run+0x15 (vmlinux) => ffffffffc13b2f50 vmx_update_host_rsp+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962087733: branches: return ffffffffc13b2f5d vmx_update_host_rsp+0xd (vmlinux) => ffffffffc13b2ffa __vmx_vcpu_run+0x1a (vmlinux) + tsc_msrs_test 18436 [007] 10897.962087733: branches: call ffffffffc13b303b __vmx_vcpu_run+0x5b (vmlinux) => ffffffffc13b2f80 vmx_vmenter+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962087836: branches: vmentry ffffffffc13b2f82 vmx_vmenter+0x2 (vmlinux) => 0 [unknown] ([unknown]) + [guest/18436] 18436 [007] 10897.962087836: branches: vmentry 0 [unknown] ([unknown]) => 402c81 guest_code+0x131 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962087836: branches: call 402c81 guest_code+0x131 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 40dba0 ucall+0x0 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962088248: branches: vmexit 40dba0 ucall+0x0 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 0 [unknown] ([unknown]) + tsc_msrs_test 18436 [007] 10897.962088248: branches: vmexit 0 [unknown] ([unknown]) => ffffffffc13b2fa0 vmx_vmexit+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962088248: branches: jmp ffffffffc13b2fa0 vmx_vmexit+0x0 (vmlinux) => ffffffffc13b2fd2 vmx_vmexit+0x32 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962088256: branches: return ffffffffc13b2fd2 vmx_vmexit+0x32 (vmlinux) => ffffffffc13b3040 __vmx_vcpu_run+0x60 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962088270: branches: return ffffffffc13b30b6 __vmx_vcpu_run+0xd6 (vmlinux) => ffffffffc13b2f2e vmx_vcpu_enter_exit+0x4e (vmlinux) + [SNIP] + tsc_msrs_test 18436 [007] 10897.962089321: branches: call ffffffffc13b2ff5 __vmx_vcpu_run+0x15 (vmlinux) => ffffffffc13b2f50 vmx_update_host_rsp+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089321: branches: return ffffffffc13b2f5d vmx_update_host_rsp+0xd (vmlinux) => ffffffffc13b2ffa __vmx_vcpu_run+0x1a (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089321: branches: call ffffffffc13b303b __vmx_vcpu_run+0x5b (vmlinux) => ffffffffc13b2f80 vmx_vmenter+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089424: branches: vmentry ffffffffc13b2f82 vmx_vmenter+0x2 (vmlinux) => 0 [unknown] ([unknown]) + [guest/18436] 18436 [007] 10897.962089424: branches: vmentry 0 [unknown] ([unknown]) => 40dba0 ucall+0x0 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962089701: branches: jmp 40dc1b ucall+0x7b (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 40dc39 ucall+0x99 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962089701: branches: jcc 40dc3c ucall+0x9c (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 40dc20 ucall+0x80 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962089701: branches: jcc 40dc3c ucall+0x9c (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 40dc20 ucall+0x80 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962089701: branches: jcc 40dc37 ucall+0x97 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 40dc50 ucall+0xb0 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) + [guest/18436] 18436 [007] 10897.962089878: branches: vmexit 40dc55 ucall+0xb5 (/home/user/git/work/tools/testing/selftests/kselftest_install/kvm/tsc_msrs_test) => 0 [unknown] ([unknown]) + tsc_msrs_test 18436 [007] 10897.962089878: branches: vmexit 0 [unknown] ([unknown]) => ffffffffc13b2fa0 vmx_vmexit+0x0 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089878: branches: jmp ffffffffc13b2fa0 vmx_vmexit+0x0 (vmlinux) => ffffffffc13b2fd2 vmx_vmexit+0x32 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089887: branches: return ffffffffc13b2fd2 vmx_vmexit+0x32 (vmlinux) => ffffffffc13b3040 __vmx_vcpu_run+0x60 (vmlinux) + tsc_msrs_test 18436 [007] 10897.962089901: branches: return ffffffffc13b30b6 __vmx_vcpu_run+0xd6 (vmlinux) => ffffffffc13b2f2e vmx_vcpu_enter_exit+0x4e (vmlinux) + [SNIP] + + # perf kvm --guest-code --guest --host report -i perf.data --stdio | head -20 + + # To display the perf.data header info, please use --header/--header-only options. + # + # + # Total Lost Samples: 0 + # + # Samples: 12 of event 'instructions' + # Event count (approx.): 2274583 + # + # Children Self Command Shared Object Symbol + # ........ ........ ............. .................... ........................................... + # + 54.70% 0.00% tsc_msrs_test [kernel.vmlinux] [k] entry_SYSCALL_64_after_hwframe + | + ---entry_SYSCALL_64_after_hwframe + do_syscall_64 + | + |--29.44%--syscall_exit_to_user_mode + | exit_to_user_mode_prepare + | task_work_run + | __fput + + Event Trace ----------- @@ -1471,6 +1543,99 @@ In that case the --itrace q option is forced because walking executable code to reconstruct the control flow is not possible. +Emulated PTWRITE +---------------- + +Later perf tools support a method to emulate the ptwrite instruction, which +can be useful if hardware does not support the ptwrite instruction. + +Instead of using the ptwrite instruction, a function is used which produces +a trace that encodes the payload data into TNT packets. Here is an example +of the function: + + #include <stdint.h> + + void perf_emulate_ptwrite(uint64_t x) + __attribute__((externally_visible, noipa, no_instrument_function, naked)); + + #define PERF_EMULATE_PTWRITE_8_BITS \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" \ + "1: shl %rax\n" \ + " jc 1f\n" + + /* Undefined instruction */ + #define PERF_EMULATE_PTWRITE_UD2 ".byte 0x0f, 0x0b\n" + + #define PERF_EMULATE_PTWRITE_MAGIC PERF_EMULATE_PTWRITE_UD2 ".ascii \"perf,ptwrite \"\n" + + void perf_emulate_ptwrite(uint64_t x __attribute__ ((__unused__))) + { + /* Assumes SysV ABI : x passed in rdi */ + __asm__ volatile ( + "jmp 1f\n" + PERF_EMULATE_PTWRITE_MAGIC + "1: mov %rdi, %rax\n" + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + PERF_EMULATE_PTWRITE_8_BITS + "1: ret\n" + ); + } + +For example, a test program with the function above: + + #include <stdio.h> + #include <stdint.h> + #include <stdlib.h> + + #include "perf_emulate_ptwrite.h" + + int main(int argc, char *argv[]) + { + uint64_t x = 0; + + if (argc > 1) + x = strtoull(argv[1], NULL, 0); + perf_emulate_ptwrite(x); + return 0; + } + +Can be compiled and traced: + + $ gcc -Wall -Wextra -O3 -g -o eg_ptw eg_ptw.c + $ perf record -e intel_pt//u ./eg_ptw 0x1234567890abcdef + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.017 MB perf.data ] + $ perf script --itrace=ew + eg_ptw 19875 [007] 8061.235912: ptwrite: IP: 0 payload: 0x1234567890abcdef 55701249a196 perf_emulate_ptwrite+0x16 (/home/user/eg_ptw) + $ + + +EXAMPLE +------- + +Examples can be found on perf wiki page "Perf tools support for IntelĀ® Processor Trace": + +https://perf.wiki.kernel.org/index.php/Perf_tools_support_for_Intel%C2%AE_Processor_Trace + SEE ALSO -------- |