aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-06-29 11:53:17 +0200
committerIngo Molnar <mingo@kernel.org>2019-06-29 11:53:17 +0200
commitcd140267a70dca999ac86edb78715e08d626df6e (patch)
tree3fd86ad02073dafbd5085d087a28fb5df50a6267 /tools
parent1e5ba93fb9073c6891e4f8371c389e8364ed3e73 (diff)
parentfd7d55172d1e2e501e6da0a5c1de25f06612dc2e (diff)
downloadtip-cd140267a70dca999ac86edb78715e08d626df6e.tar.gz
tip-cd140267a70dca999ac86edb78715e08d626df6e.tar.xz
tip-cd140267a70dca999ac86edb78715e08d626df6e.zip
Merge branch 'perf/core'
Diffstat (limited to 'tools')
-rw-r--r--tools/build/Makefile.feature3
-rw-r--r--tools/build/feature/Makefile10
-rw-r--r--tools/build/feature/test-all.c7
-rw-r--r--tools/build/feature/test-fortify-source.c1
-rw-r--r--tools/build/feature/test-gettid.c11
-rw-r--r--tools/build/feature/test-hello.c1
-rw-r--r--tools/build/feature/test-libslang-include-subdir.c7
-rw-r--r--tools/build/feature/test-setns.c1
-rw-r--r--tools/include/linux/kernel.h1
-rw-r--r--tools/lib/vsprintf.c19
-rw-r--r--tools/perf/Documentation/db-export.txt41
-rw-r--r--tools/perf/Documentation/intel-pt.txt40
-rw-r--r--tools/perf/Documentation/perf-config.txt9
-rw-r--r--tools/perf/Documentation/perf-diff.txt14
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-report.txt9
-rw-r--r--tools/perf/Documentation/perf-script.txt17
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt5
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt97
-rw-r--r--tools/perf/Makefile.config19
-rw-r--r--tools/perf/Makefile.perf44
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c309
-rw-r--r--tools/perf/arch/arm64/Build2
-rw-r--r--tools/perf/arch/arm64/tests/Build2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build2
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c304
-rw-r--r--tools/perf/builtin-record.c4
-rw-r--r--tools/perf/builtin-report.c8
-rw-r--r--tools/perf/builtin-script.c74
-rw-r--r--tools/perf/builtin-stat.c87
-rw-r--r--tools/perf/builtin-top.c7
-rw-r--r--tools/perf/builtin-trace.c137
-rw-r--r--tools/perf/examples/bpf/augmented_raw_syscalls.c268
-rw-r--r--tools/perf/jvmti/jvmti_agent.c2
-rw-r--r--tools/perf/jvmti/libjvmti.c4
-rw-r--r--tools/perf/perf-with-kcore.sh5
-rw-r--r--tools/perf/perf.h2
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py79
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py80
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py345
-rw-r--r--tools/perf/tests/Build4
-rw-r--r--tools/perf/tests/bp_account.c1
-rw-r--r--tools/perf/tests/bpf-script-example.c1
-rw-r--r--tools/perf/tests/bpf-script-test-kbuild.c1
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c1
-rw-r--r--tools/perf/tests/bpf-script-test-relocation.c1
-rw-r--r--tools/perf/tests/bpf.c1
-rw-r--r--tools/perf/tests/builtin-test.c8
-rw-r--r--tools/perf/tests/map_groups.c121
-rw-r--r--tools/perf/tests/mem.c1
-rw-r--r--tools/perf/tests/mem2node.c1
-rw-r--r--tools/perf/tests/parse-events.c27
-rw-r--r--tools/perf/tests/shell/lib/probe.sh1
-rwxr-xr-xtools/perf/tests/shell/probe_vfs_getname.sh3
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh1
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh1
-rwxr-xr-xtools/perf/tests/shell/record+zstd_comp_decomp.sh2
-rwxr-xr-xtools/perf/tests/shell/trace+probe_vfs_getname.sh1
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/tests/time-utils-test.c251
-rw-r--r--tools/perf/trace/beauty/Build4
-rw-r--r--tools/perf/trace/beauty/beauty.h15
-rw-r--r--tools/perf/trace/beauty/clone.c1
-rwxr-xr-xtools/perf/trace/beauty/fsconfig.sh17
-rw-r--r--tools/perf/trace/beauty/fsmount.c34
-rwxr-xr-xtools/perf/trace/beauty/fsmount.sh22
-rw-r--r--tools/perf/trace/beauty/fspick.c24
-rwxr-xr-xtools/perf/trace/beauty/fspick.sh17
-rw-r--r--tools/perf/trace/beauty/move_mount.c24
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh17
-rw-r--r--tools/perf/trace/beauty/sync_file_range.c31
-rwxr-xr-xtools/perf/trace/beauty/sync_file_range.sh17
-rw-r--r--tools/perf/ui/browsers/annotate.c5
-rw-r--r--tools/perf/ui/libslang.h5
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/annotate.c5
-rw-r--r--tools/perf/util/auxtrace.c3
-rw-r--r--tools/perf/util/auxtrace.h34
-rw-r--r--tools/perf/util/config.c8
-rw-r--r--tools/perf/util/cpumap.c64
-rw-r--r--tools/perf/util/cpumap.h10
-rw-r--r--tools/perf/util/cputopo.c84
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c268
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h39
-rw-r--r--tools/perf/util/cs-etm.c1026
-rw-r--r--tools/perf/util/cs-etm.h94
-rw-r--r--tools/perf/util/dso.c125
-rw-r--r--tools/perf/util/env.c1
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evsel.c24
-rw-r--r--tools/perf/util/header.c96
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c443
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h143
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c140
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt.c697
-rw-r--r--tools/perf/util/machine.c8
-rw-r--r--tools/perf/util/map.c6
-rw-r--r--tools/perf/util/map_groups.h2
-rw-r--r--tools/perf/util/perf_regs.h4
-rw-r--r--tools/perf/util/pmu.c28
-rw-r--r--tools/perf/util/s390-cpumsf.c96
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c8
-rw-r--r--tools/perf/util/setup.py2
-rw-r--r--tools/perf/util/smt.c8
-rw-r--r--tools/perf/util/stat-display.c29
-rw-r--r--tools/perf/util/stat-shadow.c1
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/symbol-elf.c3
-rw-r--r--tools/perf/util/symbol.c97
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/thread-stack.c14
-rw-r--r--tools/perf/util/thread-stack.h4
-rw-r--r--tools/perf/util/thread.c12
-rw-r--r--tools/perf/util/thread.h4
-rw-r--r--tools/perf/util/time-utils.c132
124 files changed, 5557 insertions, 1010 deletions
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3b24231c58a2..86b793dffbc4 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -36,6 +36,7 @@ FEATURE_TESTS_BASIC := \
fortify-source \
sync-compare-and-swap \
get_current_dir_name \
+ gettid \
glibc \
gtk2 \
gtk2-infobar \
@@ -52,6 +53,7 @@ FEATURE_TESTS_BASIC := \
libpython \
libpython-version \
libslang \
+ libslang-include-subdir \
libcrypto \
libunwind \
pthread-attr-setaffinity-np \
@@ -113,7 +115,6 @@ FEATURE_DISPLAY ?= \
numa_num_possible_cpus \
libperl \
libpython \
- libslang \
libcrypto \
libunwind \
libdw-dwarf-unwind \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 4b8244ee65ce..0658b8cd0e53 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -31,6 +31,7 @@ FILES= \
test-libpython.bin \
test-libpython-version.bin \
test-libslang.bin \
+ test-libslang-include-subdir.bin \
test-libcrypto.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
@@ -54,6 +55,7 @@ FILES= \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
+ test-gettid.bin \
test-jvmti.bin \
test-jvmti-cmlr.bin \
test-sched_getcpu.bin \
@@ -181,7 +183,10 @@ $(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
$(OUTPUT)test-libslang.bin:
- $(BUILD) -I/usr/include/slang -lslang
+ $(BUILD) -lslang
+
+$(OUTPUT)test-libslang-include-subdir.bin:
+ $(BUILD) -lslang
$(OUTPUT)test-libcrypto.bin:
$(BUILD) -lcrypto
@@ -267,6 +272,9 @@ $(OUTPUT)test-sdt.bin:
$(OUTPUT)test-cxx.bin:
$(BUILDXX) -std=gnu++11
+$(OUTPUT)test-gettid.bin:
+ $(BUILD)
+
$(OUTPUT)test-jvmti.bin:
$(BUILD)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index a59c53705093..88145e8cde1a 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -38,6 +38,10 @@
# include "test-get_current_dir_name.c"
#undef main
+#define main main_test_gettid
+# include "test-gettid.c"
+#undef main
+
#define main main_test_glibc
# include "test-glibc.c"
#undef main
@@ -182,7 +186,7 @@
# include "test-disassembler-four-args.c"
#undef main
-#define main main_test_zstd
+#define main main_test_libzstd
# include "test-libzstd.c"
#undef main
@@ -195,6 +199,7 @@ int main(int argc, char *argv[])
main_test_libelf();
main_test_libelf_mmap();
main_test_get_current_dir_name();
+ main_test_gettid();
main_test_glibc();
main_test_dwarf();
main_test_dwarf_getlocations();
diff --git a/tools/build/feature/test-fortify-source.c b/tools/build/feature/test-fortify-source.c
index c9f398d87868..c8a57194f9f2 100644
--- a/tools/build/feature/test-fortify-source.c
+++ b/tools/build/feature/test-fortify-source.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
int main(void)
diff --git a/tools/build/feature/test-gettid.c b/tools/build/feature/test-gettid.c
new file mode 100644
index 000000000000..ef24e42d3f1b
--- /dev/null
+++ b/tools/build/feature/test-gettid.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+#define _GNU_SOURCE
+#include <unistd.h>
+
+int main(void)
+{
+ return gettid();
+}
+
+#undef _GNU_SOURCE
diff --git a/tools/build/feature/test-hello.c b/tools/build/feature/test-hello.c
index c9f398d87868..c8a57194f9f2 100644
--- a/tools/build/feature/test-hello.c
+++ b/tools/build/feature/test-hello.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
int main(void)
diff --git a/tools/build/feature/test-libslang-include-subdir.c b/tools/build/feature/test-libslang-include-subdir.c
new file mode 100644
index 000000000000..3ea47ec7590e
--- /dev/null
+++ b/tools/build/feature/test-libslang-include-subdir.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <slang/slang.h>
+
+int main(void)
+{
+ return SLsmg_init_smg();
+}
diff --git a/tools/build/feature/test-setns.c b/tools/build/feature/test-setns.c
index 4a1581ae7a55..2757c201ed50 100644
--- a/tools/build/feature/test-setns.c
+++ b/tools/build/feature/test-setns.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <sched.h>
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 857d9e22826e..cba226948a0c 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -102,6 +102,7 @@
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
int scnprintf(char * buf, size_t size, const char * fmt, ...);
+int scnprintf_pad(char * buf, size_t size, const char * fmt, ...);
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c
index e08ee147eab4..8780b4cdab21 100644
--- a/tools/lib/vsprintf.c
+++ b/tools/lib/vsprintf.c
@@ -23,3 +23,22 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...)
return (i >= ssize) ? (ssize - 1) : i;
}
+
+int scnprintf_pad(char * buf, size_t size, const char * fmt, ...)
+{
+ ssize_t ssize = size;
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vscnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ if (i < (int) size) {
+ for (; i < (int) size; i++)
+ buf[i] = ' ';
+ buf[i] = 0x0;
+ }
+
+ return (i >= ssize) ? (ssize - 1) : i;
+}
diff --git a/tools/perf/Documentation/db-export.txt b/tools/perf/Documentation/db-export.txt
new file mode 100644
index 000000000000..52ffccb02d55
--- /dev/null
+++ b/tools/perf/Documentation/db-export.txt
@@ -0,0 +1,41 @@
+Database Export
+===============
+
+perf tool's python scripting engine:
+
+ tools/perf/util/scripting-engines/trace-event-python.c
+
+supports scripts:
+
+ tools/perf/scripts/python/export-to-sqlite.py
+ tools/perf/scripts/python/export-to-postgresql.py
+
+which export data to a SQLite3 or PostgreSQL database.
+
+The export process provides records with unique sequential ids which allows the
+data to be imported directly to a database and provides the relationships
+between tables.
+
+Over time it is possible to continue to expand the export while maintaining
+backward and forward compatibility, by following some simple rules:
+
+1. Because of the nature of SQL, existing tables and columns can continue to be
+used so long as the names and meanings (and to some extent data types) remain
+the same.
+
+2. New tables and columns can be added, without affecting existing SQL queries,
+so long as the new names are unique.
+
+3. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain
+backward compatibility by testing for the presence of new tables and columns
+before using them. e.g. function IsSelectable() in exported-sql-viewer.py
+
+4. The export scripts themselves maintain forward compatibility (i.e. an existing
+script will continue to work with new versions of perf) by accepting a variable
+number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more
+arguments which old scripts will ignore.
+
+5. The scripting engine tests for the existence of script handler functions
+before calling them. The scripting engine can also test for the support of new
+or optional features by checking for the existence and value of script global
+variables.
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 115eaacc455f..50c5b60101bd 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -88,21 +88,51 @@ smaller.
To represent software control flow, "branches" samples are produced. By default
a branch sample is synthesized for every single branch. To get an idea what
-data is available you can use the 'perf script' tool with no parameters, which
-will list all the samples.
+data is available you can use the 'perf script' tool with all itrace sampling
+options, which will list all the samples.
perf record -e intel_pt//u ls
- perf script
+ perf script --itrace=ibxwpe
An interesting field that is not printed by default is 'flags' which can be
displayed as follows:
- perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
+ perf script --itrace=ibxwpe -F+flags
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
in transaction, respectively.
+Another interesting field that is not printed by default is 'ipc' which can be
+displayed as follows:
+
+ perf script --itrace=be -F+ipc
+
+There are two ways that instructions-per-cycle (IPC) can be calculated depending
+on the recording.
+
+If the 'cyc' config term (see config terms section below) was used, then IPC is
+calculated using the cycle count from CYC packets, otherwise MTC packets are
+used - refer to the 'mtc' config term. When MTC is used, however, the values
+are less accurate because the timing is less accurate.
+
+Because Intel PT does not update the cycle count on every branch or instruction,
+the values will often be zero. When there are values, they will be the number
+of instructions and number of cycles since the last update, and thus represent
+the average IPC since the last IPC for that event type. Note IPC for "branches"
+events is calculated separately from IPC for "instructions" events.
+
+Also note that the IPC instruction count may or may not include the current
+instruction. If the cycle count is associated with an asynchronous branch
+(e.g. page fault or interrupt), then the instruction count does not include the
+current instruction, otherwise it does. That is consistent with whether or not
+that instruction has retired when the cycle count is updated.
+
+Another note, in the case of "branches" events, non-taken branches are not
+presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
+TNT packet that starts with a non-taken branch. To see every possible IPC
+value, "instructions" events can be used e.g. --itrace=i0ns
+
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
@@ -713,7 +743,7 @@ Having no option is the same as
which, in turn, is the same as
- --itrace=ibxwpe
+ --itrace=cepwx
The letters are:
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 462b3cde0675..e4aa268d2e38 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -564,9 +564,12 @@ llvm.*::
llvm.clang-bpf-cmd-template::
Cmdline template. Below lines show its default value. Environment
variable is used to pass options.
- "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \
- -Wno-unused-value -Wno-pointer-sign -working-directory \
- $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -"
+ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
+ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \
+ "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
+ "-Wno-unused-value -Wno-pointer-sign " \
+ "-working-directory $WORKING_DIR " \
+ "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
llvm.clang-opt::
Options passed to clang.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index da7809b15cc9..facd91e4e945 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -142,12 +142,14 @@ OPTIONS
perf diff --time 0%-10%,30%-40%
It also supports analyzing samples within a given time window
- <start>,<stop>. Times have the format seconds.microseconds. If 'start'
- is not given (i.e., time string is ',x.y') then analysis starts at
- the beginning of the file. If stop time is not given (i.e, time
- string is 'x.y,') then analysis goes to the end of the file. Time string is
- 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different
- perf.data files.
+ <start>,<stop>. Times have the format seconds.nanoseconds. If 'start'
+ is not given (i.e. time string is ',x.y') then analysis starts at
+ the beginning of the file. If stop time is not given (i.e. time
+ string is 'x.y,') then analysis goes to the end of the file.
+ Multiple ranges can be separated by spaces, which requires the argument
+ to be quoted e.g. --time "1234.567,1234.789 1235,"
+ Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps
+ for different perf.data files.
For example, we get the timestamp information from 'perf script'.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index de269430720a..15e0fa87241b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -490,6 +490,17 @@ Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
+--kernel-callchains::
+Collect callchains only from kernel space. I.e. this option sets
+perf_event_attr.exclude_callchain_user to 1.
+
+--user-callchains::
+Collect callchains only from user space. I.e. this option sets
+perf_event_attr.exclude_callchain_kernel to 1.
+
+Don't use both --kernel-callchains and --user-callchains at the same time or no
+callchains will be collected.
+
--timestamp-filename
Append timestamp to output file name.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f441baa794ce..8c4372819e11 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -412,12 +412,13 @@ OPTIONS
--time::
Only analyze samples within given time window: <start>,<stop>. Times
- have the format seconds.microseconds. If start is not given (i.e., time
+ have the format seconds.nanoseconds. If start is not given (i.e. time
string is ',x.y') then analysis starts at the beginning of the file. If
- stop time is not given (i.e, time string is 'x.y,') then analysis goes
- to end of file.
+ stop time is not given (i.e. time string is 'x.y,') then analysis goes
+ to end of file. Multiple ranges can be separated by spaces, which
+ requires the argument to be quoted e.g. --time "1234.567,1234.789 1235,"
- Also support time percent with multiple time range. Time string is
+ Also support time percent with multiple time ranges. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 9b0d04dd2a61..d4e2e18a5881 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
- brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode.
+ brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -203,6 +203,9 @@ OPTIONS
The synth field is used by synthesized events which may be created when
Instruction Trace decoding.
+ The ipc (instructions per cycle) field is synthesized and may have a value when
+ Instruction Trace decoding.
+
Finally, a user may not set fields to none for all event types.
i.e., -F "" is not allowed.
@@ -313,6 +316,9 @@ OPTIONS
--show-round-events
Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
+--show-bpf-events
+ Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -355,12 +361,13 @@ include::itrace.txt[]
--time::
Only analyze samples within given time window: <start>,<stop>. Times
- have the format seconds.microseconds. If start is not given (i.e., time
+ have the format seconds.nanoseconds. If start is not given (i.e. time
string is ',x.y') then analysis starts at the beginning of the file. If
- stop time is not given (i.e, time string is 'x.y,') then analysis goes
- to end of file.
+ stop time is not given (i.e. time string is 'x.y,') then analysis goes
+ to end of file. Multiple ranges can be separated by spaces, which
+ requires the argument to be quoted e.g. --time "1234.567,1234.789 1235,"
- Also support time percent with multipe time range. Time string is
+ Also support time percent with multiple time ranges. Time string is
'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
For example:
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 1e312c2672e4..930c51c01201 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -200,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the
socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation.
+--per-die::
+Aggregate counts per processor die for system-wide mode measurements. This
+is a useful mode to detect imbalance between dies. To enable this mode,
+use --per-die in addition to -a. (system-wide). The output includes the
+die number and the number of online processors on that die. This is
+useful to gauge the amount of aggregation.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements. This
is a useful mode to detect imbalance between physical cores. To enable this mode,
@@ -239,6 +246,9 @@ Input file name.
--per-socket::
Aggregate counts per processor socket for system-wide mode measurements.
+--per-die::
+Aggregate counts per processor die for system-wide mode measurements.
+
--per-core::
Aggregate counts per physical processor for system-wide mode measurements.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 44d89fb9c788..cfea87c6f38e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -262,6 +262,11 @@ Default is to monitor all CPUS.
The number of threads to run when synthesizing events for existing processes.
By default, the number of threads equals to the number of online CPUs.
+--namespaces::
+ Record events of type PERF_RECORD_NAMESPACES and display it with the
+ 'cgroup_id' sort key.
+
+
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 6967e9b02be5..5f54feb19977 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -151,25 +151,45 @@ struct {
HEADER_CPU_TOPOLOGY = 13,
-String lists defining the core and CPU threads topology.
-The string lists are followed by a variable length array
-which contains core_id and socket_id of each cpu.
-The number of entries can be determined by the size of the
-section minus the sizes of both string lists.
-
struct {
+ /*
+ * First revision of HEADER_CPU_TOPOLOGY
+ *
+ * See 'struct perf_header_string_list' definition earlier
+ * in this file.
+ */
+
struct perf_header_string_list cores; /* Variable length */
struct perf_header_string_list threads; /* Variable length */
+
+ /*
+ * Second revision of HEADER_CPU_TOPOLOGY, older tools
+ * will not consider what comes next
+ */
+
struct {
uint32_t core_id;
uint32_t socket_id;
} cpus[nr]; /* Variable length records */
+ /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */
+
+ /*
+ * Third revision of HEADER_CPU_TOPOLOGY, older tools
+ * will not consider what comes next
+ */
+
+ struct perf_header_string_list dies; /* Variable length */
+ uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */
};
Example:
- sibling cores : 0-3
+ sibling sockets : 0-8
+ sibling dies : 0-3
+ sibling dies : 4-7
sibling threads : 0-1
sibling threads : 2-3
+ sibling threads : 4-5
+ sibling threads : 6-7
HEADER_NUMA_TOPOLOGY = 14,
@@ -272,6 +292,69 @@ struct {
Two uint64_t for the time of first sample and the time of last sample.
+ HEADER_SAMPLE_TOPOLOGY = 22,
+
+Physical memory map and its node assignments.
+
+The format of data in MEM_TOPOLOGY is as follows:
+
+ 0 - version | for future changes
+ 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ 16 - count | number of nodes
+
+For each node we store map of physical indexes:
+
+ 32 - node id | node index
+ 40 - size | size of bitmap
+ 48 - bitmap | bitmap of memory indexes that belongs to node
+ | /sys/devices/system/node/node<NODE>/memory<INDEX>
+
+The MEM_TOPOLOGY can be displayed with following command:
+
+$ perf report --header-only -I
+...
+# memory nodes (nr 1, block size 0x8000000):
+# 0 [7G]: 0-23,32-69
+
+ HEADER_CLOCKID = 23,
+
+One uint64_t for the clockid frequency, specified, for instance, via 'perf
+record -k' (see clock_gettime()), to enable timestamps derived metrics
+conversion into wall clock time on the reporting stage.
+
+ HEADER_DIR_FORMAT = 24,
+
+The data files layout is described by HEADER_DIR_FORMAT feature. Currently it
+holds only version number (1):
+
+ uint64_t version;
+
+The current version holds only version value (1) means that data files:
+
+- Follow the 'data.*' name format.
+
+- Contain raw events data in standard perf format as read from kernel (and need
+ to be sorted)
+
+Future versions are expected to describe different data files layout according
+to special needs.
+
+ HEADER_BPF_PROG_INFO = 25,
+
+struct bpf_prog_info_linear, which contains detailed information about
+a BPF program, including type, id, tag, jited/xlated instructions, etc.
+
+ HEADER_BPF_BTF = 26,
+
+Contains BPF Type Format (BTF). For more information about BTF, please
+refer to Documentation/bpf/btf.rst.
+
+struct {
+ u32 id;
+ u32 data_size;
+ char data[];
+};
+
HEADER_COMPRESSED = 27,
struct {
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 85fbcd265351..89ac5a1f1550 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -332,6 +332,10 @@ ifeq ($(feature-get_current_dir_name), 1)
CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
endif
+ifeq ($(feature-gettid), 1)
+ CFLAGS += -DHAVE_GETTID
+endif
+
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
@@ -413,6 +417,9 @@ ifdef CORESIGHT
$(call feature_check,libopencsd)
ifeq ($(feature-libopencsd), 1)
CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
+ ifeq ($(feature-reallocarray), 0)
+ CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+ endif
LDFLAGS += $(LIBOPENCSD_LDFLAGS)
EXTLIBS += $(OPENCSDLIBS)
$(call detected,CONFIG_LIBOPENCSD)
@@ -637,11 +644,15 @@ endif
ifndef NO_SLANG
ifneq ($(feature-libslang), 1)
- msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
- NO_SLANG := 1
- else
+ ifneq ($(feature-libslang-include-subdir), 1)
+ msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
+ NO_SLANG := 1
+ else
+ CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
+ endif
+ endif
+ ifndef NO_SLANG
# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
- CFLAGS += -I/usr/include/slang
CFLAGS += -DHAVE_SLANG_SUPPORT
EXTLIBS += -lslang
$(call detected,CONFIG_SLANG)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4d46ca6d7e20..0fffd2bb6cd9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -420,6 +420,24 @@ fadvise_advice_tbl := $(srctree)/tools/perf/trace/beauty/fadvise.sh
$(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl)
$(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@
+fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c
+fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh
+
+$(fsmount_arrays): $(linux_uapi_dir)/fs.h $(fsmount_tbls)
+ $(Q)$(SHELL) '$(fsmount_tbls)' $(linux_uapi_dir) > $@
+
+fspick_arrays := $(beauty_outdir)/fspick_arrays.c
+fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh
+
+$(fspick_arrays): $(linux_uapi_dir)/fs.h $(fspick_tbls)
+ $(Q)$(SHELL) '$(fspick_tbls)' $(linux_uapi_dir) > $@
+
+fsconfig_arrays := $(beauty_outdir)/fsconfig_arrays.c
+fsconfig_tbls := $(srctree)/tools/perf/trace/beauty/fsconfig.sh
+
+$(fsconfig_arrays): $(linux_uapi_dir)/fs.h $(fsconfig_tbls)
+ $(Q)$(SHELL) '$(fsconfig_tbls)' $(linux_uapi_dir) > $@
+
pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -494,6 +512,12 @@ mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
$(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+move_mount_flags_array := $(beauty_outdir)/move_mount_flags_array.c
+move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh
+
+$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl)
+ $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@
+
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -526,6 +550,12 @@ arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
$(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
+sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
+
+$(sync_file_range_arrays): $(linux_uapi_dir)/fs.h $(sync_file_range_tbls)
+ $(Q)$(SHELL) '$(sync_file_range_tbls)' $(linux_uapi_dir) > $@
+
all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
# Create python binding output directory if not already present
@@ -629,6 +659,9 @@ build-dir = $(if $(__build-dir),$(__build-dir),.)
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
$(fadvise_advice_array) \
+ $(fsconfig_arrays) \
+ $(fsmount_arrays) \
+ $(fspick_arrays) \
$(pkey_alloc_access_rights_array) \
$(sndrv_pcm_ioctl_array) \
$(sndrv_ctl_ioctl_array) \
@@ -639,12 +672,14 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(madvise_behavior_array) \
$(mmap_flags_array) \
$(mount_flags_array) \
+ $(move_mount_flags_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
$(usbdevfs_ioctl_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
- $(arch_errno_name_array)
+ $(arch_errno_name_array) \
+ $(sync_file_range_arrays)
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -923,9 +958,13 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
$(OUTPUT)$(fadvise_advice_array) \
+ $(OUTPUT)$(fsconfig_arrays) \
+ $(OUTPUT)$(fsmount_arrays) \
+ $(OUTPUT)$(fspick_arrays) \
$(OUTPUT)$(madvise_behavior_array) \
$(OUTPUT)$(mmap_flags_array) \
$(OUTPUT)$(mount_flags_array) \
+ $(OUTPUT)$(move_mount_flags_array) \
$(OUTPUT)$(drm_ioctl_array) \
$(OUTPUT)$(pkey_alloc_access_rights_array) \
$(OUTPUT)$(sndrv_ctl_ioctl_array) \
@@ -939,7 +978,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(usbdevfs_ioctl_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
- $(OUTPUT)$(arch_errno_name_array)
+ $(OUTPUT)$(arch_errno_name_array) \
+ $(OUTPUT)$(sync_file_range_arrays)
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
#
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 911426721170..c6f1ab5499b5 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -31,12 +31,158 @@ struct cs_etm_recording {
struct auxtrace_record itr;
struct perf_pmu *cs_etm_pmu;
struct perf_evlist *evlist;
+ int wrapped_cnt;
+ bool *wrapped;
bool snapshot_mode;
size_t snapshot_size;
};
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+ [CS_ETM_ETMCCER] = "mgmt/etmccer",
+ [CS_ETM_ETMIDR] = "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+ [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
+ [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
+ [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
+ [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
+ [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
+};
+
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+static int cs_etm_set_context_id(struct auxtrace_record *itr,
+ struct perf_evsel *evsel, int cpu)
+{
+ struct cs_etm_recording *ptr;
+ struct perf_pmu *cs_etm_pmu;
+ char path[PATH_MAX];
+ int err = -EINVAL;
+ u32 val;
+
+ ptr = container_of(itr, struct cs_etm_recording, itr);
+ cs_etm_pmu = ptr->cs_etm_pmu;
+
+ if (!cs_etm_is_etmv4(itr, cpu))
+ goto out;
+
+ /* Get a handle on TRCIRD2 */
+ snprintf(path, PATH_MAX, "cpu%d/%s",
+ cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+ err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+ /* There was a problem reading the file, bailing out */
+ if (err != 1) {
+ pr_err("%s: can't read file %s\n",
+ CORESIGHT_ETM_PMU_NAME, path);
+ goto out;
+ }
+
+ /*
+ * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
+ * is supported:
+ * 0b00000 Context ID tracing is not supported.
+ * 0b00100 Maximum of 32-bit Context ID size.
+ * All other values are reserved.
+ */
+ val = BMVAL(val, 5, 9);
+ if (!val || val != 0x4) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* All good, let the kernel know */
+ evsel->attr.config |= (1 << ETM_OPT_CTXTID);
+ err = 0;
+
+out:
+
+ return err;
+}
+
+static int cs_etm_set_timestamp(struct auxtrace_record *itr,
+ struct perf_evsel *evsel, int cpu)
+{
+ struct cs_etm_recording *ptr;
+ struct perf_pmu *cs_etm_pmu;
+ char path[PATH_MAX];
+ int err = -EINVAL;
+ u32 val;
+
+ ptr = container_of(itr, struct cs_etm_recording, itr);
+ cs_etm_pmu = ptr->cs_etm_pmu;
+
+ if (!cs_etm_is_etmv4(itr, cpu))
+ goto out;
+
+ /* Get a handle on TRCIRD0 */
+ snprintf(path, PATH_MAX, "cpu%d/%s",
+ cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+ err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+ /* There was a problem reading the file, bailing out */
+ if (err != 1) {
+ pr_err("%s: can't read file %s\n",
+ CORESIGHT_ETM_PMU_NAME, path);
+ goto out;
+ }
+
+ /*
+ * TRCIDR0.TSSIZE, bit [28-24], indicates whether global timestamping
+ * is supported:
+ * 0b00000 Global timestamping is not implemented
+ * 0b00110 Implementation supports a maximum timestamp of 48bits.
+ * 0b01000 Implementation supports a maximum timestamp of 64bits.
+ */
+ val &= GENMASK(28, 24);
+ if (!val) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* All good, let the kernel know */
+ evsel->attr.config |= (1 << ETM_OPT_TS);
+ err = 0;
+
+out:
+ return err;
+}
+
+static int cs_etm_set_option(struct auxtrace_record *itr,
+ struct perf_evsel *evsel, u32 option)
+{
+ int i, err = -EINVAL;
+ struct cpu_map *event_cpus = evsel->evlist->cpus;
+ struct cpu_map *online_cpus = cpu_map__new(NULL);
+
+ /* Set option of each CPU we have */
+ for (i = 0; i < cpu__max_cpu(); i++) {
+ if (!cpu_map__has(event_cpus, i) ||
+ !cpu_map__has(online_cpus, i))
+ continue;
+
+ if (option & ETM_OPT_CTXTID) {
+ err = cs_etm_set_context_id(itr, evsel, i);
+ if (err)
+ goto out;
+ }
+ if (option & ETM_OPT_TS) {
+ err = cs_etm_set_timestamp(itr, evsel, i);
+ if (err)
+ goto out;
+ }
+ if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
+ /* Nothing else is currently supported */
+ goto out;
+ }
+
+ err = 0;
+out:
+ cpu_map__put(online_cpus);
+ return err;
+}
+
static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
struct record_opts *opts,
const char *str)
@@ -105,12 +251,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
struct perf_evsel *evsel, *cs_etm_evsel = NULL;
- const struct cpu_map *cpus = evlist->cpus;
+ struct cpu_map *cpus = evlist->cpus;
bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+ int err = 0;
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+ if (perf_can_record_switch_events())
+ opts->record_switch_events = true;
+
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == cs_etm_pmu->type) {
if (cs_etm_evsel) {
@@ -241,19 +391,25 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
/*
* In the case of per-cpu mmaps, we need the CPU on the
- * AUX event.
+ * AUX event. We also need the contextID in order to be notified
+ * when a context switch happened.
*/
- if (!cpu_map__empty(cpus))
+ if (!cpu_map__empty(cpus)) {
perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+ err = cs_etm_set_option(itr, cs_etm_evsel,
+ ETM_OPT_CTXTID | ETM_OPT_TS);
+ if (err)
+ goto out;
+ }
+
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
struct perf_evsel *tracking_evsel;
- int err;
err = parse_events(evlist, "dummy:u", NULL);
if (err)
- return err;
+ goto out;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
@@ -266,7 +422,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
perf_evsel__set_sample_bit(tracking_evsel, TIME);
}
- return 0;
+out:
+ return err;
}
static u64 cs_etm_get_config(struct auxtrace_record *itr)
@@ -314,6 +471,8 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
config_opts = cs_etm_get_config(itr);
if (config_opts & BIT(ETM_OPT_CYCACC))
config |= BIT(ETM4_CFG_BIT_CYCACC);
+ if (config_opts & BIT(ETM_OPT_CTXTID))
+ config |= BIT(ETM4_CFG_BIT_CTXTID);
if (config_opts & BIT(ETM_OPT_TS))
config |= BIT(ETM4_CFG_BIT_TS);
if (config_opts & BIT(ETM_OPT_RETSTK))
@@ -363,19 +522,6 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
(etmv3 * CS_ETMV3_PRIV_SIZE));
}
-static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
- [CS_ETM_ETMCCER] = "mgmt/etmccer",
- [CS_ETM_ETMIDR] = "mgmt/etmidr",
-};
-
-static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
- [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
- [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
- [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
- [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
- [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
-};
-
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
{
bool ret = false;
@@ -536,16 +682,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
return 0;
}
-static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
+static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
+{
+ bool *wrapped;
+ int cnt = ptr->wrapped_cnt;
+
+ /* Make @ptr->wrapped as big as @idx */
+ while (cnt <= idx)
+ cnt++;
+
+ /*
+ * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
+ * cross compilation problems where the host's system supports
+ * reallocarray() but not the target.
+ */
+ wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
+ if (!wrapped)
+ return -ENOMEM;
+
+ wrapped[cnt - 1] = false;
+ ptr->wrapped_cnt = cnt;
+ ptr->wrapped = wrapped;
+
+ return 0;
+}
+
+static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
+ size_t buffer_size, u64 head)
+{
+ u64 i, watermark;
+ u64 *buf = (u64 *)buffer;
+ size_t buf_size = buffer_size;
+
+ /*
+ * We want to look the very last 512 byte (chosen arbitrarily) in
+ * the ring buffer.
+ */
+ watermark = buf_size - 512;
+
+ /*
+ * @head is continuously increasing - if its value is equal or greater
+ * than the size of the ring buffer, it has wrapped around.
+ */
+ if (head >= buffer_size)
+ return true;
+
+ /*
+ * The value of @head is somewhere within the size of the ring buffer.
+ * This can be that there hasn't been enough data to fill the ring
+ * buffer yet or the trace time was so long that @head has numerically
+ * wrapped around. To find we need to check if we have data at the very
+ * end of the ring buffer. We can reliably do this because mmap'ed
+ * pages are zeroed out and there is a fresh mapping with every new
+ * session.
+ */
+
+ /* @head is less than 512 byte from the end of the ring buffer */
+ if (head > watermark)
+ watermark = head;
+
+ /*
+ * Speed things up by using 64 bit transactions (see "u64 *buf" above)
+ */
+ watermark >>= 3;
+ buf_size >>= 3;
+
+ /*
+ * If we find trace data at the end of the ring buffer, @head has
+ * been there and has numerically wrapped around at least once.
+ */
+ for (i = watermark; i < buf_size; i++)
+ if (buf[i])
+ return true;
+
+ return false;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr,
int idx, struct auxtrace_mmap *mm,
- unsigned char *data __maybe_unused,
+ unsigned char *data,
u64 *head, u64 *old)
{
+ int err;
+ bool wrapped;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+
+ /*
+ * Allocate memory to keep track of wrapping if this is the first
+ * time we deal with this *mm.
+ */
+ if (idx >= ptr->wrapped_cnt) {
+ err = cs_etm_alloc_wrapped_array(ptr, idx);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Check to see if *head has wrapped around. If it hasn't only the
+ * amount of data between *head and *old is snapshot'ed to avoid
+ * bloating the perf.data file with zeros. But as soon as *head has
+ * wrapped around the entire size of the AUX ring buffer it taken.
+ */
+ wrapped = ptr->wrapped[idx];
+ if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
+ wrapped = true;
+ ptr->wrapped[idx] = true;
+ }
+
pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
__func__, idx, (size_t)*old, (size_t)*head, mm->len);
- *old = *head;
- *head += mm->len;
+ /* No wrap has occurred, we can just use *head and *old. */
+ if (!wrapped)
+ return 0;
+
+ /*
+ * *head has wrapped around - adjust *head and *old to pickup the
+ * entire content of the AUX buffer.
+ */
+ if (*head >= mm->len) {
+ *old = *head - mm->len;
+ } else {
+ *head += mm->len;
+ *old = *head - mm->len;
+ }
return 0;
}
@@ -586,6 +847,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
{
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
+
+ zfree(&ptr->wrapped);
free(ptr);
}
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 36222e64bbf7..a7dd46a5b678 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1,2 +1,2 @@
perf-y += util/
-perf-$(CONFIG_DWARF_UNWIND) += tests/
+perf-y += tests/
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
index 41707fea74b3..a61c06bdb757 100644
--- a/tools/perf/arch/arm64/tests/Build
+++ b/tools/perf/arch/arm64/tests/Build
@@ -1,4 +1,4 @@
perf-y += regs_load.o
-perf-y += dwarf-unwind.o
+perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 613709cfbbd0..c41c5affe4be 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,6 +9,7 @@ struct test;
int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 3d83d0c6982d..2997c506550c 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -4,5 +4,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += rdpmc.o
perf-y += perf-time-to-tsc.o
-perf-$(CONFIG_AUXTRACE) += insn-x86.o
+perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index d47d3f8e3c8e..6763135aec17 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -23,6 +23,10 @@ struct test arch_tests[] = {
.desc = "x86 instruction decoder - new instructions",
.func = test__insn_x86,
},
+ {
+ .desc = "Intel PT packet decoder",
+ .func = test__intel_pt_pkt_decoder,
+ },
#endif
#if defined(__x86_64__)
{
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
new file mode 100644
index 000000000000..901bf1f449c4
--- /dev/null
+++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+/**
+ * struct test_data - Test data.
+ * @len: number of bytes to decode
+ * @bytes: bytes to decode
+ * @ctx: packet context to decode
+ * @packet: expected packet
+ * @new_ctx: expected new packet context
+ * @ctx_unchanged: the packet context must not change
+ */
+struct test_data {
+ int len;
+ u8 bytes[INTEL_PT_PKT_MAX_SZ];
+ enum intel_pt_pkt_ctx ctx;
+ struct intel_pt_pkt packet;
+ enum intel_pt_pkt_ctx new_ctx;
+ int ctx_unchanged;
+} data[] = {
+ /* Padding Packet */
+ {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 },
+ /* Short Taken/Not Taken Packet */
+ {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 },
+ {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 },
+ {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 },
+ {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 },
+ /* Long Taken/Not Taken Packet */
+ {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 },
+ {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 },
+ {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 },
+ {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 },
+ /* Target IP Packet */
+ {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 },
+ {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 },
+ {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 },
+ {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 },
+ {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 },
+ {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 },
+ /* Packet Generation Enable */
+ {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 },
+ {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 },
+ {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 },
+ {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 },
+ {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 },
+ {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 },
+ /* Packet Generation Disable */
+ {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 },
+ {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 },
+ {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 },
+ {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 },
+ {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 },
+ {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 },
+ /* Flow Update Packet */
+ {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 },
+ {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 },
+ {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 },
+ {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 },
+ {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 },
+ {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 },
+ /* Paging Information Packet */
+ {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 },
+ {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 },
+ /* Mode Exec Packet */
+ {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 },
+ {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 },
+ {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 },
+ /* Mode TSX Packet */
+ {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 },
+ {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 },
+ {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 },
+ /* Trace Stop Packet */
+ {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 },
+ /* Core:Bus Ratio Packet */
+ {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 },
+ /* Timestamp Counter Packet */
+ {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 },
+ /* Mini Time Counter Packet */
+ {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 },
+ /* TSC / MTC Alignment Packet */
+ {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 },
+ {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 },
+ {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 },
+ {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 },
+ /* Cycle Count Packet */
+ {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 },
+ {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 },
+ {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 },
+ {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 },
+ {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 },
+ {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 },
+ {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 },
+ {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 },
+ {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 },
+ {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 },
+ {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 },
+ {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 },
+ {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 },
+ {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 },
+ {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 },
+ {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 },
+ {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 },
+ {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 },
+ {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 },
+ {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 },
+ {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 },
+ /* Virtual-Machine Control Structure Packet */
+ {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 },
+ /* Overflow Packet */
+ {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 },
+ {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 },
+ {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 },
+ /* Packet Stream Boundary*/
+ {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 },
+ {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 },
+ {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 },
+ /* PSB End Packet */
+ {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 },
+ /* Maintenance Packet */
+ {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 },
+ /* Write Data to PT Packet */
+ {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 },
+ {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 },
+ {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 },
+ {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 },
+ /* Execution Stop Packet */
+ {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 },
+ {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 },
+ /* Monitor Wait Packet */
+ {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 },
+ {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 },
+ {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 },
+ /* Power Entry Packet */
+ {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 },
+ {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 },
+ {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 },
+ {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 },
+ /* Power Exit Packet */
+ {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 },
+ {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 },
+ {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 },
+ /* Block Begin Packet */
+ {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 },
+ {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 },
+ {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 },
+ {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 },
+ /* 4-byte Block Item Packet */
+ {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 },
+ {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 },
+ {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 },
+ {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 },
+ /* 8-byte Block Item Packet */
+ {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 },
+ {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 },
+ {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 },
+ {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 },
+ /* Block End Packet */
+ {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
+ {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
+ {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
+ {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
+ /* Terminator */
+ {0, {0}, 0, {0, 0, 0}, 0, 0 },
+};
+
+static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
+{
+ char desc[INTEL_PT_PKT_DESC_MAX];
+ int ret, i;
+
+ for (i = 0; i < len; i++)
+ pr_debug(" %02x", bytes[i]);
+ for (; i < INTEL_PT_PKT_MAX_SZ; i++)
+ pr_debug(" ");
+ pr_debug(" ");
+ ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+ if (ret < 0) {
+ pr_debug("intel_pt_pkt_desc failed!\n");
+ return TEST_FAIL;
+ }
+ pr_debug("%s\n", desc);
+
+ return TEST_OK;
+}
+
+static void decoding_failed(struct test_data *d)
+{
+ pr_debug("Decoding failed!\n");
+ pr_debug("Decoding: ");
+ dump_packet(&d->packet, d->bytes, d->len);
+}
+
+static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
+ enum intel_pt_pkt_ctx new_ctx)
+{
+ decoding_failed(d);
+
+ if (len != d->len)
+ pr_debug("Expected length: %d Decoded length %d\n",
+ d->len, len);
+
+ if (packet->type != d->packet.type)
+ pr_debug("Expected type: %d Decoded type %d\n",
+ d->packet.type, packet->type);
+
+ if (packet->count != d->packet.count)
+ pr_debug("Expected count: %d Decoded count %d\n",
+ d->packet.count, packet->count);
+
+ if (packet->payload != d->packet.payload)
+ pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n",
+ (unsigned long long)d->packet.payload,
+ (unsigned long long)packet->payload);
+
+ if (new_ctx != d->new_ctx)
+ pr_debug("Expected packet context: %d Decoded packet context %d\n",
+ d->new_ctx, new_ctx);
+
+ return TEST_FAIL;
+}
+
+static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx ctx)
+{
+ enum intel_pt_pkt_ctx old_ctx = ctx;
+
+ intel_pt_upd_pkt_ctx(packet, &ctx);
+
+ if (ctx != old_ctx) {
+ decoding_failed(d);
+ pr_debug("Packet context changed!\n");
+ return TEST_FAIL;
+ }
+
+ return TEST_OK;
+}
+
+static int test_one(struct test_data *d)
+{
+ struct intel_pt_pkt packet;
+ enum intel_pt_pkt_ctx ctx = d->ctx;
+ int ret;
+
+ memset(&packet, 0xff, sizeof(packet));
+
+ /* Decode a packet */
+ ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx);
+ if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) {
+ decoding_failed(d);
+ pr_debug("intel_pt_get_packet returned %d\n", ret);
+ return TEST_FAIL;
+ }
+
+ /* Some packets must always leave the packet context unchanged */
+ if (d->ctx_unchanged) {
+ int err;
+
+ err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX);
+ if (err)
+ return err;
+ err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX);
+ if (err)
+ return err;
+ err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX);
+ if (err)
+ return err;
+ }
+
+ /* Compare to the expected values */
+ if (ret != d->len || packet.type != d->packet.type ||
+ packet.count != d->packet.count ||
+ packet.payload != d->packet.payload || ctx != d->new_ctx)
+ return fail(d, &packet, ret, ctx);
+
+ pr_debug("Decoded ok:");
+ ret = dump_packet(&d->packet, d->bytes, d->len);
+
+ return ret;
+}
+
+/*
+ * This test feeds byte sequences to the Intel PT packet decoder and checks the
+ * results. Changes to the packet context are also checked.
+ */
+int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct test_data *d = data;
+ int ret;
+
+ for (d = data; d->len; d++) {
+ ret = test_one(d);
+ if (ret)
+ return ret;
+ }
+
+ return TEST_OK;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e2c3a585a61e..dca55997934e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2191,6 +2191,10 @@ static struct option __record_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
+ "collect kernel callchains"),
+ OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
+ "collect user callchains"),
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1ca533f06a4c..91c40808380d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1428,6 +1428,10 @@ repeat:
&report.range_num);
if (ret < 0)
goto error;
+
+ itrace_synth_opts__set_time_range(&itrace_synth_opts,
+ report.ptime_range,
+ report.range_num);
}
if (session->tevent.pevent &&
@@ -1449,8 +1453,10 @@ repeat:
ret = 0;
error:
- if (report.ptime_range)
+ if (report.ptime_range) {
+ itrace_synth_opts__clear_time_range(&itrace_synth_opts);
zfree(&report.ptime_range);
+ }
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 61cfd8f70989..61f00055476a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -102,6 +102,7 @@ enum perf_output_field {
PERF_OUTPUT_METRIC = 1U << 28,
PERF_OUTPUT_MISC = 1U << 29,
PERF_OUTPUT_SRCCODE = 1U << 30,
+ PERF_OUTPUT_IPC = 1U << 31,
};
struct output_option {
@@ -139,6 +140,7 @@ struct output_option {
{.str = "metric", .field = PERF_OUTPUT_METRIC},
{.str = "misc", .field = PERF_OUTPUT_MISC},
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
+ {.str = "ipc", .field = PERF_OUTPUT_IPC},
};
enum {
@@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
return printed;
}
+static int perf_sample__fprintf_ipc(struct perf_sample *sample,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ unsigned int ipc;
+
+ if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt)
+ return 0;
+
+ ipc = (sample->insn_cnt * 100) / sample->cyc_cnt;
+
+ return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ",
+ ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt);
+}
+
static int perf_sample__fprintf_bts(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
@@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
}
+ printed += perf_sample__fprintf_ipc(sample, attr, fp);
+
if (print_srcline_last)
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
@@ -1606,6 +1624,7 @@ struct perf_script {
bool show_namespace_events;
bool show_lost_events;
bool show_round_events;
+ bool show_bpf_events;
bool allocated;
bool per_event_dump;
struct cpu_map *cpus;
@@ -1858,6 +1877,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
+
+ perf_sample__fprintf_ipc(sample, attr, fp);
+
fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
@@ -2318,6 +2340,41 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
return 0;
}
+static int
+process_bpf_events(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (machine__process_ksymbol(machine, event, sample) < 0)
+ return -1;
+
+ if (!evsel->attr.sample_id_all) {
+ perf_event__fprintf(event, stdout);
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL) {
+ pr_debug("problem processing MMAP event, skipping it.\n");
+ return -1;
+ }
+
+ if (!filter_cpu(sample)) {
+ perf_sample__fprintf_start(sample, thread, evsel,
+ event->header.type, stdout);
+ perf_event__fprintf(event, stdout);
+ }
+
+ thread__put(thread);
+ return 0;
+}
+
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -2420,6 +2477,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.ordered_events = false;
script->tool.finished_round = process_finished_round_event;
}
+ if (script->show_bpf_events) {
+ script->tool.ksymbol = process_bpf_events;
+ script->tool.bpf_event = process_bpf_events;
+ }
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
@@ -3297,6 +3358,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused,
parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
itrace_parse_synth_opts(opt, "cewp", 0);
symbol_conf.nanosecs = true;
+ symbol_conf.pad_output_len_dso = 50;
return 0;
}
@@ -3392,7 +3454,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
- "callindent,insn,insnlen,synth,phys_addr,metric,misc",
+ "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -3438,6 +3500,8 @@ int cmd_script(int argc, const char **argv)
"Show lost events (if recorded)"),
OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
"Show round events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
+ "Show bpf related events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
@@ -3765,6 +3829,10 @@ int cmd_script(int argc, const char **argv)
&script.range_num);
if (err < 0)
goto out_delete;
+
+ itrace_synth_opts__set_time_range(&itrace_synth_opts,
+ script.ptime_range,
+ script.range_num);
}
err = __cmd_script(&script);
@@ -3772,8 +3840,10 @@ int cmd_script(int argc, const char **argv)
flush_scripting();
out_delete:
- if (script.ptime_range)
+ if (script.ptime_range) {
+ itrace_synth_opts__clear_time_range(&itrace_synth_opts);
zfree(&script.ptime_range);
+ }
perf_evlist__free_stats(session->evlist);
perf_session__delete(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1ae66f09dc7d..8a35fc5a7281 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -776,6 +776,8 @@ static struct option stat_options[] = {
"stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
+ "aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -800,6 +802,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
return cpu_map__get_socket(map, cpu, NULL);
}
+static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
+ struct cpu_map *map, int cpu)
+{
+ return cpu_map__get_die(map, cpu, NULL);
+}
+
static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int cpu)
{
@@ -840,6 +848,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config,
return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
}
+static int perf_stat__get_die_cached(struct perf_stat_config *config,
+ struct cpu_map *map, int idx)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
+}
+
static int perf_stat__get_core_cached(struct perf_stat_config *config,
struct cpu_map *map, int idx)
{
@@ -870,6 +884,13 @@ static int perf_stat_init_aggr_mode(void)
}
stat_config.aggr_get_id = perf_stat__get_socket_cached;
break;
+ case AGGR_DIE:
+ if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
+ perror("cannot build die map");
+ return -1;
+ }
+ stat_config.aggr_get_id = perf_stat__get_die_cached;
+ break;
case AGGR_CORE:
if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
@@ -935,21 +956,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
}
+static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+ if (cpu != -1) {
+ /*
+ * Encode socket in bit range 15:8
+ * die_id is relative to socket,
+ * we need a global id. So we combine
+ * socket + die id
+ */
+ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
+ return -1;
+
+ if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
+ return -1;
+
+ die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
+ }
+
+ return die_id;
+}
+
static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
{
struct perf_env *env = data;
int core = -1, cpu = perf_env__get_cpu(env, map, idx);
if (cpu != -1) {
- int socket_id = env->cpu[cpu].socket_id;
-
/*
- * Encode socket in upper 16 bits
- * core_id is relative to socket, and
+ * Encode socket in bit range 31:24
+ * encode die id in bit range 23:16
+ * core_id is relative to socket and die,
* we need a global id. So we combine
- * socket + core id.
+ * socket + die id + core id
*/
- core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+ if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
+ return -1;
+
+ if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
+ return -1;
+
+ if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
+ return -1;
+
+ core = (env->cpu[cpu].socket_id << 24) |
+ (env->cpu[cpu].die_id << 16) |
+ (env->cpu[cpu].core_id & 0xffff);
}
return core;
@@ -961,6 +1016,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus
return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
}
+static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **diep)
+{
+ return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
+}
+
static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
struct cpu_map **corep)
{
@@ -972,6 +1033,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un
{
return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
}
+static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
+ struct cpu_map *map, int idx)
+{
+ return perf_env__get_die(map, idx, &perf_stat.session->header.env);
+}
static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct cpu_map *map, int idx)
@@ -991,6 +1057,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
}
stat_config.aggr_get_id = perf_stat__get_socket_file;
break;
+ case AGGR_DIE:
+ if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
+ perror("cannot build die map");
+ return -1;
+ }
+ stat_config.aggr_get_id = perf_stat__get_die_file;
+ break;
case AGGR_CORE:
if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
@@ -1541,6 +1614,8 @@ static int __cmd_report(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
+ "aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 466621cd1017..12b6b15a9675 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1207,11 +1207,14 @@ static int __cmd_top(struct perf_top *top)
init_process_thread(top);
+ if (opts->record_namespaces)
+ top->tool.namespace_events = true;
+
ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
&top->session->machines.host,
&top->record_opts);
if (ret < 0)
- pr_warning("Couldn't synthesize bpf events.\n");
+ pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false,
@@ -1499,6 +1502,8 @@ int cmd_top(int argc, const char **argv)
OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
"number of thread to run event synthesize"),
+ OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
+ "Record namespaces events"),
OPT_END()
};
struct perf_evlist *sb_evlist = NULL;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 52fadc858ef0..f3532b081b31 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -402,6 +402,11 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
+}
+
size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
{
size_t printed;
@@ -481,6 +486,15 @@ static const char *bpf_cmd[] = {
};
static DEFINE_STRARRAY(bpf_cmd, "BPF_");
+static const char *fsmount_flags[] = {
+ [1] = "CLOEXEC",
+};
+static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
+
+#include "trace/beauty/generated/fsconfig_arrays.c"
+
+static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
+
static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
@@ -641,6 +655,10 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
{ .scnprintf = SCA_STRARRAY, \
.parm = &strarray__##array, }
+#define STRARRAY_FLAGS(name, array) \
+ { .scnprintf = SCA_STRARRAY_FLAGS, \
+ .parm = &strarray__##array, }
+
#include "trace/beauty/arch_errno_names.c"
#include "trace/beauty/eventfd.c"
#include "trace/beauty/futex_op.c"
@@ -712,6 +730,15 @@ static struct syscall_fmt {
[2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
{ .name = "flock",
.arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
+ { .name = "fsconfig",
+ .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
+ { .name = "fsmount",
+ .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
+ [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
+ { .name = "fspick",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* path */ },
+ [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
{ .name = "fstat", .alias = "newfstat", },
{ .name = "fstatat", .alias = "newfstatat", },
{ .name = "futex",
@@ -774,6 +801,12 @@ static struct syscall_fmt {
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
[3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
.mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
+ { .name = "move_mount",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
+ [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
+ [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
+ [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
@@ -878,6 +911,8 @@ static struct syscall_fmt {
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
{ .name = "symlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "sync_file_range",
+ .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
{ .name = "tgkill",
.arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "tkill",
@@ -936,8 +971,14 @@ struct syscall {
struct syscall_arg_fmt *arg_fmt;
};
+/*
+ * Must match what is in the BPF program:
+ *
+ * tools/perf/examples/bpf/augmented_raw_syscalls.c
+ */
struct bpf_map_syscall_entry {
bool enabled;
+ u16 string_args_len[6];
};
/*
@@ -1191,8 +1232,17 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf,
static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
{
struct augmented_arg *augmented_arg = arg->augmented.args;
+ size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
+ /*
+ * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
+ * we would have two strings, each prefixed by its size.
+ */
+ int consumed = sizeof(*augmented_arg) + augmented_arg->size;
+
+ arg->augmented.args = ((void *)arg->augmented.args) + consumed;
+ arg->augmented.size -= consumed;
- return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
+ return printed;
}
static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
@@ -1380,10 +1430,11 @@ static int syscall__set_arg_fmts(struct syscall *sc)
if (sc->fmt && sc->fmt->arg[idx].scnprintf)
continue;
+ len = strlen(field->name);
+
if (strcmp(field->type, "const char *") == 0 &&
- (strcmp(field->name, "filename") == 0 ||
- strcmp(field->name, "path") == 0 ||
- strcmp(field->name, "pathname") == 0))
+ ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
+ strstr(field->name, "path") != NULL))
sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
sc->arg_fmt[idx].scnprintf = SCA_PTR;
@@ -1394,8 +1445,7 @@ static int syscall__set_arg_fmts(struct syscall *sc)
else if ((strcmp(field->type, "int") == 0 ||
strcmp(field->type, "unsigned int") == 0 ||
strcmp(field->type, "long") == 0) &&
- (len = strlen(field->name)) >= 2 &&
- strcmp(field->name + len - 2, "fd") == 0) {
+ len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
/*
* /sys/kernel/tracing/events/syscalls/sys_enter*
* egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
@@ -1477,12 +1527,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
static int trace__validate_ev_qualifier(struct trace *trace)
{
- int err = 0, i;
- size_t nr_allocated;
+ int err = 0;
+ bool printed_invalid_prefix = false;
struct str_node *pos;
+ size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
- trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
- trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
+ trace->ev_qualifier_ids.entries = malloc(nr_allocated *
sizeof(trace->ev_qualifier_ids.entries[0]));
if (trace->ev_qualifier_ids.entries == NULL) {
@@ -1492,9 +1542,6 @@ static int trace__validate_ev_qualifier(struct trace *trace)
goto out;
}
- nr_allocated = trace->ev_qualifier_ids.nr;
- i = 0;
-
strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s;
int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
@@ -1504,17 +1551,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
if (id >= 0)
goto matches;
- if (err == 0) {
- fputs("Error:\tInvalid syscall ", trace->output);
- err = -EINVAL;
+ if (!printed_invalid_prefix) {
+ pr_debug("Skipping unknown syscalls: ");
+ printed_invalid_prefix = true;
} else {
- fputs(", ", trace->output);
+ pr_debug(", ");
}
- fputs(sc, trace->output);
+ pr_debug("%s", sc);
+ continue;
}
matches:
- trace->ev_qualifier_ids.entries[i++] = id;
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
if (match_next == -1)
continue;
@@ -1522,7 +1570,7 @@ matches:
id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
if (id < 0)
break;
- if (nr_allocated == trace->ev_qualifier_ids.nr) {
+ if (nr_allocated == nr_used) {
void *entries;
nr_allocated += 8;
@@ -1535,20 +1583,19 @@ matches:
}
trace->ev_qualifier_ids.entries = entries;
}
- trace->ev_qualifier_ids.nr++;
- trace->ev_qualifier_ids.entries[i++] = id;
+ trace->ev_qualifier_ids.entries[nr_used++] = id;
}
}
- if (err < 0) {
- fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
- "\nHint:\tand: 'man syscalls'\n", trace->output);
-out_free:
- zfree(&trace->ev_qualifier_ids.entries);
- trace->ev_qualifier_ids.nr = 0;
- }
+ trace->ev_qualifier_ids.nr = nr_used;
out:
+ if (printed_invalid_prefix)
+ pr_debug("\n");
return err;
+out_free:
+ zfree(&trace->ev_qualifier_ids.entries);
+ trace->ev_qualifier_ids.nr = 0;
+ goto out;
}
/*
@@ -2675,6 +2722,25 @@ out_enomem:
}
#ifdef HAVE_LIBBPF_SUPPORT
+static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
+{
+ struct syscall *sc = trace__syscall_info(trace, NULL, id);
+ int arg = 0;
+
+ if (sc == NULL)
+ goto out;
+
+ for (; arg < sc->nr_args; ++arg) {
+ entry->string_args_len[arg] = 0;
+ if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
+ /* Should be set like strace -s strsize */
+ entry->string_args_len[arg] = PATH_MAX;
+ }
+ }
+out:
+ for (; arg < 6; ++arg)
+ entry->string_args_len[arg] = 0;
+}
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
{
int fd = bpf_map__fd(trace->syscalls.map);
@@ -2687,6 +2753,9 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
int key = trace->ev_qualifier_ids.entries[i];
+ if (value.enabled)
+ trace__init_bpf_map_syscall_args(trace, key, &value);
+
err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
if (err)
break;
@@ -2704,6 +2773,9 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
int err = 0, key;
for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
+ if (enabled)
+ trace__init_bpf_map_syscall_args(trace, key, &value);
+
err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
if (err)
break;
@@ -3627,7 +3699,12 @@ static int trace__config(const char *var, const char *value, void *arg)
struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option);
- err = parse_events_option(&o, value, 0);
+ /*
+ * We can't propagate parse_event_option() return, as it is 1
+ * for failure while perf_config() expects -1.
+ */
+ if (parse_events_option(&o, value, 0))
+ err = -1;
} else if (!strcmp(var, "trace.show_timestamp")) {
trace->show_tstamp = perf_config_bool(var, value);
} else if (!strcmp(var, "trace.show_duration")) {
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 2422894a8194..2f822bb51717 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -21,8 +21,14 @@
/* bpf-output associated map */
bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
+/*
+ * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
+ * PATH_MAX for copying everything, any other value to limit
+ * it a la 'strace -s strsize'.
+ */
struct syscall {
bool enabled;
+ u16 string_args_len[6];
};
bpf_map(syscalls, ARRAY, int, struct syscall, 512);
@@ -41,83 +47,10 @@ struct syscall_exit_args {
struct augmented_filename {
unsigned int size;
- int reserved;
+ int err;
char value[PATH_MAX];
};
-/* syscalls where the first arg is a string */
-#define SYS_OPEN 2
-#define SYS_STAT 4
-#define SYS_LSTAT 6
-#define SYS_ACCESS 21
-#define SYS_EXECVE 59
-#define SYS_TRUNCATE 76
-#define SYS_CHDIR 80
-#define SYS_RENAME 82
-#define SYS_MKDIR 83
-#define SYS_RMDIR 84
-#define SYS_CREAT 85
-#define SYS_LINK 86
-#define SYS_UNLINK 87
-#define SYS_SYMLINK 88
-#define SYS_READLINK 89
-#define SYS_CHMOD 90
-#define SYS_CHOWN 92
-#define SYS_LCHOWN 94
-#define SYS_MKNOD 133
-#define SYS_STATFS 137
-#define SYS_PIVOT_ROOT 155
-#define SYS_CHROOT 161
-#define SYS_ACCT 163
-#define SYS_SWAPON 167
-#define SYS_SWAPOFF 168
-#define SYS_DELETE_MODULE 176
-#define SYS_SETXATTR 188
-#define SYS_LSETXATTR 189
-#define SYS_GETXATTR 191
-#define SYS_LGETXATTR 192
-#define SYS_LISTXATTR 194
-#define SYS_LLISTXATTR 195
-#define SYS_REMOVEXATTR 197
-#define SYS_LREMOVEXATTR 198
-#define SYS_MQ_OPEN 240
-#define SYS_MQ_UNLINK 241
-#define SYS_ADD_KEY 248
-#define SYS_REQUEST_KEY 249
-#define SYS_SYMLINKAT 266
-#define SYS_MEMFD_CREATE 319
-
-/* syscalls where the first arg is a string */
-
-#define SYS_PWRITE64 18
-#define SYS_EXECVE 59
-#define SYS_RENAME 82
-#define SYS_QUOTACTL 179
-#define SYS_FSETXATTR 190
-#define SYS_FGETXATTR 193
-#define SYS_FREMOVEXATTR 199
-#define SYS_MQ_TIMEDSEND 242
-#define SYS_REQUEST_KEY 249
-#define SYS_INOTIFY_ADD_WATCH 254
-#define SYS_OPENAT 257
-#define SYS_MKDIRAT 258
-#define SYS_MKNODAT 259
-#define SYS_FCHOWNAT 260
-#define SYS_FUTIMESAT 261
-#define SYS_NEWFSTATAT 262
-#define SYS_UNLINKAT 263
-#define SYS_RENAMEAT 264
-#define SYS_LINKAT 265
-#define SYS_READLINKAT 267
-#define SYS_FCHMODAT 268
-#define SYS_FACCESSAT 269
-#define SYS_UTIMENSAT 280
-#define SYS_NAME_TO_HANDLE_AT 303
-#define SYS_FINIT_MODULE 313
-#define SYS_RENAMEAT2 316
-#define SYS_EXECVEAT 322
-#define SYS_STATX 332
-
pid_filter(pids_filtered);
struct augmented_args_filename {
@@ -127,12 +60,48 @@ struct augmented_args_filename {
bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1);
+static inline
+unsigned int augmented_filename__read(struct augmented_filename *augmented_filename,
+ const void *filename_arg, unsigned int filename_len)
+{
+ unsigned int len = sizeof(*augmented_filename);
+ int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg);
+
+ augmented_filename->size = augmented_filename->err = 0;
+ /*
+ * probe_read_str may return < 0, e.g. -EFAULT
+ * So we leave that in the augmented_filename->size that userspace will
+ */
+ if (size > 0) {
+ len -= sizeof(augmented_filename->value) - size;
+ len &= sizeof(augmented_filename->value) - 1;
+ augmented_filename->size = size;
+ } else {
+ /*
+ * So that username notice the error while still being able
+ * to skip this augmented arg record
+ */
+ augmented_filename->err = size;
+ len = offsetof(struct augmented_filename, value);
+ }
+
+ return len;
+}
+
SEC("raw_syscalls:sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
struct augmented_args_filename *augmented_args;
- unsigned int len = sizeof(*augmented_args);
- const void *filename_arg = NULL;
+ /*
+ * We start len, the amount of data that will be in the perf ring
+ * buffer, if this is not filtered out by one of pid_filter__has(),
+ * syscall->enabled, etc, with the non-augmented raw syscall payload,
+ * i.e. sizeof(augmented_args->args).
+ *
+ * We'll add to this as we add augmented syscalls right after that
+ * initial, non-augmented raw_syscalls:sys_enter payload.
+ */
+ unsigned int len = sizeof(augmented_args->args);
struct syscall *syscall;
int key = 0;
@@ -189,102 +158,67 @@ int sys_enter(struct syscall_enter_args *args)
* after the ctx memory access to prevent their down stream merging.
*/
/*
- * This table of what args are strings will be provided by userspace,
- * in the syscalls map, i.e. we will already have to do the lookup to
- * see if this specific syscall is filtered, so we can as well get more
- * info about what syscall args are strings or pointers, and how many
- * bytes to copy, per arg, etc.
+ * For now copy just the first string arg, we need to improve the protocol
+ * and have more than one.
*
- * For now hard code it, till we have all the basic mechanisms in place
- * to automate everything and make the kernel part be completely driven
- * by information obtained in userspace for each kernel version and
- * processor architecture, making the kernel part the same no matter what
- * kernel version or processor architecture it runs on.
- */
- switch (augmented_args->args.syscall_nr) {
- case SYS_ACCT:
- case SYS_ADD_KEY:
- case SYS_CHDIR:
- case SYS_CHMOD:
- case SYS_CHOWN:
- case SYS_CHROOT:
- case SYS_CREAT:
- case SYS_DELETE_MODULE:
- case SYS_EXECVE:
- case SYS_GETXATTR:
- case SYS_LCHOWN:
- case SYS_LGETXATTR:
- case SYS_LINK:
- case SYS_LISTXATTR:
- case SYS_LLISTXATTR:
- case SYS_LREMOVEXATTR:
- case SYS_LSETXATTR:
- case SYS_LSTAT:
- case SYS_MEMFD_CREATE:
- case SYS_MKDIR:
- case SYS_MKNOD:
- case SYS_MQ_OPEN:
- case SYS_MQ_UNLINK:
- case SYS_PIVOT_ROOT:
- case SYS_READLINK:
- case SYS_REMOVEXATTR:
- case SYS_RENAME:
- case SYS_REQUEST_KEY:
- case SYS_RMDIR:
- case SYS_SETXATTR:
- case SYS_STAT:
- case SYS_STATFS:
- case SYS_SWAPOFF:
- case SYS_SWAPON:
- case SYS_SYMLINK:
- case SYS_SYMLINKAT:
- case SYS_TRUNCATE:
- case SYS_UNLINK:
- case SYS_ACCESS:
- case SYS_OPEN: filename_arg = (const void *)args->args[0];
+ * Using the unrolled loop is not working, only when we do it manually,
+ * check this out later...
+
+ u8 arg;
+#pragma clang loop unroll(full)
+ for (arg = 0; arg < 6; ++arg) {
+ if (syscall->string_args_len[arg] != 0) {
+ filename_len = syscall->string_args_len[arg];
+ filename_arg = (const void *)args->args[arg];
__asm__ __volatile__("": : :"memory");
- break;
- case SYS_EXECVEAT:
- case SYS_FACCESSAT:
- case SYS_FCHMODAT:
- case SYS_FCHOWNAT:
- case SYS_FGETXATTR:
- case SYS_FINIT_MODULE:
- case SYS_FREMOVEXATTR:
- case SYS_FSETXATTR:
- case SYS_FUTIMESAT:
- case SYS_INOTIFY_ADD_WATCH:
- case SYS_LINKAT:
- case SYS_MKDIRAT:
- case SYS_MKNODAT:
- case SYS_MQ_TIMEDSEND:
- case SYS_NAME_TO_HANDLE_AT:
- case SYS_NEWFSTATAT:
- case SYS_PWRITE64:
- case SYS_QUOTACTL:
- case SYS_READLINKAT:
- case SYS_RENAMEAT:
- case SYS_RENAMEAT2:
- case SYS_STATX:
- case SYS_UNLINKAT:
- case SYS_UTIMENSAT:
- case SYS_OPENAT: filename_arg = (const void *)args->args[1];
- break;
- }
-
- if (filename_arg != NULL) {
- augmented_args->filename.reserved = 0;
- augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,
- sizeof(augmented_args->filename.value),
- filename_arg);
- if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {
- len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size;
- len &= sizeof(augmented_args->filename.value) - 1;
+ break;
}
- } else {
- len = sizeof(augmented_args->args);
}
+ verifier log:
+
+; if (syscall->string_args_len[arg] != 0) {
+37: (69) r3 = *(u16 *)(r0 +2)
+ R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
+; if (syscall->string_args_len[arg] != 0) {
+38: (55) if r3 != 0x0 goto pc+5
+ R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
+39: (b7) r1 = 1
+; if (syscall->string_args_len[arg] != 0) {
+40: (bf) r2 = r0
+41: (07) r2 += 4
+42: (69) r3 = *(u16 *)(r0 +4)
+ R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
+; if (syscall->string_args_len[arg] != 0) {
+43: (15) if r3 == 0x0 goto pc+32
+ R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
+; filename_arg = (const void *)args->args[arg];
+44: (67) r1 <<= 3
+45: (bf) r3 = r6
+46: (0f) r3 += r1
+47: (b7) r5 = 64
+48: (79) r3 = *(u64 *)(r3 +16)
+dereference of modified ctx ptr R3 off=8 disallowed
+processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7
+ */
+
+#define __loop_iter(arg) \
+ if (syscall->string_args_len[arg] != 0) { \
+ unsigned int filename_len = syscall->string_args_len[arg]; \
+ const void *filename_arg = (const void *)args->args[arg]; \
+ if (filename_len <= sizeof(augmented_args->filename.value)) \
+ len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len);
+#define loop_iter_first() __loop_iter(0); }
+#define loop_iter(arg) else __loop_iter(arg); }
+#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); }
+
+ loop_iter_first()
+ loop_iter(1)
+ loop_iter(2)
+ loop_iter(3)
+ loop_iter(4)
+ loop_iter_last(5)
+
/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
}
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index f7eb63cbbc65..88108598d6e9 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -45,10 +45,12 @@
static char jit_path[PATH_MAX];
static void *marker_addr;
+#ifndef HAVE_GETTID
static inline pid_t gettid(void)
{
return (pid_t)syscall(__NR_gettid);
}
+#endif
static int get_e_machine(struct jitheader *hdr)
{
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index aea7b1fe85aa..c441a34cb1c0 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
+#include <linux/string.h>
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
@@ -162,8 +163,7 @@ copy_class_filename(const char * class_sign, const char * file_name, char * resu
result[i] = '\0';
} else {
/* fallback case */
- size_t file_name_len = strlen(file_name);
- strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
+ strlcpy(result, file_name, max_length);
}
}
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index 74e4627ca278..0b96545c8184 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -104,11 +104,6 @@ fix_buildid_cache_permissions()
USER_HOME=$(bash <<< "echo ~$SUDO_USER")
- if [ "$HOME" != "$USER_HOME" ] ; then
- echo "Fix unnecessary because root has a home: $HOME" >&2
- exit 1
- fi
-
echo "Fixing buildid cache permissions"
find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index d59dee61b64d..711e009381ec 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -61,6 +61,8 @@ struct record_opts {
bool record_switch_events;
bool all_kernel;
bool all_user;
+ bool kernel_callchains;
+ bool user_callchains;
bool tail_synthesize;
bool overwrite;
bool ignore_missing_thread;
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index c3eae1d77d36..93225c02117e 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -27,18 +27,31 @@ import datetime
#
# fedora:
#
-# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
+# $ sudo yum install postgresql postgresql-server qt-postgresql
# $ sudo su - postgres -c initdb
# $ sudo service postgresql start
# $ sudo su - postgres
-# $ createuser <your user id here>
+# $ createuser -s <your user id here> # Older versions may not support -s, in which case answer the prompt below:
# Shall the new role be a superuser? (y/n) y
+# $ sudo yum install python-pyside
+#
+# Alternately, to use Python3 and/or pyside 2, one of the following:
+# $ sudo yum install python3-pyside
+# $ pip install --user PySide2
+# $ pip3 install --user PySide2
#
# ubuntu:
#
-# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
+# $ sudo apt-get install postgresql
# $ sudo su - postgres
# $ createuser -s <your user id here>
+# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql
+#
+# Alternately, to use Python3 and/or pyside 2, one of the following:
+#
+# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql
+# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql
+# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql
#
# An example of using this script with Intel PT:
#
@@ -199,7 +212,16 @@ import datetime
# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
# call_path_id = query.value(6)
-from PySide.QtSql import *
+pyside_version_1 = True
+if not "pyside-version-1" in sys.argv:
+ try:
+ from PySide2.QtSql import *
+ pyside_version_1 = False
+ except:
+ pass
+
+if pyside_version_1:
+ from PySide.QtSql import *
if sys.version_info < (3, 0):
def toserverstr(str):
@@ -255,11 +277,12 @@ def printdate(*args, **kw_args):
print(datetime.datetime.today(), *args, sep=' ', **kw_args)
def usage():
- printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]")
- printerr("where: columns 'all' or 'branches'")
- printerr(" calls 'calls' => create calls and call_paths table")
- printerr(" callchains 'callchains' => create call_paths table")
- raise Exception("Too few arguments")
+ printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]");
+ printerr("where: columns 'all' or 'branches'");
+ printerr(" calls 'calls' => create calls and call_paths table");
+ printerr(" callchains 'callchains' => create call_paths table");
+ printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1");
+ raise Exception("Too few or bad arguments")
if (len(sys.argv) < 2):
usage()
@@ -281,6 +304,8 @@ for i in range(3,len(sys.argv)):
perf_db_export_calls = True
elif (sys.argv[i] == "callchains"):
perf_db_export_callchains = True
+ elif (sys.argv[i] == "pyside-version-1"):
+ pass
else:
usage()
@@ -369,7 +394,9 @@ if branches:
'to_ip bigint,'
'branch_type integer,'
'in_tx boolean,'
- 'call_path_id bigint)')
+ 'call_path_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
else:
do_query(query, 'CREATE TABLE samples ('
'id bigint NOT NULL,'
@@ -393,7 +420,9 @@ else:
'data_src bigint,'
'branch_type integer,'
'in_tx boolean,'
- 'call_path_id bigint)')
+ 'call_path_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE TABLE call_paths ('
@@ -414,7 +443,9 @@ if perf_db_export_calls:
'return_id bigint,'
'parent_call_path_id bigint,'
'flags integer,'
- 'parent_id bigint)')
+ 'parent_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
do_query(query, 'CREATE VIEW machines_view AS '
'SELECT '
@@ -496,6 +527,9 @@ if perf_db_export_calls:
'return_time,'
'return_time - call_time AS elapsed_time,'
'branch_count,'
+ 'insn_count,'
+ 'cyc_count,'
+ 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,'
'call_id,'
'return_id,'
'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,'
@@ -521,7 +555,10 @@ do_query(query, 'CREATE VIEW samples_view AS '
'to_sym_offset,'
'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
- 'in_tx'
+ 'in_tx,'
+ 'insn_count,'
+ 'cyc_count,'
+ 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC'
' FROM samples')
@@ -593,10 +630,10 @@ def trace_begin():
comm_table(0, "unknown")
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
- sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
if perf_db_export_calls or perf_db_export_callchains:
call_path_table(0, 0, 0, 0)
- call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
unhandled_count = 0
@@ -747,11 +784,11 @@ def branch_type_table(branch_type, name, *x):
value = struct.pack(fmt, 2, 4, branch_type, n, name)
branch_type_file.write(value)
-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x):
if branches:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiqiqiq", 20, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt)
else:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiqiqiq", 24, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt)
sample_file.write(value)
def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
@@ -759,7 +796,7 @@ def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
call_path_file.write(value)
-def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, *x):
- fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiq"
- value = struct.pack(fmt, 12, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id)
+def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, insn_cnt, cyc_cnt, *x):
+ fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiqiqiq"
+ value = struct.pack(fmt, 14, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id, 8, insn_cnt, 8, cyc_cnt)
call_file.write(value)
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index bf271fbc3a88..4542ce89034b 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -21,6 +21,26 @@ import datetime
# provides LGPL-licensed Python bindings for Qt. You will also need the package
# libqt4-sql-sqlite for Qt sqlite3 support.
#
+# Examples of installing pyside:
+#
+# ubuntu:
+#
+# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql
+#
+# Alternately, to use Python3 and/or pyside 2, one of the following:
+#
+# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql
+# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql
+# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql
+# fedora:
+#
+# $ sudo yum install python-pyside
+#
+# Alternately, to use Python3 and/or pyside 2, one of the following:
+# $ sudo yum install python3-pyside
+# $ pip install --user PySide2
+# $ pip3 install --user PySide2
+#
# An example of using this script with Intel PT:
#
# $ perf record -e intel_pt//u ls
@@ -49,7 +69,16 @@ import datetime
# difference is the 'transaction' column of the 'samples' table which is
# renamed 'transaction_' in sqlite because 'transaction' is a reserved word.
-from PySide.QtSql import *
+pyside_version_1 = True
+if not "pyside-version-1" in sys.argv:
+ try:
+ from PySide2.QtSql import *
+ pyside_version_1 = False
+ except:
+ pass
+
+if pyside_version_1:
+ from PySide.QtSql import *
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -69,11 +98,12 @@ def printdate(*args, **kw_args):
print(datetime.datetime.today(), *args, sep=' ', **kw_args)
def usage():
- printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]");
- printerr("where: columns 'all' or 'branches'");
- printerr(" calls 'calls' => create calls and call_paths table");
- printerr(" callchains 'callchains' => create call_paths table");
- raise Exception("Too few arguments")
+ printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]");
+ printerr("where: columns 'all' or 'branches'");
+ printerr(" calls 'calls' => create calls and call_paths table");
+ printerr(" callchains 'callchains' => create call_paths table");
+ printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1");
+ raise Exception("Too few or bad arguments")
if (len(sys.argv) < 2):
usage()
@@ -95,6 +125,8 @@ for i in range(3,len(sys.argv)):
perf_db_export_calls = True
elif (sys.argv[i] == "callchains"):
perf_db_export_callchains = True
+ elif (sys.argv[i] == "pyside-version-1"):
+ pass
else:
usage()
@@ -186,7 +218,9 @@ if branches:
'to_ip bigint,'
'branch_type integer,'
'in_tx boolean,'
- 'call_path_id bigint)')
+ 'call_path_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
else:
do_query(query, 'CREATE TABLE samples ('
'id integer NOT NULL PRIMARY KEY,'
@@ -210,7 +244,9 @@ else:
'data_src bigint,'
'branch_type integer,'
'in_tx boolean,'
- 'call_path_id bigint)')
+ 'call_path_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE TABLE call_paths ('
@@ -231,7 +267,9 @@ if perf_db_export_calls:
'return_id bigint,'
'parent_call_path_id bigint,'
'flags integer,'
- 'parent_id bigint)')
+ 'parent_id bigint,'
+ 'insn_count bigint,'
+ 'cyc_count bigint)')
# printf was added to sqlite in version 3.8.3
sqlite_has_printf = False
@@ -327,6 +365,9 @@ if perf_db_export_calls:
'return_time,'
'return_time - call_time AS elapsed_time,'
'branch_count,'
+ 'insn_count,'
+ 'cyc_count,'
+ 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,'
'call_id,'
'return_id,'
'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
@@ -352,7 +393,10 @@ do_query(query, 'CREATE VIEW samples_view AS '
'to_sym_offset,'
'(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
'(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
- 'in_tx'
+ 'in_tx,'
+ 'insn_count,'
+ 'cyc_count,'
+ 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC'
' FROM samples')
do_query(query, 'END TRANSACTION')
@@ -375,15 +419,15 @@ branch_type_query = QSqlQuery(db)
branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)")
sample_query = QSqlQuery(db)
if branches:
- sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+ sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
else:
- sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+ sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
if perf_db_export_calls or perf_db_export_callchains:
call_path_query = QSqlQuery(db)
call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)")
if perf_db_export_calls:
call_query = QSqlQuery(db)
- call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+ call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
def trace_begin():
printdate("Writing records...")
@@ -395,10 +439,10 @@ def trace_begin():
comm_table(0, "unknown")
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
- sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
if perf_db_export_calls or perf_db_export_callchains:
call_path_table(0, 0, 0, 0)
- call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
unhandled_count = 0
@@ -454,14 +498,14 @@ def sample_table(*x):
if branches:
for xx in x[0:15]:
sample_query.addBindValue(str(xx))
- for xx in x[19:22]:
+ for xx in x[19:24]:
sample_query.addBindValue(str(xx))
do_query_(sample_query)
else:
- bind_exec(sample_query, 22, x)
+ bind_exec(sample_query, 24, x)
def call_path_table(*x):
bind_exec(call_path_query, 4, x)
def call_return_table(*x):
- bind_exec(call_query, 12, x)
+ bind_exec(call_query, 14, x)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index affed7d149be..6e7934f2ac9a 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
# SPDX-License-Identifier: GPL-2.0
# exported-sql-viewer.py: view data from sql database
# Copyright (c) 2014-2018, Intel Corporation.
@@ -91,6 +91,7 @@
from __future__ import print_function
import sys
+import argparse
import weakref
import threading
import string
@@ -104,10 +105,23 @@ except ImportError:
glb_nsz = 16
import re
import os
-from PySide.QtCore import *
-from PySide.QtGui import *
-from PySide.QtSql import *
+
pyside_version_1 = True
+if not "--pyside-version-1" in sys.argv:
+ try:
+ from PySide2.QtCore import *
+ from PySide2.QtGui import *
+ from PySide2.QtSql import *
+ from PySide2.QtWidgets import *
+ pyside_version_1 = False
+ except:
+ pass
+
+if pyside_version_1:
+ from PySide.QtCore import *
+ from PySide.QtGui import *
+ from PySide.QtSql import *
+
from decimal import *
from ctypes import *
from multiprocessing import Process, Array, Value, Event
@@ -186,9 +200,10 @@ class Thread(QThread):
class TreeModel(QAbstractItemModel):
- def __init__(self, glb, parent=None):
+ def __init__(self, glb, params, parent=None):
super(TreeModel, self).__init__(parent)
self.glb = glb
+ self.params = params
self.root = self.GetRoot()
self.last_row_read = 0
@@ -385,6 +400,7 @@ class FindBar():
def Activate(self):
self.bar.show()
+ self.textbox.lineEdit().selectAll()
self.textbox.setFocus()
def Deactivate(self):
@@ -449,8 +465,9 @@ class FindBar():
class CallGraphLevelItemBase(object):
- def __init__(self, glb, row, parent_item):
+ def __init__(self, glb, params, row, parent_item):
self.glb = glb
+ self.params = params
self.row = row
self.parent_item = parent_item
self.query_done = False;
@@ -489,18 +506,24 @@ class CallGraphLevelItemBase(object):
class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
- def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
- super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+ def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item)
self.comm_id = comm_id
self.thread_id = thread_id
self.call_path_id = call_path_id
+ self.insn_cnt = insn_cnt
+ self.cyc_cnt = cyc_cnt
self.branch_count = branch_count
self.time = time
def Select(self):
self.query_done = True;
query = QSqlQuery(self.glb.db)
- QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+ if self.params.have_ipc:
+ ipc_str = ", SUM(insn_count), SUM(cyc_count)"
+ else:
+ ipc_str = ""
+ QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)"
" FROM calls"
" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
@@ -511,7 +534,15 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
" GROUP BY call_path_id, name, short_name"
" ORDER BY call_path_id")
while query.next():
- child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+ if self.params.have_ipc:
+ insn_cnt = int(query.value(5))
+ cyc_cnt = int(query.value(6))
+ branch_count = int(query.value(7))
+ else:
+ insn_cnt = 0
+ cyc_cnt = 0
+ branch_count = int(query.value(5))
+ child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self)
self.child_items.append(child_item)
self.child_count += 1
@@ -519,37 +550,57 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
- def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
- super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+ def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item)
dso = dsoname(dso)
- self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+ if self.params.have_ipc:
+ insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt)
+ cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt)
+ br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count)
+ ipc = CalcIPC(cyc_cnt, insn_cnt)
+ self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
+ else:
+ self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
self.dbid = call_path_id
# Context-sensitive call graph data model level two item
class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
- def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
- super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
- self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+ def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item):
+ super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, 0, 0, parent_item)
+ if self.params.have_ipc:
+ self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""]
+ else:
+ self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
self.dbid = thread_id
def Select(self):
super(CallGraphLevelTwoItem, self).Select()
for child_item in self.child_items:
self.time += child_item.time
+ self.insn_cnt += child_item.insn_cnt
+ self.cyc_cnt += child_item.cyc_cnt
self.branch_count += child_item.branch_count
for child_item in self.child_items:
child_item.data[4] = PercentToOneDP(child_item.time, self.time)
- child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+ if self.params.have_ipc:
+ child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt)
+ child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt)
+ child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count)
+ else:
+ child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
# Context-sensitive call graph data model level one item
class CallGraphLevelOneItem(CallGraphLevelItemBase):
- def __init__(self, glb, row, comm_id, comm, parent_item):
- super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
- self.data = [comm, "", "", "", "", "", ""]
+ def __init__(self, glb, params, row, comm_id, comm, parent_item):
+ super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item)
+ if self.params.have_ipc:
+ self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""]
+ else:
+ self.data = [comm, "", "", "", "", "", ""]
self.dbid = comm_id
def Select(self):
@@ -560,7 +611,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
" INNER JOIN threads ON thread_id = threads.id"
" WHERE comm_id = " + str(self.dbid))
while query.next():
- child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+ child_item = CallGraphLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
self.child_items.append(child_item)
self.child_count += 1
@@ -568,8 +619,8 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
class CallGraphRootItem(CallGraphLevelItemBase):
- def __init__(self, glb):
- super(CallGraphRootItem, self).__init__(glb, 0, None)
+ def __init__(self, glb, params):
+ super(CallGraphRootItem, self).__init__(glb, params, 0, None)
self.dbid = 0
self.query_done = True;
query = QSqlQuery(glb.db)
@@ -577,16 +628,23 @@ class CallGraphRootItem(CallGraphLevelItemBase):
while query.next():
if not query.value(0):
continue
- child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+ child_item = CallGraphLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self)
self.child_items.append(child_item)
self.child_count += 1
+# Call graph model parameters
+
+class CallGraphModelParams():
+
+ def __init__(self, glb, parent=None):
+ self.have_ipc = IsSelectable(glb.db, "calls", columns = "insn_count, cyc_count")
+
# Context-sensitive call graph data model base
class CallGraphModelBase(TreeModel):
def __init__(self, glb, parent=None):
- super(CallGraphModelBase, self).__init__(glb, parent)
+ super(CallGraphModelBase, self).__init__(glb, CallGraphModelParams(glb), parent)
def FindSelect(self, value, pattern, query):
if pattern:
@@ -668,17 +726,26 @@ class CallGraphModel(CallGraphModelBase):
super(CallGraphModel, self).__init__(glb, parent)
def GetRoot(self):
- return CallGraphRootItem(self.glb)
+ return CallGraphRootItem(self.glb, self.params)
def columnCount(self, parent=None):
- return 7
+ if self.params.have_ipc:
+ return 12
+ else:
+ return 7
def columnHeader(self, column):
- headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+ if self.params.have_ipc:
+ headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "]
+ else:
+ headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
return headers[column]
def columnAlignment(self, column):
- alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+ if self.params.have_ipc:
+ alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+ else:
+ alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
return alignment[column]
def DoFindSelect(self, query, match):
@@ -715,11 +782,13 @@ class CallGraphModel(CallGraphModelBase):
class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
- def __init__(self, glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item):
- super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+ def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item)
self.comm_id = comm_id
self.thread_id = thread_id
self.calls_id = calls_id
+ self.insn_cnt = insn_cnt
+ self.cyc_cnt = cyc_cnt
self.branch_count = branch_count
self.time = time
@@ -729,8 +798,12 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id)
else:
comm_thread = ""
+ if self.params.have_ipc:
+ ipc_str = ", insn_count, cyc_count"
+ else:
+ ipc_str = ""
query = QSqlQuery(self.glb.db)
- QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count"
+ QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time" + ipc_str + ", branch_count"
" FROM calls"
" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
@@ -738,7 +811,15 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
" WHERE calls.parent_id = " + str(self.calls_id) + comm_thread +
" ORDER BY call_time, calls.id")
while query.next():
- child_item = CallTreeLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+ if self.params.have_ipc:
+ insn_cnt = int(query.value(5))
+ cyc_cnt = int(query.value(6))
+ branch_count = int(query.value(7))
+ else:
+ insn_cnt = 0
+ cyc_cnt = 0
+ branch_count = int(query.value(5))
+ child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self)
self.child_items.append(child_item)
self.child_count += 1
@@ -746,37 +827,57 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase):
- def __init__(self, glb, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item):
- super(CallTreeLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item)
+ def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item):
+ super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item)
dso = dsoname(dso)
- self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+ if self.params.have_ipc:
+ insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt)
+ cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt)
+ br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count)
+ ipc = CalcIPC(cyc_cnt, insn_cnt)
+ self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
+ else:
+ self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
self.dbid = calls_id
# Call tree data model level two item
class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase):
- def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
- super(CallTreeLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 0, 0, 0, parent_item)
- self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+ def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item):
+ super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item)
+ if self.params.have_ipc:
+ self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""]
+ else:
+ self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
self.dbid = thread_id
def Select(self):
super(CallTreeLevelTwoItem, self).Select()
for child_item in self.child_items:
self.time += child_item.time
+ self.insn_cnt += child_item.insn_cnt
+ self.cyc_cnt += child_item.cyc_cnt
self.branch_count += child_item.branch_count
for child_item in self.child_items:
child_item.data[4] = PercentToOneDP(child_item.time, self.time)
- child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+ if self.params.have_ipc:
+ child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt)
+ child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt)
+ child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count)
+ else:
+ child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
# Call tree data model level one item
class CallTreeLevelOneItem(CallGraphLevelItemBase):
- def __init__(self, glb, row, comm_id, comm, parent_item):
- super(CallTreeLevelOneItem, self).__init__(glb, row, parent_item)
- self.data = [comm, "", "", "", "", "", ""]
+ def __init__(self, glb, params, row, comm_id, comm, parent_item):
+ super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item)
+ if self.params.have_ipc:
+ self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""]
+ else:
+ self.data = [comm, "", "", "", "", "", ""]
self.dbid = comm_id
def Select(self):
@@ -787,7 +888,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
" INNER JOIN threads ON thread_id = threads.id"
" WHERE comm_id = " + str(self.dbid))
while query.next():
- child_item = CallTreeLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+ child_item = CallTreeLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
self.child_items.append(child_item)
self.child_count += 1
@@ -795,8 +896,8 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
class CallTreeRootItem(CallGraphLevelItemBase):
- def __init__(self, glb):
- super(CallTreeRootItem, self).__init__(glb, 0, None)
+ def __init__(self, glb, params):
+ super(CallTreeRootItem, self).__init__(glb, params, 0, None)
self.dbid = 0
self.query_done = True;
query = QSqlQuery(glb.db)
@@ -804,7 +905,7 @@ class CallTreeRootItem(CallGraphLevelItemBase):
while query.next():
if not query.value(0):
continue
- child_item = CallTreeLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+ child_item = CallTreeLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self)
self.child_items.append(child_item)
self.child_count += 1
@@ -816,17 +917,26 @@ class CallTreeModel(CallGraphModelBase):
super(CallTreeModel, self).__init__(glb, parent)
def GetRoot(self):
- return CallTreeRootItem(self.glb)
+ return CallTreeRootItem(self.glb, self.params)
def columnCount(self, parent=None):
- return 7
+ if self.params.have_ipc:
+ return 12
+ else:
+ return 7
def columnHeader(self, column):
- headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+ if self.params.have_ipc:
+ headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "]
+ else:
+ headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
return headers[column]
def columnAlignment(self, column):
- alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+ if self.params.have_ipc:
+ alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+ else:
+ alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
return alignment[column]
def DoFindSelect(self, query, match):
@@ -1355,11 +1465,11 @@ class FetchMoreRecordsBar():
class BranchLevelTwoItem():
- def __init__(self, row, text, parent_item):
+ def __init__(self, row, col, text, parent_item):
self.row = row
self.parent_item = parent_item
- self.data = [""] * 8
- self.data[7] = text
+ self.data = [""] * (col + 1)
+ self.data[col] = text
self.level = 2
def getParentItem(self):
@@ -1391,6 +1501,7 @@ class BranchLevelOneItem():
self.dbid = data[0]
self.level = 1
self.query_done = False
+ self.br_col = len(self.data) - 1
def getChildItem(self, row):
return self.child_items[row]
@@ -1471,7 +1582,7 @@ class BranchLevelOneItem():
while k < 15:
byte_str += " "
k += 1
- self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+ self.child_items.append(BranchLevelTwoItem(0, self.br_col, byte_str + " " + text, self))
self.child_count += 1
else:
return
@@ -1522,16 +1633,37 @@ class BranchRootItem():
def getData(self, column):
return ""
+# Calculate instructions per cycle
+
+def CalcIPC(cyc_cnt, insn_cnt):
+ if cyc_cnt and insn_cnt:
+ ipc = Decimal(float(insn_cnt) / cyc_cnt)
+ ipc = str(ipc.quantize(Decimal(".01"), rounding=ROUND_HALF_UP))
+ else:
+ ipc = "0"
+ return ipc
+
# Branch data preparation
-def BranchDataPrep(query):
- data = []
- for i in xrange(0, 8):
- data.append(query.value(i))
+def BranchDataPrepBr(query, data):
data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
" (" + dsoname(query.value(11)) + ")" + " -> " +
tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
" (" + dsoname(query.value(15)) + ")")
+
+def BranchDataPrepIPC(query, data):
+ insn_cnt = query.value(16)
+ cyc_cnt = query.value(17)
+ ipc = CalcIPC(cyc_cnt, insn_cnt)
+ data.append(insn_cnt)
+ data.append(cyc_cnt)
+ data.append(ipc)
+
+def BranchDataPrep(query):
+ data = []
+ for i in xrange(0, 8):
+ data.append(query.value(i))
+ BranchDataPrepBr(query, data)
return data
def BranchDataPrepWA(query):
@@ -1541,10 +1673,26 @@ def BranchDataPrepWA(query):
data.append("{:>19}".format(query.value(1)))
for i in xrange(2, 8):
data.append(query.value(i))
- data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
- " (" + dsoname(query.value(11)) + ")" + " -> " +
- tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
- " (" + dsoname(query.value(15)) + ")")
+ BranchDataPrepBr(query, data)
+ return data
+
+def BranchDataWithIPCPrep(query):
+ data = []
+ for i in xrange(0, 8):
+ data.append(query.value(i))
+ BranchDataPrepIPC(query, data)
+ BranchDataPrepBr(query, data)
+ return data
+
+def BranchDataWithIPCPrepWA(query):
+ data = []
+ data.append(query.value(0))
+ # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
+ data.append("{:>19}".format(query.value(1)))
+ for i in xrange(2, 8):
+ data.append(query.value(i))
+ BranchDataPrepIPC(query, data)
+ BranchDataPrepBr(query, data)
return data
# Branch data model
@@ -1554,14 +1702,24 @@ class BranchModel(TreeModel):
progress = Signal(object)
def __init__(self, glb, event_id, where_clause, parent=None):
- super(BranchModel, self).__init__(glb, parent)
+ super(BranchModel, self).__init__(glb, None, parent)
self.event_id = event_id
self.more = True
self.populated = 0
+ self.have_ipc = IsSelectable(glb.db, "samples", columns = "insn_count, cyc_count")
+ if self.have_ipc:
+ select_ipc = ", insn_count, cyc_count"
+ prep_fn = BranchDataWithIPCPrep
+ prep_wa_fn = BranchDataWithIPCPrepWA
+ else:
+ select_ipc = ""
+ prep_fn = BranchDataPrep
+ prep_wa_fn = BranchDataPrepWA
sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
" CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
" ip, symbols.name, sym_offset, dsos.short_name,"
" to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+ + select_ipc +
" FROM samples"
" INNER JOIN comms ON comm_id = comms.id"
" INNER JOIN threads ON thread_id = threads.id"
@@ -1575,9 +1733,9 @@ class BranchModel(TreeModel):
" ORDER BY samples.id"
" LIMIT " + str(glb_chunk_sz))
if pyside_version_1 and sys.version_info[0] == 3:
- prep = BranchDataPrepWA
+ prep = prep_fn
else:
- prep = BranchDataPrep
+ prep = prep_wa_fn
self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
self.fetcher.done.connect(self.Update)
self.fetcher.Fetch(glb_chunk_sz)
@@ -1586,13 +1744,23 @@ class BranchModel(TreeModel):
return BranchRootItem()
def columnCount(self, parent=None):
- return 8
+ if self.have_ipc:
+ return 11
+ else:
+ return 8
def columnHeader(self, column):
- return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+ if self.have_ipc:
+ return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Insn Cnt", "Cyc Cnt", "IPC", "Branch")[column]
+ else:
+ return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
def columnFont(self, column):
- if column != 7:
+ if self.have_ipc:
+ br_col = 10
+ else:
+ br_col = 7
+ if column != br_col:
return None
return QFont("Monospace")
@@ -2100,10 +2268,10 @@ def GetEventList(db):
# Is a table selectable
-def IsSelectable(db, table, sql = ""):
+def IsSelectable(db, table, sql = "", columns = "*"):
query = QSqlQuery(db)
try:
- QueryExec(query, "SELECT * FROM " + table + " " + sql + " LIMIT 1")
+ QueryExec(query, "SELECT " + columns + " FROM " + table + " " + sql + " LIMIT 1")
except:
return False
return True
@@ -2754,7 +2922,7 @@ class WindowMenu():
action = self.window_menu.addAction(label)
action.setCheckable(True)
action.setChecked(sub_window == self.mdi_area.activeSubWindow())
- action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+ action.triggered.connect(lambda a=None,x=nr: self.setActiveSubWindow(x))
self.window_menu.addAction(action)
nr += 1
@@ -2840,6 +3008,12 @@ cd xed
sudo ./mfile.py --prefix=/usr/local install
sudo ldconfig
</pre>
+<h3>Instructions per Cycle (IPC)</h3>
+If available, IPC information is displayed in columns 'insn_cnt', 'cyc_cnt' and 'IPC'.
+<p><b>Intel PT note:</b> The information applies to the blocks of code ending with, and including, that branch.
+Due to the granularity of timing information, the number of cycles for some code blocks will not be known.
+In that case, 'insn_cnt', 'cyc_cnt' and 'IPC' are zero, but when 'IPC' is displayed it covers the period
+since the previous displayed 'IPC'.
<h3>Find</h3>
Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
Refer to Python documentation for the regular expression syntax.
@@ -3114,14 +3288,14 @@ class MainWindow(QMainWindow):
event = event.split(":")[0]
if event == "branches":
label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
- reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+ reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self))
label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
- reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self))
+ reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self))
def TableMenu(self, tables, menu):
table_menu = menu.addMenu("&Tables")
for table in tables:
- table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+ table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda a=None,t=table: self.NewTableView(t), self))
def NewCallGraph(self):
CallGraphWindow(self.glb, self)
@@ -3361,18 +3535,27 @@ class DBRef():
# Main
def Main():
- if (len(sys.argv) < 2):
- printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}");
- raise Exception("Too few arguments")
-
- dbname = sys.argv[1]
- if dbname == "--help-only":
+ usage_str = "exported-sql-viewer.py [--pyside-version-1] <database name>\n" \
+ " or: exported-sql-viewer.py --help-only"
+ ap = argparse.ArgumentParser(usage = usage_str, add_help = False)
+ ap.add_argument("--pyside-version-1", action='store_true')
+ ap.add_argument("dbname", nargs="?")
+ ap.add_argument("--help-only", action='store_true')
+ args = ap.parse_args()
+
+ if args.help_only:
app = QApplication(sys.argv)
mainwindow = HelpOnlyWindow()
mainwindow.show()
err = app.exec_()
sys.exit(err)
+ dbname = args.dbname
+ if dbname is None:
+ ap.print_usage()
+ print("Too few arguments")
+ sys.exit(1)
+
is_sqlite3 = False
try:
f = open(dbname, "rb")
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 0b2b8305c965..e72accefd669 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
perf-y += builtin-test.o
perf-y += parse-events.o
perf-y += dso-data.o
@@ -50,6 +52,8 @@ perf-y += perf-hooks.o
perf-y += clang.o
perf-y += unit_number__scnprintf.o
perf-y += mem2node.o
+perf-y += map_groups.o
+perf-y += time-utils-test.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 57fc544aedb0..153624e2d0f5 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
* 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index 1ca5106df5f1..ab4b98b3165d 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-example.c
* Test basic LLVM building
diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c
index ff3ec8337f0a..219673aa278f 100644
--- a/tools/perf/tests/bpf-script-test-kbuild.c
+++ b/tools/perf/tests/bpf-script-test-kbuild.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-kbuild.c
* Test include from kernel header
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index 43f1e16486f4..bd83d364cf30 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-prologue.c
* Test BPF prologue
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
index 93af77421816..74006e4b2d24 100644
--- a/tools/perf/tests/bpf-script-test-relocation.c
+++ b/tools/perf/tests/bpf-script-test-relocation.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bpf-script-test-relocation.c
* Test BPF loader checking relocation
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 79b54f8ddebf..c9e4cdc4c9c8 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdio.h>
#include <sys/epoll.h>
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9852b5d624a5..cd72ff0f7658 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -290,6 +290,14 @@ static struct test generic_tests[] = {
.func = test__mem2node,
},
{
+ .desc = "time utils",
+ .func = test__time_utils,
+ },
+ {
+ .desc = "map_groups__merge_in",
+ .func = test__map_groups__merge_in,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/map_groups.c b/tools/perf/tests/map_groups.c
new file mode 100644
index 000000000000..594fdaca4f71
--- /dev/null
+++ b/tools/perf/tests/map_groups.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "tests.h"
+#include "map.h"
+#include "map_groups.h"
+#include "dso.h"
+#include "debug.h"
+
+struct map_def {
+ const char *name;
+ u64 start;
+ u64 end;
+};
+
+static int check_maps(struct map_def *merged, unsigned int size, struct map_groups *mg)
+{
+ struct map *map;
+ unsigned int i = 0;
+
+ map = map_groups__first(mg);
+ while (map) {
+ TEST_ASSERT_VAL("wrong map start", map->start == merged[i].start);
+ TEST_ASSERT_VAL("wrong map end", map->end == merged[i].end);
+ TEST_ASSERT_VAL("wrong map name", !strcmp(map->dso->name, merged[i].name));
+ TEST_ASSERT_VAL("wrong map refcnt", refcount_read(&map->refcnt) == 2);
+
+ i++;
+ map = map_groups__next(map);
+
+ TEST_ASSERT_VAL("less maps expected", (map && i < size) || (!map && i == size));
+ }
+
+ return TEST_OK;
+}
+
+int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ struct map_groups mg;
+ unsigned int i;
+ struct map_def bpf_progs[] = {
+ { "bpf_prog_1", 200, 300 },
+ { "bpf_prog_2", 500, 600 },
+ { "bpf_prog_3", 800, 900 },
+ };
+ struct map_def merged12[] = {
+ { "kcore1", 100, 200 },
+ { "bpf_prog_1", 200, 300 },
+ { "kcore1", 300, 500 },
+ { "bpf_prog_2", 500, 600 },
+ { "kcore1", 600, 800 },
+ { "bpf_prog_3", 800, 900 },
+ { "kcore1", 900, 1000 },
+ };
+ struct map_def merged3[] = {
+ { "kcore1", 100, 200 },
+ { "bpf_prog_1", 200, 300 },
+ { "kcore1", 300, 500 },
+ { "bpf_prog_2", 500, 600 },
+ { "kcore1", 600, 800 },
+ { "bpf_prog_3", 800, 900 },
+ { "kcore1", 900, 1000 },
+ { "kcore3", 1000, 1100 },
+ };
+ struct map *map_kcore1, *map_kcore2, *map_kcore3;
+ int ret;
+
+ map_groups__init(&mg, NULL);
+
+ for (i = 0; i < ARRAY_SIZE(bpf_progs); i++) {
+ struct map *map;
+
+ map = dso__new_map(bpf_progs[i].name);
+ TEST_ASSERT_VAL("failed to create map", map);
+
+ map->start = bpf_progs[i].start;
+ map->end = bpf_progs[i].end;
+ map_groups__insert(&mg, map);
+ map__put(map);
+ }
+
+ map_kcore1 = dso__new_map("kcore1");
+ TEST_ASSERT_VAL("failed to create map", map_kcore1);
+
+ map_kcore2 = dso__new_map("kcore2");
+ TEST_ASSERT_VAL("failed to create map", map_kcore2);
+
+ map_kcore3 = dso__new_map("kcore3");
+ TEST_ASSERT_VAL("failed to create map", map_kcore3);
+
+ /* kcore1 map overlaps over all bpf maps */
+ map_kcore1->start = 100;
+ map_kcore1->end = 1000;
+
+ /* kcore2 map hides behind bpf_prog_2 */
+ map_kcore2->start = 550;
+ map_kcore2->end = 570;
+
+ /* kcore3 map hides behind bpf_prog_3, kcore1 and adds new map */
+ map_kcore3->start = 880;
+ map_kcore3->end = 1100;
+
+ ret = map_groups__merge_in(&mg, map_kcore1);
+ TEST_ASSERT_VAL("failed to merge map", !ret);
+
+ ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg);
+ TEST_ASSERT_VAL("merge check failed", !ret);
+
+ ret = map_groups__merge_in(&mg, map_kcore2);
+ TEST_ASSERT_VAL("failed to merge map", !ret);
+
+ ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg);
+ TEST_ASSERT_VAL("merge check failed", !ret);
+
+ ret = map_groups__merge_in(&mg, map_kcore3);
+ TEST_ASSERT_VAL("failed to merge map", !ret);
+
+ ret = check_maps(merged3, ARRAY_SIZE(merged3), &mg);
+ TEST_ASSERT_VAL("merge check failed", !ret);
+ return TEST_OK;
+}
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
index 0f82ee9fd3f7..efe3397824d2 100644
--- a/tools/perf/tests/mem.c
+++ b/tools/perf/tests/mem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include "util/mem-events.h"
#include "util/symbol.h"
#include "linux/perf_event.h"
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index 9e9e4d37cc77..d23ff1b68eba 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/bitmap.h>
#include "cpumap.h"
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 4a69c07f4101..8f3c80e13584 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -18,6 +18,32 @@
#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+#if defined(__s390x__)
+/* Return true if kvm module is available and loaded. Test this
+ * and retun success when trace point kvm_s390_create_vm
+ * exists. Otherwise this test always fails.
+ */
+static bool kvm_s390_create_vm_valid(void)
+{
+ char *eventfile;
+ bool rc = false;
+
+ eventfile = get_events_file("kvm-s390");
+
+ if (eventfile) {
+ DIR *mydir = opendir(eventfile);
+
+ if (mydir) {
+ rc = true;
+ closedir(mydir);
+ }
+ put_events_file(eventfile);
+ }
+
+ return rc;
+}
+#endif
+
static int test__checkevent_tracepoint(struct perf_evlist *evlist)
{
struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1642,6 +1668,7 @@ static struct evlist_test test__events[] = {
{
.name = "kvm-s390:kvm_s390_create_vm",
.check = test__checkevent_tracepoint,
+ .valid = kvm_s390_create_vm_valid,
.id = 100,
},
#endif
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh
index e37787be672b..51e3f60baba0 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
skip_if_no_perf_probe() {
diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh
index 46e076e3c537..5d1b63d3f3e1 100755
--- a/tools/perf/tests/shell/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/probe_vfs_getname.sh
@@ -1,6 +1,7 @@
#!/bin/sh
# Add vfs_getname probe to get syscall args filenames
-#
+
+# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 61c9f8fc6fa1..9b7632ff70aa 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -7,6 +7,7 @@
# This needs no debuginfo package, all is done using the libc ELF symtab
# and the CFI info in the binaries.
+# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 9b073e7fa88c..54030c18bfc2 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -6,6 +6,7 @@
# checks that that was captured by the vfs_getname probe in the generated
# perf.data file, with the temp file name as the pathname argument.
+# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
index 5dcba800109f..899604d17b85 100755
--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -1,6 +1,8 @@
#!/bin/sh
# Zstd perf.data compression/decompression
+# SPDX-License-Identifier: GPL-2.0
+
trace_file=$(mktemp /tmp/perf.data.XXX)
perf_tool=perf
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 147efeb6b195..45d269b0157e 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -7,6 +7,7 @@
# that already handles "probe:vfs_getname" if present, and used in the
# "open" syscall "filename" argument beautifier.
+# SPDX-License-Identifier: GPL-2.0
# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 399f18ca71a3..72912eb473cb 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -107,6 +107,8 @@ const char *test__clang_subtest_get_desc(int subtest);
int test__clang_subtest_get_nr(void);
int test__unit_number__scnprint(struct test *test, int subtest);
int test__mem2node(struct test *t, int subtest);
+int test__map_groups__merge_in(struct test *t, int subtest);
+int test__time_utils(struct test *t, int subtest);
bool test__bp_signal_is_supported(void);
bool test__wp_is_supported(void);
diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c
new file mode 100644
index 000000000000..4f53006233a1
--- /dev/null
+++ b/tools/perf/tests/time-utils-test.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/time64.h>
+#include <inttypes.h>
+#include <string.h>
+#include "time-utils.h"
+#include "evlist.h"
+#include "session.h"
+#include "debug.h"
+#include "tests.h"
+
+static bool test__parse_nsec_time(const char *str, u64 expected)
+{
+ u64 ptime;
+ int err;
+
+ pr_debug("\nparse_nsec_time(\"%s\")\n", str);
+
+ err = parse_nsec_time(str, &ptime);
+ if (err) {
+ pr_debug("error %d\n", err);
+ return false;
+ }
+
+ if (ptime != expected) {
+ pr_debug("Failed. ptime %" PRIu64 " expected %" PRIu64 "\n",
+ ptime, expected);
+ return false;
+ }
+
+ pr_debug("%" PRIu64 "\n", ptime);
+
+ return true;
+}
+
+static bool test__perf_time__parse_str(const char *ostr, u64 start, u64 end)
+{
+ struct perf_time_interval ptime;
+ int err;
+
+ pr_debug("\nperf_time__parse_str(\"%s\")\n", ostr);
+
+ err = perf_time__parse_str(&ptime, ostr);
+ if (err) {
+ pr_debug("Error %d\n", err);
+ return false;
+ }
+
+ if (ptime.start != start || ptime.end != end) {
+ pr_debug("Failed. Expected %" PRIu64 " to %" PRIu64 "\n",
+ start, end);
+ return false;
+ }
+
+ return true;
+}
+
+#define TEST_MAX 64
+
+struct test_data {
+ const char *str;
+ u64 first;
+ u64 last;
+ struct perf_time_interval ptime[TEST_MAX];
+ int num;
+ u64 skip[TEST_MAX];
+ u64 noskip[TEST_MAX];
+};
+
+static bool test__perf_time__parse_for_ranges(struct test_data *d)
+{
+ struct perf_evlist evlist = {
+ .first_sample_time = d->first,
+ .last_sample_time = d->last,
+ };
+ struct perf_session session = { .evlist = &evlist };
+ struct perf_time_interval *ptime = NULL;
+ int range_size, range_num;
+ bool pass = false;
+ int i, err;
+
+ pr_debug("\nperf_time__parse_for_ranges(\"%s\")\n", d->str);
+
+ if (strchr(d->str, '%'))
+ pr_debug("first_sample_time %" PRIu64 " last_sample_time %" PRIu64 "\n",
+ d->first, d->last);
+
+ err = perf_time__parse_for_ranges(d->str, &session, &ptime, &range_size,
+ &range_num);
+ if (err) {
+ pr_debug("error %d\n", err);
+ goto out;
+ }
+
+ if (range_size < d->num || range_num != d->num) {
+ pr_debug("bad size: range_size %d range_num %d expected num %d\n",
+ range_size, range_num, d->num);
+ goto out;
+ }
+
+ for (i = 0; i < d->num; i++) {
+ if (ptime[i].start != d->ptime[i].start ||
+ ptime[i].end != d->ptime[i].end) {
+ pr_debug("bad range %d expected %" PRIu64 " to %" PRIu64 "\n",
+ i, d->ptime[i].start, d->ptime[i].end);
+ goto out;
+ }
+ }
+
+ if (perf_time__ranges_skip_sample(ptime, d->num, 0)) {
+ pr_debug("failed to keep 0\n");
+ goto out;
+ }
+
+ for (i = 0; i < TEST_MAX; i++) {
+ if (d->skip[i] &&
+ !perf_time__ranges_skip_sample(ptime, d->num, d->skip[i])) {
+ pr_debug("failed to skip %" PRIu64 "\n", d->skip[i]);
+ goto out;
+ }
+ if (d->noskip[i] &&
+ perf_time__ranges_skip_sample(ptime, d->num, d->noskip[i])) {
+ pr_debug("failed to keep %" PRIu64 "\n", d->noskip[i]);
+ goto out;
+ }
+ }
+
+ pass = true;
+out:
+ free(ptime);
+ return pass;
+}
+
+int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ bool pass = true;
+
+ pass &= test__parse_nsec_time("0", 0);
+ pass &= test__parse_nsec_time("1", 1000000000ULL);
+ pass &= test__parse_nsec_time("0.000000001", 1);
+ pass &= test__parse_nsec_time("1.000000001", 1000000001ULL);
+ pass &= test__parse_nsec_time("123456.123456", 123456123456000ULL);
+ pass &= test__parse_nsec_time("1234567.123456789", 1234567123456789ULL);
+ pass &= test__parse_nsec_time("18446744073.709551615",
+ 0xFFFFFFFFFFFFFFFFULL);
+
+ pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456789",
+ 1234567123456789ULL, 1234567123456789ULL);
+ pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456790",
+ 1234567123456789ULL, 1234567123456790ULL);
+ pass &= test__perf_time__parse_str("1234567.123456789,",
+ 1234567123456789ULL, 0);
+ pass &= test__perf_time__parse_str(",1234567.123456789",
+ 0, 1234567123456789ULL);
+ pass &= test__perf_time__parse_str("0,1234567.123456789",
+ 0, 1234567123456789ULL);
+
+ {
+ u64 b = 1234567123456789ULL;
+ struct test_data d = {
+ .str = "1234567.123456789,1234567.123456790",
+ .ptime = { {b, b + 1}, },
+ .num = 1,
+ .skip = { b - 1, b + 2, },
+ .noskip = { b, b + 1, },
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ {
+ u64 b = 1234567123456789ULL;
+ u64 c = 7654321987654321ULL;
+ u64 e = 8000000000000000ULL;
+ struct test_data d = {
+ .str = "1234567.123456789,1234567.123456790 "
+ "7654321.987654321,7654321.987654444 "
+ "8000000,8000000.000000005",
+ .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, },
+ .num = 3,
+ .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 },
+ .noskip = { b, b + 1, c, c + 123, e, e + 5 },
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ {
+ u64 b = 7654321ULL * NSEC_PER_SEC;
+ struct test_data d = {
+ .str = "10%/1",
+ .first = b,
+ .last = b + 100,
+ .ptime = { {b, b + 9}, },
+ .num = 1,
+ .skip = { b - 1, b + 10, },
+ .noskip = { b, b + 9, },
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ {
+ u64 b = 7654321ULL * NSEC_PER_SEC;
+ struct test_data d = {
+ .str = "10%/2",
+ .first = b,
+ .last = b + 100,
+ .ptime = { {b + 10, b + 19}, },
+ .num = 1,
+ .skip = { b + 9, b + 20, },
+ .noskip = { b + 10, b + 19, },
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ {
+ u64 b = 11223344ULL * NSEC_PER_SEC;
+ struct test_data d = {
+ .str = "10%/1,10%/2",
+ .first = b,
+ .last = b + 100,
+ .ptime = { {b, b + 9}, {b + 10, b + 19}, },
+ .num = 2,
+ .skip = { b - 1, b + 20, },
+ .noskip = { b, b + 8, b + 9, b + 10, b + 11, b + 12, b + 19, },
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ {
+ u64 b = 11223344ULL * NSEC_PER_SEC;
+ struct test_data d = {
+ .str = "10%/1,10%/3,10%/10",
+ .first = b,
+ .last = b + 100,
+ .ptime = { {b, b + 9}, {b + 20, b + 29}, { b + 90, b + 100}, },
+ .num = 3,
+ .skip = { b - 1, b + 10, b + 19, b + 30, b + 89, b + 101 },
+ .noskip = { b, b + 9, b + 20, b + 29, b + 90, b + 100},
+ };
+
+ pass &= test__perf_time__parse_for_ranges(&d);
+ }
+
+ pr_debug("\n");
+
+ return pass ? 0 : TEST_FAIL;
+}
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 85f328ddf897..afa75a76f6b8 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -1,11 +1,14 @@
perf-y += clone.o
perf-y += fcntl.o
perf-y += flock.o
+perf-y += fsmount.o
+perf-y += fspick.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
perf-y += ioctl.o
endif
perf-y += kcmp.o
perf-y += mount_flags.o
+perf-y += move_mount.o
perf-y += pkey_alloc.o
perf-y += arch_prctl.o
perf-y += prctl.o
@@ -13,3 +16,4 @@ perf-y += renameat.o
perf-y += sockaddr.o
perf-y += socket.o
perf-y += statx.o
+perf-y += sync_file_range.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 139d485a6f16..7e06605f7c76 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -108,6 +108,9 @@ struct syscall_arg {
unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
+size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags
+
size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
@@ -141,6 +144,12 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar
size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_FLOCK syscall_arg__scnprintf_flock
+size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags
+
+size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FSPICK_FLAGS syscall_arg__scnprintf_fspick_flags
+
size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd
@@ -156,6 +165,9 @@ unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigne
size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOVE_MOUNT_FLAGS syscall_arg__scnprintf_move_mount_flags
+
size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
@@ -189,6 +201,9 @@ size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_
size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask
+size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags
+
size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix);
void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index 6eb9a6636171..1a8d3be2030e 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -25,6 +25,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
P_FLAG(FS);
P_FLAG(FILES);
P_FLAG(SIGHAND);
+ P_FLAG(PIDFD);
P_FLAG(PTRACE);
P_FLAG(VFORK);
P_FLAG(PARENT);
diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh
new file mode 100755
index 000000000000..83fb24df05c9
--- /dev/null
+++ b/tools/perf/trace/beauty/fsconfig.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mount=${linux_header_dir}/mount.h
+
+printf "static const char *fsconfig_cmds[] = {\n"
+regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*'
+egrep $regex ${linux_mount} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c
new file mode 100644
index 000000000000..30c8c082a3c3
--- /dev/null
+++ b/tools/perf/trace/beauty/fsmount.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/fsmount.c
+ *
+ * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/log2.h>
+#include <uapi/linux/mount.h>
+
+static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+#include "trace/beauty/generated/fsmount_arrays.c"
+ static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_");
+ size_t printed = 0;
+
+ if ((flags & ~MOUNT_ATTR__ATIME) != 0)
+ printed += strarray__scnprintf_flags(&strarray__fsmount_attr_flags, bf, size, show_prefix, flags);
+
+ if ((flags & MOUNT_ATTR__ATIME) == MOUNT_ATTR_RELATIME) {
+ printed += scnprintf(bf + printed, size - printed, "%s%s%s",
+ printed ? "|" : "", show_prefix ? "MOUNT_ATTR_" : "", "RELATIME");
+ }
+
+ return printed;
+}
+
+size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return fsmount__scnprintf_attr_flags(flags, bf, size, arg->show_string_prefix);
+}
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
new file mode 100755
index 000000000000..615cc0fcf4f9
--- /dev/null
+++ b/tools/perf/trace/beauty/fsmount.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mount=${linux_header_dir}/mount.h
+
+# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier
+# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case
+# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling
+# bits. Special case it as well in the beautifier
+
+printf "static const char *fsmount_attr_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/fspick.c b/tools/perf/trace/beauty/fspick.c
new file mode 100644
index 000000000000..c402479c96f0
--- /dev/null
+++ b/tools/perf/trace/beauty/fspick.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/fspick.c
+ *
+ * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/log2.h>
+
+static size_t fspick__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+#include "trace/beauty/generated/fspick_arrays.c"
+ static DEFINE_STRARRAY(fspick_flags, "FSPICK_");
+
+ return strarray__scnprintf_flags(&strarray__fspick_flags, bf, size, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return fspick__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
+}
diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh
new file mode 100755
index 000000000000..b220e07ef452
--- /dev/null
+++ b/tools/perf/trace/beauty/fspick.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mount=${linux_header_dir}/mount.h
+
+printf "static const char *fspick_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep $regex ${linux_mount} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/move_mount.c b/tools/perf/trace/beauty/move_mount.c
new file mode 100644
index 000000000000..78ed80395406
--- /dev/null
+++ b/tools/perf/trace/beauty/move_mount.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/move_mount.c
+ *
+ * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/log2.h>
+
+static size_t move_mount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+#include "trace/beauty/generated/move_mount_flags_array.c"
+ static DEFINE_STRARRAY(move_mount_flags, "MOVE_MOUNT_");
+
+ return strarray__scnprintf_flags(&strarray__move_mount_flags, bf, size, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return move_mount__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
+}
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
new file mode 100755
index 000000000000..55e59241daa4
--- /dev/null
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_mount=${linux_header_dir}/mount.h
+
+printf "static const char *move_mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep $regex ${linux_mount} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/sync_file_range.c b/tools/perf/trace/beauty/sync_file_range.c
new file mode 100644
index 000000000000..1c425f04047d
--- /dev/null
+++ b/tools/perf/trace/beauty/sync_file_range.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/sync_file_range.c
+ *
+ * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/log2.h>
+#include <uapi/linux/fs.h>
+
+static size_t sync_file_range__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+#include "trace/beauty/generated/sync_file_range_arrays.c"
+ static DEFINE_STRARRAY(sync_file_range_flags, "SYNC_FILE_RANGE_");
+ size_t printed = 0;
+
+ if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == SYNC_FILE_RANGE_WRITE_AND_WAIT) {
+ printed += scnprintf(bf + printed, size - printed, "%s%s", show_prefix ? "SYNC_FILE_RANGE_" : "", "WRITE_AND_WAIT");
+ flags &= ~SYNC_FILE_RANGE_WRITE_AND_WAIT;
+ }
+
+ return printed + strarray__scnprintf_flags(&strarray__sync_file_range_flags, bf + printed, size - printed, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return sync_file_range__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
+}
diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh
new file mode 100755
index 000000000000..7a9282d04e44
--- /dev/null
+++ b/tools/perf/trace/beauty/sync_file_range.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ linux_header_dir=tools/include/uapi/linux
+else
+ linux_header_dir=$1
+fi
+
+linux_fs=${linux_header_dir}/fs.h
+
+printf "static const char *sync_file_range_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*'
+egrep $regex ${linux_fs} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 98d934a36d86..b0d089a95dac 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -97,11 +97,12 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
struct annotation *notes = browser__annotation(browser);
struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+ const bool is_current_entry = ui_browser__is_current_entry(browser, row);
struct annotation_write_ops ops = {
.first_line = row == 0,
- .current_entry = ui_browser__is_current_entry(browser, row),
+ .current_entry = is_current_entry,
.change_color = (!notes->options->hide_src_code &&
- (!ops.current_entry ||
+ (!is_current_entry ||
(browser->use_navkeypressed &&
!browser->navkeypressed))),
.width = browser->width,
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
index c0686cda39a5..991e692b9b46 100644
--- a/tools/perf/ui/libslang.h
+++ b/tools/perf/ui/libslang.h
@@ -10,7 +10,12 @@
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
#endif
+
+#ifdef HAVE_SLANG_INCLUDE_SUBDIR
+#include <slang/slang.h>
+#else
#include <slang.h>
+#endif
#if SLANG_VERSION < 20104
#define slsmg_printf(msg, args...) \
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 3802cee5e188..59241ff342be 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -19,7 +19,7 @@ TAG=
if test -d ../../.git -o -f ../../.git
then
TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
- CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+ CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
elif test -f ../../PERF-VERSION-FILE
then
TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 79db038b56f2..c8ce13419d9b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -931,9 +931,8 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
if (sym == NULL)
return 0;
src = symbol__hists(sym, evsel->evlist->nr_entries);
- if (src == NULL)
- return -ENOMEM;
- return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample);
+ return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx,
+ addr, sample) : 0;
}
static int symbol__account_cycles(u64 addr, u64 start,
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 66e82bd0683e..cfdbf65f1e02 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1001,7 +1001,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
if (!str) {
- itrace_synth_opts__set_default(synth_opts, false);
+ itrace_synth_opts__set_default(synth_opts,
+ synth_opts->default_no_sample);
return 0;
}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d62f60eb5df4..e9b4c5edf78b 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -74,6 +74,8 @@ enum itrace_period_type {
* @period_type: 'instructions' events period type
* @initial_skip: skip N events at the beginning.
* @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ * @ptime_range: time intervals to trace or NULL
+ * @range_num: number of time intervals to trace
*/
struct itrace_synth_opts {
bool set;
@@ -98,6 +100,8 @@ struct itrace_synth_opts {
enum itrace_period_type period_type;
unsigned long initial_skip;
unsigned long *cpu_bitmap;
+ struct perf_time_interval *ptime_range;
+ int range_num;
};
/**
@@ -590,6 +594,21 @@ static inline void auxtrace__free(struct perf_session *session)
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
+ struct perf_time_interval *ptime_range,
+ int range_num)
+{
+ opts->ptime_range = ptime_range;
+ opts->range_num = range_num;
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
+{
+ opts->ptime_range = NULL;
+ opts->range_num = 0;
+}
#else
@@ -733,6 +752,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
#define ITRACE_HELP ""
+static inline
+void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
+ __maybe_unused,
+ struct perf_time_interval *ptime_range
+ __maybe_unused,
+ int range_num __maybe_unused)
+{
+}
+
+static inline
+void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
+ __maybe_unused)
+{
+}
+
#endif
#endif
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 7e3c1b60120c..e7d2c08d263a 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -739,11 +739,15 @@ int perf_config(config_fn_t fn, void *data)
if (ret < 0) {
pr_err("Error: wrong config key-value pair %s=%s\n",
key, value);
- break;
+ /*
+ * Can't be just a 'break', as perf_config_set__for_each_entry()
+ * expands to two nested for() loops.
+ */
+ goto out;
}
}
}
-
+out:
return ret;
}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0b599229bc7e..c11a459ca582 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -373,6 +373,46 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
return 0;
}
+int cpu_map__get_die_id(int cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
+
+ return ret ?: value;
+}
+
+int cpu_map__get_die(struct cpu_map *map, int idx, void *data)
+{
+ int cpu, die_id, s;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ die_id = cpu_map__get_die_id(cpu);
+ /* There is no die_id on legacy system. */
+ if (die_id == -1)
+ die_id = 0;
+
+ s = cpu_map__get_socket(map, idx, data);
+ if (s == -1)
+ return -1;
+
+ /*
+ * Encode socket in bit range 15:8
+ * die_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket + die id
+ */
+ if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
+ return -1;
+
+ if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
+ return -1;
+
+ return (s << 8) | (die_id & 0xff);
+}
+
int cpu_map__get_core_id(int cpu)
{
int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
@@ -381,7 +421,7 @@ int cpu_map__get_core_id(int cpu)
int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
{
- int cpu, s;
+ int cpu, s_die;
if (idx > map->nr)
return -1;
@@ -390,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
cpu = cpu_map__get_core_id(cpu);
- s = cpu_map__get_socket(map, idx, data);
- if (s == -1)
+ /* s_die is the combination of socket + die id */
+ s_die = cpu_map__get_die(map, idx, data);
+ if (s_die == -1)
return -1;
/*
- * encode socket in upper 16 bits
- * core_id is relative to socket, and
+ * encode socket in bit range 31:24
+ * encode die id in bit range 23:16
+ * core_id is relative to socket and die,
* we need a global id. So we combine
- * socket+ core id
+ * socket + die id + core id
*/
- return (s << 16) | (cpu & 0xffff);
+ if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
+ return -1;
+
+ return (s_die << 16) | (cpu & 0xffff);
}
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
@@ -408,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
+int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep)
+{
+ return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
+}
+
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
{
return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f00ce624b9f7..1265f0e33920 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -25,9 +25,12 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket_id(int cpu);
int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
+int cpu_map__get_die_id(int cpu);
+int cpu_map__get_die(struct cpu_map *map, int idx, void *data);
int cpu_map__get_core_id(int cpu);
int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep);
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
const struct cpu_map *cpu_map__online(void); /* thread unsafe */
@@ -43,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
static inline int cpu_map__id_to_socket(int id)
{
- return id >> 16;
+ return id >> 24;
+}
+
+static inline int cpu_map__id_to_die(int id)
+{
+ return (id >> 16) & 0xff;
}
static inline int cpu_map__id_to_cpu(int id)
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index ece0710249d4..26e73a4bd4fe 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/param.h>
+#include <sys/utsname.h>
#include <inttypes.h>
#include <api/fs/fs.h>
@@ -8,11 +9,14 @@
#include "util.h"
#include "env.h"
-
#define CORE_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define DIE_SIB_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
#define THRD_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+#define THRD_SIB_FMT_NEW \
+ "%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
#define NODE_ONLINE_FMT \
"%s/devices/system/node/online"
#define NODE_MEMINFO_FMT \
@@ -34,12 +38,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
sysfs__mountpoint(), cpu);
fp = fopen(filename, "r");
if (!fp)
- goto try_threads;
+ goto try_dies;
sret = getline(&buf, &len, fp);
fclose(fp);
if (sret <= 0)
- goto try_threads;
+ goto try_dies;
p = strchr(buf, '\n');
if (p)
@@ -57,9 +61,44 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
}
ret = 0;
+try_dies:
+ if (!tp->die_siblings)
+ goto try_threads;
+
+ scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_threads;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_threads;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->die_sib; i++) {
+ if (!strcmp(buf, tp->die_siblings[i]))
+ break;
+ }
+ if (i == tp->die_sib) {
+ tp->die_siblings[i] = buf;
+ tp->die_sib++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
try_threads:
- scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+ scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
sysfs__mountpoint(), cpu);
+ if (access(filename, F_OK) == -1) {
+ scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ }
fp = fopen(filename, "r");
if (!fp)
goto done;
@@ -98,21 +137,46 @@ void cpu_topology__delete(struct cpu_topology *tp)
for (i = 0 ; i < tp->core_sib; i++)
zfree(&tp->core_siblings[i]);
+ if (tp->die_sib) {
+ for (i = 0 ; i < tp->die_sib; i++)
+ zfree(&tp->die_siblings[i]);
+ }
+
for (i = 0 ; i < tp->thread_sib; i++)
zfree(&tp->thread_siblings[i]);
free(tp);
}
+static bool has_die_topology(void)
+{
+ char filename[MAXPATHLEN];
+ struct utsname uts;
+
+ if (uname(&uts) < 0)
+ return false;
+
+ if (strncmp(uts.machine, "x86_64", 6))
+ return false;
+
+ scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+ sysfs__mountpoint(), 0);
+ if (access(filename, F_OK) == -1)
+ return false;
+
+ return true;
+}
+
struct cpu_topology *cpu_topology__new(void)
{
struct cpu_topology *tp = NULL;
void *addr;
- u32 nr, i;
+ u32 nr, i, nr_addr;
size_t sz;
long ncpus;
int ret = -1;
struct cpu_map *map;
+ bool has_die = has_die_topology();
ncpus = cpu__max_present_cpu();
@@ -126,7 +190,11 @@ struct cpu_topology *cpu_topology__new(void)
nr = (u32)(ncpus & UINT_MAX);
sz = nr * sizeof(char *);
- addr = calloc(1, sizeof(*tp) + 2 * sz);
+ if (has_die)
+ nr_addr = 3;
+ else
+ nr_addr = 2;
+ addr = calloc(1, sizeof(*tp) + nr_addr * sz);
if (!addr)
goto out_free;
@@ -134,6 +202,10 @@ struct cpu_topology *cpu_topology__new(void)
addr += sizeof(*tp);
tp->core_siblings = addr;
addr += sz;
+ if (has_die) {
+ tp->die_siblings = addr;
+ addr += sz;
+ }
tp->thread_siblings = addr;
for (i = 0; i < nr; i++) {
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 47a97e71acdf..bae2f1d41856 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -7,8 +7,10 @@
struct cpu_topology {
u32 core_sib;
+ u32 die_sib;
u32 thread_sib;
char **core_siblings;
+ char **die_siblings;
char **thread_siblings;
};
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 39fe21e1cf93..bb45e23018ee 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -18,8 +18,6 @@
#include "intlist.h"
#include "util.h"
-#define MAX_BUFFER 1024
-
/* use raw logging */
#ifdef CS_DEBUG_RAW
#define CS_LOG_RAW_FRAMES
@@ -31,33 +29,26 @@
#endif
#endif
-#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
-
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
ocsd_datapath_resp_t prev_return;
- u32 packet_count;
- u32 head;
- u32 tail;
- struct cs_etm_packet packet_buffer[MAX_BUFFER];
};
static u32
cs_etm_decoder__mem_access(const void *context,
const ocsd_vaddr_t address,
const ocsd_mem_space_acc_t mem_space __maybe_unused,
+ const u8 trace_chan_id,
const u32 req_size,
u8 *buffer)
{
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
- return decoder->mem_access(decoder->data,
- address,
- req_size,
- buffer);
+ return decoder->mem_access(decoder->data, trace_chan_id,
+ address, req_size, buffer);
}
int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
@@ -66,9 +57,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
{
decoder->mem_access = cb_func;
- if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end,
- OCSD_MEM_SPACE_ANY,
- cs_etm_decoder__mem_access, decoder))
+ if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end,
+ OCSD_MEM_SPACE_ANY,
+ cs_etm_decoder__mem_access,
+ decoder))
return -1;
return 0;
@@ -88,14 +80,14 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
return 0;
}
-int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
struct cs_etm_packet *packet)
{
- if (!decoder || !packet)
+ if (!packet_queue || !packet)
return -EINVAL;
/* Nothing to do, might as well just return */
- if (decoder->packet_count == 0)
+ if (packet_queue->packet_count == 0)
return 0;
/*
* The queueing process in function cs_etm_decoder__buffer_packet()
@@ -106,11 +98,12 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
* value. Otherwise the first element of the packet queue is not
* used.
*/
- decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
+ packet_queue->head = (packet_queue->head + 1) &
+ (CS_ETM_PACKET_MAX_BUFFER - 1);
- *packet = decoder->packet_buffer[decoder->head];
+ *packet = packet_queue->packet_buffer[packet_queue->head];
- decoder->packet_count--;
+ packet_queue->packet_count--;
return 1;
}
@@ -276,84 +269,130 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
trace_config);
}
-static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const uint8_t trace_chan_id)
{
- int i;
-
- decoder->head = 0;
- decoder->tail = 0;
- decoder->packet_count = 0;
- for (i = 0; i < MAX_BUFFER; i++) {
- decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
- decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[i].instr_count = 0;
- decoder->packet_buffer[i].last_instr_taken_branch = false;
- decoder->packet_buffer[i].last_instr_size = 0;
- decoder->packet_buffer[i].last_instr_type = 0;
- decoder->packet_buffer[i].last_instr_subtype = 0;
- decoder->packet_buffer[i].last_instr_cond = 0;
- decoder->packet_buffer[i].flags = 0;
- decoder->packet_buffer[i].exception_number = UINT32_MAX;
- decoder->packet_buffer[i].trace_chan_id = UINT8_MAX;
- decoder->packet_buffer[i].cpu = INT_MIN;
+ /* No timestamp packet has been received, nothing to do */
+ if (!packet_queue->timestamp)
+ return OCSD_RESP_CONT;
+
+ packet_queue->timestamp = packet_queue->next_timestamp;
+
+ /* Estimate the timestamp for the next range packet */
+ packet_queue->next_timestamp += packet_queue->instr_count;
+ packet_queue->instr_count = 0;
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ return OCSD_RESP_WAIT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ struct cs_etm_packet_queue *packet_queue;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ /*
+ * We've seen a timestamp packet before - simply record the new value.
+ * Function do_soft_timestamp() will report the value to the front end,
+ * hence asking the decoder to keep decoding rather than stopping.
+ */
+ if (packet_queue->timestamp) {
+ packet_queue->next_timestamp = elem->timestamp;
+ return OCSD_RESP_CONT;
}
+
+ /*
+ * This is the first timestamp we've seen since the beginning of traces
+ * or a discontinuity. Since timestamps packets are generated *after*
+ * range packets have been generated, we need to estimate the time at
+ * which instructions started by substracting the number of instructions
+ * executed to the timestamp.
+ */
+ packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
+ packet_queue->next_timestamp = elem->timestamp;
+ packet_queue->instr_count = 0;
+
+ /* Tell the front end which traceid_queue needs attention */
+ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
+
+ /* Halt processing until we are being told to proceed */
+ return OCSD_RESP_WAIT;
+}
+
+static void
+cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
+{
+ packet_queue->timestamp = 0;
+ packet_queue->next_timestamp = 0;
+ packet_queue->instr_count = 0;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
u32 et = 0;
int cpu;
- if (decoder->packet_count >= MAX_BUFFER - 1)
+ if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
- et = decoder->tail;
- et = (et + 1) & (MAX_BUFFER - 1);
- decoder->tail = et;
- decoder->packet_count++;
-
- decoder->packet_buffer[et].sample_type = sample_type;
- decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
- decoder->packet_buffer[et].cpu = cpu;
- decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
- decoder->packet_buffer[et].instr_count = 0;
- decoder->packet_buffer[et].last_instr_taken_branch = false;
- decoder->packet_buffer[et].last_instr_size = 0;
- decoder->packet_buffer[et].last_instr_type = 0;
- decoder->packet_buffer[et].last_instr_subtype = 0;
- decoder->packet_buffer[et].last_instr_cond = 0;
- decoder->packet_buffer[et].flags = 0;
- decoder->packet_buffer[et].exception_number = UINT32_MAX;
- decoder->packet_buffer[et].trace_chan_id = trace_chan_id;
-
- if (decoder->packet_count == MAX_BUFFER - 1)
+ et = packet_queue->tail;
+ et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1);
+ packet_queue->tail = et;
+ packet_queue->packet_count++;
+
+ packet_queue->packet_buffer[et].sample_type = sample_type;
+ packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
+ packet_queue->packet_buffer[et].cpu = cpu;
+ packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+ packet_queue->packet_buffer[et].instr_count = 0;
+ packet_queue->packet_buffer[et].last_instr_taken_branch = false;
+ packet_queue->packet_buffer[et].last_instr_size = 0;
+ packet_queue->packet_buffer[et].last_instr_type = 0;
+ packet_queue->packet_buffer[et].last_instr_subtype = 0;
+ packet_queue->packet_buffer[et].last_instr_cond = 0;
+ packet_queue->packet_buffer[et].flags = 0;
+ packet_queue->packet_buffer[et].exception_number = UINT32_MAX;
+ packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id;
+
+ if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
return OCSD_RESP_CONT;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{
int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
- packet = &decoder->packet_buffer[decoder->tail];
+ packet = &packet_queue->packet_buffer[packet_queue->tail];
switch (elem->isa) {
case ocsd_isa_aarch64:
@@ -396,43 +435,90 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet->last_instr_size = elem->last_instr_sz;
+ /* per-thread scenario, no need to generate a timestamp */
+ if (cs_etm__etmq_is_timeless(etmq))
+ goto out;
+
+ /*
+ * The packet queue is full and we haven't seen a timestamp (had we
+ * seen one the packet queue wouldn't be full). Let the front end
+ * deal with it.
+ */
+ if (ret == OCSD_RESP_WAIT)
+ goto out;
+
+ packet_queue->instr_count += elem->num_instr_range;
+ /* Tell the front end we have a new timestamp to process */
+ ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue,
+ trace_chan_id);
+out:
return ret;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder,
- const uint8_t trace_chan_id)
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+ const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ /*
+ * Something happened and who knows when we'll get new traces so
+ * reset time statistics.
+ */
+ cs_etm_decoder__reset_timestamp(queue);
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_DISCONTINUITY);
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{ int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_EXCEPTION);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
- packet = &decoder->packet_buffer[decoder->tail];
+ packet = &queue->packet_buffer[queue->tail];
packet->exception_number = elem->exception_number;
return ret;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder,
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
CS_ETM_EXCEPTION_RET);
}
+static ocsd_datapath_resp_t
+cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ pid_t tid;
+
+ /* Ignore PE_CONTEXT packets that don't have a valid contextID */
+ if (!elem->context.ctxt_id_valid)
+ return OCSD_RESP_CONT;
+
+ tid = elem->context.context_id;
+ if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ /*
+ * A timestamp is generated after a PE_CONTEXT element so make sure
+ * to rely on that coming one.
+ */
+ cs_etm_decoder__reset_timestamp(packet_queue);
+
+ return OCSD_RESP_CONT;
+}
+
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
const void *context,
const ocsd_trc_index_t indx __maybe_unused,
@@ -441,6 +527,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
{
ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+ struct cs_etm_queue *etmq = decoder->data;
+ struct cs_etm_packet_queue *packet_queue;
+
+ /* First get the packet queue for this traceID */
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
+ if (!packet_queue)
+ return OCSD_RESP_FATAL_SYS_ERR;
switch (elem->elem_type) {
case OCSD_GEN_TRC_ELEM_UNKNOWN:
@@ -448,24 +541,30 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_discontinuity(decoder,
+ resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
- resp = cs_etm_decoder__buffer_range(decoder, elem,
+ resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- resp = cs_etm_decoder__buffer_exception(decoder, elem,
+ resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- resp = cs_etm_decoder__buffer_exception_ret(decoder,
+ resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
trace_chan_id);
break;
+ case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+ resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
+ trace_chan_id);
+ break;
case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+ resp = cs_etm_decoder__set_tid(etmq, packet_queue,
+ elem, trace_chan_id);
+ break;
case OCSD_GEN_TRC_ELEM_ADDR_NACC:
- case OCSD_GEN_TRC_ELEM_TIMESTAMP:
case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
case OCSD_GEN_TRC_ELEM_EVENT:
@@ -554,7 +653,6 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
decoder->data = d_params->data;
decoder->prev_return = OCSD_RESP_CONT;
- cs_etm_decoder__clear_buffer(decoder);
format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
OCSD_TRC_SRC_SINGLE);
flags = 0;
@@ -577,7 +675,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
/* init library print logging support */
ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
if (ret != 0)
- goto err_free_decoder_tree;
+ goto err_free_decoder;
/* init raw frame logging if required */
cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
@@ -587,15 +685,13 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
&t_params[i],
decoder);
if (ret != 0)
- goto err_free_decoder_tree;
+ goto err_free_decoder;
}
return decoder;
-err_free_decoder_tree:
- ocsd_destroy_dcd_tree(decoder->dcd_tree);
err_free_decoder:
- free(decoder);
+ cs_etm_decoder__free(decoder);
return NULL;
}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3ab11dfa92ae..11f3391d06f2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -14,43 +14,12 @@
#include <stdio.h>
struct cs_etm_decoder;
-
-enum cs_etm_sample_type {
- CS_ETM_EMPTY,
- CS_ETM_RANGE,
- CS_ETM_DISCONTINUITY,
- CS_ETM_EXCEPTION,
- CS_ETM_EXCEPTION_RET,
-};
-
-enum cs_etm_isa {
- CS_ETM_ISA_UNKNOWN,
- CS_ETM_ISA_A64,
- CS_ETM_ISA_A32,
- CS_ETM_ISA_T32,
-};
-
-struct cs_etm_packet {
- enum cs_etm_sample_type sample_type;
- enum cs_etm_isa isa;
- u64 start_addr;
- u64 end_addr;
- u32 instr_count;
- u32 last_instr_type;
- u32 last_instr_subtype;
- u32 flags;
- u32 exception_number;
- u8 last_instr_cond;
- u8 last_instr_taken_branch;
- u8 last_instr_size;
- u8 trace_chan_id;
- int cpu;
-};
+struct cs_etm_packet;
+struct cs_etm_packet_queue;
struct cs_etm_queue;
-typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64,
- size_t, u8 *);
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *);
struct cs_etmv3_trace_params {
u32 reg_ctrl;
@@ -119,7 +88,7 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
u64 start, u64 end,
cs_etm_mem_cb_type cb_func);
-int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
struct cs_etm_packet *packet);
int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index de488b43f440..0c7776b51045 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -29,6 +29,7 @@
#include "thread.h"
#include "thread_map.h"
#include "thread-stack.h"
+#include <tools/libc_compat.h>
#include "util.h"
#define MAX_TIMESTAMP (~0ULL)
@@ -60,33 +61,55 @@ struct cs_etm_auxtrace {
unsigned int pmu_type;
};
-struct cs_etm_queue {
- struct cs_etm_auxtrace *etm;
- struct thread *thread;
- struct cs_etm_decoder *decoder;
- struct auxtrace_buffer *buffer;
- union perf_event *event_buf;
- unsigned int queue_nr;
+struct cs_etm_traceid_queue {
+ u8 trace_chan_id;
pid_t pid, tid;
- int cpu;
- u64 offset;
u64 period_instructions;
+ size_t last_branch_pos;
+ union perf_event *event_buf;
+ struct thread *thread;
struct branch_stack *last_branch;
struct branch_stack *last_branch_rb;
- size_t last_branch_pos;
struct cs_etm_packet *prev_packet;
struct cs_etm_packet *packet;
+ struct cs_etm_packet_queue packet_queue;
+};
+
+struct cs_etm_queue {
+ struct cs_etm_auxtrace *etm;
+ struct cs_etm_decoder *decoder;
+ struct auxtrace_buffer *buffer;
+ unsigned int queue_nr;
+ u8 pending_timestamp;
+ u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
+ /* Conversion between traceID and index in traceid_queues array */
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue **traceid_queues;
};
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
+/*
+ * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
+ * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
+ * encode the etm queue number as the upper 16 bit and the channel as
+ * the lower 16 bit.
+ */
+#define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \
+ (queue_nr << 16 | trace_chan_id)
+#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
+#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+
static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
{
etmidr &= ETMIDR_PTM_VERSION;
@@ -125,6 +148,216 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
return 0;
}
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id)
+{
+ /*
+ * Wnen a timestamp packet is encountered the backend code
+ * is stopped so that the front end has time to process packets
+ * that were accumulated in the traceID queue. Since there can
+ * be more than one channel per cs_etm_queue, we need to specify
+ * what traceID queue needs servicing.
+ */
+ etmq->pending_timestamp = trace_chan_id;
+}
+
+static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
+ u8 *trace_chan_id)
+{
+ struct cs_etm_packet_queue *packet_queue;
+
+ if (!etmq->pending_timestamp)
+ return 0;
+
+ if (trace_chan_id)
+ *trace_chan_id = etmq->pending_timestamp;
+
+ packet_queue = cs_etm__etmq_get_packet_queue(etmq,
+ etmq->pending_timestamp);
+ if (!packet_queue)
+ return 0;
+
+ /* Acknowledge pending status */
+ etmq->pending_timestamp = 0;
+
+ /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
+ return packet_queue->timestamp;
+}
+
+static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
+{
+ int i;
+
+ queue->head = 0;
+ queue->tail = 0;
+ queue->packet_count = 0;
+ for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
+ queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
+ queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ queue->packet_buffer[i].instr_count = 0;
+ queue->packet_buffer[i].last_instr_taken_branch = false;
+ queue->packet_buffer[i].last_instr_size = 0;
+ queue->packet_buffer[i].last_instr_type = 0;
+ queue->packet_buffer[i].last_instr_subtype = 0;
+ queue->packet_buffer[i].last_instr_cond = 0;
+ queue->packet_buffer[i].flags = 0;
+ queue->packet_buffer[i].exception_number = UINT32_MAX;
+ queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
+ queue->packet_buffer[i].cpu = INT_MIN;
+ }
+}
+
+static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+ }
+}
+
+static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u8 trace_chan_id)
+{
+ int rc = -ENOMEM;
+ struct auxtrace_queue *queue;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ cs_etm__clear_packet_queue(&tidq->packet_queue);
+
+ queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+ tidq->tid = queue->tid;
+ tidq->pid = -1;
+ tidq->trace_chan_id = trace_chan_id;
+
+ tidq->packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->packet)
+ goto out;
+
+ tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
+ if (!tidq->prev_packet)
+ goto out_free;
+
+ if (etm->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += etm->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ tidq->last_branch = zalloc(sz);
+ if (!tidq->last_branch)
+ goto out_free;
+ tidq->last_branch_rb = zalloc(sz);
+ if (!tidq->last_branch_rb)
+ goto out_free;
+ }
+
+ tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!tidq->event_buf)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+out:
+ return rc;
+}
+
+static struct cs_etm_traceid_queue
+*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ int idx;
+ struct int_node *inode;
+ struct intlist *traceid_queues_list;
+ struct cs_etm_traceid_queue *tidq, **traceid_queues;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+
+ if (etm->timeless_decoding)
+ trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
+
+ traceid_queues_list = etmq->traceid_queues_list;
+
+ /*
+ * Check if the traceid_queue exist for this traceID by looking
+ * in the queue list.
+ */
+ inode = intlist__find(traceid_queues_list, trace_chan_id);
+ if (inode) {
+ idx = (int)(intptr_t)inode->priv;
+ return etmq->traceid_queues[idx];
+ }
+
+ /* We couldn't find a traceid_queue for this traceID, allocate one */
+ tidq = malloc(sizeof(*tidq));
+ if (!tidq)
+ return NULL;
+
+ memset(tidq, 0, sizeof(*tidq));
+
+ /* Get a valid index for the new traceid_queue */
+ idx = intlist__nr_entries(traceid_queues_list);
+ /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
+ inode = intlist__findnew(traceid_queues_list, trace_chan_id);
+ if (!inode)
+ goto out_free;
+
+ /* Associate this traceID with this index */
+ inode->priv = (void *)(intptr_t)idx;
+
+ if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
+ goto out_free;
+
+ /* Grow the traceid_queues array by one unit */
+ traceid_queues = etmq->traceid_queues;
+ traceid_queues = reallocarray(traceid_queues,
+ idx + 1,
+ sizeof(*traceid_queues));
+
+ /*
+ * On failure reallocarray() returns NULL and the original block of
+ * memory is left untouched.
+ */
+ if (!traceid_queues)
+ goto out_free;
+
+ traceid_queues[idx] = tidq;
+ etmq->traceid_queues = traceid_queues;
+
+ return etmq->traceid_queues[idx];
+
+out_free:
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ free(tidq);
+
+ return NULL;
+}
+
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
+{
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (tidq)
+ return &tidq->packet_queue;
+
+ return NULL;
+}
+
static void cs_etm__packet_dump(const char *pkt_string)
{
const char *color = PERF_COLOR_BLUE;
@@ -276,15 +509,53 @@ static int cs_etm__flush_events(struct perf_session *session,
if (!tool->ordered_events)
return -EINVAL;
- if (!etm->timeless_decoding)
- return -EINVAL;
-
ret = cs_etm__update_queues(etm);
if (ret < 0)
return ret;
- return cs_etm__process_timeless_queues(etm, -1);
+ if (etm->timeless_decoding)
+ return cs_etm__process_timeless_queues(etm, -1);
+
+ return cs_etm__process_queues(etm);
+}
+
+static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ uintptr_t priv;
+ struct int_node *inode, *tmp;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
+ priv = (uintptr_t)inode->priv;
+ idx = priv;
+
+ /* Free this traceid_queue from the array */
+ tidq = etmq->traceid_queues[idx];
+ thread__zput(tidq->thread);
+ zfree(&tidq->event_buf);
+ zfree(&tidq->last_branch);
+ zfree(&tidq->last_branch_rb);
+ zfree(&tidq->prev_packet);
+ zfree(&tidq->packet);
+ zfree(&tidq);
+
+ /*
+ * Function intlist__remove() removes the inode from the list
+ * and delete the memory associated to it.
+ */
+ intlist__remove(traceid_queues_list, inode);
+ }
+
+ /* Then the RB tree itself */
+ intlist__delete(traceid_queues_list);
+ etmq->traceid_queues_list = NULL;
+
+ /* finally free the traceid_queues array */
+ free(etmq->traceid_queues);
+ etmq->traceid_queues = NULL;
}
static void cs_etm__free_queue(void *priv)
@@ -294,13 +565,8 @@ static void cs_etm__free_queue(void *priv)
if (!etmq)
return;
- thread__zput(etmq->thread);
cs_etm_decoder__free(etmq->decoder);
- zfree(&etmq->event_buf);
- zfree(&etmq->last_branch);
- zfree(&etmq->last_branch_rb);
- zfree(&etmq->prev_packet);
- zfree(&etmq->packet);
+ cs_etm__free_traceid_queues(etmq);
free(etmq);
}
@@ -365,23 +631,27 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
}
}
-static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
- size_t size, u8 *buffer)
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
+ u64 address, size_t size, u8 *buffer)
{
u8 cpumode;
u64 offset;
int len;
- struct thread *thread;
- struct machine *machine;
- struct addr_location al;
+ struct thread *thread;
+ struct machine *machine;
+ struct addr_location al;
+ struct cs_etm_traceid_queue *tidq;
if (!etmq)
return 0;
machine = etmq->etm->machine;
cpumode = cs_etm__cpu_mode(etmq, address);
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ return 0;
- thread = etmq->thread;
+ thread = tidq->thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL)
return 0;
@@ -412,35 +682,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq;
- size_t szp = sizeof(struct cs_etm_packet);
etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
- etmq->packet = zalloc(szp);
- if (!etmq->packet)
- goto out_free;
-
- etmq->prev_packet = zalloc(szp);
- if (!etmq->prev_packet)
- goto out_free;
-
- if (etm->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
-
- sz += etm->synth_opts.last_branch_sz *
- sizeof(struct branch_entry);
- etmq->last_branch = zalloc(sz);
- if (!etmq->last_branch)
- goto out_free;
- etmq->last_branch_rb = zalloc(sz);
- if (!etmq->last_branch_rb)
- goto out_free;
- }
-
- etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
- if (!etmq->event_buf)
+ etmq->traceid_queues_list = intlist__new(NULL);
+ if (!etmq->traceid_queues_list)
goto out_free;
/* Use metadata to fill in trace parameters for trace decoder */
@@ -477,12 +725,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
out_free_decoder:
cs_etm_decoder__free(etmq->decoder);
out_free:
- zfree(&t_params);
- zfree(&etmq->event_buf);
- zfree(&etmq->last_branch);
- zfree(&etmq->last_branch_rb);
- zfree(&etmq->prev_packet);
- zfree(&etmq->packet);
+ intlist__delete(etmq->traceid_queues_list);
free(etmq);
return NULL;
@@ -493,6 +736,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
unsigned int queue_nr)
{
int ret = 0;
+ unsigned int cs_queue_nr;
+ u8 trace_chan_id;
+ u64 timestamp;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
@@ -508,12 +754,69 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
- etmq->cpu = queue->cpu;
- etmq->tid = queue->tid;
- etmq->pid = -1;
etmq->offset = 0;
- etmq->period_instructions = 0;
+ if (etm->timeless_decoding)
+ goto out;
+
+ /*
+ * We are under a CPU-wide trace scenario. As such we need to know
+ * when the code that generated the traces started to execute so that
+ * it can be correlated with execution on other CPUs. So we get a
+ * handle on the beginning of traces and decode until we find a
+ * timestamp. The timestamp is then added to the auxtrace min heap
+ * in order to know what nibble (of all the etmqs) to decode first.
+ */
+ while (1) {
+ /*
+ * Fetch an aux_buffer from this etmq. Bail if no more
+ * blocks or an error has been encountered.
+ */
+ ret = cs_etm__get_data_block(etmq);
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * Run decoder on the trace block. The decoder will stop when
+ * encountering a timestamp, a full packet queue or the end of
+ * trace for that block.
+ */
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ /*
+ * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
+ * the timestamp calculation for us.
+ */
+ timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ /* We found a timestamp, no need to continue. */
+ if (timestamp)
+ break;
+
+ /*
+ * We didn't find a timestamp so empty all the traceid packet
+ * queues before looking for another timestamp packet, either
+ * in the current data block or a new one. Packets that were
+ * just decoded are useless since no timestamp has been
+ * associated with them. As such simply discard them.
+ */
+ cs_etm__clear_all_packet_queues(etmq);
+ }
+
+ /*
+ * We have a timestamp. Add it to the min heap to reflect when
+ * instructions conveyed by the range packets of this traceID queue
+ * started to execute. Once the same has been done for all the traceID
+ * queues of each etmq, redenring and decoding can start in
+ * chronological order.
+ *
+ * Note that packets decoded above are still in the traceID's packet
+ * queue and will be processed in cs_etm__process_queues().
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
out:
return ret;
}
@@ -545,10 +848,12 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
return 0;
}
-static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+static inline
+void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct branch_stack *bs_src = etmq->last_branch_rb;
- struct branch_stack *bs_dst = etmq->last_branch;
+ struct branch_stack *bs_src = tidq->last_branch_rb;
+ struct branch_stack *bs_dst = tidq->last_branch;
size_t nr = 0;
/*
@@ -568,9 +873,9 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
* two steps. First, copy the branches from the most recently inserted
* branch ->last_branch_pos until the end of bs_src->entries buffer.
*/
- nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+ nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
memcpy(&bs_dst->entries[0],
- &bs_src->entries[etmq->last_branch_pos],
+ &bs_src->entries[tidq->last_branch_pos],
sizeof(struct branch_entry) * nr);
/*
@@ -583,21 +888,24 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
memcpy(&bs_dst->entries[nr],
&bs_src->entries[0],
- sizeof(struct branch_entry) * etmq->last_branch_pos);
+ sizeof(struct branch_entry) * tidq->last_branch_pos);
}
}
-static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+static inline
+void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
{
- etmq->last_branch_pos = 0;
- etmq->last_branch_rb->nr = 0;
+ tidq->last_branch_pos = 0;
+ tidq->last_branch_rb->nr = 0;
}
static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
- u64 addr) {
+ u8 trace_chan_id, u64 addr)
+{
u8 instrBytes[2];
- cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ ARRAY_SIZE(instrBytes), instrBytes);
/*
* T32 instruction size is indicated by bits[15:11] of the first
* 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
@@ -626,6 +934,7 @@ u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
}
static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+ u64 trace_chan_id,
const struct cs_etm_packet *packet,
u64 offset)
{
@@ -633,7 +942,8 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
u64 addr = packet->start_addr;
while (offset > 0) {
- addr += cs_etm__t32_instr_size(etmq, addr);
+ addr += cs_etm__t32_instr_size(etmq,
+ trace_chan_id, addr);
offset--;
}
return addr;
@@ -643,9 +953,10 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
return packet->start_addr + offset * 4;
}
-static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct branch_stack *bs = etmq->last_branch_rb;
+ struct branch_stack *bs = tidq->last_branch_rb;
struct branch_entry *be;
/*
@@ -654,14 +965,14 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
* buffer down. After writing the first element of the stack, move the
* insert position back to the end of the buffer.
*/
- if (!etmq->last_branch_pos)
- etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+ if (!tidq->last_branch_pos)
+ tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
- etmq->last_branch_pos -= 1;
+ tidq->last_branch_pos -= 1;
- be = &bs->entries[etmq->last_branch_pos];
- be->from = cs_etm__last_executed_instr(etmq->prev_packet);
- be->to = cs_etm__first_executed_instr(etmq->packet);
+ be = &bs->entries[tidq->last_branch_pos];
+ be->from = cs_etm__last_executed_instr(tidq->prev_packet);
+ be->to = cs_etm__first_executed_instr(tidq->packet);
/* No support for mispredict */
be->flags.mispred = 0;
be->flags.predicted = 1;
@@ -725,31 +1036,53 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
}
static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
- struct auxtrace_queue *queue)
+ struct cs_etm_traceid_queue *tidq)
{
- struct cs_etm_queue *etmq = queue->priv;
+ if ((!tidq->thread) && (tidq->tid != -1))
+ tidq->thread = machine__find_thread(etm->machine, -1,
+ tidq->tid);
- /* CPU-wide tracing isn't supported yet */
- if (queue->tid == -1)
- return;
+ if (tidq->thread)
+ tidq->pid = tidq->thread->pid_;
+}
- if ((!etmq->thread) && (etmq->tid != -1))
- etmq->thread = machine__find_thread(etm->machine, -1,
- etmq->tid);
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+ pid_t tid, u8 trace_chan_id)
+{
+ int cpu, err = -EINVAL;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_traceid_queue *tidq;
- if (etmq->thread) {
- etmq->pid = etmq->thread->pid_;
- if (queue->cpu == -1)
- etmq->cpu = etmq->thread->cpu;
- }
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq)
+ return err;
+
+ if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ return err;
+
+ err = machine__set_current_tid(etm->machine, cpu, tid, tid);
+ if (err)
+ return err;
+
+ tidq->tid = tid;
+ thread__zput(tidq->thread);
+
+ cs_etm__set_pid_tid_cpu(etm, tidq);
+ return 0;
+}
+
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
+{
+ return !!etmq->etm->timeless_decoding;
}
static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
u64 addr, u64 period)
{
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
- union perf_event *event = etmq->event_buf;
+ union perf_event *event = tidq->event_buf;
struct perf_sample sample = {.ip = 0,};
event->sample.header.type = PERF_RECORD_SAMPLE;
@@ -757,19 +1090,19 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = addr;
- sample.pid = etmq->pid;
- sample.tid = etmq->tid;
+ sample.pid = tidq->pid;
+ sample.tid = tidq->tid;
sample.id = etmq->etm->instructions_id;
sample.stream_id = etmq->etm->instructions_id;
sample.period = period;
- sample.cpu = etmq->packet->cpu;
- sample.flags = etmq->prev_packet->flags;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
sample.insn_len = 1;
sample.cpumode = event->sample.header.misc;
if (etm->synth_opts.last_branch) {
- cs_etm__copy_last_branch_rb(etmq);
- sample.branch_stack = etmq->last_branch;
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+ sample.branch_stack = tidq->last_branch;
}
if (etm->synth_opts.inject) {
@@ -787,7 +1120,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
ret);
if (etm->synth_opts.last_branch)
- cs_etm__reset_last_branch_rb(etmq);
+ cs_etm__reset_last_branch_rb(tidq);
return ret;
}
@@ -796,33 +1129,34 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
* The cs etm packet encodes an instruction range between a branch target
* and the next taken branch. Generate sample accordingly.
*/
-static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int ret = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
struct perf_sample sample = {.ip = 0,};
- union perf_event *event = etmq->event_buf;
+ union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
struct branch_entry entries;
} dummy_bs;
u64 ip;
- ip = cs_etm__last_executed_instr(etmq->prev_packet);
+ ip = cs_etm__last_executed_instr(tidq->prev_packet);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = ip;
- sample.pid = etmq->pid;
- sample.tid = etmq->tid;
- sample.addr = cs_etm__first_executed_instr(etmq->packet);
+ sample.pid = tidq->pid;
+ sample.tid = tidq->tid;
+ sample.addr = cs_etm__first_executed_instr(tidq->packet);
sample.id = etmq->etm->branches_id;
sample.stream_id = etmq->etm->branches_id;
sample.period = 1;
- sample.cpu = etmq->packet->cpu;
- sample.flags = etmq->prev_packet->flags;
+ sample.cpu = tidq->packet->cpu;
+ sample.flags = tidq->prev_packet->flags;
sample.cpumode = event->sample.header.misc;
/*
@@ -965,33 +1299,35 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
return 0;
}
-static int cs_etm__sample(struct cs_etm_queue *etmq)
+static int cs_etm__sample(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
int ret;
- u64 instrs_executed = etmq->packet->instr_count;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ u64 instrs_executed = tidq->packet->instr_count;
- etmq->period_instructions += instrs_executed;
+ tidq->period_instructions += instrs_executed;
/*
* Record a branch when the last instruction in
* PREV_PACKET is a branch.
*/
if (etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE &&
- etmq->prev_packet->last_instr_taken_branch)
- cs_etm__update_last_branch_rb(etmq);
+ tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
+ cs_etm__update_last_branch_rb(etmq, tidq);
if (etm->sample_instructions &&
- etmq->period_instructions >= etm->instructions_sample_period) {
+ tidq->period_instructions >= etm->instructions_sample_period) {
/*
* Emit instruction sample periodically
* TODO: allow period to be defined in cycles and clock time
*/
/* Get number of instructions executed after the sample point */
- u64 instrs_over = etmq->period_instructions -
+ u64 instrs_over = tidq->period_instructions -
etm->instructions_sample_period;
/*
@@ -1000,31 +1336,32 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* executed, but PC has not advanced to next instruction)
*/
u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
+ u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset);
ret = cs_etm__synth_instruction_sample(
- etmq, addr, etm->instructions_sample_period);
+ etmq, tidq, addr, etm->instructions_sample_period);
if (ret)
return ret;
/* Carry remaining instructions into next sample period */
- etmq->period_instructions = instrs_over;
+ tidq->period_instructions = instrs_over;
}
if (etm->sample_branches) {
bool generate_sample = false;
/* Generate sample for tracing on packet */
- if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+ if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
generate_sample = true;
/* Generate sample for branch taken packet */
- if (etmq->prev_packet->sample_type == CS_ETM_RANGE &&
- etmq->prev_packet->last_instr_taken_branch)
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
+ tidq->prev_packet->last_instr_taken_branch)
generate_sample = true;
if (generate_sample) {
- ret = cs_etm__synth_branch_sample(etmq);
+ ret = cs_etm__synth_branch_sample(etmq, tidq);
if (ret)
return ret;
}
@@ -1035,15 +1372,15 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
*/
- tmp = etmq->packet;
- etmq->packet = etmq->prev_packet;
- etmq->prev_packet = tmp;
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
}
return 0;
}
-static int cs_etm__exception(struct cs_etm_queue *etmq)
+static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
{
/*
* When the exception packet is inserted, whether the last instruction
@@ -1056,24 +1393,25 @@ static int cs_etm__exception(struct cs_etm_queue *etmq)
* swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
* for generating instruction and branch samples.
*/
- if (etmq->prev_packet->sample_type == CS_ETM_RANGE)
- etmq->prev_packet->last_instr_taken_branch = true;
+ if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
+ tidq->prev_packet->last_instr_taken_branch = true;
return 0;
}
-static int cs_etm__flush(struct cs_etm_queue *etmq)
+static int cs_etm__flush(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int err = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
/* Handle start tracing packet */
- if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
+ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
goto swap_packet;
if (etmq->etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
/*
* Generate a last branch event for the branches left in the
* circular buffer at the end of the trace.
@@ -1081,21 +1419,21 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
* Use the address of the end of the last reported execution
* range
*/
- u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+ u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
- etmq, addr,
- etmq->period_instructions);
+ etmq, tidq, addr,
+ tidq->period_instructions);
if (err)
return err;
- etmq->period_instructions = 0;
+ tidq->period_instructions = 0;
}
if (etm->sample_branches &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
- err = cs_etm__synth_branch_sample(etmq);
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ err = cs_etm__synth_branch_sample(etmq, tidq);
if (err)
return err;
}
@@ -1106,15 +1444,16 @@ swap_packet:
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
*/
- tmp = etmq->packet;
- etmq->packet = etmq->prev_packet;
- etmq->prev_packet = tmp;
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
}
return err;
}
-static int cs_etm__end_block(struct cs_etm_queue *etmq)
+static int cs_etm__end_block(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int err;
@@ -1128,20 +1467,20 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
* the trace.
*/
if (etmq->etm->synth_opts.last_branch &&
- etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ tidq->prev_packet->sample_type == CS_ETM_RANGE) {
/*
* Use the address of the end of the last reported execution
* range.
*/
- u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+ u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
- etmq, addr,
- etmq->period_instructions);
+ etmq, tidq, addr,
+ tidq->period_instructions);
if (err)
return err;
- etmq->period_instructions = 0;
+ tidq->period_instructions = 0;
}
return 0;
@@ -1173,12 +1512,13 @@ static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
return etmq->buf_len;
}
-static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
+static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
struct cs_etm_packet *packet,
u64 end_addr)
{
- u16 instr16;
- u32 instr32;
+ /* Initialise to keep compiler happy */
+ u16 instr16 = 0;
+ u32 instr32 = 0;
u64 addr;
switch (packet->isa) {
@@ -1196,7 +1536,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* so below only read 2 bytes as instruction size for T32.
*/
addr = end_addr - 2;
- cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr16), (u8 *)&instr16);
if ((instr16 & 0xFF00) == 0xDF00)
return true;
@@ -1211,7 +1552,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* +---------+---------+-------------------------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr32), (u8 *)&instr32);
if ((instr32 & 0x0F000000) == 0x0F000000 &&
(instr32 & 0xF0000000) != 0xF0000000)
return true;
@@ -1227,7 +1569,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
* +-----------------------+---------+-----------+
*/
addr = end_addr - 4;
- cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ cs_etm__mem_access(etmq, trace_chan_id, addr,
+ sizeof(instr32), (u8 *)&instr32);
if ((instr32 & 0xFFE0001F) == 0xd4000001)
return true;
@@ -1240,10 +1583,12 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
return false;
}
-static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq, u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_SVC)
@@ -1256,7 +1601,7 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
*/
if (magic == __perf_cs_etmv4_magic) {
if (packet->exception_number == CS_ETMV4_EXC_CALL &&
- cs_etm__is_svc_instr(etmq, prev_packet,
+ cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr))
return true;
}
@@ -1264,9 +1609,10 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
+ u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
+ struct cs_etm_packet *packet = tidq->packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
@@ -1289,10 +1635,13 @@ static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
+static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq,
+ u64 magic)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
if (magic == __perf_cs_etmv3_magic)
if (packet->exception_number == CS_ETMV3_EXC_SMC ||
@@ -1316,7 +1665,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
* (SMC, HVC) are taken as sync exceptions.
*/
if (packet->exception_number == CS_ETMV4_EXC_CALL &&
- !cs_etm__is_svc_instr(etmq, prev_packet,
+ !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr))
return true;
@@ -1335,10 +1684,12 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
return false;
}
-static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
+static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
- struct cs_etm_packet *packet = etmq->packet;
- struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ struct cs_etm_packet *packet = tidq->packet;
+ struct cs_etm_packet *prev_packet = tidq->prev_packet;
+ u8 trace_chan_id = tidq->trace_chan_id;
u64 magic;
int ret;
@@ -1419,7 +1770,8 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_INTERRUPT) &&
- cs_etm__is_svc_instr(etmq, packet, packet->start_addr))
+ cs_etm__is_svc_instr(etmq, trace_chan_id,
+ packet, packet->start_addr))
prev_packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_SYSCALLRET;
@@ -1440,7 +1792,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
return ret;
/* The exception is for system call. */
- if (cs_etm__is_syscall(etmq, magic))
+ if (cs_etm__is_syscall(etmq, tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_SYSCALLRET;
@@ -1448,7 +1800,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
* The exceptions are triggered by external signals from bus,
* interrupt controller, debug module, PE reset or halt.
*/
- else if (cs_etm__is_async_exception(etmq, magic))
+ else if (cs_etm__is_async_exception(tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_ASYNC |
@@ -1457,7 +1809,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
* Otherwise, exception is caused by trap, instruction &
* data fault, or alignment errors.
*/
- else if (cs_etm__is_sync_exception(etmq, magic))
+ else if (cs_etm__is_sync_exception(etmq, tidq, magic))
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL |
PERF_IP_FLAG_INTERRUPT;
@@ -1539,75 +1891,106 @@ out:
return ret;
}
-static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
+static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
+ struct cs_etm_traceid_queue *tidq)
{
int ret;
+ struct cs_etm_packet_queue *packet_queue;
- /* Process each packet in this chunk */
- while (1) {
- ret = cs_etm_decoder__get_packet(etmq->decoder,
- etmq->packet);
- if (ret <= 0)
- /*
- * Stop processing this chunk on
- * end of data or error
- */
- break;
+ packet_queue = &tidq->packet_queue;
+ /* Process each packet in this chunk */
+ while (1) {
+ ret = cs_etm_decoder__get_packet(packet_queue,
+ tidq->packet);
+ if (ret <= 0)
/*
- * Since packet addresses are swapped in packet
- * handling within below switch() statements,
- * thus setting sample flags must be called
- * prior to switch() statement to use address
- * information before packets swapping.
+ * Stop processing this chunk on
+ * end of data or error
*/
- ret = cs_etm__set_sample_flags(etmq);
- if (ret < 0)
- break;
-
- switch (etmq->packet->sample_type) {
- case CS_ETM_RANGE:
- /*
- * If the packet contains an instruction
- * range, generate instruction sequence
- * events.
- */
- cs_etm__sample(etmq);
- break;
- case CS_ETM_EXCEPTION:
- case CS_ETM_EXCEPTION_RET:
- /*
- * If the exception packet is coming,
- * make sure the previous instruction
- * range packet to be handled properly.
- */
- cs_etm__exception(etmq);
- break;
- case CS_ETM_DISCONTINUITY:
- /*
- * Discontinuity in trace, flush
- * previous branch stack
- */
- cs_etm__flush(etmq);
- break;
- case CS_ETM_EMPTY:
- /*
- * Should not receive empty packet,
- * report error.
- */
- pr_err("CS ETM Trace: empty packet\n");
- return -EINVAL;
- default:
- break;
- }
+ break;
+
+ /*
+ * Since packet addresses are swapped in packet
+ * handling within below switch() statements,
+ * thus setting sample flags must be called
+ * prior to switch() statement to use address
+ * information before packets swapping.
+ */
+ ret = cs_etm__set_sample_flags(etmq, tidq);
+ if (ret < 0)
+ break;
+
+ switch (tidq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq, tidq);
+ break;
+ case CS_ETM_EXCEPTION:
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * If the exception packet is coming,
+ * make sure the previous instruction
+ * range packet to be handled properly.
+ */
+ cs_etm__exception(tidq);
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq, tidq);
+ break;
+ case CS_ETM_EMPTY:
+ /*
+ * Should not receive empty packet,
+ * report error.
+ */
+ pr_err("CS ETM Trace: empty packet\n");
+ return -EINVAL;
+ default:
+ break;
}
+ }
return ret;
}
+static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
+{
+ int idx;
+ struct int_node *inode;
+ struct cs_etm_traceid_queue *tidq;
+ struct intlist *traceid_queues_list = etmq->traceid_queues_list;
+
+ intlist__for_each_entry(inode, traceid_queues_list) {
+ idx = (int)(intptr_t)inode->priv;
+ tidq = etmq->traceid_queues[idx];
+
+ /* Ignore return value */
+ cs_etm__process_traceid_queue(etmq, tidq);
+
+ /*
+ * Generate an instruction sample with the remaining
+ * branchstack entries.
+ */
+ cs_etm__flush(etmq, tidq);
+ }
+}
+
static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{
int err = 0;
+ struct cs_etm_traceid_queue *tidq;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
+ if (!tidq)
+ return -EINVAL;
/* Go through each buffer in the queue and decode them one by one */
while (1) {
@@ -1626,13 +2009,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
* an error occurs other than hoping the next one will
* be better.
*/
- err = cs_etm__process_decoder_queue(etmq);
+ err = cs_etm__process_traceid_queue(etmq, tidq);
} while (etmq->buf_len);
if (err == 0)
/* Flush any remaining branch stack entries */
- err = cs_etm__end_block(etmq);
+ err = cs_etm__end_block(etmq, tidq);
}
return err;
@@ -1647,9 +2030,19 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
for (i = 0; i < queues->nr_queues; i++) {
struct auxtrace_queue *queue = &etm->queues.queue_array[i];
struct cs_etm_queue *etmq = queue->priv;
+ struct cs_etm_traceid_queue *tidq;
+
+ if (!etmq)
+ continue;
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq,
+ CS_ETM_PER_THREAD_TRACEID);
+
+ if (!tidq)
+ continue;
- if (etmq && ((tid == -1) || (etmq->tid == tid))) {
- cs_etm__set_pid_tid_cpu(etm, queue);
+ if ((tid == -1) || (tidq->tid == tid)) {
+ cs_etm__set_pid_tid_cpu(etm, tidq);
cs_etm__run_decoder(etmq);
}
}
@@ -1657,6 +2050,164 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
return 0;
}
+static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
+{
+ int ret = 0;
+ unsigned int cs_queue_nr, queue_nr;
+ u8 trace_chan_id;
+ u64 timestamp;
+ struct auxtrace_queue *queue;
+ struct cs_etm_queue *etmq;
+ struct cs_etm_traceid_queue *tidq;
+
+ while (1) {
+ if (!etm->heap.heap_cnt)
+ goto out;
+
+ /* Take the entry at the top of the min heap */
+ cs_queue_nr = etm->heap.heap_array[0].queue_nr;
+ queue_nr = TO_QUEUE_NR(cs_queue_nr);
+ trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
+ queue = &etm->queues.queue_array[queue_nr];
+ etmq = queue->priv;
+
+ /*
+ * Remove the top entry from the heap since we are about
+ * to process it.
+ */
+ auxtrace_heap__pop(&etm->heap);
+
+ tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
+ if (!tidq) {
+ /*
+ * No traceID queue has been allocated for this traceID,
+ * which means something somewhere went very wrong. No
+ * other choice than simply exit.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Packets associated with this timestamp are already in
+ * the etmq's traceID queue, so process them.
+ */
+ ret = cs_etm__process_traceid_queue(etmq, tidq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Packets for this timestamp have been processed, time to
+ * move on to the next timestamp, fetching a new auxtrace_buffer
+ * if need be.
+ */
+refetch:
+ ret = cs_etm__get_data_block(etmq);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * No more auxtrace_buffers to process in this etmq, simply
+ * move on to another entry in the auxtrace_heap.
+ */
+ if (!ret)
+ continue;
+
+ ret = cs_etm__decode_data_block(etmq);
+ if (ret)
+ goto out;
+
+ timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
+
+ if (!timestamp) {
+ /*
+ * Function cs_etm__decode_data_block() returns when
+ * there is no more traces to decode in the current
+ * auxtrace_buffer OR when a timestamp has been
+ * encountered on any of the traceID queues. Since we
+ * did not get a timestamp, there is no more traces to
+ * process in this auxtrace_buffer. As such empty and
+ * flush all traceID queues.
+ */
+ cs_etm__clear_all_traceid_queues(etmq);
+
+ /* Fetch another auxtrace_buffer for this etmq */
+ goto refetch;
+ }
+
+ /*
+ * Add to the min heap the timestamp for packets that have
+ * just been decoded. They will be processed and synthesized
+ * during the next call to cs_etm__process_traceid_queue() for
+ * this queue/traceID.
+ */
+ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
+ ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
+ }
+
+out:
+ return ret;
+}
+
+static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when
+ * we get a contextID from the decoder.
+ */
+ th = machine__findnew_thread(etm->machine,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
+static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
+ union perf_event *event)
+{
+ struct thread *th;
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
+ /*
+ * Context switch in per-thread mode are irrelevant since perf
+ * will start/stop tracing as the process is scheduled.
+ */
+ if (etm->timeless_decoding)
+ return 0;
+
+ /*
+ * SWITCH_IN events carry the next process to be switched out while
+ * SWITCH_OUT events carry the process to be switched in. As such
+ * we don't care about IN events.
+ */
+ if (!out)
+ return 0;
+
+ /*
+ * Add the tid/pid to the log so that we can get a match when
+ * we get a contextID from the decoder.
+ */
+ th = machine__findnew_thread(etm->machine,
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+ if (!th)
+ return -ENOMEM;
+
+ thread__put(th);
+
+ return 0;
+}
+
static int cs_etm__process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -1676,9 +2227,6 @@ static int cs_etm__process_event(struct perf_session *session,
return -EINVAL;
}
- if (!etm->timeless_decoding)
- return -EINVAL;
-
if (sample->time && (sample->time != (u64) -1))
timestamp = sample->time;
else
@@ -1690,10 +2238,20 @@ static int cs_etm__process_event(struct perf_session *session,
return err;
}
- if (event->header.type == PERF_RECORD_EXIT)
+ if (etm->timeless_decoding &&
+ event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm,
event->fork.tid);
+ if (event->header.type == PERF_RECORD_ITRACE_START)
+ return cs_etm__process_itrace_start(etm, event);
+ else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+ return cs_etm__process_switch_cpu_wide(etm, event);
+
+ if (!etm->timeless_decoding &&
+ event->header.type == PERF_RECORD_AUX)
+ return cs_etm__process_queues(etm);
+
return 0;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 0e97c196147a..bc848fd095f4 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -9,6 +9,7 @@
#include "util/event.h"
#include "util/session.h"
+#include <linux/bits.h>
/* Versionning header in case things need tro change in the future. That way
* decoding of old snapshot is still possible.
@@ -97,12 +98,72 @@ enum {
CS_ETMV4_EXC_END = 31,
};
+enum cs_etm_sample_type {
+ CS_ETM_EMPTY,
+ CS_ETM_RANGE,
+ CS_ETM_DISCONTINUITY,
+ CS_ETM_EXCEPTION,
+ CS_ETM_EXCEPTION_RET,
+};
+
+enum cs_etm_isa {
+ CS_ETM_ISA_UNKNOWN,
+ CS_ETM_ISA_A64,
+ CS_ETM_ISA_A32,
+ CS_ETM_ISA_T32,
+};
+
/* RB tree for quick conversion between traceID and metadata pointers */
struct intlist *traceid_list;
+struct cs_etm_queue;
+
+struct cs_etm_packet {
+ enum cs_etm_sample_type sample_type;
+ enum cs_etm_isa isa;
+ u64 start_addr;
+ u64 end_addr;
+ u32 instr_count;
+ u32 last_instr_type;
+ u32 last_instr_subtype;
+ u32 flags;
+ u32 exception_number;
+ u8 last_instr_cond;
+ u8 last_instr_taken_branch;
+ u8 last_instr_size;
+ u8 trace_chan_id;
+ int cpu;
+};
+
+#define CS_ETM_PACKET_MAX_BUFFER 1024
+
+/*
+ * When working with per-thread scenarios the process under trace can
+ * be scheduled on any CPU and as such, more than one traceID may be
+ * associated with the same process. Since a traceID of '0' is illegal
+ * as per the CoreSight architecture, use that specific value to
+ * identify the queue where all packets (with any traceID) are
+ * aggregated.
+ */
+#define CS_ETM_PER_THREAD_TRACEID 0
+
+struct cs_etm_packet_queue {
+ u32 packet_count;
+ u32 head;
+ u32 tail;
+ u32 instr_count;
+ u64 timestamp;
+ u64 next_timestamp;
+ struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
+};
+
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
+#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
+
+#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
+
#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
#define __perf_cs_etmv3_magic 0x3030303030303030ULL
@@ -114,6 +175,13 @@ struct intlist *traceid_list;
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
+ pid_t tid, u8 trace_chan_id);
+bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
+void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
+ u8 trace_chan_id);
+struct cs_etm_packet_queue
+*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
#else
static inline int
cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
@@ -127,6 +195,32 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
{
return -1;
}
+
+static inline int cs_etm__etmq_set_tid(
+ struct cs_etm_queue *etmq __maybe_unused,
+ pid_t tid __maybe_unused,
+ u8 trace_chan_id __maybe_unused)
+{
+ return -1;
+}
+
+static inline bool cs_etm__etmq_is_timeless(
+ struct cs_etm_queue *etmq __maybe_unused)
+{
+ /* What else to return? */
+ return true;
+}
+
+static inline void cs_etm__etmq_set_traceid_queue_timestamp(
+ struct cs_etm_queue *etmq __maybe_unused,
+ u8 trace_chan_id __maybe_unused) {}
+
+static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(
+ struct cs_etm_queue *etmq __maybe_unused,
+ u8 trace_chan_id __maybe_unused)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e059976d9d93..1fb18292c2d3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -9,6 +9,8 @@
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
+#include <bpf/libbpf.h>
+#include "bpf-event.h"
#include "compress.h"
#include "namespaces.h"
#include "path.h"
@@ -706,6 +708,44 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
return false;
}
+static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
+{
+ struct bpf_prog_info_node *node;
+ ssize_t size = DSO__DATA_CACHE_SIZE;
+ u64 len;
+ u8 *buf;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ len = node->info_linear->info.jited_prog_len;
+ buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns;
+
+ if (offset >= len)
+ return -1;
+
+ size = (ssize_t)min(len - offset, (u64)size);
+ memcpy(data, buf + offset, size);
+ return size;
+}
+
+static int bpf_size(struct dso *dso)
+{
+ struct bpf_prog_info_node *node;
+
+ node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
+ if (!node || !node->info_linear) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ return -1;
+ }
+
+ dso->data.file_size = node->info_linear->info.jited_prog_len;
+ return 0;
+}
+
static void
dso_cache__free(struct dso *dso)
{
@@ -794,48 +834,53 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
return cache_size;
}
-static ssize_t
-dso_cache__read(struct dso *dso, struct machine *machine,
- u64 offset, u8 *data, ssize_t size)
+static ssize_t file_read(struct dso *dso, struct machine *machine,
+ u64 offset, char *data)
{
- struct dso_cache *cache;
- struct dso_cache *old;
ssize_t ret;
- do {
- u64 cache_offset;
+ pthread_mutex_lock(&dso__data_open_lock);
- cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
- if (!cache)
- return -ENOMEM;
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
- pthread_mutex_lock(&dso__data_open_lock);
+ if (dso->data.fd < 0) {
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ ret = -errno;
+ goto out;
+ }
- /*
- * dso->data.fd might be closed if other thread opened another
- * file (dso) due to open file limit (RLIMIT_NOFILE).
- */
- try_to_open_dso(dso, machine);
+ ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
+}
- if (dso->data.fd < 0) {
- ret = -errno;
- dso->data.status = DSO_DATA_STATUS_ERROR;
- break;
- }
+static ssize_t
+dso_cache__read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
+ struct dso_cache *cache;
+ struct dso_cache *old;
+ ssize_t ret;
- cache_offset = offset & DSO__DATA_CACHE_MASK;
+ cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+ if (!cache)
+ return -ENOMEM;
- ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
- if (ret <= 0)
- break;
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ ret = bpf_read(dso, cache_offset, cache->data);
+ else
+ ret = file_read(dso, machine, cache_offset, cache->data);
+ if (ret > 0) {
cache->offset = cache_offset;
cache->size = ret;
- } while (0);
-
- pthread_mutex_unlock(&dso__data_open_lock);
- if (ret > 0) {
old = dso_cache__insert(dso, cache);
if (old) {
/* we lose the race */
@@ -898,18 +943,12 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine,
return r;
}
-int dso__data_file_size(struct dso *dso, struct machine *machine)
+static int file_size(struct dso *dso, struct machine *machine)
{
int ret = 0;
struct stat st;
char sbuf[STRERR_BUFSIZE];
- if (dso->data.file_size)
- return 0;
-
- if (dso->data.status == DSO_DATA_STATUS_ERROR)
- return -1;
-
pthread_mutex_lock(&dso__data_open_lock);
/*
@@ -938,6 +977,20 @@ out:
return ret;
}
+int dso__data_file_size(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return bpf_size(dso);
+
+ return file_size(dso, machine);
+}
+
/**
* dso__data_size - Return dso data size
* @dso: dso object
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6a3eaf7d9353..1cc7a1837822 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -246,6 +246,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
for (cpu = 0; cpu < nr_cpus; ++cpu) {
env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
+ env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
}
env->nr_cpus_avail = nr_cpus;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 271a90b326c4..d5d9865aa812 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -9,6 +9,7 @@
struct cpu_topology_map {
int socket_id;
+ int die_id;
int core_id;
};
@@ -49,6 +50,7 @@ struct perf_env {
int nr_cmdline;
int nr_sibling_cores;
+ int nr_sibling_dies;
int nr_sibling_threads;
int nr_numa_nodes;
int nr_memory_nodes;
@@ -57,6 +59,7 @@ struct perf_env {
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
+ char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
struct cpu_topology_map *cpu;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index d1ad6c419724..c9c6857360e4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1486,7 +1486,7 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
+ return fprintf(fp, " addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
event->ksymbol_event.addr, event->ksymbol_event.len,
event->ksymbol_event.ksym_type,
event->ksymbol_event.flags, event->ksymbol_event.name);
@@ -1494,7 +1494,7 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " bpf event with type %u, flags %u, id %u\n",
+ return fprintf(fp, " type %u, flags %u, id %u\n",
event->bpf_event.type, event->bpf_event.flags,
event->bpf_event.id);
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 9e999550f247..1f1da6082806 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -204,6 +204,8 @@ struct perf_sample {
u64 period;
u64 weight;
u64 transaction;
+ u64 insn_cnt;
+ u64 cyc_cnt;
u32 cpu;
u32 raw_size;
u64 data_src;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a5947625c5c..04c4ed1573cb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -589,6 +589,9 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
{
char bf[128];
+ if (!evsel)
+ goto out_unknown;
+
if (evsel->name)
return evsel->name;
@@ -628,7 +631,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
evsel->name = strdup(bf);
- return evsel->name ?: "unknown";
+ if (evsel->name)
+ return evsel->name;
+out_unknown:
+ return "unknown";
}
const char *perf_evsel__group_name(struct perf_evsel *evsel)
@@ -679,6 +685,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
attr->sample_max_stack = param->max_stack;
+ if (opts->kernel_callchains)
+ attr->exclude_callchain_user = 1;
+ if (opts->user_callchains)
+ attr->exclude_callchain_kernel = 1;
if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
@@ -701,7 +711,14 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
if (!function) {
perf_evsel__set_sample_bit(evsel, REGS_USER);
perf_evsel__set_sample_bit(evsel, STACK_USER);
- attr->sample_regs_user |= PERF_REGS_MASK;
+ if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
+ attr->sample_regs_user |= DWARF_MINIMAL_REGS;
+ pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
+ "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
+ "so the minimal registers set (IP, SP) is explicitly forced.\n");
+ } else {
+ attr->sample_regs_user |= PERF_REGS_MASK;
+ }
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
} else {
@@ -1136,9 +1153,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
- if (evsel->system_wide)
- nthreads = 1;
-
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 847ae51a524b..06ddb6618ef3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -599,6 +599,27 @@ static int write_cpu_topology(struct feat_fd *ff,
if (ret < 0)
return ret;
}
+
+ if (!tp->die_sib)
+ goto done;
+
+ ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->die_sib; i++) {
+ ret = do_write_string(ff, tp->die_siblings[i]);
+ if (ret < 0)
+ goto done;
+ }
+
+ for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+ ret = do_write(ff, &perf_env.cpu[j].die_id,
+ sizeof(perf_env.cpu[j].die_id));
+ if (ret < 0)
+ return ret;
+ }
+
done:
cpu_topology__delete(tp);
return ret;
@@ -1439,10 +1460,20 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
str = ph->env.sibling_cores;
for (i = 0; i < nr; i++) {
- fprintf(fp, "# sibling cores : %s\n", str);
+ fprintf(fp, "# sibling sockets : %s\n", str);
str += strlen(str) + 1;
}
+ if (ph->env.nr_sibling_dies) {
+ nr = ph->env.nr_sibling_dies;
+ str = ph->env.sibling_dies;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling dies : %s\n", str);
+ str += strlen(str) + 1;
+ }
+ }
+
nr = ph->env.nr_sibling_threads;
str = ph->env.sibling_threads;
@@ -1451,12 +1482,28 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
str += strlen(str) + 1;
}
- if (ph->env.cpu != NULL) {
- for (i = 0; i < cpu_nr; i++)
- fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i,
- ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id);
- } else
- fprintf(fp, "# Core ID and Socket ID information is not available\n");
+ if (ph->env.nr_sibling_dies) {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Die ID %d, Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].die_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID, Die ID and Socket ID "
+ "information is not available\n");
+ } else {
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, "
+ "Socket ID %d\n",
+ i, ph->env.cpu[i].core_id,
+ ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID and Socket ID "
+ "information is not available\n");
+ }
}
static void print_clockid(struct feat_fd *ff, FILE *fp)
@@ -2214,6 +2261,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
goto free_cpu;
ph->env.cpu[i].core_id = nr;
+ size += sizeof(u32);
if (do_read_u32(ff, &nr))
goto free_cpu;
@@ -2225,6 +2273,40 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
}
ph->env.cpu[i].socket_id = nr;
+ size += sizeof(u32);
+ }
+
+ /*
+ * The header may be from old perf,
+ * which doesn't include die information.
+ */
+ if (ff->size <= size)
+ return 0;
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ph->env.nr_sibling_dies = nr;
+ size += sizeof(u32);
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_dies = strbuf_detach(&sb, NULL);
+
+ for (i = 0; i < (u32)cpu_nr; i++) {
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.cpu[i].die_id = nr;
}
return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7ace7a10054d..fb3271fd420c 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2561,7 +2561,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
char unit;
int printed;
const struct dso *dso = hists->dso_filter;
- const struct thread *thread = hists->thread_filter;
+ struct thread *thread = hists->thread_filter;
int socket_id = hists->socket_filter;
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = hists->stats.total_period;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9d189e90fbdc..f8b71bf2bb4c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -95,6 +95,7 @@ struct intel_pt_decoder {
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
void *data;
struct intel_pt_state state;
const unsigned char *buf;
@@ -107,6 +108,7 @@ struct intel_pt_decoder {
bool have_cyc;
bool fixup_last_mtc;
bool have_last_ip;
+ bool in_psb;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -115,6 +117,7 @@ struct intel_pt_decoder {
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
+ uint64_t buf_timestamp;
uint64_t sample_timestamp;
uint64_t ret_addr;
uint64_t ctc_timestamp;
@@ -130,6 +133,10 @@ struct intel_pt_decoder {
int mtc_shift;
struct intel_pt_stack stack;
enum intel_pt_pkt_state pkt_state;
+ enum intel_pt_pkt_ctx pkt_ctx;
+ enum intel_pt_pkt_ctx prev_pkt_ctx;
+ enum intel_pt_blk_type blk_type;
+ int blk_type_pos;
struct intel_pt_pkt packet;
struct intel_pt_pkt tnt;
int pkt_step;
@@ -151,6 +158,11 @@ struct intel_pt_decoder {
uint64_t period_mask;
uint64_t period_ticks;
uint64_t last_masked_timestamp;
+ uint64_t tot_cyc_cnt;
+ uint64_t sample_tot_cyc_cnt;
+ uint64_t base_cyc_cnt;
+ uint64_t cyc_cnt_timestamp;
+ double tsc_to_cyc;
bool continuous_period;
bool overflow;
bool set_fup_tx_flags;
@@ -158,6 +170,8 @@ struct intel_pt_decoder {
bool set_fup_mwait;
bool set_fup_pwre;
bool set_fup_exstop;
+ bool set_fup_bep;
+ bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
uint64_t fup_ptw_payload;
@@ -217,6 +231,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->get_trace = params->get_trace;
decoder->walk_insn = params->walk_insn;
decoder->pgd_ip = params->pgd_ip;
+ decoder->lookahead = params->lookahead;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
@@ -470,7 +485,21 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
return -EBADMSG;
}
-static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
+{
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+}
+
+static void intel_pt_reposition(struct intel_pt_decoder *decoder)
+{
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
{
struct intel_pt_buffer buffer = { .buf = 0, };
int ret;
@@ -487,12 +516,10 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
intel_pt_log("No more data\n");
return -ENODATA;
}
- if (!buffer.consecutive) {
- decoder->ip = 0;
- decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->buf_timestamp = buffer.ref_timestamp;
+ if (!buffer.consecutive || reposition) {
+ intel_pt_reposition(decoder);
decoder->ref_timestamp = buffer.ref_timestamp;
- decoder->timestamp = 0;
- decoder->have_tma = false;
decoder->state.trace_nr = buffer.trace_nr;
intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
decoder->ref_timestamp);
@@ -502,10 +529,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
return 0;
}
-static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
+ bool reposition)
{
if (!decoder->next_buf)
- return intel_pt_get_data(decoder);
+ return intel_pt_get_data(decoder, reposition);
decoder->buf = decoder->next_buf;
decoder->len = decoder->next_len;
@@ -524,7 +552,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
len = decoder->len;
memcpy(buf, decoder->buf, len);
- ret = intel_pt_get_data(decoder);
+ ret = intel_pt_get_data(decoder, false);
if (ret) {
decoder->pos += old_len;
return ret < 0 ? ret : -EINVAL;
@@ -536,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
memcpy(buf + len, decoder->buf, n);
len += n;
- ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
+ ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
if (ret < (int)old_len) {
decoder->next_buf = decoder->buf;
decoder->next_len = decoder->len;
@@ -571,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
{
struct intel_pt_pkt_info pkt_info;
const unsigned char *buf = decoder->buf;
+ enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
size_t len = decoder->len;
int ret;
@@ -589,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
if (!len)
return INTEL_PT_NEED_MORE_BYTES;
- ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
+ &pkt_ctx);
if (!ret)
return INTEL_PT_NEED_MORE_BYTES;
if (ret < 0)
@@ -664,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_MNT:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
return 0;
case INTEL_PT_MTC:
@@ -850,13 +885,14 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
decoder->len -= decoder->pkt_step;
if (!decoder->len) {
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
}
+ decoder->prev_pkt_ctx = decoder->pkt_ctx;
ret = intel_pt_get_packet(decoder->buf, decoder->len,
- &decoder->packet);
+ &decoder->packet, &decoder->pkt_ctx);
if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
@@ -1094,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
decoder->state.to_ip = 0;
ret = true;
}
+ if (decoder->set_fup_bep) {
+ decoder->set_fup_bep = false;
+ decoder->state.type |= INTEL_PT_BLK_ITEMS;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
return ret;
}
@@ -1308,10 +1352,10 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->ip += intel_pt_insn.length;
return 0;
}
+ decoder->sample_cyc = false;
decoder->ip += intel_pt_insn.length;
if (!decoder->tnt.count) {
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
return -EAGAIN;
}
decoder->tnt.payload <<= 1;
@@ -1345,6 +1389,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
return 0;
}
+static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
+{
+ timestamp |= (ref_timestamp & (0xffULL << 56));
+
+ if (timestamp < ref_timestamp) {
+ if (ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+
+ return timestamp;
+}
+
static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t timestamp;
@@ -1352,15 +1411,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
decoder->have_tma = false;
if (decoder->ref_timestamp) {
- timestamp = decoder->packet.payload |
- (decoder->ref_timestamp & (0xffULL << 56));
- if (timestamp < decoder->ref_timestamp) {
- if (decoder->ref_timestamp - timestamp > (1ULL << 55))
- timestamp += (1ULL << 56);
- } else {
- if (timestamp - decoder->ref_timestamp > (1ULL << 55))
- timestamp -= (1ULL << 56);
- }
+ timestamp = intel_pt_8b_tsc(decoder->packet.payload,
+ decoder->ref_timestamp);
decoder->tsc_timestamp = timestamp;
decoder->timestamp = timestamp;
decoder->ref_timestamp = 0;
@@ -1404,6 +1456,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
return -EOVERFLOW;
}
+static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
+{
+ if (decoder->have_cyc)
+ return;
+
+ decoder->cyc_cnt_timestamp = decoder->timestamp;
+ decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
+}
+
+static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
+{
+ decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
+
+ if (decoder->pge)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+}
+
+static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
+{
+ uint64_t tot_cyc_cnt, tsc_delta;
+
+ if (decoder->have_cyc)
+ return;
+
+ decoder->sample_cyc = true;
+
+ if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
+ return;
+
+ tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
+ tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
+
+ if (tot_cyc_cnt > decoder->tot_cyc_cnt)
+ decoder->tot_cyc_cnt = tot_cyc_cnt;
+}
+
static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
{
uint32_t ctc = decoder->packet.payload;
@@ -1413,6 +1501,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
if (!decoder->tsc_ctc_ratio_d)
return;
+ if (decoder->pge && !decoder->in_psb)
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ else
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
if (decoder->tsc_ctc_mult) {
@@ -1468,6 +1561,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
else
decoder->timestamp = timestamp;
+ intel_pt_mtc_cyc_cnt_upd(decoder);
+
decoder->timestamp_insn_cnt = 0;
decoder->last_mtc = mtc;
@@ -1492,6 +1587,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
decoder->cbr = cbr;
decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+
+ intel_pt_mtc_cyc_cnt_cbr(decoder);
}
static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
@@ -1501,6 +1598,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
decoder->have_cyc = true;
decoder->cycle_cnt += decoder->packet.payload;
+ if (decoder->pge)
+ decoder->tot_cyc_cnt += decoder->packet.payload;
+ decoder->sample_cyc = true;
if (!decoder->cyc_ref_timestamp)
return;
@@ -1523,19 +1623,62 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+ if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+ memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+ decoder->state.items.is_32_bit = false;
+ }
+ decoder->blk_type = decoder->packet.payload;
+ decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+ if (decoder->blk_type == INTEL_PT_GP_REGS)
+ decoder->state.items.is_32_bit = decoder->packet.count;
+ if (decoder->blk_type_pos < 0) {
+ intel_pt_log("WARNING: Unknown block type %u\n",
+ decoder->blk_type);
+ } else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+ intel_pt_log("WARNING: Duplicate block type %u\n",
+ decoder->blk_type);
+ }
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+ uint32_t id = decoder->packet.count;
+ uint32_t bit = 1 << id;
+ int pos = decoder->blk_type_pos;
+
+ if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+ intel_pt_log("WARNING: Unknown block item %u type %d\n",
+ id, decoder->blk_type);
+ return;
+ }
+
+ if (decoder->state.items.mask[pos] & bit) {
+ intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+ id, decoder->blk_type);
+ }
+
+ decoder->state.items.mask[pos] |= bit;
+ decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
/* Walk PSB+ packets when already in sync. */
static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
{
int err;
+ decoder->in_psb = true;
+
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
- return err;
+ goto out;
switch (decoder->packet.type) {
case INTEL_PT_PSBEND:
- return 0;
+ err = 0;
+ goto out;
case INTEL_PT_TIP_PGD:
case INTEL_PT_TIP_PGE:
@@ -1551,12 +1694,18 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
- return -EAGAIN;
+ err = -EAGAIN;
+ goto out;
case INTEL_PT_OVF:
- return intel_pt_overflow(decoder);
+ err = intel_pt_overflow(decoder);
+ goto out;
case INTEL_PT_TSC:
intel_pt_calc_tsc_timestamp(decoder);
@@ -1602,6 +1751,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
}
}
+out:
+ decoder->in_psb = false;
+
+ return err;
}
static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
@@ -1638,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -1675,6 +1832,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->ip;
}
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
return 0;
case INTEL_PT_TIP:
@@ -1745,6 +1903,7 @@ next:
case INTEL_PT_TIP_PGE: {
decoder->pge = true;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
if (decoder->packet.count == 0) {
intel_pt_log_at("Skipping zero TIP.PGE",
decoder->pos);
@@ -1957,6 +2116,33 @@ next:
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
+ case INTEL_PT_BBP:
+ intel_pt_bbp(decoder);
+ break;
+
+ case INTEL_PT_BIP:
+ intel_pt_bip(decoder);
+ break;
+
+ case INTEL_PT_BEP:
+ decoder->state.type = INTEL_PT_BLK_ITEMS;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_BEP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_bep = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after BEP",
+ decoder->pos);
+ }
+ goto next;
+
default:
return intel_pt_bug(decoder);
}
@@ -1975,10 +2161,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
{
int err;
+ decoder->in_psb = true;
+
while (1) {
err = intel_pt_get_next_packet(decoder);
if (err)
- return err;
+ goto out;
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
@@ -1993,8 +2181,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
intel_pt_log("ERROR: Unexpected packet\n");
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case INTEL_PT_FUP:
decoder->pge = true;
@@ -2053,16 +2246,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
decoder->pkt_state = INTEL_PT_STATE_ERR4;
else
decoder->pkt_state = INTEL_PT_STATE_ERR3;
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case INTEL_PT_BAD: /* Does not happen */
- return intel_pt_bug(decoder);
+ err = intel_pt_bug(decoder);
+ goto out;
case INTEL_PT_OVF:
- return intel_pt_overflow(decoder);
+ err = intel_pt_overflow(decoder);
+ goto out;
case INTEL_PT_PSBEND:
- return 0;
+ err = 0;
+ goto out;
case INTEL_PT_PSB:
case INTEL_PT_VMCS:
@@ -2072,6 +2269,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
}
}
+out:
+ decoder->in_psb = false;
+
+ return err;
}
static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
@@ -2086,18 +2287,30 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
switch (decoder->packet.type) {
case INTEL_PT_TIP_PGD:
decoder->continuous_period = false;
- __fallthrough;
+ decoder->pge = false;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ return 0;
+
case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_mtc_cyc_cnt_pge(decoder);
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (!decoder->ip)
+ break;
+ decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ return 0;
+
case INTEL_PT_TIP:
- decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+ decoder->pge = true;
if (intel_pt_have_ip(decoder))
intel_pt_set_ip(decoder);
if (!decoder->ip)
break;
- if (decoder->packet.type == INTEL_PT_TIP_PGE)
- decoder->state.type |= INTEL_PT_TRACE_BEGIN;
- if (decoder->packet.type == INTEL_PT_TIP_PGD)
- decoder->state.type |= INTEL_PT_TRACE_END;
return 0;
case INTEL_PT_FUP:
@@ -2178,6 +2391,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
default:
break;
}
@@ -2193,6 +2410,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
decoder->set_fup_mwait = false;
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
@@ -2250,7 +2468,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
decoder->pos += decoder->len;
decoder->len = 0;
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
@@ -2276,7 +2494,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
intel_pt_log("Scanning for PSB\n");
while (1) {
if (!decoder->len) {
- ret = intel_pt_get_next_data(decoder);
+ ret = intel_pt_get_next_data(decoder, false);
if (ret)
return ret;
}
@@ -2404,8 +2622,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
if (err) {
decoder->state.err = intel_pt_ext_err(err);
decoder->state.from_ip = decoder->ip;
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
} else {
decoder->state.err = 0;
if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
@@ -2414,8 +2632,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.cbr_payload = decoder->cbr_payload;
}
if (intel_pt_sample_time(decoder->pkt_state)) {
- decoder->sample_timestamp = decoder->timestamp;
- decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ intel_pt_update_sample_time(decoder);
+ if (decoder->sample_cyc)
+ decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
}
}
@@ -2423,6 +2642,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+ decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
return &decoder->state;
}
@@ -2526,11 +2746,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
size_t *rem)
{
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
struct intel_pt_pkt packet;
int ret;
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret <= 0)
return false;
if (packet.type == INTEL_PT_TSC) {
@@ -2732,3 +2953,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
return buf_b; /* No overlap */
}
}
+
+/**
+ * struct fast_forward_data - data used by intel_pt_ff_cb().
+ * @timestamp: timestamp to fast forward towards
+ * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
+ * the fast forward timestamp.
+ */
+struct fast_forward_data {
+ uint64_t timestamp;
+ uint64_t buf_timestamp;
+};
+
+/**
+ * intel_pt_ff_cb - fast forward lookahead callback.
+ * @buffer: Intel PT trace buffer
+ * @data: opaque pointer to fast forward data (struct fast_forward_data)
+ *
+ * Determine if @buffer trace is past the fast forward timestamp.
+ *
+ * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
+ * timestamp, and 0 otherwise.
+ */
+static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
+{
+ struct fast_forward_data *d = data;
+ unsigned char *buf;
+ uint64_t tsc;
+ size_t rem;
+ size_t len;
+
+ buf = (unsigned char *)buffer->buf;
+ len = buffer->len;
+
+ if (!intel_pt_next_psb(&buf, &len) ||
+ !intel_pt_next_tsc(buf, len, &tsc, &rem))
+ return 0;
+
+ tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
+
+ intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
+ tsc, buffer->ref_timestamp);
+
+ /*
+ * If the buffer contains a timestamp earlier that the fast forward
+ * timestamp, then record it, else stop.
+ */
+ if (tsc < d->timestamp)
+ d->buf_timestamp = buffer->ref_timestamp;
+ else
+ return 1;
+
+ return 0;
+}
+
+/**
+ * intel_pt_fast_forward - reposition decoder forwards.
+ * @decoder: Intel PT decoder
+ * @timestamp: timestamp to fast forward towards
+ *
+ * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
+ *
+ * Return: 0 on success or negative error code on failure.
+ */
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
+{
+ struct fast_forward_data d = { .timestamp = timestamp };
+ unsigned char *buf;
+ size_t len;
+ int err;
+
+ intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
+
+ /* Find buffer timestamp of buffer to fast forward to */
+ err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
+ if (err < 0)
+ return err;
+
+ /* Walk to buffer with same buffer timestamp */
+ if (d.buf_timestamp) {
+ do {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ err = intel_pt_get_next_data(decoder, true);
+ /* -ENOLINK means non-consecutive trace */
+ if (err && err != -ENOLINK)
+ return err;
+ } while (decoder->buf_timestamp != d.buf_timestamp);
+ }
+
+ if (!decoder->buf)
+ return 0;
+
+ buf = (unsigned char *)decoder->buf;
+ len = decoder->len;
+
+ if (!intel_pt_next_psb(&buf, &len))
+ return 0;
+
+ /*
+ * Walk PSBs while the PSB timestamp is less than the fast forward
+ * timestamp.
+ */
+ do {
+ uint64_t tsc;
+ size_t rem;
+
+ if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
+ break;
+ tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
+ /*
+ * A TSC packet can slip past MTC packets but, after fast
+ * forward, decoding starts at the TSC timestamp. That means
+ * the timestamps may not be exactly the same as the timestamps
+ * that would have been decoded without fast forward.
+ */
+ if (tsc < timestamp) {
+ intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
+ decoder->pos += decoder->len - len;
+ decoder->buf = buf;
+ decoder->len = len;
+ intel_pt_reposition(decoder);
+ } else {
+ break;
+ }
+ } while (intel_pt_step_psb(&buf, &len));
+
+ return 0;
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 1e8cfdc7bfab..9957f2ccdca8 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
INTEL_PT_CBR_CHG = 1 << 8,
INTEL_PT_TRACE_BEGIN = 1 << 9,
INTEL_PT_TRACE_END = 1 << 10,
+ INTEL_PT_BLK_ITEMS = 1 << 11,
};
enum intel_pt_period_type {
@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
INTEL_PT_FUP_WITH_NLIP = 1 << 0,
};
+enum intel_pt_blk_type {
+ INTEL_PT_GP_REGS = 1,
+ INTEL_PT_PEBS_BASIC = 4,
+ INTEL_PT_PEBS_MEM = 5,
+ INTEL_PT_LBR_0 = 8,
+ INTEL_PT_LBR_1 = 9,
+ INTEL_PT_LBR_2 = 10,
+ INTEL_PT_XMM = 16,
+ INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+ INTEL_PT_GP_REGS_POS,
+ INTEL_PT_PEBS_BASIC_POS,
+ INTEL_PT_PEBS_MEM_POS,
+ INTEL_PT_LBR_0_POS,
+ INTEL_PT_LBR_1_POS,
+ INTEL_PT_LBR_2_POS,
+ INTEL_PT_XMM_POS,
+ INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+ const int map[INTEL_PT_BLK_TYPE_MAX] = {
+ BLK_TYPE(GP_REGS),
+ BLK_TYPE(PEBS_BASIC),
+ BLK_TYPE(PEBS_MEM),
+ BLK_TYPE(LBR_0),
+ BLK_TYPE(LBR_1),
+ BLK_TYPE(LBR_2),
+ BLK_TYPE(XMM),
+ };
+#undef BLK_TYPE
+
+ return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT 32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+ union {
+ uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+ struct {
+ uint32_t has_rflags:1;
+ uint32_t has_rip:1;
+ uint32_t has_rax:1;
+ uint32_t has_rcx:1;
+ uint32_t has_rdx:1;
+ uint32_t has_rbx:1;
+ uint32_t has_rsp:1;
+ uint32_t has_rbp:1;
+ uint32_t has_rsi:1;
+ uint32_t has_rdi:1;
+ uint32_t has_r8:1;
+ uint32_t has_r9:1;
+ uint32_t has_r10:1;
+ uint32_t has_r11:1;
+ uint32_t has_r12:1;
+ uint32_t has_r13:1;
+ uint32_t has_r14:1;
+ uint32_t has_r15:1;
+ uint32_t has_unused_0:14;
+ uint32_t has_ip:1;
+ uint32_t has_applicable_counters:1;
+ uint32_t has_timestamp:1;
+ uint32_t has_unused_1:29;
+ uint32_t has_mem_access_address:1;
+ uint32_t has_mem_aux_info:1;
+ uint32_t has_mem_access_latency:1;
+ uint32_t has_tsx_aux_info:1;
+ uint32_t has_unused_2:28;
+ uint32_t has_lbr_0;
+ uint32_t has_lbr_1;
+ uint32_t has_lbr_2;
+ uint32_t has_xmm;
+ };
+ };
+ union {
+ uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+ struct {
+ struct {
+ uint64_t rflags;
+ uint64_t rip;
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+ };
+ struct {
+ uint64_t ip;
+ uint64_t applicable_counters;
+ uint64_t timestamp;
+ uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+ };
+ struct {
+ uint64_t mem_access_address;
+ uint64_t mem_aux_info;
+ uint64_t mem_access_latency;
+ uint64_t tsx_aux_info;
+ uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+ };
+ uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+ uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+ };
+ };
+ bool is_32_bit;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
int err;
@@ -68,6 +204,7 @@ struct intel_pt_state {
uint64_t to_ip;
uint64_t cr3;
uint64_t tot_insn_cnt;
+ uint64_t tot_cyc_cnt;
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
@@ -80,6 +217,7 @@ struct intel_pt_state {
enum intel_pt_insn_op insn_op;
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
+ struct intel_pt_blk_items items;
};
struct intel_pt_insn;
@@ -92,12 +230,15 @@ struct intel_pt_buffer {
uint64_t trace_nr;
};
+typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *);
+
struct intel_pt_params {
int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
uint64_t max_insn_cnt, void *data);
bool (*pgd_ip)(uint64_t ip, void *data);
+ int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
void *data;
bool return_compression;
bool branch_enable;
@@ -117,6 +258,8 @@ void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
+
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
unsigned char *buf_b, size_t len_b,
bool have_tsc, bool *consecutive);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 605fce537d80..0ccf10a0bf44 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -62,6 +62,10 @@ static const char * const packet_name[] = {
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
+ [INTEL_PT_BBP] = "BBP",
+ [INTEL_PT_BIP] = "BIP",
+ [INTEL_PT_BEP] = "BEP",
+ [INTEL_PT_BEP_IP] = "BEP",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -280,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
return 7;
}
+static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BBP;
+ packet->count = buf[2] >> 7;
+ packet->payload = buf[2] & 0x1f;
+ return 3;
+}
+
+static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 4);
+ return 5;
+}
+
+static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BIP;
+ packet->count = buf[0] >> 3;
+ memcpy_le64(&packet->payload, buf + 1, 8);
+ return 9;
+}
+
+static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP;
+ return 2;
+}
+
+static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_BEP_IP;
+ return 2;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -320,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
+ case 0x63: /* BBP */
+ return intel_pt_get_bbp(buf, len, packet);
+ case 0x33: /* BEP no IP */
+ return intel_pt_get_bep(len, packet);
+ case 0xb3: /* BEP with IP */
+ return intel_pt_get_bep_ip(len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -468,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
}
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx ctx)
{
unsigned int byte;
@@ -478,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
byte = buf[0];
+
+ switch (ctx) {
+ case INTEL_PT_NO_CTX:
+ break;
+ case INTEL_PT_BLK_4_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_4(buf, len, packet);
+ break;
+ case INTEL_PT_BLK_8_CTX:
+ if ((byte & 0x7) == 4)
+ return intel_pt_get_bip_8(buf, len, packet);
+ break;
+ default:
+ break;
+ };
+
if (!(byte & BIT(0))) {
if (byte == 0)
return intel_pt_get_pad(packet);
@@ -516,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
}
}
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx)
+{
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MTC:
+ case INTEL_PT_FUP:
+ case INTEL_PT_CYC:
+ case INTEL_PT_CBR:
+ case INTEL_PT_MNT:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BIP:
+ break;
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ *ctx = INTEL_PT_NO_CTX;
+ break;
+ case INTEL_PT_BBP:
+ if (packet->count)
+ *ctx = INTEL_PT_BLK_4_CTX;
+ else
+ *ctx = INTEL_PT_BLK_8_CTX;
+ break;
+ default:
+ break;
+ }
+}
+
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet)
+ struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
{
int ret;
- ret = intel_pt_do_get_packet(buf, len, packet);
+ ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
if (ret > 0) {
while (ret < 8 && len > (size_t)ret && !buf[ret])
ret += 1;
+ intel_pt_upd_pkt_ctx(packet, ctx);
}
return ret;
}
@@ -602,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_BEP:
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_BEP_IP:
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
@@ -621,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_BBP:
+ return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
+ name, packet->count ? "4" : "8", payload);
+ case INTEL_PT_BIP:
+ return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index a7aefaa08588..17ca9b56d72f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -50,6 +50,10 @@ enum intel_pt_pkt_type {
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
+ INTEL_PT_BBP,
+ INTEL_PT_BIP,
+ INTEL_PT_BEP,
+ INTEL_PT_BEP_IP,
};
struct intel_pt_pkt {
@@ -58,10 +62,25 @@ struct intel_pt_pkt {
uint64_t payload;
};
+/*
+ * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
+ * packets only occur in the context of a block (i.e. between BBP and BEP), that
+ * context must be recorded and passed to the packet decoder.
+ */
+enum intel_pt_pkt_ctx {
+ INTEL_PT_NO_CTX, /* BIP packets are invalid */
+ INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
+ INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
+};
+
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
int intel_pt_get_packet(const unsigned char *buf, size_t len,
- struct intel_pt_pkt *packet);
+ struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
+
+void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
+ enum intel_pt_pkt_ctx *ctx);
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d6f1b2a03f9b..550db6e77968 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,9 @@
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"
+#include "time-utils.h"
+
+#include "../arch/x86/include/uapi/asm/perf_regs.h"
#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -41,6 +44,11 @@
#define MAX_TIMESTAMP (~0ULL)
+struct range {
+ u64 start;
+ u64 end;
+};
+
struct intel_pt {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -95,6 +103,9 @@ struct intel_pt {
u64 pwrx_id;
u64 cbr_id;
+ bool sample_pebs;
+ struct perf_evsel *pebs_evsel;
+
u64 tsc_bit;
u64 mtc_bit;
u64 mtc_freq_bits;
@@ -109,6 +120,9 @@ struct intel_pt {
char *filter;
struct addr_filters filts;
+
+ struct range *time_ranges;
+ unsigned int range_cnt;
};
enum switch_state {
@@ -145,9 +159,18 @@ struct intel_pt_queue {
bool have_sample;
u64 time;
u64 timestamp;
+ u64 sel_timestamp;
+ bool sel_start;
+ unsigned int sel_idx;
u32 flags;
u16 insn_len;
u64 last_insn_cnt;
+ u64 ipc_insn_cnt;
+ u64 ipc_cyc_cnt;
+ u64 last_in_insn_cnt;
+ u64 last_in_cyc_cnt;
+ u64 last_br_insn_cnt;
+ u64 last_br_cyc_cnt;
char insn[INTEL_PT_INSN_BUF_SZ];
};
@@ -159,13 +182,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
int ret, pkt_len, i;
char desc[INTEL_PT_PKT_DESC_MAX];
const char *color = PERF_COLOR_BLUE;
+ enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
color_fprintf(stdout, color,
". ... Intel Processor Trace data: size %zu bytes\n",
len);
while (len) {
- ret = intel_pt_get_packet(buf, len, &packet);
+ ret = intel_pt_get_packet(buf, len, &packet, &ctx);
if (ret > 0)
pkt_len = ret;
else
@@ -224,32 +248,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
return 0;
}
-/* This function assumes data is processed sequentially only */
-static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer *old_buffer,
+ struct intel_pt_buffer *b)
{
- struct intel_pt_queue *ptq = data;
- struct auxtrace_buffer *buffer = ptq->buffer;
- struct auxtrace_buffer *old_buffer = ptq->old_buffer;
- struct auxtrace_queue *queue;
bool might_overlap;
- if (ptq->stop) {
- b->len = 0;
- return 0;
- }
-
- queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
- buffer = auxtrace_buffer__next(queue, buffer);
- if (!buffer) {
- if (old_buffer)
- auxtrace_buffer__drop_data(old_buffer);
- b->len = 0;
- return 0;
- }
-
- ptq->buffer = buffer;
-
if (!buffer->data) {
int fd = perf_data__fd(ptq->pt->session->data);
@@ -279,6 +284,95 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
b->consecutive = true;
}
+ return 0;
+}
+
+/* Do not drop buffers with references - refer intel_pt_get_trace() */
+static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
+ struct auxtrace_buffer *buffer)
+{
+ if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
+ return;
+
+ auxtrace_buffer__drop_data(buffer);
+}
+
+/* Must be serialized with respect to intel_pt_get_trace() */
+static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
+ void *cb_data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err = 0;
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ while (1) {
+ struct intel_pt_buffer b = { .len = 0 };
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer)
+ break;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
+ if (err)
+ break;
+
+ if (b.len) {
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+ old_buffer = buffer;
+ } else {
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ continue;
+ }
+
+ err = cb(&b, cb_data);
+ if (err)
+ break;
+ }
+
+ if (buffer != old_buffer)
+ intel_pt_lookahead_drop_buffer(ptq, buffer);
+ intel_pt_lookahead_drop_buffer(ptq, old_buffer);
+
+ return err;
+}
+
+/*
+ * This function assumes data is processed sequentially only.
+ * Must be serialized with respect to intel_pt_lookahead()
+ */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
+ if (err)
+ return err;
+
if (ptq->step_through_buffers)
ptq->stop = true;
@@ -798,6 +892,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.get_trace = intel_pt_get_trace;
params.walk_insn = intel_pt_walk_next_insn;
+ params.lookahead = intel_pt_lookahead;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
@@ -921,6 +1016,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
ptq->flags |= PERF_IP_FLAG_TRACE_END;
}
+static void intel_pt_setup_time_range(struct intel_pt *pt,
+ struct intel_pt_queue *ptq)
+{
+ if (!pt->range_cnt)
+ return;
+
+ ptq->sel_timestamp = pt->time_ranges[0].start;
+ ptq->sel_idx = 0;
+
+ if (ptq->sel_timestamp) {
+ ptq->sel_start = true;
+ } else {
+ ptq->sel_timestamp = pt->time_ranges[0].end;
+ ptq->sel_start = false;
+ }
+}
+
static int intel_pt_setup_queue(struct intel_pt *pt,
struct auxtrace_queue *queue,
unsigned int queue_nr)
@@ -945,6 +1057,8 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
ptq->step_through_buffers = true;
ptq->sync_switch = pt->sync_switch;
+
+ intel_pt_setup_time_range(pt, ptq);
}
if (!ptq->on_heap &&
@@ -959,6 +1073,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
intel_pt_log("queue %u getting timestamp\n", queue_nr);
intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+
+ if (ptq->sel_start && ptq->sel_timestamp) {
+ ret = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (ret)
+ return ret;
+ }
+
while (1) {
state = intel_pt_decode(ptq->decoder);
if (state->err) {
@@ -978,6 +1100,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
queue_nr, ptq->timestamp);
ptq->state = state;
ptq->have_sample = true;
+ if (ptq->sel_start && ptq->sel_timestamp &&
+ ptq->timestamp < ptq->sel_timestamp)
+ ptq->have_sample = false;
intel_pt_sample_flags(ptq);
ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
if (ret)
@@ -1059,28 +1184,37 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt)
pt->num_events++ < pt->synth_opts.initial_skip;
}
+static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+ sample->cpu = ptq->cpu;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
static void intel_pt_prep_b_sample(struct intel_pt *pt,
struct intel_pt_queue *ptq,
union perf_event *event,
struct perf_sample *sample)
{
+ intel_pt_prep_a_sample(ptq, event, sample);
+
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
sample->ip = ptq->state->from_ip;
sample->cpumode = intel_pt_cpumode(pt, sample->ip);
- sample->pid = ptq->pid;
- sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
sample->period = 1;
- sample->cpu = ptq->cpu;
sample->flags = ptq->flags;
- sample->insn_len = ptq->insn_len;
- memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
- event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = sample->cpumode;
- event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
@@ -1153,6 +1287,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.branch_stack = (struct branch_stack *)&dummy_bs;
}
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
+ ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
return intel_pt_deliver_synth_b_event(pt, event, &sample,
pt->branches_sample_type);
}
@@ -1208,6 +1349,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.stream_id = ptq->pt->instructions_id;
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
+ if (sample.cyc_cnt) {
+ sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
+ ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
+ ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
+ }
+
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
@@ -1401,6 +1549,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
pt->pwr_events_sample_type);
}
+/*
+ * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
+ * intel_pt_add_gp_regs().
+ */
+static const int pebs_gp_regs[] = {
+ [PERF_REG_X86_FLAGS] = 1,
+ [PERF_REG_X86_IP] = 2,
+ [PERF_REG_X86_AX] = 3,
+ [PERF_REG_X86_CX] = 4,
+ [PERF_REG_X86_DX] = 5,
+ [PERF_REG_X86_BX] = 6,
+ [PERF_REG_X86_SP] = 7,
+ [PERF_REG_X86_BP] = 8,
+ [PERF_REG_X86_SI] = 9,
+ [PERF_REG_X86_DI] = 10,
+ [PERF_REG_X86_R8] = 11,
+ [PERF_REG_X86_R9] = 12,
+ [PERF_REG_X86_R10] = 13,
+ [PERF_REG_X86_R11] = 14,
+ [PERF_REG_X86_R12] = 15,
+ [PERF_REG_X86_R13] = 16,
+ [PERF_REG_X86_R14] = 17,
+ [PERF_REG_X86_R15] = 18,
+};
+
+static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
+ u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
+ u32 bit;
+ int i;
+
+ for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
+ /* Get the PEBS gp_regs array index */
+ int n = pebs_gp_regs[i] - 1;
+
+ if (n < 0)
+ continue;
+ /*
+ * Add only registers that were requested (i.e. 'regs_mask') and
+ * that were provided (i.e. 'mask'), and update the resulting
+ * mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ if (mask & 1 << n && regs_mask & bit) {
+ intr_regs->mask |= bit;
+ *pos++ = gp_regs[n];
+ }
+ }
+
+ return pos;
+}
+
+#ifndef PERF_REG_X86_XMM0
+#define PERF_REG_X86_XMM0 32
+#endif
+
+static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
+ const struct intel_pt_blk_items *items,
+ u64 regs_mask)
+{
+ u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
+ const u64 *xmm = items->xmm;
+
+ /*
+ * If there are any XMM registers, then there should be all of them.
+ * Nevertheless, follow the logic to add only registers that were
+ * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
+ * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
+ */
+ intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
+
+ for (; mask; mask >>= 1, xmm++) {
+ if (mask & 1)
+ *pos++ = *xmm;
+ }
+}
+
+#define LBR_INFO_MISPRED (1ULL << 63)
+#define LBR_INFO_IN_TX (1ULL << 62)
+#define LBR_INFO_ABORT (1ULL << 61)
+#define LBR_INFO_CYCLES 0xffff
+
+/* Refer kernel's intel_pmu_store_pebs_lbrs() */
+static u64 intel_pt_lbr_flags(u64 info)
+{
+ union {
+ struct branch_flags flags;
+ u64 result;
+ } u = {
+ .flags = {
+ .mispred = !!(info & LBR_INFO_MISPRED),
+ .predicted = !(info & LBR_INFO_MISPRED),
+ .in_tx = !!(info & LBR_INFO_IN_TX),
+ .abort = !!(info & LBR_INFO_ABORT),
+ .cycles = info & LBR_INFO_CYCLES,
+ }
+ };
+
+ return u.result;
+}
+
+static void intel_pt_add_lbrs(struct branch_stack *br_stack,
+ const struct intel_pt_blk_items *items)
+{
+ u64 *to;
+ int i;
+
+ br_stack->nr = 0;
+
+ to = &br_stack->entries[0].from;
+
+ for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
+ u32 mask = items->mask[i];
+ const u64 *from = items->val[i];
+
+ for (; mask; mask >>= 3, from += 3) {
+ if ((mask & 7) == 7) {
+ *to++ = from[0];
+ *to++ = from[1];
+ *to++ = intel_pt_lbr_flags(from[2]);
+ br_stack->nr += 1;
+ }
+ }
+ }
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
+
+static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_blk_items *items = &ptq->state->items;
+ struct perf_sample sample = { .ip = 0, };
+ union perf_event *event = ptq->event_buf;
+ struct intel_pt *pt = ptq->pt;
+ struct perf_evsel *evsel = pt->pebs_evsel;
+ u64 sample_type = evsel->attr.sample_type;
+ u64 id = evsel->id[0];
+ u8 cpumode;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_a_sample(ptq, event, &sample);
+
+ sample.id = id;
+ sample.stream_id = id;
+
+ if (!evsel->attr.freq)
+ sample.period = evsel->attr.sample_period;
+
+ /* No support for non-zero CS base */
+ if (items->has_ip)
+ sample.ip = items->ip;
+ else if (items->has_rip)
+ sample.ip = items->rip;
+ else
+ sample.ip = ptq->state->from_ip;
+
+ /* No support for guest mode at this time */
+ cpumode = sample.ip < ptq->pt->kernel_start ?
+ PERF_RECORD_MISC_USER :
+ PERF_RECORD_MISC_KERNEL;
+
+ event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
+
+ sample.cpumode = cpumode;
+
+ if (sample_type & PERF_SAMPLE_TIME) {
+ u64 timestamp = 0;
+
+ if (items->has_timestamp)
+ timestamp = items->timestamp;
+ else if (!pt->timeless_decoding)
+ timestamp = ptq->timestamp;
+ if (timestamp)
+ sample.time = tsc_to_perf_time(timestamp, &pt->tc);
+ }
+
+ if (sample_type & PERF_SAMPLE_CALLCHAIN &&
+ pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz, sample.ip,
+ pt->kernel_start);
+ sample.callchain = ptq->chain;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR &&
+ items->mask[INTEL_PT_GP_REGS_POS]) {
+ u64 regs[sizeof(sample.intr_regs.mask)];
+ u64 regs_mask = evsel->attr.sample_regs_intr;
+ u64 *pos;
+
+ sample.intr_regs.abi = items->is_32_bit ?
+ PERF_SAMPLE_REGS_ABI_32 :
+ PERF_SAMPLE_REGS_ABI_64;
+ sample.intr_regs.regs = regs;
+
+ pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
+
+ intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
+ }
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ struct {
+ struct branch_stack br_stack;
+ struct branch_entry entries[LBRS_MAX];
+ } br;
+
+ if (items->mask[INTEL_PT_LBR_0_POS] ||
+ items->mask[INTEL_PT_LBR_1_POS] ||
+ items->mask[INTEL_PT_LBR_2_POS]) {
+ intel_pt_add_lbrs(&br.br_stack, items);
+ sample.branch_stack = &br.br_stack;
+ } else if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample.branch_stack = ptq->last_branch;
+ } else {
+ br.br_stack.nr = 0;
+ sample.branch_stack = &br.br_stack;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
+ sample.addr = items->mem_access_address;
+
+ if (sample_type & PERF_SAMPLE_WEIGHT) {
+ /*
+ * Refer kernel's setup_pebs_adaptive_sample_data() and
+ * intel_hsw_weight().
+ */
+ if (items->has_mem_access_latency)
+ sample.weight = items->mem_access_latency;
+ if (!sample.weight && items->has_tsx_aux_info) {
+ /* Cycles last block */
+ sample.weight = (u32)items->tsx_aux_info;
+ }
+ }
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
+ u64 ax = items->has_rax ? items->rax : 0;
+ /* Refer kernel's intel_hsw_transaction() */
+ u64 txn = (u8)(items->tsx_aux_info >> 32);
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if (txn & PERF_TXN_TRANSACTION && ax & 1)
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ sample.transaction = txn;
+ }
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+}
+
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
@@ -1479,6 +1882,25 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
+ if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
+ /*
+ * Cycle count and instruction count only go together to create
+ * a valid IPC ratio when the cycle count changes.
+ */
+ ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
+ ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
+ }
+
+ /*
+ * Do PEBS first to allow for the possibility that the PEBS timestamp
+ * precedes the current timestamp.
+ */
+ if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
+ err = intel_pt_synth_pebs_sample(ptq);
+ if (err)
+ return err;
+ }
+
if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
if (state->type & INTEL_PT_CBR_CHG) {
err = intel_pt_synth_cbr_sample(ptq);
@@ -1641,10 +2063,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
}
}
+/*
+ * To filter against time ranges, it is only necessary to look at the next start
+ * or end time.
+ */
+static bool intel_pt_next_time(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ if (ptq->sel_start) {
+ /* Next time is an end time */
+ ptq->sel_start = false;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
+ return true;
+ } else if (ptq->sel_idx + 1 < pt->range_cnt) {
+ /* Next time is a start time */
+ ptq->sel_start = true;
+ ptq->sel_idx += 1;
+ ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
+ return true;
+ }
+
+ /* No next time */
+ return false;
+}
+
+static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
+{
+ int err;
+
+ while (1) {
+ if (ptq->sel_start) {
+ if (ptq->timestamp >= ptq->sel_timestamp) {
+ /* After start time, so consider next time */
+ intel_pt_next_time(ptq);
+ if (!ptq->sel_timestamp) {
+ /* No end time */
+ return 0;
+ }
+ /* Check against end time */
+ continue;
+ }
+ /* Before start time, so fast forward */
+ ptq->have_sample = false;
+ if (ptq->sel_timestamp > *ff_timestamp) {
+ if (ptq->sync_switch) {
+ intel_pt_next_tid(ptq->pt, ptq);
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ }
+ *ff_timestamp = ptq->sel_timestamp;
+ err = intel_pt_fast_forward(ptq->decoder,
+ ptq->sel_timestamp);
+ if (err)
+ return err;
+ }
+ return 0;
+ } else if (ptq->timestamp > ptq->sel_timestamp) {
+ /* After end time, so consider next time */
+ if (!intel_pt_next_time(ptq)) {
+ /* No next time range, so stop decoding */
+ ptq->have_sample = false;
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ return 1;
+ }
+ /* Check against next start time */
+ continue;
+ } else {
+ /* Before end time */
+ return 0;
+ }
+ }
+}
+
static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
const struct intel_pt_state *state = ptq->state;
struct intel_pt *pt = ptq->pt;
+ u64 ff_timestamp = 0;
int err;
if (!pt->kernel_start) {
@@ -1709,6 +2204,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
ptq->timestamp = state->timestamp;
}
+ if (ptq->sel_timestamp) {
+ err = intel_pt_time_filter(ptq, &ff_timestamp);
+ if (err)
+ return err;
+ }
+
if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
*timestamp = ptq->timestamp;
return 0;
@@ -1850,7 +2351,6 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
switch (ptq->switch_state) {
case INTEL_PT_SS_NOT_TRACING:
- ptq->next_tid = -1;
break;
case INTEL_PT_SS_UNKNOWN:
case INTEL_PT_SS_TRACING:
@@ -1870,13 +2370,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
ptq->switch_state = INTEL_PT_SS_TRACING;
break;
case INTEL_PT_SS_EXPECTING_SWITCH_IP:
- ptq->next_tid = tid;
intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
break;
default:
break;
}
+ ptq->next_tid = -1;
+
return 1;
}
@@ -1905,6 +2406,44 @@ static int intel_pt_process_switch(struct intel_pt *pt,
return machine__set_current_tid(pt->machine, cpu, -1, tid);
}
+static int intel_pt_context_switch_in(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ pid_t pid = sample->pid;
+ pid_t tid = sample->tid;
+ int cpu = sample->cpu;
+
+ if (pt->sync_switch) {
+ struct intel_pt_queue *ptq;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (ptq && ptq->sync_switch) {
+ ptq->next_tid = -1;
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * If the current tid has not been updated yet, ensure it is now that
+ * a "switch in" event has occurred.
+ */
+ if (machine__get_current_tid(pt->machine, cpu) == tid)
+ return 0;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
struct perf_sample *sample)
{
@@ -1916,7 +2455,7 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
if (pt->have_sched_switch == 3) {
if (!out)
- return 0;
+ return intel_pt_context_switch_in(pt, sample);
if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
pr_err("Expecting CPU-wide context switch event\n");
return -EINVAL;
@@ -2076,6 +2615,7 @@ static void intel_pt_free(struct perf_session *session)
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
}
@@ -2373,6 +2913,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
return 0;
}
+/* Find least TSC which converts to ns or later */
+static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm < ns)
+ break;
+ tsc -= 1;
+ }
+
+ while (tm < ns)
+ tm = tsc_to_perf_time(++tsc, &pt->tc);
+
+ return tsc;
+}
+
+/* Find greatest TSC which converts to ns or earlier */
+static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
+{
+ u64 tsc, tm;
+
+ tsc = perf_time_to_tsc(ns, &pt->tc);
+
+ while (1) {
+ tm = tsc_to_perf_time(tsc, &pt->tc);
+ if (tm > ns)
+ break;
+ tsc += 1;
+ }
+
+ while (tm > ns)
+ tm = tsc_to_perf_time(--tsc, &pt->tc);
+
+ return tsc;
+}
+
+static int intel_pt_setup_time_ranges(struct intel_pt *pt,
+ struct itrace_synth_opts *opts)
+{
+ struct perf_time_interval *p = opts->ptime_range;
+ int n = opts->range_num;
+ int i;
+
+ if (!n || !p || pt->timeless_decoding)
+ return 0;
+
+ pt->time_ranges = calloc(n, sizeof(struct range));
+ if (!pt->time_ranges)
+ return -ENOMEM;
+
+ pt->range_cnt = n;
+
+ intel_pt_log("%s: %u range(s)\n", __func__, n);
+
+ for (i = 0; i < n; i++) {
+ struct range *r = &pt->time_ranges[i];
+ u64 ts = p[i].start;
+ u64 te = p[i].end;
+
+ /*
+ * Take care to ensure the TSC range matches the perf-time range
+ * when converted back to perf-time.
+ */
+ r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
+ r->end = te ? intel_pt_tsc_end(te, pt) : 0;
+
+ intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
+ i, ts, te);
+ intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
+ i, r->start, r->end);
+ }
+
+ return 0;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2579,7 +3198,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
} else {
itrace_synth_opts__set_default(&pt->synth_opts,
session->itrace_synth_opts->default_no_sample);
- if (use_browser != -1) {
+ if (!session->itrace_synth_opts->default_no_sample &&
+ !session->itrace_synth_opts->inject) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
}
@@ -2604,6 +3224,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
}
+ if (session->itrace_synth_opts) {
+ err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
+ if (err)
+ goto err_delete_thread;
+ }
+
if (pt->synth_opts.calls)
pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
PERF_IP_FLAG_TRACE_END;
@@ -2644,6 +3270,7 @@ err_free_queues:
err_free:
addr_filters__exit(&pt->filts);
zfree(&pt->filter);
+ zfree(&pt->time_ranges);
free(pt);
return err;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dc7aafe45a2b..17eec39e775e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -704,12 +704,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;
map->start = event->ksymbol_event.addr;
- map->pgoff = map->start;
map->end = map->start + event->ksymbol_event.len;
map_groups__insert(&machine->kmaps, map);
}
- sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ sym = symbol__new(map->map_ip(map, map->start),
+ event->ksymbol_event.len,
0, 0, event->ksymbol_event.name);
if (!sym)
return -ENOMEM;
@@ -1241,9 +1241,9 @@ static char *get_kernel_version(const char *root_dir)
return NULL;
tmp = fgets(version, sizeof(version), file);
- if (!tmp)
- *version = '\0';
fclose(file);
+ if (!tmp)
+ return NULL;
name = strstr(version, prefix);
if (!name)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ee71efb9db62..6fce983c6115 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -405,6 +405,7 @@ size_t map__fprintf(struct map *map, FILE *fp)
size_t map__fprintf_dsoname(struct map *map, FILE *fp)
{
+ char buf[symbol_conf.pad_output_len_dso + 1];
const char *dsoname = "[unknown]";
if (map && map->dso) {
@@ -414,6 +415,11 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
dsoname = map->dso->name;
}
+ if (symbol_conf.pad_output_len_dso) {
+ scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
+ dsoname = buf;
+ }
+
return fprintf(fp, "%s", dsoname);
}
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h
index 4dcda33e0fdf..5f25efa6d6bc 100644
--- a/tools/perf/util/map_groups.h
+++ b/tools/perf/util/map_groups.h
@@ -88,4 +88,6 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE
struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
+int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map);
+
#endif // __PERF_MAP_GROUPS_H
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index cb9c246c8962..47fe34e5f7d5 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -29,12 +29,16 @@ uint64_t arch__user_reg_mask(void);
#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>
+#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
#else
#define PERF_REGS_MASK 0
#define PERF_REGS_MAX 0
+#define DWARF_MINIMAL_REGS PERF_REGS_MASK
+
static inline const char *perf_reg_name(int id __maybe_unused)
{
return NULL;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e0429f4ef335..faa8eb231e1b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -709,9 +709,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
{
int i;
struct pmu_events_map *map;
- struct pmu_event *pe;
const char *name = pmu->name;
- const char *pname;
map = perf_pmu__find_map(pmu);
if (!map)
@@ -722,28 +720,26 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
*/
i = 0;
while (1) {
+ const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
+ struct pmu_event *pe = &map->table[i++];
+ const char *pname = pe->pmu ? pe->pmu : cpu_name;
- pe = &map->table[i++];
if (!pe->name) {
if (pe->metric_group || pe->metric_name)
continue;
break;
}
- if (!is_arm_pmu_core(name)) {
- pname = pe->pmu ? pe->pmu : "cpu";
-
- /*
- * uncore alias may be from different PMU
- * with common prefix
- */
- if (pmu_is_uncore(name) &&
- !strncmp(pname, name, strlen(pname)))
- goto new_alias;
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
- if (strcmp(pname, name))
- continue;
- }
+ if (strcmp(pname, name))
+ continue;
new_alias:
/* need type casts to override 'const' */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index c215704931dc..10d36d9b7909 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -17,8 +17,8 @@
* see Documentation/perf.data-file-format.txt.
* PERF_RECORD_AUXTRACE_INFO:
* Defines a table of contains for PERF_RECORD_AUXTRACE records. This
- * record is generated during 'perf record' command. Each record contains up
- * to 256 entries describing offset and size of the AUXTRACE data in the
+ * record is generated during 'perf record' command. Each record contains
+ * up to 256 entries describing offset and size of the AUXTRACE data in the
* perf.data file.
* PERF_RECORD_AUXTRACE_ERROR:
* Indicates an error during AUXTRACE collection such as buffer overflow.
@@ -237,10 +237,33 @@ static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
return rc;
}
-/* Display s390 CPU measurement facility basic-sampling data entry */
+/* Display s390 CPU measurement facility basic-sampling data entry
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
static bool s390_cpumsf_basic_show(const char *color, size_t pos,
- struct hws_basic_entry *basic)
+ struct hws_basic_entry *basicp)
{
+ struct hws_basic_entry *basic = basicp;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_basic_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)basicp);
+
+ memset(&local, 0, sizeof(local));
+ local.def = be16toh(basicp->def);
+ local.prim_asn = word & 0xffff;
+ local.CL = word >> 30 & 0x3;
+ local.I = word >> 32 & 0x1;
+ local.AS = word >> 33 & 0x3;
+ local.P = word >> 35 & 0x1;
+ local.W = word >> 36 & 0x1;
+ local.T = word >> 37 & 0x1;
+ local.U = word >> 40 & 0xf;
+ local.ia = be64toh(basicp->ia);
+ local.gpp = be64toh(basicp->gpp);
+ local.hpp = be64toh(basicp->hpp);
+ basic = &local;
+#endif
if (basic->def != 1) {
pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
return false;
@@ -258,10 +281,22 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos,
return true;
}
-/* Display s390 CPU measurement facility diagnostic-sampling data entry */
+/* Display s390 CPU measurement facility diagnostic-sampling data entry.
+ * Data written on s390 in big endian byte order and contains bit
+ * fields across byte boundaries.
+ */
static bool s390_cpumsf_diag_show(const char *color, size_t pos,
- struct hws_diag_entry *diag)
+ struct hws_diag_entry *diagp)
{
+ struct hws_diag_entry *diag = diagp;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_diag_entry local;
+ unsigned long long word = be64toh(*(unsigned long long *)diagp);
+
+ local.def = be16toh(diagp->def);
+ local.I = word >> 32 & 0x1;
+ diag = &local;
+#endif
if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
return false;
@@ -272,35 +307,52 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos,
}
/* Return TOD timestamp contained in an trailer entry */
-static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
+ int idx)
{
/* te->t set: TOD in STCKE format, bytes 8-15
* to->t not set: TOD in STCK format, bytes 0-7
*/
unsigned long long ts;
- memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
- return ts;
+ memcpy(&ts, &te->timestamp[idx], sizeof(ts));
+ return be64toh(ts);
}
/* Display s390 CPU measurement facility trailer entry */
static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
struct hws_trailer_entry *te)
{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ struct hws_trailer_entry local;
+ const unsigned long long flags = be64toh(te->flags);
+
+ memset(&local, 0, sizeof(local));
+ local.f = flags >> 63 & 0x1;
+ local.a = flags >> 62 & 0x1;
+ local.t = flags >> 61 & 0x1;
+ local.bsdes = be16toh((flags >> 16 & 0xffff));
+ local.dsdes = be16toh((flags & 0xffff));
+ memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
+ local.overflow = be64toh(te->overflow);
+ local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
+ local.progusage2 = be64toh(te->progusage2);
+ te = &local;
+#endif
if (te->bsdes != sizeof(struct hws_basic_entry)) {
pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
return false;
}
color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
" dsdes:%d Overflow:%lld Time:%#llx\n"
- "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
+ "\t\tC:%d TOD:%#lx\n",
pos,
te->f ? 'F' : ' ',
te->a ? 'A' : ' ',
te->t ? 'T' : ' ',
te->bsdes, te->dsdes, te->overflow,
- trailer_timestamp(te), te->clock_base, te->progusage2,
- te->progusage[0], te->progusage[1]);
+ trailer_timestamp(te, te->clock_base),
+ te->clock_base, te->progusage2);
return true;
}
@@ -327,13 +379,13 @@ static bool s390_cpumsf_validate(int machine_type,
*dsdes = *bsdes = 0;
if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
return false;
- if (basic->def != 1) /* No basic set entry, must be first */
+ if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
return false;
/* Check for trailer entry at end of SDB */
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
- sizeof(*te));
- *bsdes = te->bsdes;
- *dsdes = te->dsdes;
+ *bsdes = be16toh(te->bsdes);
+ *dsdes = be16toh(te->dsdes);
if (!te->bsdes && !te->dsdes) {
/* Very old hardware, use CPUID */
switch (machine_type) {
@@ -495,19 +547,27 @@ static bool s390_cpumsf_make_event(size_t pos,
static unsigned long long get_trailer_time(const unsigned char *buf)
{
struct hws_trailer_entry *te;
- unsigned long long aux_time;
+ unsigned long long aux_time, progusage2;
+ bool clock_base;
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
- sizeof(*te));
- if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
+ progusage2 = be64toh(te->progusage[1]);
+#else
+ clock_base = te->clock_base;
+ progusage2 = te->progusage2;
+#endif
+ if (!clock_base) /* TOD_CLOCK_BASE value missing */
return 0;
/* Correct calculation to convert time stamp in trailer entry to
* nano seconds (taken from arch/s390 function tod_to_ns()).
* TOD_CLOCK_BASE is stored in trailer entry member progusage2.
*/
- aux_time = trailer_timestamp(te) - te->progusage2;
+ aux_time = trailer_timestamp(te, clock_base) - progusage2;
aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
return aux_time;
}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 22f52b669871..6acb379b53ec 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1111,7 +1111,7 @@ static int python_export_sample(struct db_export *dbe,
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(22);
+ t = tuple_new(24);
tuple_set_u64(t, 0, es->db_id);
tuple_set_u64(t, 1, es->evsel->db_id);
@@ -1135,6 +1135,8 @@ static int python_export_sample(struct db_export *dbe,
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
tuple_set_u64(t, 21, es->call_path_id);
+ tuple_set_u64(t, 22, es->sample->insn_cnt);
+ tuple_set_u64(t, 23, es->sample->cyc_cnt);
call_object(tables->sample_handler, t, "sample_table");
@@ -1173,7 +1175,7 @@ static int python_export_call_return(struct db_export *dbe,
u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
PyObject *t;
- t = tuple_new(12);
+ t = tuple_new(14);
tuple_set_u64(t, 0, cr->db_id);
tuple_set_u64(t, 1, cr->thread->db_id);
@@ -1187,6 +1189,8 @@ static int python_export_call_return(struct db_export *dbe,
tuple_set_u64(t, 9, cr->cp->parent->db_id);
tuple_set_s32(t, 10, cr->flags);
tuple_set_u64(t, 11, cr->parent_db_id);
+ tuple_set_u64(t, 12, cr->insn_count);
+ tuple_set_u64(t, 13, cr->cyc_count);
call_object(tables->call_return_handler, t, "call_return_table");
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 5b5a167b43ce..a1a68a2fa917 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -17,6 +17,8 @@ if cc == "clang":
vars[var] = sub("-fcf-protection", "", vars[var])
if not clang_has_option("-fstack-clash-protection"):
vars[var] = sub("-fstack-clash-protection", "", vars[var])
+ if not clang_has_option("-fstack-protector-strong"):
+ vars[var] = sub("-fstack-protector-strong", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 453f6f6f29f3..3b791ef2cd50 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -23,8 +23,12 @@ int smt_on(void)
char fn[256];
snprintf(fn, sizeof fn,
- "devices/system/cpu/cpu%d/topology/thread_siblings",
- cpu);
+ "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
+ if (access(fn, F_OK) == -1) {
+ snprintf(fn, sizeof fn,
+ "devices/system/cpu/cpu%d/topology/thread_siblings",
+ cpu);
+ }
if (sysfs__read_str(fn, &str, &strlen) < 0)
continue;
/* Entry is hex, but does not have 0x, so need custom parser */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4c53bae5644b..a6b9de3e83fc 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -69,8 +69,9 @@ static void aggr_printout(struct perf_stat_config *config,
{
switch (config->aggr_mode) {
case AGGR_CORE:
- fprintf(config->output, "S%d-C%*d%s%*d%s",
+ fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
cpu_map__id_to_socket(id),
+ cpu_map__id_to_die(id),
config->csv_output ? 0 : -8,
cpu_map__id_to_cpu(id),
config->csv_sep,
@@ -78,6 +79,16 @@ static void aggr_printout(struct perf_stat_config *config,
nr,
config->csv_sep);
break;
+ case AGGR_DIE:
+ fprintf(config->output, "S%d-D%*d%s%*d%s",
+ cpu_map__id_to_socket(id << 16),
+ config->csv_output ? 0 : -8,
+ cpu_map__id_to_die(id << 16),
+ config->csv_sep,
+ config->csv_output ? 0 : 4,
+ nr,
+ config->csv_sep);
+ break;
case AGGR_SOCKET:
fprintf(config->output, "S%*d%s%*d%s",
config->csv_output ? 0 : -5,
@@ -89,8 +100,9 @@ static void aggr_printout(struct perf_stat_config *config,
break;
case AGGR_NONE:
if (evsel->percore) {
- fprintf(config->output, "S%d-C%*d%s",
+ fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
+ cpu_map__id_to_die(id),
config->csv_output ? 0 : -5,
cpu_map__id_to_cpu(id), config->csv_sep);
} else {
@@ -407,6 +419,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
[AGGR_THREAD] = 1,
[AGGR_NONE] = 1,
[AGGR_SOCKET] = 2,
+ [AGGR_DIE] = 2,
[AGGR_CORE] = 2,
};
@@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
}
static int aggr_header_lens[] = {
- [AGGR_CORE] = 18,
+ [AGGR_CORE] = 24,
+ [AGGR_DIE] = 18,
[AGGR_SOCKET] = 12,
[AGGR_NONE] = 6,
[AGGR_THREAD] = 24,
@@ -888,6 +902,7 @@ static int aggr_header_lens[] = {
static const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,cpus,",
+ [AGGR_DIE] = "die,cpus",
[AGGR_SOCKET] = "socket,cpus",
[AGGR_NONE] = "cpu,",
[AGGR_THREAD] = "comm-pid,",
@@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config,
if (!metric_only)
fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
+ case AGGR_DIE:
+ fprintf(output, "# time die cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
case AGGR_CORE:
- fprintf(output, "# time core cpus");
+ fprintf(output, "# time core cpus");
if (!metric_only)
fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
@@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
switch (config->aggr_mode) {
case AGGR_CORE:
+ case AGGR_DIE:
case AGGR_SOCKET:
print_aggr(config, evlist, prefix);
break;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 83d8094be4fe..027b09aaa4cf 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -12,6 +12,7 @@
/*
* AGGR_GLOBAL: Use CPU 0
* AGGR_SOCKET: Use first CPU of socket
+ * AGGR_DIE: Use first CPU of die
* AGGR_CORE: Use first CPU of core
* AGGR_NONE: Use matching CPU
* AGGR_THREAD: Not supported?
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c3115d939b0b..d91fe754b6d2 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
switch (config->aggr_mode) {
case AGGR_THREAD:
case AGGR_CORE:
+ case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NONE:
if (!evsel->snapshot)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2f9c9159a364..7032dd1eeac2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -44,6 +44,7 @@ enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
+ AGGR_DIE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4ad106a5f2c0..fdc5bd7dbb90 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -699,7 +699,6 @@ bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
enum dso_binary_type type)
{
- int err = -1;
GElf_Ehdr ehdr;
Elf *elf;
int fd;
@@ -793,7 +792,7 @@ out_elf_end:
elf_end(elf);
out_close:
close(fd);
- return err;
+ return -1;
}
/**
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5cbad55cd99d..f4540f8bbed1 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1166,6 +1166,85 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
return 0;
}
+/*
+ * Merges map into map_groups by splitting the new map
+ * within the existing map regions.
+ */
+int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map)
+{
+ struct map *old_map;
+ LIST_HEAD(merged);
+
+ for (old_map = map_groups__first(kmaps); old_map;
+ old_map = map_groups__next(old_map)) {
+
+ /* no overload with this one */
+ if (new_map->end < old_map->start ||
+ new_map->start >= old_map->end)
+ continue;
+
+ if (new_map->start < old_map->start) {
+ /*
+ * |new......
+ * |old....
+ */
+ if (new_map->end < old_map->end) {
+ /*
+ * |new......| -> |new..|
+ * |old....| -> |old....|
+ */
+ new_map->end = old_map->start;
+ } else {
+ /*
+ * |new.............| -> |new..| |new..|
+ * |old....| -> |old....|
+ */
+ struct map *m = map__clone(new_map);
+
+ if (!m)
+ return -ENOMEM;
+
+ m->end = old_map->start;
+ list_add_tail(&m->node, &merged);
+ new_map->start = old_map->end;
+ }
+ } else {
+ /*
+ * |new......
+ * |old....
+ */
+ if (new_map->end < old_map->end) {
+ /*
+ * |new..| -> x
+ * |old.........| -> |old.........|
+ */
+ map__put(new_map);
+ new_map = NULL;
+ break;
+ } else {
+ /*
+ * |new......| -> |new...|
+ * |old....| -> |old....|
+ */
+ new_map->start = old_map->end;
+ }
+ }
+ }
+
+ while (!list_empty(&merged)) {
+ old_map = list_entry(merged.next, struct map, node);
+ list_del_init(&old_map->node);
+ map_groups__insert(kmaps, old_map);
+ map__put(old_map);
+ }
+
+ if (new_map) {
+ map_groups__insert(kmaps, new_map);
+ map__put(new_map);
+ }
+ return 0;
+}
+
static int dso__load_kcore(struct dso *dso, struct map *map,
const char *kallsyms_filename)
{
@@ -1222,7 +1301,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
while (old_map) {
struct map *next = map_groups__next(old_map);
- if (old_map != map)
+ /*
+ * We need to preserve eBPF maps even if they are
+ * covered by kcore, because we need to access
+ * eBPF dso for source data.
+ */
+ if (old_map != map && !__map__is_bpf_prog(old_map))
map_groups__remove(kmaps, old_map);
old_map = next;
}
@@ -1256,11 +1340,16 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
map_groups__remove(kmaps, map);
map_groups__insert(kmaps, map);
map__put(map);
+ map__put(new_map);
} else {
- map_groups__insert(kmaps, new_map);
+ /*
+ * Merge kcore map into existing maps,
+ * and ensure that current maps (eBPF)
+ * stay intact.
+ */
+ if (map_groups__merge_in(kmaps, new_map))
+ goto out_err;
}
-
- map__put(new_map);
}
if (machine__is(machine, "x86_64")) {
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 6c55fa6fccec..382ba63fc554 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -69,6 +69,7 @@ struct symbol_conf {
*tid_list;
const char *symfs;
int res_sample;
+ int pad_output_len_dso;
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 4ba9e866b076..c485186a8b6d 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -40,6 +40,8 @@ enum retpoline_state_t {
* @timestamp: timestamp (if known)
* @ref: external reference (e.g. db_id of sample)
* @branch_count: the branch count when the entry was created
+ * @insn_count: the instruction count when the entry was created
+ * @cyc_count the cycle count when the entry was created
* @db_id: id used for db-export
* @cp: call path
* @no_call: a 'call' was not seen
@@ -51,6 +53,8 @@ struct thread_stack_entry {
u64 timestamp;
u64 ref;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 db_id;
struct call_path *cp;
bool no_call;
@@ -66,6 +70,8 @@ struct thread_stack_entry {
* @sz: current maximum stack size
* @trace_nr: current trace number
* @branch_count: running branch count
+ * @insn_count: running instruction count
+ * @cyc_count running cycle count
* @kernel_start: kernel start address
* @last_time: last timestamp
* @crp: call/return processor
@@ -79,6 +85,8 @@ struct thread_stack {
size_t sz;
u64 trace_nr;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 kernel_start;
u64 last_time;
struct call_return_processor *crp;
@@ -280,6 +288,8 @@ static int thread_stack__call_return(struct thread *thread,
cr.call_time = tse->timestamp;
cr.return_time = timestamp;
cr.branch_count = ts->branch_count - tse->branch_count;
+ cr.insn_count = ts->insn_count - tse->insn_count;
+ cr.cyc_count = ts->cyc_count - tse->cyc_count;
cr.db_id = tse->db_id;
cr.call_ref = tse->ref;
cr.return_ref = ref;
@@ -535,6 +545,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
tse->timestamp = timestamp;
tse->ref = ref;
tse->branch_count = ts->branch_count;
+ tse->insn_count = ts->insn_count;
+ tse->cyc_count = ts->cyc_count;
tse->cp = cp;
tse->no_call = no_call;
tse->trace_end = trace_end;
@@ -865,6 +877,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
}
ts->branch_count += 1;
+ ts->insn_count += sample->insn_cnt;
+ ts->cyc_count += sample->cyc_cnt;
ts->last_time = sample->time;
if (sample->flags & PERF_IP_FLAG_CALL) {
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index 71e15d4ec533..e1ec5a58f1b2 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -43,6 +43,8 @@ enum {
* @call_time: timestamp of call (if known)
* @return_time: timestamp of return (if known)
* @branch_count: number of branches seen between call and return
+ * @insn_count: approx. number of instructions between call and return
+ * @cyc_count: approx. number of cycles between call and return
* @call_ref: external reference to 'call' sample (e.g. db_id)
* @return_ref: external reference to 'return' sample (e.g. db_id)
* @db_id: id used for db-export
@@ -56,6 +58,8 @@ struct call_return {
u64 call_time;
u64 return_time;
u64 branch_count;
+ u64 insn_count;
+ u64 cyc_count;
u64 call_ref;
u64 return_ref;
u64 db_id;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index b413ba5b9835..aab7807d445f 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -141,13 +141,13 @@ static struct namespaces *__thread__namespaces(const struct thread *thread)
return list_first_entry(&thread->namespaces_list, struct namespaces, list);
}
-struct namespaces *thread__namespaces(const struct thread *thread)
+struct namespaces *thread__namespaces(struct thread *thread)
{
struct namespaces *ns;
- down_read((struct rw_semaphore *)&thread->namespaces_lock);
+ down_read(&thread->namespaces_lock);
ns = __thread__namespaces(thread);
- up_read((struct rw_semaphore *)&thread->namespaces_lock);
+ up_read(&thread->namespaces_lock);
return ns;
}
@@ -271,13 +271,13 @@ static const char *__thread__comm_str(const struct thread *thread)
return comm__str(comm);
}
-const char *thread__comm_str(const struct thread *thread)
+const char *thread__comm_str(struct thread *thread)
{
const char *str;
- down_read((struct rw_semaphore *)&thread->comm_lock);
+ down_read(&thread->comm_lock);
str = __thread__comm_str(thread);
- up_read((struct rw_semaphore *)&thread->comm_lock);
+ up_read(&thread->comm_lock);
return str;
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index cf8375c017a0..e97ef6977eb9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -76,7 +76,7 @@ static inline void thread__exited(struct thread *thread)
thread->dead = true;
}
-struct namespaces *thread__namespaces(const struct thread *thread);
+struct namespaces *thread__namespaces(struct thread *thread);
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event);
@@ -93,7 +93,7 @@ int thread__set_comm_from_proc(struct thread *thread);
int thread__comm_len(struct thread *thread);
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
-const char *thread__comm_str(const struct thread *thread);
+const char *thread__comm_str(struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
size_t thread__fprintf(struct thread *thread, FILE *fp);
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 20663a460df3..2b48816a2d2e 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -7,6 +7,7 @@
#include <errno.h>
#include <inttypes.h>
#include <math.h>
+#include <ctype.h>
#include "perf.h"
#include "debug.h"
@@ -116,6 +117,69 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
return rc;
}
+static int perf_time__parse_strs(struct perf_time_interval *ptime,
+ const char *ostr, int size)
+{
+ const char *cp;
+ char *str, *arg, *p;
+ int i, num = 0, rc = 0;
+
+ /* Count the commas */
+ for (cp = ostr; *cp; cp++)
+ num += !!(*cp == ',');
+
+ if (!num)
+ return -EINVAL;
+
+ BUG_ON(num > size);
+
+ str = strdup(ostr);
+ if (!str)
+ return -ENOMEM;
+
+ /* Split the string and parse each piece, except the last */
+ for (i = 0, p = str; i < num - 1; i++) {
+ arg = p;
+ /* Find next comma, there must be one */
+ p = strchr(p, ',') + 1;
+ /* Skip white space */
+ while (isspace(*p))
+ p++;
+ /* Skip the value, must not contain space or comma */
+ while (*p && !isspace(*p)) {
+ if (*p++ == ',') {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+ /* Split and parse */
+ if (*p)
+ *p++ = 0;
+ rc = perf_time__parse_str(ptime + i, arg);
+ if (rc < 0)
+ goto out;
+ }
+
+ /* Parse the last piece */
+ rc = perf_time__parse_str(ptime + i, p);
+ if (rc < 0)
+ goto out;
+
+ /* Check there is no overlap */
+ for (i = 0; i < num - 1; i++) {
+ if (ptime[i].end >= ptime[i + 1].start) {
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
+ rc = num;
+out:
+ free(str);
+
+ return rc;
+}
+
static int parse_percent(double *pcnt, char *str)
{
char *c, *endptr;
@@ -135,12 +199,30 @@ static int parse_percent(double *pcnt, char *str)
return 0;
}
+static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt,
+ double end_pcnt, u64 start, u64 end)
+{
+ u64 total = end - start;
+
+ if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+ end_pcnt < 0.0 || end_pcnt > 1.0) {
+ return -1;
+ }
+
+ ptime->start = start + round(start_pcnt * total);
+ ptime->end = start + round(end_pcnt * total);
+
+ if (ptime->end > ptime->start && ptime->end != end)
+ ptime->end -= 1;
+
+ return 0;
+}
+
static int percent_slash_split(char *str, struct perf_time_interval *ptime,
u64 start, u64 end)
{
char *p, *end_str;
double pcnt, start_pcnt, end_pcnt;
- u64 total = end - start;
int i;
/*
@@ -168,15 +250,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime,
start_pcnt = pcnt * (i - 1);
end_pcnt = pcnt * i;
- if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
- end_pcnt < 0.0 || end_pcnt > 1.0) {
- return -1;
- }
-
- ptime->start = start + round(start_pcnt * total);
- ptime->end = start + round(end_pcnt * total);
-
- return 0;
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
}
static int percent_dash_split(char *str, struct perf_time_interval *ptime,
@@ -184,7 +258,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
{
char *start_str = NULL, *end_str;
double start_pcnt, end_pcnt;
- u64 total = end - start;
int ret;
/*
@@ -203,16 +276,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
free(start_str);
- if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
- end_pcnt < 0.0 || end_pcnt > 1.0 ||
- start_pcnt > end_pcnt) {
- return -1;
- }
-
- ptime->start = start + round(start_pcnt * total);
- ptime->end = start + round(end_pcnt * total);
-
- return 0;
+ return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
}
typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
@@ -389,13 +453,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
ptime = &ptime_buf[i];
if (timestamp >= ptime->start &&
- ((timestamp < ptime->end && i < num - 1) ||
- (timestamp <= ptime->end && i == num - 1))) {
- break;
+ (timestamp <= ptime->end || !ptime->end)) {
+ return false;
}
}
- return (i == num) ? true : false;
+ return true;
}
int perf_time__parse_for_ranges(const char *time_str,
@@ -403,20 +466,20 @@ int perf_time__parse_for_ranges(const char *time_str,
struct perf_time_interval **ranges,
int *range_size, int *range_num)
{
+ bool has_percent = strchr(time_str, '%');
struct perf_time_interval *ptime_range;
- int size, num, ret;
+ int size, num, ret = -EINVAL;
ptime_range = perf_time__range_alloc(time_str, &size);
if (!ptime_range)
return -ENOMEM;
- if (perf_time__parse_str(ptime_range, time_str) != 0) {
+ if (has_percent) {
if (session->evlist->first_sample_time == 0 &&
session->evlist->last_sample_time == 0) {
pr_err("HINT: no first/last sample time found in perf data.\n"
"Please use latest perf binary to execute 'perf record'\n"
"(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
- ret = -EINVAL;
goto error;
}
@@ -425,21 +488,20 @@ int perf_time__parse_for_ranges(const char *time_str,
time_str,
session->evlist->first_sample_time,
session->evlist->last_sample_time);
-
- if (num < 0) {
- pr_err("Invalid time string\n");
- ret = -EINVAL;
- goto error;
- }
} else {
- num = 1;
+ num = perf_time__parse_strs(ptime_range, time_str, size);
}
+ if (num < 0)
+ goto error_invalid;
+
*range_size = size;
*range_num = num;
*ranges = ptime_range;
return 0;
+error_invalid:
+ pr_err("Invalid time string\n");
error:
free(ptime_range);
return ret;