Skip to content

Commit 92ea788

Browse files
namhyungacmel
authored andcommitted
perf inject: Add --convert-callchain option
There are applications not built with frame pointers, so DWARF is needed to get the stack traces. `perf record --call-graph dwarf` saves the stack and register data for each sample to get the stacktrace offline. But sometimes this data may have sensitive information and we don't want to keep them in the file. This new 'perf inject --convert-callchain' option creates the callchains and discards the stack and register after that. This saves storage space and processing time for the new data file. Of course, users should remove the original data file to not keep sensitive data around. :) The down side is that it cannot handle inlined callchain entries as they all have the same IPs. Maybe we can add an option to 'perf report' to look up inlined functions using DWARF - IIUC it doesn't require stack and register data. This is an example. $ perf record --call-graph dwarf -- perf test -w noploop $ perf report --stdio --no-children --percent-limit=0 > output-prev $ perf inject -i perf.data --convert-callchain -o perf.data.out $ perf report --stdio --no-children --percent-limit=0 -i perf.data.out > output-next $ diff -u output-prev output-next ... 0.23% perf ld-linux-x86-64.so.2 [.] _dl_relocate_object_no_relro | - ---elf_dynamic_do_Rela (inlined) - _dl_relocate_object_no_relro + ---_dl_relocate_object_no_relro _dl_relocate_object dl_main _dl_sysdep_start - _dl_start_final (inlined) _dl_start _start Reviewed-by: Ian Rogers <[email protected]> Signed-off-by: Namhyung Kim <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: James Clark <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Peter Zijlstra <[email protected]> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 28cb835 commit 92ea788

2 files changed

Lines changed: 157 additions & 0 deletions

File tree

tools/perf/Documentation/perf-inject.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ include::itrace.txt[]
109109
should be used, and also --buildid-all and --switch-events may be
110110
useful.
111111

112+
--convert-callchain::
113+
Parse DWARF callchains and convert them to usual callchains. This also
114+
discards stack and register data from the samples. This will lose
115+
inlined callchain entries.
116+
112117
:GMEXAMPLECMD: inject
113118
:GMEXAMPLESUBCMD:
114119
include::guestmount.txt[]

tools/perf/builtin-inject.c

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ struct perf_inject {
122122
bool in_place_update;
123123
bool in_place_update_dry_run;
124124
bool copy_kcore_dir;
125+
bool convert_callchain;
125126
const char *input_name;
126127
struct perf_data output;
127128
u64 bytes_written;
@@ -133,6 +134,7 @@ struct perf_inject {
133134
struct guest_session guest_session;
134135
struct strlist *known_build_ids;
135136
const struct evsel *mmap_evsel;
137+
struct ip_callchain *raw_callchain;
136138
};
137139

138140
struct event_entry {
@@ -383,6 +385,90 @@ static int perf_event__repipe_sample(const struct perf_tool *tool,
383385
return perf_event__repipe_synth(tool, event);
384386
}
385387

388+
static int perf_event__convert_sample_callchain(const struct perf_tool *tool,
389+
union perf_event *event,
390+
struct perf_sample *sample,
391+
struct evsel *evsel,
392+
struct machine *machine)
393+
{
394+
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
395+
struct callchain_cursor *cursor = get_tls_callchain_cursor();
396+
union perf_event *event_copy = (void *)inject->event_copy;
397+
struct callchain_cursor_node *node;
398+
struct thread *thread;
399+
u64 sample_type = evsel->core.attr.sample_type;
400+
u32 sample_size = event->header.size;
401+
u64 i, k;
402+
int ret;
403+
404+
if (event_copy == NULL) {
405+
inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
406+
if (!inject->event_copy)
407+
return -ENOMEM;
408+
409+
event_copy = (void *)inject->event_copy;
410+
}
411+
412+
if (cursor == NULL)
413+
return -ENOMEM;
414+
415+
callchain_cursor_reset(cursor);
416+
417+
thread = machine__find_thread(machine, sample->tid, sample->pid);
418+
if (thread == NULL)
419+
goto out;
420+
421+
/* this will parse DWARF using stack and register data */
422+
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
423+
/*parent=*/NULL, /*root_al=*/NULL,
424+
PERF_MAX_STACK_DEPTH);
425+
thread__put(thread);
426+
if (ret != 0)
427+
goto out;
428+
429+
/* copy kernel callchain and context entries */
430+
for (i = 0; i < sample->callchain->nr; i++) {
431+
inject->raw_callchain->ips[i] = sample->callchain->ips[i];
432+
if (sample->callchain->ips[i] == PERF_CONTEXT_USER) {
433+
i++;
434+
break;
435+
}
436+
}
437+
if (i == 0 || inject->raw_callchain->ips[i - 1] != PERF_CONTEXT_USER)
438+
inject->raw_callchain->ips[i++] = PERF_CONTEXT_USER;
439+
440+
node = cursor->first;
441+
for (k = 0; k < cursor->nr && i < PERF_MAX_STACK_DEPTH; k++) {
442+
if (machine__kernel_ip(machine, node->ip))
443+
/* kernel IPs were added already */;
444+
else if (node->ms.sym && node->ms.sym->inlined)
445+
/* we can't handle inlined callchains */;
446+
else
447+
inject->raw_callchain->ips[i++] = node->ip;
448+
449+
node = node->next;
450+
}
451+
452+
inject->raw_callchain->nr = i;
453+
sample->callchain = inject->raw_callchain;
454+
455+
out:
456+
memcpy(event_copy, event, sizeof(event->header));
457+
458+
/* adjust sample size for stack and regs */
459+
sample_size -= sample->user_stack.size;
460+
sample_size -= (hweight64(evsel->core.attr.sample_regs_user) + 1) * sizeof(u64);
461+
sample_size += (sample->callchain->nr + 1) * sizeof(u64);
462+
event_copy->header.size = sample_size;
463+
464+
/* remove sample_type {STACK,REGS}_USER for synthesize */
465+
sample_type &= ~(PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER);
466+
467+
perf_event__synthesize_sample(event_copy, sample_type,
468+
evsel->core.attr.read_format, sample);
469+
return perf_event__repipe_synth(tool, event_copy);
470+
}
471+
386472
static struct dso *findnew_dso(int pid, int tid, const char *filename,
387473
const struct dso_id *id, struct machine *machine)
388474
{
@@ -2270,6 +2356,15 @@ static int __cmd_inject(struct perf_inject *inject)
22702356
/* Allow space in the header for guest attributes */
22712357
output_data_offset += gs->session->header.data_offset;
22722358
output_data_offset = roundup(output_data_offset, 4096);
2359+
} else if (inject->convert_callchain) {
2360+
inject->tool.sample = perf_event__convert_sample_callchain;
2361+
inject->tool.fork = perf_event__repipe_fork;
2362+
inject->tool.comm = perf_event__repipe_comm;
2363+
inject->tool.exit = perf_event__repipe_exit;
2364+
inject->tool.mmap = perf_event__repipe_mmap;
2365+
inject->tool.mmap2 = perf_event__repipe_mmap2;
2366+
inject->tool.ordered_events = true;
2367+
inject->tool.ordering_requires_timestamps = true;
22732368
}
22742369

22752370
if (!inject->itrace_synth_opts.set)
@@ -2322,6 +2417,23 @@ static int __cmd_inject(struct perf_inject *inject)
23222417
perf_header__set_feat(&session->header,
23232418
HEADER_BRANCH_STACK);
23242419
}
2420+
2421+
/*
2422+
* The converted data file won't have stack and registers.
2423+
* Update the perf_event_attr to remove them before writing.
2424+
*/
2425+
if (inject->convert_callchain) {
2426+
struct evsel *evsel;
2427+
2428+
evlist__for_each_entry(session->evlist, evsel) {
2429+
evsel__reset_sample_bit(evsel, REGS_USER);
2430+
evsel__reset_sample_bit(evsel, STACK_USER);
2431+
evsel->core.attr.sample_regs_user = 0;
2432+
evsel->core.attr.sample_stack_user = 0;
2433+
evsel->core.attr.exclude_callchain_user = 0;
2434+
}
2435+
}
2436+
23252437
session->header.data_offset = output_data_offset;
23262438
session->header.data_size = inject->bytes_written;
23272439
perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
@@ -2346,6 +2458,18 @@ static int __cmd_inject(struct perf_inject *inject)
23462458
return ret;
23472459
}
23482460

2461+
static bool evsel__has_dwarf_callchain(struct evsel *evsel)
2462+
{
2463+
struct perf_event_attr *attr = &evsel->core.attr;
2464+
const u64 dwarf_callchain_flags =
2465+
PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN;
2466+
2467+
if (!attr->exclude_callchain_user)
2468+
return false;
2469+
2470+
return (attr->sample_type & dwarf_callchain_flags) == dwarf_callchain_flags;
2471+
}
2472+
23492473
int cmd_inject(int argc, const char **argv)
23502474
{
23512475
struct perf_inject inject = {
@@ -2414,6 +2538,8 @@ int cmd_inject(int argc, const char **argv)
24142538
OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
24152539
"guest mount directory under which every guest os"
24162540
" instance has a subdir"),
2541+
OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
2542+
"Generate callchains using DWARF and drop register/stack data"),
24172543
OPT_END()
24182544
};
24192545
const char * const inject_usage[] = {
@@ -2429,6 +2555,9 @@ int cmd_inject(int argc, const char **argv)
24292555

24302556
#ifndef HAVE_JITDUMP
24312557
set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2558+
#endif
2559+
#ifndef HAVE_LIBDW_SUPPORT
2560+
set_option_nobuild(options, 0, "convert-callchain", "NO_LIBDW=1", true);
24322561
#endif
24332562
argc = parse_options(argc, argv, options, inject_usage, 0);
24342563

@@ -2588,6 +2717,28 @@ int cmd_inject(int argc, const char **argv)
25882717
}
25892718
}
25902719

2720+
if (inject.convert_callchain) {
2721+
struct evsel *evsel;
2722+
2723+
if (inject.output.is_pipe || inject.session->data->is_pipe) {
2724+
pr_err("--convert-callchain cannot work with pipe\n");
2725+
goto out_delete;
2726+
}
2727+
2728+
evlist__for_each_entry(inject.session->evlist, evsel) {
2729+
if (!evsel__has_dwarf_callchain(evsel)) {
2730+
pr_err("--convert-callchain requires DWARF call graph.\n");
2731+
goto out_delete;
2732+
}
2733+
}
2734+
2735+
inject.raw_callchain = calloc(PERF_MAX_STACK_DEPTH, sizeof(u64));
2736+
if (inject.raw_callchain == NULL) {
2737+
pr_err("callchain allocation failed\n");
2738+
goto out_delete;
2739+
}
2740+
}
2741+
25912742
#ifdef HAVE_JITDUMP
25922743
if (inject.jit_mode) {
25932744
inject.tool.mmap2 = perf_event__repipe_mmap2;
@@ -2618,5 +2769,6 @@ int cmd_inject(int argc, const char **argv)
26182769
free(inject.itrace_synth_opts.vm_tm_corr_args);
26192770
free(inject.event_copy);
26202771
free(inject.guest_session.ev.event_buf);
2772+
free(inject.raw_callchain);
26212773
return ret;
26222774
}

0 commit comments

Comments
 (0)