f8182417b7341d2c4eb99ca618bffe338cb717d7
[platform/kernel/linux-starfive.git] / tools / perf / builtin-inject.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-inject.c
4  *
5  * Builtin inject command: Examine the live mode (stdin) event stream
6  * and repipe it to stdout while optionally injecting additional
7  * events into it.
8  */
9 #include "builtin.h"
10
11 #include "util/color.h"
12 #include "util/dso.h"
13 #include "util/vdso.h"
14 #include "util/evlist.h"
15 #include "util/evsel.h"
16 #include "util/map.h"
17 #include "util/session.h"
18 #include "util/tool.h"
19 #include "util/debug.h"
20 #include "util/build-id.h"
21 #include "util/data.h"
22 #include "util/auxtrace.h"
23 #include "util/jit.h"
24 #include "util/string2.h"
25 #include "util/symbol.h"
26 #include "util/synthetic-events.h"
27 #include "util/thread.h"
28 #include "util/namespaces.h"
29 #include "util/util.h"
30 #include "util/tsc.h"
31
32 #include <internal/lib.h>
33
34 #include <linux/err.h>
35 #include <subcmd/parse-options.h>
36 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37
38 #include <linux/list.h>
39 #include <linux/string.h>
40 #include <linux/zalloc.h>
41 #include <linux/hash.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <signal.h>
45 #include <inttypes.h>
46
47 struct guest_event {
48         struct perf_sample              sample;
49         union perf_event                *event;
50         char                            event_buf[PERF_SAMPLE_MAX_SIZE];
51 };
52
53 struct guest_id {
54         /* hlist_node must be first, see free_hlist() */
55         struct hlist_node               node;
56         u64                             id;
57         u64                             host_id;
58         u32                             vcpu;
59 };
60
61 struct guest_tid {
62         /* hlist_node must be first, see free_hlist() */
63         struct hlist_node               node;
64         /* Thread ID of QEMU thread */
65         u32                             tid;
66         u32                             vcpu;
67 };
68
69 struct guest_vcpu {
70         /* Current host CPU */
71         u32                             cpu;
72         /* Thread ID of QEMU thread */
73         u32                             tid;
74 };
75
76 struct guest_session {
77         char                            *perf_data_file;
78         u32                             machine_pid;
79         u64                             time_offset;
80         double                          time_scale;
81         struct perf_tool                tool;
82         struct perf_data                data;
83         struct perf_session             *session;
84         char                            *tmp_file_name;
85         int                             tmp_fd;
86         struct perf_tsc_conversion      host_tc;
87         struct perf_tsc_conversion      guest_tc;
88         bool                            copy_kcore_dir;
89         bool                            have_tc;
90         bool                            fetched;
91         bool                            ready;
92         u16                             dflt_id_hdr_size;
93         u64                             dflt_id;
94         u64                             highest_id;
95         /* Array of guest_vcpu */
96         struct guest_vcpu               *vcpu;
97         size_t                          vcpu_cnt;
98         /* Hash table for guest_id */
99         struct hlist_head               heads[PERF_EVLIST__HLIST_SIZE];
100         /* Hash table for guest_tid */
101         struct hlist_head               tids[PERF_EVLIST__HLIST_SIZE];
102         /* Place to stash next guest event */
103         struct guest_event              ev;
104 };
105
106 struct perf_inject {
107         struct perf_tool        tool;
108         struct perf_session     *session;
109         bool                    build_ids;
110         bool                    build_id_all;
111         bool                    sched_stat;
112         bool                    have_auxtrace;
113         bool                    strip;
114         bool                    jit_mode;
115         bool                    in_place_update;
116         bool                    in_place_update_dry_run;
117         bool                    is_pipe;
118         bool                    copy_kcore_dir;
119         const char              *input_name;
120         struct perf_data        output;
121         u64                     bytes_written;
122         u64                     aux_id;
123         struct list_head        samples;
124         struct itrace_synth_opts itrace_synth_opts;
125         char                    event_copy[PERF_SAMPLE_MAX_SIZE];
126         struct perf_file_section secs[HEADER_FEAT_BITS];
127         struct guest_session    guest_session;
128         struct strlist          *known_build_ids;
129 };
130
131 struct event_entry {
132         struct list_head node;
133         u32              tid;
134         union perf_event event[];
135 };
136
137 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
138                                 struct machine *machine, u8 cpumode, u32 flags);
139
140 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
141 {
142         ssize_t size;
143
144         size = perf_data__write(&inject->output, buf, sz);
145         if (size < 0)
146                 return -errno;
147
148         inject->bytes_written += size;
149         return 0;
150 }
151
152 static int perf_event__repipe_synth(struct perf_tool *tool,
153                                     union perf_event *event)
154 {
155         struct perf_inject *inject = container_of(tool, struct perf_inject,
156                                                   tool);
157
158         return output_bytes(inject, event, event->header.size);
159 }
160
161 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
162                                        union perf_event *event,
163                                        struct ordered_events *oe __maybe_unused)
164 {
165         return perf_event__repipe_synth(tool, event);
166 }
167
168 #ifdef HAVE_JITDUMP
169 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
170                                union perf_event *event __maybe_unused,
171                                struct ordered_events *oe __maybe_unused)
172 {
173         return 0;
174 }
175 #endif
176
177 static int perf_event__repipe_op2_synth(struct perf_session *session,
178                                         union perf_event *event)
179 {
180         return perf_event__repipe_synth(session->tool, event);
181 }
182
183 static int perf_event__repipe_op4_synth(struct perf_session *session,
184                                         union perf_event *event,
185                                         u64 data __maybe_unused,
186                                         const char *str __maybe_unused)
187 {
188         return perf_event__repipe_synth(session->tool, event);
189 }
190
191 static int perf_event__repipe_attr(struct perf_tool *tool,
192                                    union perf_event *event,
193                                    struct evlist **pevlist)
194 {
195         struct perf_inject *inject = container_of(tool, struct perf_inject,
196                                                   tool);
197         int ret;
198
199         ret = perf_event__process_attr(tool, event, pevlist);
200         if (ret)
201                 return ret;
202
203         if (!inject->is_pipe)
204                 return 0;
205
206         return perf_event__repipe_synth(tool, event);
207 }
208
209 static int perf_event__repipe_event_update(struct perf_tool *tool,
210                                            union perf_event *event,
211                                            struct evlist **pevlist __maybe_unused)
212 {
213         return perf_event__repipe_synth(tool, event);
214 }
215
216 #ifdef HAVE_AUXTRACE_SUPPORT
217
218 static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
219 {
220         char buf[4096];
221         ssize_t ssz;
222         int ret;
223
224         while (size > 0) {
225                 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
226                 if (ssz < 0)
227                         return -errno;
228                 ret = output_bytes(inject, buf, ssz);
229                 if (ret)
230                         return ret;
231                 size -= ssz;
232         }
233
234         return 0;
235 }
236
237 static s64 perf_event__repipe_auxtrace(struct perf_session *session,
238                                        union perf_event *event)
239 {
240         struct perf_tool *tool = session->tool;
241         struct perf_inject *inject = container_of(tool, struct perf_inject,
242                                                   tool);
243         int ret;
244
245         inject->have_auxtrace = true;
246
247         if (!inject->output.is_pipe) {
248                 off_t offset;
249
250                 offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
251                 if (offset == -1)
252                         return -errno;
253                 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
254                                                      event, offset);
255                 if (ret < 0)
256                         return ret;
257         }
258
259         if (perf_data__is_pipe(session->data) || !session->one_mmap) {
260                 ret = output_bytes(inject, event, event->header.size);
261                 if (ret < 0)
262                         return ret;
263                 ret = copy_bytes(inject, session->data,
264                                  event->auxtrace.size);
265         } else {
266                 ret = output_bytes(inject, event,
267                                    event->header.size + event->auxtrace.size);
268         }
269         if (ret < 0)
270                 return ret;
271
272         return event->auxtrace.size;
273 }
274
275 #else
276
277 static s64
278 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
279                             union perf_event *event __maybe_unused)
280 {
281         pr_err("AUX area tracing not supported\n");
282         return -EINVAL;
283 }
284
285 #endif
286
287 static int perf_event__repipe(struct perf_tool *tool,
288                               union perf_event *event,
289                               struct perf_sample *sample __maybe_unused,
290                               struct machine *machine __maybe_unused)
291 {
292         return perf_event__repipe_synth(tool, event);
293 }
294
295 static int perf_event__drop(struct perf_tool *tool __maybe_unused,
296                             union perf_event *event __maybe_unused,
297                             struct perf_sample *sample __maybe_unused,
298                             struct machine *machine __maybe_unused)
299 {
300         return 0;
301 }
302
303 static int perf_event__drop_aux(struct perf_tool *tool,
304                                 union perf_event *event __maybe_unused,
305                                 struct perf_sample *sample,
306                                 struct machine *machine __maybe_unused)
307 {
308         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
309
310         if (!inject->aux_id)
311                 inject->aux_id = sample->id;
312
313         return 0;
314 }
315
316 static union perf_event *
317 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
318                                  union perf_event *event,
319                                  struct perf_sample *sample)
320 {
321         size_t sz1 = sample->aux_sample.data - (void *)event;
322         size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
323         union perf_event *ev = (union perf_event *)inject->event_copy;
324
325         if (sz1 > event->header.size || sz2 > event->header.size ||
326             sz1 + sz2 > event->header.size ||
327             sz1 < sizeof(struct perf_event_header) + sizeof(u64))
328                 return event;
329
330         memcpy(ev, event, sz1);
331         memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
332         ev->header.size = sz1 + sz2;
333         ((u64 *)((void *)ev + sz1))[-1] = 0;
334
335         return ev;
336 }
337
338 typedef int (*inject_handler)(struct perf_tool *tool,
339                               union perf_event *event,
340                               struct perf_sample *sample,
341                               struct evsel *evsel,
342                               struct machine *machine);
343
344 static int perf_event__repipe_sample(struct perf_tool *tool,
345                                      union perf_event *event,
346                                      struct perf_sample *sample,
347                                      struct evsel *evsel,
348                                      struct machine *machine)
349 {
350         struct perf_inject *inject = container_of(tool, struct perf_inject,
351                                                   tool);
352
353         if (evsel && evsel->handler) {
354                 inject_handler f = evsel->handler;
355                 return f(tool, event, sample, evsel, machine);
356         }
357
358         build_id__mark_dso_hit(tool, event, sample, evsel, machine);
359
360         if (inject->itrace_synth_opts.set && sample->aux_sample.size)
361                 event = perf_inject__cut_auxtrace_sample(inject, event, sample);
362
363         return perf_event__repipe_synth(tool, event);
364 }
365
366 static int perf_event__repipe_mmap(struct perf_tool *tool,
367                                    union perf_event *event,
368                                    struct perf_sample *sample,
369                                    struct machine *machine)
370 {
371         int err;
372
373         err = perf_event__process_mmap(tool, event, sample, machine);
374         perf_event__repipe(tool, event, sample, machine);
375
376         return err;
377 }
378
379 #ifdef HAVE_JITDUMP
380 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
381                                        union perf_event *event,
382                                        struct perf_sample *sample,
383                                        struct machine *machine)
384 {
385         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
386         u64 n = 0;
387         int ret;
388
389         /*
390          * if jit marker, then inject jit mmaps and generate ELF images
391          */
392         ret = jit_process(inject->session, &inject->output, machine,
393                           event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
394         if (ret < 0)
395                 return ret;
396         if (ret) {
397                 inject->bytes_written += n;
398                 return 0;
399         }
400         return perf_event__repipe_mmap(tool, event, sample, machine);
401 }
402 #endif
403
404 static struct dso *findnew_dso(int pid, int tid, const char *filename,
405                                struct dso_id *id, struct machine *machine)
406 {
407         struct thread *thread;
408         struct nsinfo *nsi = NULL;
409         struct nsinfo *nnsi;
410         struct dso *dso;
411         bool vdso;
412
413         thread = machine__findnew_thread(machine, pid, tid);
414         if (thread == NULL) {
415                 pr_err("cannot find or create a task %d/%d.\n", tid, pid);
416                 return NULL;
417         }
418
419         vdso = is_vdso_map(filename);
420         nsi = nsinfo__get(thread->nsinfo);
421
422         if (vdso) {
423                 /* The vdso maps are always on the host and not the
424                  * container.  Ensure that we don't use setns to look
425                  * them up.
426                  */
427                 nnsi = nsinfo__copy(nsi);
428                 if (nnsi) {
429                         nsinfo__put(nsi);
430                         nsinfo__clear_need_setns(nnsi);
431                         nsi = nnsi;
432                 }
433                 dso = machine__findnew_vdso(machine, thread);
434         } else {
435                 dso = machine__findnew_dso_id(machine, filename, id);
436         }
437
438         if (dso) {
439                 mutex_lock(&dso->lock);
440                 nsinfo__put(dso->nsinfo);
441                 dso->nsinfo = nsi;
442                 mutex_unlock(&dso->lock);
443         } else
444                 nsinfo__put(nsi);
445
446         thread__put(thread);
447         return dso;
448 }
449
450 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
451                                            union perf_event *event,
452                                            struct perf_sample *sample,
453                                            struct machine *machine)
454 {
455         struct dso *dso;
456
457         dso = findnew_dso(event->mmap.pid, event->mmap.tid,
458                           event->mmap.filename, NULL, machine);
459
460         if (dso && !dso->hit) {
461                 dso->hit = 1;
462                 dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
463         }
464         dso__put(dso);
465
466         return perf_event__repipe(tool, event, sample, machine);
467 }
468
469 static int perf_event__repipe_mmap2(struct perf_tool *tool,
470                                    union perf_event *event,
471                                    struct perf_sample *sample,
472                                    struct machine *machine)
473 {
474         int err;
475
476         err = perf_event__process_mmap2(tool, event, sample, machine);
477         perf_event__repipe(tool, event, sample, machine);
478
479         if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
480                 struct dso *dso;
481
482                 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
483                                   event->mmap2.filename, NULL, machine);
484                 if (dso) {
485                         /* mark it not to inject build-id */
486                         dso->hit = 1;
487                 }
488                 dso__put(dso);
489         }
490
491         return err;
492 }
493
494 #ifdef HAVE_JITDUMP
495 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
496                                         union perf_event *event,
497                                         struct perf_sample *sample,
498                                         struct machine *machine)
499 {
500         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
501         u64 n = 0;
502         int ret;
503
504         /*
505          * if jit marker, then inject jit mmaps and generate ELF images
506          */
507         ret = jit_process(inject->session, &inject->output, machine,
508                           event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
509         if (ret < 0)
510                 return ret;
511         if (ret) {
512                 inject->bytes_written += n;
513                 return 0;
514         }
515         return perf_event__repipe_mmap2(tool, event, sample, machine);
516 }
517 #endif
518
519 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
520                                             union perf_event *event,
521                                             struct perf_sample *sample,
522                                             struct machine *machine)
523 {
524         struct dso_id dso_id = {
525                 .maj = event->mmap2.maj,
526                 .min = event->mmap2.min,
527                 .ino = event->mmap2.ino,
528                 .ino_generation = event->mmap2.ino_generation,
529         };
530         struct dso *dso;
531
532         if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
533                 /* cannot use dso_id since it'd have invalid info */
534                 dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
535                                   event->mmap2.filename, NULL, machine);
536                 if (dso) {
537                         /* mark it not to inject build-id */
538                         dso->hit = 1;
539                 }
540                 dso__put(dso);
541                 return 0;
542         }
543
544         dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
545                           event->mmap2.filename, &dso_id, machine);
546
547         if (dso && !dso->hit) {
548                 dso->hit = 1;
549                 dso__inject_build_id(dso, tool, machine, sample->cpumode,
550                                      event->mmap2.flags);
551         }
552         dso__put(dso);
553
554         perf_event__repipe(tool, event, sample, machine);
555
556         return 0;
557 }
558
559 static int perf_event__repipe_fork(struct perf_tool *tool,
560                                    union perf_event *event,
561                                    struct perf_sample *sample,
562                                    struct machine *machine)
563 {
564         int err;
565
566         err = perf_event__process_fork(tool, event, sample, machine);
567         perf_event__repipe(tool, event, sample, machine);
568
569         return err;
570 }
571
572 static int perf_event__repipe_comm(struct perf_tool *tool,
573                                    union perf_event *event,
574                                    struct perf_sample *sample,
575                                    struct machine *machine)
576 {
577         int err;
578
579         err = perf_event__process_comm(tool, event, sample, machine);
580         perf_event__repipe(tool, event, sample, machine);
581
582         return err;
583 }
584
585 static int perf_event__repipe_namespaces(struct perf_tool *tool,
586                                          union perf_event *event,
587                                          struct perf_sample *sample,
588                                          struct machine *machine)
589 {
590         int err = perf_event__process_namespaces(tool, event, sample, machine);
591
592         perf_event__repipe(tool, event, sample, machine);
593
594         return err;
595 }
596
597 static int perf_event__repipe_exit(struct perf_tool *tool,
598                                    union perf_event *event,
599                                    struct perf_sample *sample,
600                                    struct machine *machine)
601 {
602         int err;
603
604         err = perf_event__process_exit(tool, event, sample, machine);
605         perf_event__repipe(tool, event, sample, machine);
606
607         return err;
608 }
609
610 #ifdef HAVE_LIBTRACEEVENT
611 static int perf_event__repipe_tracing_data(struct perf_session *session,
612                                            union perf_event *event)
613 {
614         perf_event__repipe_synth(session->tool, event);
615
616         return perf_event__process_tracing_data(session, event);
617 }
618 #endif
619
620 static int dso__read_build_id(struct dso *dso)
621 {
622         struct nscookie nsc;
623
624         if (dso->has_build_id)
625                 return 0;
626
627         mutex_lock(&dso->lock);
628         nsinfo__mountns_enter(dso->nsinfo, &nsc);
629         if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
630                 dso->has_build_id = true;
631         else if (dso->nsinfo) {
632                 char *new_name;
633
634                 new_name = filename_with_chroot(dso->nsinfo->pid,
635                                                 dso->long_name);
636                 if (new_name && filename__read_build_id(new_name, &dso->bid) > 0)
637                         dso->has_build_id = true;
638                 free(new_name);
639         }
640         nsinfo__mountns_exit(&nsc);
641         mutex_unlock(&dso->lock);
642
643         return dso->has_build_id ? 0 : -1;
644 }
645
646 static struct strlist *perf_inject__parse_known_build_ids(
647         const char *known_build_ids_string)
648 {
649         struct str_node *pos, *tmp;
650         struct strlist *known_build_ids;
651         int bid_len;
652
653         known_build_ids = strlist__new(known_build_ids_string, NULL);
654         if (known_build_ids == NULL)
655                 return NULL;
656         strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
657                 const char *build_id, *dso_name;
658
659                 build_id = skip_spaces(pos->s);
660                 dso_name = strchr(build_id, ' ');
661                 if (dso_name == NULL) {
662                         strlist__remove(known_build_ids, pos);
663                         continue;
664                 }
665                 bid_len = dso_name - pos->s;
666                 dso_name = skip_spaces(dso_name);
667                 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
668                         strlist__remove(known_build_ids, pos);
669                         continue;
670                 }
671                 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
672                         if (!isxdigit(build_id[2 * ix]) ||
673                             !isxdigit(build_id[2 * ix + 1])) {
674                                 strlist__remove(known_build_ids, pos);
675                                 break;
676                         }
677                 }
678         }
679         return known_build_ids;
680 }
681
682 static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
683                                                struct dso *dso)
684 {
685         struct str_node *pos;
686         int bid_len;
687
688         strlist__for_each_entry(pos, inject->known_build_ids) {
689                 const char *build_id, *dso_name;
690
691                 build_id = skip_spaces(pos->s);
692                 dso_name = strchr(build_id, ' ');
693                 bid_len = dso_name - pos->s;
694                 dso_name = skip_spaces(dso_name);
695                 if (strcmp(dso->long_name, dso_name))
696                         continue;
697                 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
698                         dso->bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
699                                              hex(build_id[2 * ix + 1]));
700                 }
701                 dso->bid.size = bid_len / 2;
702                 dso->has_build_id = 1;
703                 return true;
704         }
705         return false;
706 }
707
708 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
709                                 struct machine *machine, u8 cpumode, u32 flags)
710 {
711         struct perf_inject *inject = container_of(tool, struct perf_inject,
712                                                   tool);
713         int err;
714
715         if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
716                 return 0;
717         if (is_no_dso_memory(dso->long_name))
718                 return 0;
719
720         if (inject->known_build_ids != NULL &&
721             perf_inject__lookup_known_build_id(inject, dso))
722                 return 1;
723
724         if (dso__read_build_id(dso) < 0) {
725                 pr_debug("no build_id found for %s\n", dso->long_name);
726                 return -1;
727         }
728
729         err = perf_event__synthesize_build_id(tool, dso, cpumode,
730                                               perf_event__repipe, machine);
731         if (err) {
732                 pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
733                 return -1;
734         }
735
736         return 0;
737 }
738
739 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
740                                struct perf_sample *sample,
741                                struct evsel *evsel __maybe_unused,
742                                struct machine *machine)
743 {
744         struct addr_location al;
745         struct thread *thread;
746
747         thread = machine__findnew_thread(machine, sample->pid, sample->tid);
748         if (thread == NULL) {
749                 pr_err("problem processing %d event, skipping it.\n",
750                        event->header.type);
751                 goto repipe;
752         }
753
754         if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
755                 if (!al.map->dso->hit) {
756                         al.map->dso->hit = 1;
757                         dso__inject_build_id(al.map->dso, tool, machine,
758                                              sample->cpumode, al.map->flags);
759                 }
760         }
761
762         thread__put(thread);
763 repipe:
764         perf_event__repipe(tool, event, sample, machine);
765         return 0;
766 }
767
768 static int perf_inject__sched_process_exit(struct perf_tool *tool,
769                                            union perf_event *event __maybe_unused,
770                                            struct perf_sample *sample,
771                                            struct evsel *evsel __maybe_unused,
772                                            struct machine *machine __maybe_unused)
773 {
774         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
775         struct event_entry *ent;
776
777         list_for_each_entry(ent, &inject->samples, node) {
778                 if (sample->tid == ent->tid) {
779                         list_del_init(&ent->node);
780                         free(ent);
781                         break;
782                 }
783         }
784
785         return 0;
786 }
787
788 static int perf_inject__sched_switch(struct perf_tool *tool,
789                                      union perf_event *event,
790                                      struct perf_sample *sample,
791                                      struct evsel *evsel,
792                                      struct machine *machine)
793 {
794         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
795         struct event_entry *ent;
796
797         perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
798
799         ent = malloc(event->header.size + sizeof(struct event_entry));
800         if (ent == NULL) {
801                 color_fprintf(stderr, PERF_COLOR_RED,
802                              "Not enough memory to process sched switch event!");
803                 return -1;
804         }
805
806         ent->tid = sample->tid;
807         memcpy(&ent->event, event, event->header.size);
808         list_add(&ent->node, &inject->samples);
809         return 0;
810 }
811
812 #ifdef HAVE_LIBTRACEEVENT
813 static int perf_inject__sched_stat(struct perf_tool *tool,
814                                    union perf_event *event __maybe_unused,
815                                    struct perf_sample *sample,
816                                    struct evsel *evsel,
817                                    struct machine *machine)
818 {
819         struct event_entry *ent;
820         union perf_event *event_sw;
821         struct perf_sample sample_sw;
822         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
823         u32 pid = evsel__intval(evsel, sample, "pid");
824
825         list_for_each_entry(ent, &inject->samples, node) {
826                 if (pid == ent->tid)
827                         goto found;
828         }
829
830         return 0;
831 found:
832         event_sw = &ent->event[0];
833         evsel__parse_sample(evsel, event_sw, &sample_sw);
834
835         sample_sw.period = sample->period;
836         sample_sw.time   = sample->time;
837         perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
838                                       evsel->core.attr.read_format, &sample_sw);
839         build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
840         return perf_event__repipe(tool, event_sw, &sample_sw, machine);
841 }
842 #endif
843
844 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
845 {
846         if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
847                 return NULL;
848         return &gs->vcpu[vcpu];
849 }
850
851 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
852 {
853         ssize_t ret = writen(gs->tmp_fd, buf, sz);
854
855         return ret < 0 ? ret : 0;
856 }
857
858 static int guest_session__repipe(struct perf_tool *tool,
859                                  union perf_event *event,
860                                  struct perf_sample *sample __maybe_unused,
861                                  struct machine *machine __maybe_unused)
862 {
863         struct guest_session *gs = container_of(tool, struct guest_session, tool);
864
865         return guest_session__output_bytes(gs, event, event->header.size);
866 }
867
868 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
869 {
870         struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
871         int hash;
872
873         if (!guest_tid)
874                 return -ENOMEM;
875
876         guest_tid->tid = tid;
877         guest_tid->vcpu = vcpu;
878         hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
879         hlist_add_head(&guest_tid->node, &gs->tids[hash]);
880
881         return 0;
882 }
883
884 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
885                                  union perf_event *event,
886                                  u64 offset __maybe_unused, void *data)
887 {
888         struct guest_session *gs = data;
889         unsigned int vcpu;
890         struct guest_vcpu *guest_vcpu;
891         int ret;
892
893         if (event->header.type != PERF_RECORD_COMM ||
894             event->comm.pid != gs->machine_pid)
895                 return 0;
896
897         /*
898          * QEMU option -name debug-threads=on, causes thread names formatted as
899          * below, although it is not an ABI. Also libvirt seems to use this by
900          * default. Here we rely on it to tell us which thread is which VCPU.
901          */
902         ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
903         if (ret <= 0)
904                 return ret;
905         pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
906                  event->comm.tid, event->comm.comm, vcpu);
907         if (vcpu > INT_MAX) {
908                 pr_err("Invalid VCPU %u\n", vcpu);
909                 return -EINVAL;
910         }
911         guest_vcpu = guest_session__vcpu(gs, vcpu);
912         if (!guest_vcpu)
913                 return -ENOMEM;
914         if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
915                 pr_err("Fatal error: Two threads found with the same VCPU\n");
916                 return -EINVAL;
917         }
918         guest_vcpu->tid = event->comm.tid;
919
920         return guest_session__map_tid(gs, event->comm.tid, vcpu);
921 }
922
923 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
924 {
925         return perf_session__peek_events(session, session->header.data_offset,
926                                          session->header.data_size,
927                                          host_peek_vm_comms_cb, gs);
928 }
929
930 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
931 {
932         return evlist__id2sid(evlist, id);
933 }
934
935 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
936 {
937         do {
938                 gs->highest_id += 1;
939         } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
940
941         return gs->highest_id;
942 }
943
944 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
945 {
946         struct guest_id *guest_id = zalloc(sizeof(*guest_id));
947         int hash;
948
949         if (!guest_id)
950                 return -ENOMEM;
951
952         guest_id->id = id;
953         guest_id->host_id = host_id;
954         guest_id->vcpu = vcpu;
955         hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
956         hlist_add_head(&guest_id->node, &gs->heads[hash]);
957
958         return 0;
959 }
960
961 static u64 evlist__find_highest_id(struct evlist *evlist)
962 {
963         struct evsel *evsel;
964         u64 highest_id = 1;
965
966         evlist__for_each_entry(evlist, evsel) {
967                 u32 j;
968
969                 for (j = 0; j < evsel->core.ids; j++) {
970                         u64 id = evsel->core.id[j];
971
972                         if (id > highest_id)
973                                 highest_id = id;
974                 }
975         }
976
977         return highest_id;
978 }
979
980 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
981 {
982         struct evlist *evlist = gs->session->evlist;
983         struct evsel *evsel;
984         int ret;
985
986         evlist__for_each_entry(evlist, evsel) {
987                 u32 j;
988
989                 for (j = 0; j < evsel->core.ids; j++) {
990                         struct perf_sample_id *sid;
991                         u64 host_id;
992                         u64 id;
993
994                         id = evsel->core.id[j];
995                         sid = evlist__id2sid(evlist, id);
996                         if (!sid || sid->cpu.cpu == -1)
997                                 continue;
998                         host_id = guest_session__allocate_new_id(gs, host_evlist);
999                         ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1000                         if (ret)
1001                                 return ret;
1002                 }
1003         }
1004
1005         return 0;
1006 }
1007
1008 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1009 {
1010         struct hlist_head *head;
1011         struct guest_id *guest_id;
1012         int hash;
1013
1014         hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1015         head = &gs->heads[hash];
1016
1017         hlist_for_each_entry(guest_id, head, node)
1018                 if (guest_id->id == id)
1019                         return guest_id;
1020
1021         return NULL;
1022 }
1023
1024 static int process_attr(struct perf_tool *tool, union perf_event *event,
1025                         struct perf_sample *sample __maybe_unused,
1026                         struct machine *machine __maybe_unused)
1027 {
1028         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1029
1030         return perf_event__process_attr(tool, event, &inject->session->evlist);
1031 }
1032
1033 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1034 {
1035         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1036         struct perf_event_attr attr = evsel->core.attr;
1037         u64 *id_array;
1038         u32 *vcpu_array;
1039         int ret = -ENOMEM;
1040         u32 i;
1041
1042         id_array = calloc(evsel->core.ids, sizeof(*id_array));
1043         if (!id_array)
1044                 return -ENOMEM;
1045
1046         vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1047         if (!vcpu_array)
1048                 goto out;
1049
1050         for (i = 0; i < evsel->core.ids; i++) {
1051                 u64 id = evsel->core.id[i];
1052                 struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1053
1054                 if (!guest_id) {
1055                         pr_err("Failed to find guest id %"PRIu64"\n", id);
1056                         ret = -EINVAL;
1057                         goto out;
1058                 }
1059                 id_array[i] = guest_id->host_id;
1060                 vcpu_array[i] = guest_id->vcpu;
1061         }
1062
1063         attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1064         attr.exclude_host = 1;
1065         attr.exclude_guest = 0;
1066
1067         ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1068                                           id_array, process_attr);
1069         if (ret)
1070                 pr_err("Failed to add guest attr.\n");
1071
1072         for (i = 0; i < evsel->core.ids; i++) {
1073                 struct perf_sample_id *sid;
1074                 u32 vcpu = vcpu_array[i];
1075
1076                 sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1077                 /* Guest event is per-thread from the host point of view */
1078                 sid->cpu.cpu = -1;
1079                 sid->tid = gs->vcpu[vcpu].tid;
1080                 sid->machine_pid = gs->machine_pid;
1081                 sid->vcpu.cpu = vcpu;
1082         }
1083 out:
1084         free(vcpu_array);
1085         free(id_array);
1086         return ret;
1087 }
1088
1089 static int guest_session__add_attrs(struct guest_session *gs)
1090 {
1091         struct evlist *evlist = gs->session->evlist;
1092         struct evsel *evsel;
1093         int ret;
1094
1095         evlist__for_each_entry(evlist, evsel) {
1096                 ret = guest_session__add_attr(gs, evsel);
1097                 if (ret)
1098                         return ret;
1099         }
1100
1101         return 0;
1102 }
1103
1104 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1105 {
1106         struct perf_session *session = inject->session;
1107         struct evlist *evlist = session->evlist;
1108         struct machine *machine = &session->machines.host;
1109         size_t from = evlist->core.nr_entries - new_cnt;
1110
1111         return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1112                                                  evlist, machine, from);
1113 }
1114
1115 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1116 {
1117         struct hlist_head *head;
1118         struct guest_tid *guest_tid;
1119         int hash;
1120
1121         hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1122         head = &gs->tids[hash];
1123
1124         hlist_for_each_entry(guest_tid, head, node)
1125                 if (guest_tid->tid == tid)
1126                         return guest_tid;
1127
1128         return NULL;
1129 }
1130
1131 static bool dso__is_in_kernel_space(struct dso *dso)
1132 {
1133         if (dso__is_vdso(dso))
1134                 return false;
1135
1136         return dso__is_kcore(dso) ||
1137                dso->kernel ||
1138                is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1139 }
1140
1141 static u64 evlist__first_id(struct evlist *evlist)
1142 {
1143         struct evsel *evsel;
1144
1145         evlist__for_each_entry(evlist, evsel) {
1146                 if (evsel->core.ids)
1147                         return evsel->core.id[0];
1148         }
1149         return 0;
1150 }
1151
1152 static int process_build_id(struct perf_tool *tool,
1153                             union perf_event *event,
1154                             struct perf_sample *sample __maybe_unused,
1155                             struct machine *machine __maybe_unused)
1156 {
1157         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1158
1159         return perf_event__process_build_id(inject->session, event);
1160 }
1161
1162 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1163 {
1164         struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1165         u8 cpumode = dso__is_in_kernel_space(dso) ?
1166                         PERF_RECORD_MISC_GUEST_KERNEL :
1167                         PERF_RECORD_MISC_GUEST_USER;
1168
1169         if (!machine)
1170                 return -ENOMEM;
1171
1172         dso->hit = 1;
1173
1174         return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
1175                                                process_build_id, machine);
1176 }
1177
1178 static int guest_session__add_build_ids(struct guest_session *gs)
1179 {
1180         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1181         struct machine *machine = &gs->session->machines.host;
1182         struct dso *dso;
1183         int ret;
1184
1185         /* Build IDs will be put in the Build ID feature section */
1186         perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1187
1188         dsos__for_each_with_build_id(dso, &machine->dsos.head) {
1189                 ret = synthesize_build_id(inject, dso, gs->machine_pid);
1190                 if (ret)
1191                         return ret;
1192         }
1193
1194         return 0;
1195 }
1196
1197 static int guest_session__ksymbol_event(struct perf_tool *tool,
1198                                         union perf_event *event,
1199                                         struct perf_sample *sample __maybe_unused,
1200                                         struct machine *machine __maybe_unused)
1201 {
1202         struct guest_session *gs = container_of(tool, struct guest_session, tool);
1203
1204         /* Only support out-of-line i.e. no BPF support */
1205         if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1206                 return 0;
1207
1208         return guest_session__output_bytes(gs, event, event->header.size);
1209 }
1210
1211 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1212 {
1213         char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1214         struct perf_session *session;
1215         int ret;
1216
1217         /* Only these events will be injected */
1218         gs->tool.mmap           = guest_session__repipe;
1219         gs->tool.mmap2          = guest_session__repipe;
1220         gs->tool.comm           = guest_session__repipe;
1221         gs->tool.fork           = guest_session__repipe;
1222         gs->tool.exit           = guest_session__repipe;
1223         gs->tool.lost           = guest_session__repipe;
1224         gs->tool.context_switch = guest_session__repipe;
1225         gs->tool.ksymbol        = guest_session__ksymbol_event;
1226         gs->tool.text_poke      = guest_session__repipe;
1227         /*
1228          * Processing a build ID creates a struct dso with that build ID. Later,
1229          * all guest dsos are iterated and the build IDs processed into the host
1230          * session where they will be output to the Build ID feature section
1231          * when the perf.data file header is written.
1232          */
1233         gs->tool.build_id       = perf_event__process_build_id;
1234         /* Process the id index to know what VCPU an ID belongs to */
1235         gs->tool.id_index       = perf_event__process_id_index;
1236
1237         gs->tool.ordered_events = true;
1238         gs->tool.ordering_requires_timestamps = true;
1239
1240         gs->data.path   = name;
1241         gs->data.force  = force;
1242         gs->data.mode   = PERF_DATA_MODE_READ;
1243
1244         session = perf_session__new(&gs->data, &gs->tool);
1245         if (IS_ERR(session))
1246                 return PTR_ERR(session);
1247         gs->session = session;
1248
1249         /*
1250          * Initial events have zero'd ID samples. Get default ID sample size
1251          * used for removing them.
1252          */
1253         gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1254         /* And default ID for adding back a host-compatible ID sample */
1255         gs->dflt_id = evlist__first_id(session->evlist);
1256         if (!gs->dflt_id) {
1257                 pr_err("Guest data has no sample IDs");
1258                 return -EINVAL;
1259         }
1260
1261         /* Temporary file for guest events */
1262         gs->tmp_file_name = strdup(tmp_file_name);
1263         if (!gs->tmp_file_name)
1264                 return -ENOMEM;
1265         gs->tmp_fd = mkstemp(gs->tmp_file_name);
1266         if (gs->tmp_fd < 0)
1267                 return -errno;
1268
1269         if (zstd_init(&gs->session->zstd_data, 0) < 0)
1270                 pr_warning("Guest session decompression initialization failed.\n");
1271
1272         /*
1273          * perf does not support processing 2 sessions simultaneously, so output
1274          * guest events to a temporary file.
1275          */
1276         ret = perf_session__process_events(gs->session);
1277         if (ret)
1278                 return ret;
1279
1280         if (lseek(gs->tmp_fd, 0, SEEK_SET))
1281                 return -errno;
1282
1283         return 0;
1284 }
1285
1286 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1287 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1288 {
1289         struct hlist_node *pos, *n;
1290         size_t i;
1291
1292         for (i = 0; i < hlist_sz; ++i) {
1293                 hlist_for_each_safe(pos, n, &heads[i]) {
1294                         hlist_del(pos);
1295                         free(pos);
1296                 }
1297         }
1298 }
1299
1300 static void guest_session__exit(struct guest_session *gs)
1301 {
1302         if (gs->session) {
1303                 perf_session__delete(gs->session);
1304                 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1305                 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1306         }
1307         if (gs->tmp_file_name) {
1308                 if (gs->tmp_fd >= 0)
1309                         close(gs->tmp_fd);
1310                 unlink(gs->tmp_file_name);
1311                 free(gs->tmp_file_name);
1312         }
1313         free(gs->vcpu);
1314         free(gs->perf_data_file);
1315 }
1316
1317 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1318 {
1319         tc->time_shift          = time_conv->time_shift;
1320         tc->time_mult           = time_conv->time_mult;
1321         tc->time_zero           = time_conv->time_zero;
1322         tc->time_cycles         = time_conv->time_cycles;
1323         tc->time_mask           = time_conv->time_mask;
1324         tc->cap_user_time_zero  = time_conv->cap_user_time_zero;
1325         tc->cap_user_time_short = time_conv->cap_user_time_short;
1326 }
1327
1328 static void guest_session__get_tc(struct guest_session *gs)
1329 {
1330         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1331
1332         get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1333         get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1334 }
1335
1336 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1337 {
1338         u64 tsc;
1339
1340         if (!guest_time) {
1341                 *host_time = 0;
1342                 return;
1343         }
1344
1345         if (gs->guest_tc.cap_user_time_zero)
1346                 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1347         else
1348                 tsc = guest_time;
1349
1350         /*
1351          * This is the correct order of operations for x86 if the TSC Offset and
1352          * Multiplier values are used.
1353          */
1354         tsc -= gs->time_offset;
1355         tsc /= gs->time_scale;
1356
1357         if (gs->host_tc.cap_user_time_zero)
1358                 *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1359         else
1360                 *host_time = tsc;
1361 }
1362
1363 static int guest_session__fetch(struct guest_session *gs)
1364 {
1365         void *buf = gs->ev.event_buf;
1366         struct perf_event_header *hdr = buf;
1367         size_t hdr_sz = sizeof(*hdr);
1368         ssize_t ret;
1369
1370         ret = readn(gs->tmp_fd, buf, hdr_sz);
1371         if (ret < 0)
1372                 return ret;
1373
1374         if (!ret) {
1375                 /* Zero size means EOF */
1376                 hdr->size = 0;
1377                 return 0;
1378         }
1379
1380         buf += hdr_sz;
1381
1382         ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1383         if (ret < 0)
1384                 return ret;
1385
1386         gs->ev.event = (union perf_event *)gs->ev.event_buf;
1387         gs->ev.sample.time = 0;
1388
1389         if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1390                 pr_err("Unexpected type fetching guest event");
1391                 return 0;
1392         }
1393
1394         ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1395         if (ret) {
1396                 pr_err("Parse failed fetching guest event");
1397                 return ret;
1398         }
1399
1400         if (!gs->have_tc) {
1401                 guest_session__get_tc(gs);
1402                 gs->have_tc = true;
1403         }
1404
1405         guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1406
1407         return 0;
1408 }
1409
1410 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1411                                     const struct perf_sample *sample)
1412 {
1413         struct evsel *evsel;
1414         void *array;
1415         int ret;
1416
1417         evsel = evlist__id2evsel(evlist, sample->id);
1418         array = ev;
1419
1420         if (!evsel) {
1421                 pr_err("No evsel for id %"PRIu64"\n", sample->id);
1422                 return -EINVAL;
1423         }
1424
1425         array += ev->header.size;
1426         ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1427         if (ret < 0)
1428                 return ret;
1429
1430         if (ret & 7) {
1431                 pr_err("Bad id sample size %d\n", ret);
1432                 return -EINVAL;
1433         }
1434
1435         ev->header.size += ret;
1436
1437         return 0;
1438 }
1439
1440 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1441 {
1442         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1443         int ret;
1444
1445         if (!gs->ready)
1446                 return 0;
1447
1448         while (1) {
1449                 struct perf_sample *sample;
1450                 struct guest_id *guest_id;
1451                 union perf_event *ev;
1452                 u16 id_hdr_size;
1453                 u8 cpumode;
1454                 u64 id;
1455
1456                 if (!gs->fetched) {
1457                         ret = guest_session__fetch(gs);
1458                         if (ret)
1459                                 return ret;
1460                         gs->fetched = true;
1461                 }
1462
1463                 ev = gs->ev.event;
1464                 sample = &gs->ev.sample;
1465
1466                 if (!ev->header.size)
1467                         return 0; /* EOF */
1468
1469                 if (sample->time > timestamp)
1470                         return 0;
1471
1472                 /* Change cpumode to guest */
1473                 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1474                 if (cpumode & PERF_RECORD_MISC_USER)
1475                         cpumode = PERF_RECORD_MISC_GUEST_USER;
1476                 else
1477                         cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1478                 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1479                 ev->header.misc |= cpumode;
1480
1481                 id = sample->id;
1482                 if (!id) {
1483                         id = gs->dflt_id;
1484                         id_hdr_size = gs->dflt_id_hdr_size;
1485                 } else {
1486                         struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1487
1488                         id_hdr_size = evsel__id_hdr_size(evsel);
1489                 }
1490
1491                 if (id_hdr_size & 7) {
1492                         pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1493                         return -EINVAL;
1494                 }
1495
1496                 if (ev->header.size & 7) {
1497                         pr_err("Bad event size %u\n", ev->header.size);
1498                         return -EINVAL;
1499                 }
1500
1501                 /* Remove guest id sample */
1502                 ev->header.size -= id_hdr_size;
1503
1504                 if (ev->header.size & 7) {
1505                         pr_err("Bad raw event size %u\n", ev->header.size);
1506                         return -EINVAL;
1507                 }
1508
1509                 guest_id = guest_session__lookup_id(gs, id);
1510                 if (!guest_id) {
1511                         pr_err("Guest event with unknown id %llu\n",
1512                                (unsigned long long)id);
1513                         return -EINVAL;
1514                 }
1515
1516                 /* Change to host ID to avoid conflicting ID values */
1517                 sample->id = guest_id->host_id;
1518                 sample->stream_id = guest_id->host_id;
1519
1520                 if (sample->cpu != (u32)-1) {
1521                         if (sample->cpu >= gs->vcpu_cnt) {
1522                                 pr_err("Guest event with unknown VCPU %u\n",
1523                                        sample->cpu);
1524                                 return -EINVAL;
1525                         }
1526                         /* Change to host CPU instead of guest VCPU */
1527                         sample->cpu = gs->vcpu[sample->cpu].cpu;
1528                 }
1529
1530                 /* New id sample with new ID and CPU */
1531                 ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1532                 if (ret)
1533                         return ret;
1534
1535                 if (ev->header.size & 7) {
1536                         pr_err("Bad new event size %u\n", ev->header.size);
1537                         return -EINVAL;
1538                 }
1539
1540                 gs->fetched = false;
1541
1542                 ret = output_bytes(inject, ev, ev->header.size);
1543                 if (ret)
1544                         return ret;
1545         }
1546 }
1547
1548 static int guest_session__flush_events(struct guest_session *gs)
1549 {
1550         return guest_session__inject_events(gs, -1);
1551 }
1552
1553 static int host__repipe(struct perf_tool *tool,
1554                         union perf_event *event,
1555                         struct perf_sample *sample,
1556                         struct machine *machine)
1557 {
1558         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1559         int ret;
1560
1561         ret = guest_session__inject_events(&inject->guest_session, sample->time);
1562         if (ret)
1563                 return ret;
1564
1565         return perf_event__repipe(tool, event, sample, machine);
1566 }
1567
1568 static int host__finished_init(struct perf_session *session, union perf_event *event)
1569 {
1570         struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
1571         struct guest_session *gs = &inject->guest_session;
1572         int ret;
1573
1574         /*
1575          * Peek through host COMM events to find QEMU threads and the VCPU they
1576          * are running.
1577          */
1578         ret = host_peek_vm_comms(session, gs);
1579         if (ret)
1580                 return ret;
1581
1582         if (!gs->vcpu_cnt) {
1583                 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1584                 return -EINVAL;
1585         }
1586
1587         /*
1588          * Allocate new (unused) host sample IDs and map them to the guest IDs.
1589          */
1590         gs->highest_id = evlist__find_highest_id(session->evlist);
1591         ret = guest_session__map_ids(gs, session->evlist);
1592         if (ret)
1593                 return ret;
1594
1595         ret = guest_session__add_attrs(gs);
1596         if (ret)
1597                 return ret;
1598
1599         ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1600         if (ret) {
1601                 pr_err("Failed to synthesize id_index\n");
1602                 return ret;
1603         }
1604
1605         ret = guest_session__add_build_ids(gs);
1606         if (ret) {
1607                 pr_err("Failed to add guest build IDs\n");
1608                 return ret;
1609         }
1610
1611         gs->ready = true;
1612
1613         ret = guest_session__inject_events(gs, 0);
1614         if (ret)
1615                 return ret;
1616
1617         return perf_event__repipe_op2_synth(session, event);
1618 }
1619
1620 /*
1621  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1622  * which flushes host events to file up until the last flush time. Then inject
1623  * guest events up to the same time. Finally write out the FINISHED_ROUND event
1624  * itself.
1625  */
1626 static int host__finished_round(struct perf_tool *tool,
1627                                 union perf_event *event,
1628                                 struct ordered_events *oe)
1629 {
1630         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1631         int ret = perf_event__process_finished_round(tool, event, oe);
1632         u64 timestamp = ordered_events__last_flush_time(oe);
1633
1634         if (ret)
1635                 return ret;
1636
1637         ret = guest_session__inject_events(&inject->guest_session, timestamp);
1638         if (ret)
1639                 return ret;
1640
1641         return perf_event__repipe_oe_synth(tool, event, oe);
1642 }
1643
1644 static int host__context_switch(struct perf_tool *tool,
1645                                 union perf_event *event,
1646                                 struct perf_sample *sample,
1647                                 struct machine *machine)
1648 {
1649         struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1650         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1651         struct guest_session *gs = &inject->guest_session;
1652         u32 pid = event->context_switch.next_prev_pid;
1653         u32 tid = event->context_switch.next_prev_tid;
1654         struct guest_tid *guest_tid;
1655         u32 vcpu;
1656
1657         if (out || pid != gs->machine_pid)
1658                 goto out;
1659
1660         guest_tid = guest_session__lookup_tid(gs, tid);
1661         if (!guest_tid)
1662                 goto out;
1663
1664         if (sample->cpu == (u32)-1) {
1665                 pr_err("Switch event does not have CPU\n");
1666                 return -EINVAL;
1667         }
1668
1669         vcpu = guest_tid->vcpu;
1670         if (vcpu >= gs->vcpu_cnt)
1671                 return -EINVAL;
1672
1673         /* Guest is switching in, record which CPU the VCPU is now running on */
1674         gs->vcpu[vcpu].cpu = sample->cpu;
1675 out:
1676         return host__repipe(tool, event, sample, machine);
1677 }
1678
1679 static void sig_handler(int sig __maybe_unused)
1680 {
1681         session_done = 1;
1682 }
1683
1684 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1685 {
1686         struct perf_event_attr *attr = &evsel->core.attr;
1687         const char *name = evsel__name(evsel);
1688
1689         if (!(attr->sample_type & sample_type)) {
1690                 pr_err("Samples for %s event do not have %s attribute set.",
1691                         name, sample_msg);
1692                 return -EINVAL;
1693         }
1694
1695         return 0;
1696 }
1697
1698 static int drop_sample(struct perf_tool *tool __maybe_unused,
1699                        union perf_event *event __maybe_unused,
1700                        struct perf_sample *sample __maybe_unused,
1701                        struct evsel *evsel __maybe_unused,
1702                        struct machine *machine __maybe_unused)
1703 {
1704         return 0;
1705 }
1706
1707 static void strip_init(struct perf_inject *inject)
1708 {
1709         struct evlist *evlist = inject->session->evlist;
1710         struct evsel *evsel;
1711
1712         inject->tool.context_switch = perf_event__drop;
1713
1714         evlist__for_each_entry(evlist, evsel)
1715                 evsel->handler = drop_sample;
1716 }
1717
1718 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1719 {
1720         struct perf_inject *inject = opt->value;
1721         const char *args;
1722         char *dry_run;
1723
1724         if (unset)
1725                 return 0;
1726
1727         inject->itrace_synth_opts.set = true;
1728         inject->itrace_synth_opts.vm_time_correlation = true;
1729         inject->in_place_update = true;
1730
1731         if (!str)
1732                 return 0;
1733
1734         dry_run = skip_spaces(str);
1735         if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1736                 inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1737                 inject->in_place_update_dry_run = true;
1738                 args = dry_run + strlen("dry-run");
1739         } else {
1740                 args = str;
1741         }
1742
1743         inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1744
1745         return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1746 }
1747
1748 static int parse_guest_data(const struct option *opt, const char *str, int unset)
1749 {
1750         struct perf_inject *inject = opt->value;
1751         struct guest_session *gs = &inject->guest_session;
1752         char *tok;
1753         char *s;
1754
1755         if (unset)
1756                 return 0;
1757
1758         if (!str)
1759                 goto bad_args;
1760
1761         s = strdup(str);
1762         if (!s)
1763                 return -ENOMEM;
1764
1765         gs->perf_data_file = strsep(&s, ",");
1766         if (!gs->perf_data_file)
1767                 goto bad_args;
1768
1769         gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1770         if (gs->copy_kcore_dir)
1771                 inject->output.is_dir = true;
1772
1773         tok = strsep(&s, ",");
1774         if (!tok)
1775                 goto bad_args;
1776         gs->machine_pid = strtoul(tok, NULL, 0);
1777         if (!inject->guest_session.machine_pid)
1778                 goto bad_args;
1779
1780         gs->time_scale = 1;
1781
1782         tok = strsep(&s, ",");
1783         if (!tok)
1784                 goto out;
1785         gs->time_offset = strtoull(tok, NULL, 0);
1786
1787         tok = strsep(&s, ",");
1788         if (!tok)
1789                 goto out;
1790         gs->time_scale = strtod(tok, NULL);
1791         if (!gs->time_scale)
1792                 goto bad_args;
1793 out:
1794         return 0;
1795
1796 bad_args:
1797         pr_err("--guest-data option requires guest perf.data file name, "
1798                "guest machine PID, and optionally guest timestamp offset, "
1799                "and guest timestamp scale factor, separated by commas.\n");
1800         return -1;
1801 }
1802
1803 static int save_section_info_cb(struct perf_file_section *section,
1804                                 struct perf_header *ph __maybe_unused,
1805                                 int feat, int fd __maybe_unused, void *data)
1806 {
1807         struct perf_inject *inject = data;
1808
1809         inject->secs[feat] = *section;
1810         return 0;
1811 }
1812
1813 static int save_section_info(struct perf_inject *inject)
1814 {
1815         struct perf_header *header = &inject->session->header;
1816         int fd = perf_data__fd(inject->session->data);
1817
1818         return perf_header__process_sections(header, fd, inject, save_section_info_cb);
1819 }
1820
1821 static bool keep_feat(int feat)
1822 {
1823         switch (feat) {
1824         /* Keep original information that describes the machine or software */
1825         case HEADER_TRACING_DATA:
1826         case HEADER_HOSTNAME:
1827         case HEADER_OSRELEASE:
1828         case HEADER_VERSION:
1829         case HEADER_ARCH:
1830         case HEADER_NRCPUS:
1831         case HEADER_CPUDESC:
1832         case HEADER_CPUID:
1833         case HEADER_TOTAL_MEM:
1834         case HEADER_CPU_TOPOLOGY:
1835         case HEADER_NUMA_TOPOLOGY:
1836         case HEADER_PMU_MAPPINGS:
1837         case HEADER_CACHE:
1838         case HEADER_MEM_TOPOLOGY:
1839         case HEADER_CLOCKID:
1840         case HEADER_BPF_PROG_INFO:
1841         case HEADER_BPF_BTF:
1842         case HEADER_CPU_PMU_CAPS:
1843         case HEADER_CLOCK_DATA:
1844         case HEADER_HYBRID_TOPOLOGY:
1845         case HEADER_PMU_CAPS:
1846                 return true;
1847         /* Information that can be updated */
1848         case HEADER_BUILD_ID:
1849         case HEADER_CMDLINE:
1850         case HEADER_EVENT_DESC:
1851         case HEADER_BRANCH_STACK:
1852         case HEADER_GROUP_DESC:
1853         case HEADER_AUXTRACE:
1854         case HEADER_STAT:
1855         case HEADER_SAMPLE_TIME:
1856         case HEADER_DIR_FORMAT:
1857         case HEADER_COMPRESSED:
1858         default:
1859                 return false;
1860         };
1861 }
1862
1863 static int read_file(int fd, u64 offs, void *buf, size_t sz)
1864 {
1865         ssize_t ret = preadn(fd, buf, sz, offs);
1866
1867         if (ret < 0)
1868                 return -errno;
1869         if ((size_t)ret != sz)
1870                 return -EINVAL;
1871         return 0;
1872 }
1873
1874 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
1875 {
1876         int fd = perf_data__fd(inject->session->data);
1877         u64 offs = inject->secs[feat].offset;
1878         size_t sz = inject->secs[feat].size;
1879         void *buf = malloc(sz);
1880         int ret;
1881
1882         if (!buf)
1883                 return -ENOMEM;
1884
1885         ret = read_file(fd, offs, buf, sz);
1886         if (ret)
1887                 goto out_free;
1888
1889         ret = fw->write(fw, buf, sz);
1890 out_free:
1891         free(buf);
1892         return ret;
1893 }
1894
1895 struct inject_fc {
1896         struct feat_copier fc;
1897         struct perf_inject *inject;
1898 };
1899
1900 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
1901 {
1902         struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
1903         struct perf_inject *inject = inj_fc->inject;
1904         int ret;
1905
1906         if (!inject->secs[feat].offset ||
1907             !keep_feat(feat))
1908                 return 0;
1909
1910         ret = feat_copy(inject, feat, fw);
1911         if (ret < 0)
1912                 return ret;
1913
1914         return 1; /* Feature section copied */
1915 }
1916
1917 static int copy_kcore_dir(struct perf_inject *inject)
1918 {
1919         char *cmd;
1920         int ret;
1921
1922         ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
1923                        inject->input_name, inject->output.path);
1924         if (ret < 0)
1925                 return ret;
1926         pr_debug("%s\n", cmd);
1927         ret = system(cmd);
1928         free(cmd);
1929         return ret;
1930 }
1931
1932 static int guest_session__copy_kcore_dir(struct guest_session *gs)
1933 {
1934         struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1935         char *cmd;
1936         int ret;
1937
1938         ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
1939                        gs->perf_data_file, inject->output.path, gs->machine_pid);
1940         if (ret < 0)
1941                 return ret;
1942         pr_debug("%s\n", cmd);
1943         ret = system(cmd);
1944         free(cmd);
1945         return ret;
1946 }
1947
1948 static int output_fd(struct perf_inject *inject)
1949 {
1950         return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
1951 }
1952
1953 static int __cmd_inject(struct perf_inject *inject)
1954 {
1955         int ret = -EINVAL;
1956         struct guest_session *gs = &inject->guest_session;
1957         struct perf_session *session = inject->session;
1958         int fd = output_fd(inject);
1959         u64 output_data_offset;
1960
1961         signal(SIGINT, sig_handler);
1962
1963         if (inject->build_ids || inject->sched_stat ||
1964             inject->itrace_synth_opts.set || inject->build_id_all) {
1965                 inject->tool.mmap         = perf_event__repipe_mmap;
1966                 inject->tool.mmap2        = perf_event__repipe_mmap2;
1967                 inject->tool.fork         = perf_event__repipe_fork;
1968 #ifdef HAVE_LIBTRACEEVENT
1969                 inject->tool.tracing_data = perf_event__repipe_tracing_data;
1970 #endif
1971         }
1972
1973         output_data_offset = perf_session__data_offset(session->evlist);
1974
1975         if (inject->build_id_all) {
1976                 inject->tool.mmap         = perf_event__repipe_buildid_mmap;
1977                 inject->tool.mmap2        = perf_event__repipe_buildid_mmap2;
1978         } else if (inject->build_ids) {
1979                 inject->tool.sample = perf_event__inject_buildid;
1980         } else if (inject->sched_stat) {
1981                 struct evsel *evsel;
1982
1983                 evlist__for_each_entry(session->evlist, evsel) {
1984                         const char *name = evsel__name(evsel);
1985
1986                         if (!strcmp(name, "sched:sched_switch")) {
1987                                 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
1988                                         return -EINVAL;
1989
1990                                 evsel->handler = perf_inject__sched_switch;
1991                         } else if (!strcmp(name, "sched:sched_process_exit"))
1992                                 evsel->handler = perf_inject__sched_process_exit;
1993 #ifdef HAVE_LIBTRACEEVENT
1994                         else if (!strncmp(name, "sched:sched_stat_", 17))
1995                                 evsel->handler = perf_inject__sched_stat;
1996 #endif
1997                 }
1998         } else if (inject->itrace_synth_opts.vm_time_correlation) {
1999                 session->itrace_synth_opts = &inject->itrace_synth_opts;
2000                 memset(&inject->tool, 0, sizeof(inject->tool));
2001                 inject->tool.id_index       = perf_event__process_id_index;
2002                 inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2003                 inject->tool.auxtrace       = perf_event__process_auxtrace;
2004                 inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2005                 inject->tool.ordered_events = true;
2006                 inject->tool.ordering_requires_timestamps = true;
2007         } else if (inject->itrace_synth_opts.set) {
2008                 session->itrace_synth_opts = &inject->itrace_synth_opts;
2009                 inject->itrace_synth_opts.inject = true;
2010                 inject->tool.comm           = perf_event__repipe_comm;
2011                 inject->tool.namespaces     = perf_event__repipe_namespaces;
2012                 inject->tool.exit           = perf_event__repipe_exit;
2013                 inject->tool.id_index       = perf_event__process_id_index;
2014                 inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2015                 inject->tool.auxtrace       = perf_event__process_auxtrace;
2016                 inject->tool.aux            = perf_event__drop_aux;
2017                 inject->tool.itrace_start   = perf_event__drop_aux;
2018                 inject->tool.aux_output_hw_id = perf_event__drop_aux;
2019                 inject->tool.ordered_events = true;
2020                 inject->tool.ordering_requires_timestamps = true;
2021                 /* Allow space in the header for new attributes */
2022                 output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2023                 if (inject->strip)
2024                         strip_init(inject);
2025         } else if (gs->perf_data_file) {
2026                 char *name = gs->perf_data_file;
2027
2028                 /*
2029                  * Not strictly necessary, but keep these events in order wrt
2030                  * guest events.
2031                  */
2032                 inject->tool.mmap               = host__repipe;
2033                 inject->tool.mmap2              = host__repipe;
2034                 inject->tool.comm               = host__repipe;
2035                 inject->tool.fork               = host__repipe;
2036                 inject->tool.exit               = host__repipe;
2037                 inject->tool.lost               = host__repipe;
2038                 inject->tool.context_switch     = host__repipe;
2039                 inject->tool.ksymbol            = host__repipe;
2040                 inject->tool.text_poke          = host__repipe;
2041                 /*
2042                  * Once the host session has initialized, set up sample ID
2043                  * mapping and feed in guest attrs, build IDs and initial
2044                  * events.
2045                  */
2046                 inject->tool.finished_init      = host__finished_init;
2047                 /* Obey finished round ordering */
2048                 inject->tool.finished_round     = host__finished_round,
2049                 /* Keep track of which CPU a VCPU is runnng on */
2050                 inject->tool.context_switch     = host__context_switch;
2051                 /*
2052                  * Must order events to be able to obey finished round
2053                  * ordering.
2054                  */
2055                 inject->tool.ordered_events     = true;
2056                 inject->tool.ordering_requires_timestamps = true;
2057                 /* Set up a separate session to process guest perf.data file */
2058                 ret = guest_session__start(gs, name, session->data->force);
2059                 if (ret) {
2060                         pr_err("Failed to process %s, error %d\n", name, ret);
2061                         return ret;
2062                 }
2063                 /* Allow space in the header for guest attributes */
2064                 output_data_offset += gs->session->header.data_offset;
2065                 output_data_offset = roundup(output_data_offset, 4096);
2066         }
2067
2068         if (!inject->itrace_synth_opts.set)
2069                 auxtrace_index__free(&session->auxtrace_index);
2070
2071         if (!inject->is_pipe && !inject->in_place_update)
2072                 lseek(fd, output_data_offset, SEEK_SET);
2073
2074         ret = perf_session__process_events(session);
2075         if (ret)
2076                 return ret;
2077
2078         if (gs->session) {
2079                 /*
2080                  * Remaining guest events have later timestamps. Flush them
2081                  * out to file.
2082                  */
2083                 ret = guest_session__flush_events(gs);
2084                 if (ret) {
2085                         pr_err("Failed to flush guest events\n");
2086                         return ret;
2087                 }
2088         }
2089
2090         if (!inject->is_pipe && !inject->in_place_update) {
2091                 struct inject_fc inj_fc = {
2092                         .fc.copy = feat_copy_cb,
2093                         .inject = inject,
2094                 };
2095
2096                 if (inject->build_ids)
2097                         perf_header__set_feat(&session->header,
2098                                               HEADER_BUILD_ID);
2099                 /*
2100                  * Keep all buildids when there is unprocessed AUX data because
2101                  * it is not known which ones the AUX trace hits.
2102                  */
2103                 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2104                     inject->have_auxtrace && !inject->itrace_synth_opts.set)
2105                         dsos__hit_all(session);
2106                 /*
2107                  * The AUX areas have been removed and replaced with
2108                  * synthesized hardware events, so clear the feature flag.
2109                  */
2110                 if (inject->itrace_synth_opts.set) {
2111                         perf_header__clear_feat(&session->header,
2112                                                 HEADER_AUXTRACE);
2113                         if (inject->itrace_synth_opts.last_branch ||
2114                             inject->itrace_synth_opts.add_last_branch)
2115                                 perf_header__set_feat(&session->header,
2116                                                       HEADER_BRANCH_STACK);
2117                 }
2118                 session->header.data_offset = output_data_offset;
2119                 session->header.data_size = inject->bytes_written;
2120                 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
2121
2122                 if (inject->copy_kcore_dir) {
2123                         ret = copy_kcore_dir(inject);
2124                         if (ret) {
2125                                 pr_err("Failed to copy kcore\n");
2126                                 return ret;
2127                         }
2128                 }
2129                 if (gs->copy_kcore_dir) {
2130                         ret = guest_session__copy_kcore_dir(gs);
2131                         if (ret) {
2132                                 pr_err("Failed to copy guest kcore\n");
2133                                 return ret;
2134                         }
2135                 }
2136         }
2137
2138         return ret;
2139 }
2140
2141 int cmd_inject(int argc, const char **argv)
2142 {
2143         struct perf_inject inject = {
2144                 .tool = {
2145                         .sample         = perf_event__repipe_sample,
2146                         .read           = perf_event__repipe_sample,
2147                         .mmap           = perf_event__repipe,
2148                         .mmap2          = perf_event__repipe,
2149                         .comm           = perf_event__repipe,
2150                         .namespaces     = perf_event__repipe,
2151                         .cgroup         = perf_event__repipe,
2152                         .fork           = perf_event__repipe,
2153                         .exit           = perf_event__repipe,
2154                         .lost           = perf_event__repipe,
2155                         .lost_samples   = perf_event__repipe,
2156                         .aux            = perf_event__repipe,
2157                         .itrace_start   = perf_event__repipe,
2158                         .aux_output_hw_id = perf_event__repipe,
2159                         .context_switch = perf_event__repipe,
2160                         .throttle       = perf_event__repipe,
2161                         .unthrottle     = perf_event__repipe,
2162                         .ksymbol        = perf_event__repipe,
2163                         .bpf            = perf_event__repipe,
2164                         .text_poke      = perf_event__repipe,
2165                         .attr           = perf_event__repipe_attr,
2166                         .event_update   = perf_event__repipe_event_update,
2167                         .tracing_data   = perf_event__repipe_op2_synth,
2168                         .finished_round = perf_event__repipe_oe_synth,
2169                         .build_id       = perf_event__repipe_op2_synth,
2170                         .id_index       = perf_event__repipe_op2_synth,
2171                         .auxtrace_info  = perf_event__repipe_op2_synth,
2172                         .auxtrace_error = perf_event__repipe_op2_synth,
2173                         .time_conv      = perf_event__repipe_op2_synth,
2174                         .thread_map     = perf_event__repipe_op2_synth,
2175                         .cpu_map        = perf_event__repipe_op2_synth,
2176                         .stat_config    = perf_event__repipe_op2_synth,
2177                         .stat           = perf_event__repipe_op2_synth,
2178                         .stat_round     = perf_event__repipe_op2_synth,
2179                         .feature        = perf_event__repipe_op2_synth,
2180                         .finished_init  = perf_event__repipe_op2_synth,
2181                         .compressed     = perf_event__repipe_op4_synth,
2182                         .auxtrace       = perf_event__repipe_auxtrace,
2183                 },
2184                 .input_name  = "-",
2185                 .samples = LIST_HEAD_INIT(inject.samples),
2186                 .output = {
2187                         .path = "-",
2188                         .mode = PERF_DATA_MODE_WRITE,
2189                         .use_stdio = true,
2190                 },
2191         };
2192         struct perf_data data = {
2193                 .mode = PERF_DATA_MODE_READ,
2194                 .use_stdio = true,
2195         };
2196         int ret;
2197         bool repipe = true;
2198         const char *known_build_ids = NULL;
2199
2200         struct option options[] = {
2201                 OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
2202                             "Inject build-ids into the output stream"),
2203                 OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
2204                             "Inject build-ids of all DSOs into the output stream"),
2205                 OPT_STRING(0, "known-build-ids", &known_build_ids,
2206                            "buildid path [,buildid path...]",
2207                            "build-ids to use for given paths"),
2208                 OPT_STRING('i', "input", &inject.input_name, "file",
2209                            "input file name"),
2210                 OPT_STRING('o', "output", &inject.output.path, "file",
2211                            "output file name"),
2212                 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2213                             "Merge sched-stat and sched-switch for getting events "
2214                             "where and how long tasks slept"),
2215 #ifdef HAVE_JITDUMP
2216                 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2217 #endif
2218                 OPT_INCR('v', "verbose", &verbose,
2219                          "be more verbose (show build ids, etc)"),
2220                 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2221                            "file", "vmlinux pathname"),
2222                 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2223                             "don't load vmlinux even if found"),
2224                 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2225                            "kallsyms pathname"),
2226                 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2227                 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2228                                     NULL, "opts", "Instruction Tracing options\n"
2229                                     ITRACE_HELP,
2230                                     itrace_parse_synth_opts),
2231                 OPT_BOOLEAN(0, "strip", &inject.strip,
2232                             "strip non-synthesized events (use with --itrace)"),
2233                 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2234                                     "correlate time between VM guests and the host",
2235                                     parse_vm_time_correlation),
2236                 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2237                                     "inject events from a guest perf.data file",
2238                                     parse_guest_data),
2239                 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2240                            "guest mount directory under which every guest os"
2241                            " instance has a subdir"),
2242                 OPT_END()
2243         };
2244         const char * const inject_usage[] = {
2245                 "perf inject [<options>]",
2246                 NULL
2247         };
2248 #ifndef HAVE_JITDUMP
2249         set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2250 #endif
2251         argc = parse_options(argc, argv, options, inject_usage, 0);
2252
2253         /*
2254          * Any (unrecognized) arguments left?
2255          */
2256         if (argc)
2257                 usage_with_options(inject_usage, options);
2258
2259         if (inject.strip && !inject.itrace_synth_opts.set) {
2260                 pr_err("--strip option requires --itrace option\n");
2261                 return -1;
2262         }
2263
2264         if (symbol__validate_sym_arguments())
2265                 return -1;
2266
2267         if (inject.in_place_update) {
2268                 if (!strcmp(inject.input_name, "-")) {
2269                         pr_err("Input file name required for in-place updating\n");
2270                         return -1;
2271                 }
2272                 if (strcmp(inject.output.path, "-")) {
2273                         pr_err("Output file name must not be specified for in-place updating\n");
2274                         return -1;
2275                 }
2276                 if (!data.force && !inject.in_place_update_dry_run) {
2277                         pr_err("The input file would be updated in place, "
2278                                 "the --force option is required.\n");
2279                         return -1;
2280                 }
2281                 if (!inject.in_place_update_dry_run)
2282                         data.in_place_update = true;
2283         } else {
2284                 if (strcmp(inject.output.path, "-") && !inject.strip &&
2285                     has_kcore_dir(inject.input_name)) {
2286                         inject.output.is_dir = true;
2287                         inject.copy_kcore_dir = true;
2288                 }
2289                 if (perf_data__open(&inject.output)) {
2290                         perror("failed to create output file");
2291                         return -1;
2292                 }
2293         }
2294
2295         data.path = inject.input_name;
2296         if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
2297                 inject.is_pipe = true;
2298                 /*
2299                  * Do not repipe header when input is a regular file
2300                  * since either it can rewrite the header at the end
2301                  * or write a new pipe header.
2302                  */
2303                 if (strcmp(inject.input_name, "-"))
2304                         repipe = false;
2305         }
2306
2307         inject.session = __perf_session__new(&data, repipe,
2308                                              output_fd(&inject),
2309                                              &inject.tool);
2310         if (IS_ERR(inject.session)) {
2311                 ret = PTR_ERR(inject.session);
2312                 goto out_close_output;
2313         }
2314
2315         if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2316                 pr_warning("Decompression initialization failed.\n");
2317
2318         /* Save original section info before feature bits change */
2319         ret = save_section_info(&inject);
2320         if (ret)
2321                 goto out_delete;
2322
2323         if (!data.is_pipe && inject.output.is_pipe) {
2324                 ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2325                 if (ret < 0) {
2326                         pr_err("Couldn't write a new pipe header.\n");
2327                         goto out_delete;
2328                 }
2329
2330                 ret = perf_event__synthesize_for_pipe(&inject.tool,
2331                                                       inject.session,
2332                                                       &inject.output,
2333                                                       perf_event__repipe);
2334                 if (ret < 0)
2335                         goto out_delete;
2336         }
2337
2338         if (inject.build_ids && !inject.build_id_all) {
2339                 /*
2340                  * to make sure the mmap records are ordered correctly
2341                  * and so that the correct especially due to jitted code
2342                  * mmaps. We cannot generate the buildid hit list and
2343                  * inject the jit mmaps at the same time for now.
2344                  */
2345                 inject.tool.ordered_events = true;
2346                 inject.tool.ordering_requires_timestamps = true;
2347                 if (known_build_ids != NULL) {
2348                         inject.known_build_ids =
2349                                 perf_inject__parse_known_build_ids(known_build_ids);
2350
2351                         if (inject.known_build_ids == NULL) {
2352                                 pr_err("Couldn't parse known build ids.\n");
2353                                 goto out_delete;
2354                         }
2355                 }
2356         }
2357
2358         if (inject.sched_stat) {
2359                 inject.tool.ordered_events = true;
2360         }
2361
2362 #ifdef HAVE_JITDUMP
2363         if (inject.jit_mode) {
2364                 inject.tool.mmap2          = perf_event__jit_repipe_mmap2;
2365                 inject.tool.mmap           = perf_event__jit_repipe_mmap;
2366                 inject.tool.ordered_events = true;
2367                 inject.tool.ordering_requires_timestamps = true;
2368                 /*
2369                  * JIT MMAP injection injects all MMAP events in one go, so it
2370                  * does not obey finished_round semantics.
2371                  */
2372                 inject.tool.finished_round = perf_event__drop_oe;
2373         }
2374 #endif
2375         ret = symbol__init(&inject.session->header.env);
2376         if (ret < 0)
2377                 goto out_delete;
2378
2379         ret = __cmd_inject(&inject);
2380
2381         guest_session__exit(&inject.guest_session);
2382
2383 out_delete:
2384         strlist__delete(inject.known_build_ids);
2385         zstd_fini(&(inject.session->zstd_data));
2386         perf_session__delete(inject.session);
2387 out_close_output:
2388         if (!inject.in_place_update)
2389                 perf_data__close(&inject.output);
2390         free(inject.itrace_synth_opts.vm_tm_corr_args);
2391         return ret;
2392 }