tools/testing/selftests/bpf/progs/profiler.inc.h

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2020 Facebook */
   3 #include <vmlinux.h>
   4 #include <bpf/bpf_core_read.h>
   5 #include <bpf/bpf_helpers.h>
   6 #include <bpf/bpf_tracing.h>
   7
   8 #include "profiler.h"
   9 #include "err.h"
  10
  11 #ifndef NULL
  12 #define NULL 0
  13 #endif
  14
  15 #define O_WRONLY 00000001
  16 #define O_RDWR 00000002
  17 #define O_DIRECTORY 00200000
  18 #define __O_TMPFILE 020000000
  19 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
  20 #define S_IFMT 00170000
  21 #define S_IFSOCK 0140000
  22 #define S_IFLNK 0120000
  23 #define S_IFREG 0100000
  24 #define S_IFBLK 0060000
  25 #define S_IFDIR 0040000
  26 #define S_IFCHR 0020000
  27 #define S_IFIFO 0010000
  28 #define S_ISUID 0004000
  29 #define S_ISGID 0002000
  30 #define S_ISVTX 0001000
  31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
  32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
  33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
  34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
  35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
  36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
  37
  38 #define KILL_DATA_ARRAY_SIZE 8
  39
  40 struct var_kill_data_arr_t {
  41         struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
  42 };
  43
  44 union any_profiler_data_t {
  45         struct var_exec_data_t var_exec;
  46         struct var_kill_data_t var_kill;
  47         struct var_sysctl_data_t var_sysctl;
  48         struct var_filemod_data_t var_filemod;
  49         struct var_fork_data_t var_fork;
  50         struct var_kill_data_arr_t var_kill_data_arr;
  51 };
  52
  53 volatile struct profiler_config_struct bpf_config = {};
  54
  55 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
  56 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
  57 #define CGROUP_LOGIN_SESSION_INODE \
  58         (bpf_config.cgroup_login_session_inode)
  59 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
  60 #define STALE_INFO (bpf_config.stale_info_secs)
  61 #define INODE_FILTER (bpf_config.inode_filter)
  62 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
  63 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
  64
  65 struct kernfs_iattrs___52 {
  66         struct iattr ia_iattr;
  67 };
  68
  69 struct kernfs_node___52 {
  70         union /* kernfs_node_id */ {
  71                 struct {
  72                         u32 ino;
  73                         u32 generation;
  74                 };
  75                 u64 id;
  76         } id;
  77 };
  78
  79 struct {
  80         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  81         __uint(max_entries, 1);
  82         __type(key, u32);
  83         __type(value, union any_profiler_data_t);
  84 } data_heap SEC(".maps");
  85
  86 struct {
  87         __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
  88         __uint(key_size, sizeof(int));
  89         __uint(value_size, sizeof(int));
  90 } events SEC(".maps");
  91
  92 struct {
  93         __uint(type, BPF_MAP_TYPE_HASH);
  94         __uint(max_entries, KILL_DATA_ARRAY_SIZE);
  95         __type(key, u32);
  96         __type(value, struct var_kill_data_arr_t);
  97 } var_tpid_to_data SEC(".maps");
  98
  99 struct {
 100         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 101         __uint(max_entries, profiler_bpf_max_function_id);
 102         __type(key, u32);
 103         __type(value, struct bpf_func_stats_data);
 104 } bpf_func_stats SEC(".maps");
 105
 106 struct {
 107         __uint(type, BPF_MAP_TYPE_HASH);
 108         __type(key, u32);
 109         __type(value, bool);
 110         __uint(max_entries, 16);
 111 } allowed_devices SEC(".maps");
 112
 113 struct {
 114         __uint(type, BPF_MAP_TYPE_HASH);
 115         __type(key, u64);
 116         __type(value, bool);
 117         __uint(max_entries, 1024);
 118 } allowed_file_inodes SEC(".maps");
 119
 120 struct {
 121         __uint(type, BPF_MAP_TYPE_HASH);
 122         __type(key, u64);
 123         __type(value, bool);
 124         __uint(max_entries, 1024);
 125 } allowed_directory_inodes SEC(".maps");
 126
 127 struct {
 128         __uint(type, BPF_MAP_TYPE_HASH);
 129         __type(key, u32);
 130         __type(value, bool);
 131         __uint(max_entries, 16);
 132 } disallowed_exec_inodes SEC(".maps");
 133
 134 #ifndef ARRAY_SIZE
 135 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 136 #endif
 137
 138 static INLINE bool IS_ERR(const void* ptr)
 139 {
 140         return IS_ERR_VALUE((unsigned long)ptr);
 141 }
 142
 143 static INLINE u32 get_userspace_pid()
 144 {
 145         return bpf_get_current_pid_tgid() >> 32;
 146 }
 147
 148 static INLINE bool is_init_process(u32 tgid)
 149 {
 150         return tgid == 1 || tgid == 0;
 151 }
 152
 153 static INLINE unsigned long
 154 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
 155 {
 156         len = len < max ? len : max;
 157         if (len > 1) {
 158                 if (bpf_probe_read_kernel(dst, len, src))
 159                         return 0;
 160         } else if (len == 1) {
 161                 if (bpf_probe_read_kernel(dst, 1, src))
 162                         return 0;
 163         }
 164         return len;
 165 }
 166
 167 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
 168                                      int spid)
 169 {
 170 #ifdef UNROLL
 171 #pragma unroll
 172 #endif
 173         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 174                 if (arr_struct->array[i].meta.pid == spid)
 175                         return i;
 176         return -1;
 177 }
 178
 179 static INLINE void populate_ancestors(struct task_struct* task,
 180                                       struct ancestors_data_t* ancestors_data)
 181 {
 182         struct task_struct* parent = task;
 183         u32 num_ancestors, ppid;
 184
 185         ancestors_data->num_ancestors = 0;
 186 #ifdef UNROLL
 187 #pragma unroll
 188 #endif
 189         for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
 190                 parent = BPF_CORE_READ(parent, real_parent);
 191                 if (parent == NULL)
 192                         break;
 193                 ppid = BPF_CORE_READ(parent, tgid);
 194                 if (is_init_process(ppid))
 195                         break;
 196                 ancestors_data->ancestor_pids[num_ancestors] = ppid;
 197                 ancestors_data->ancestor_exec_ids[num_ancestors] =
 198                         BPF_CORE_READ(parent, self_exec_id);
 199                 ancestors_data->ancestor_start_times[num_ancestors] =
 200                         BPF_CORE_READ(parent, start_time);
 201                 ancestors_data->num_ancestors = num_ancestors;
 202         }
 203 }
 204
 205 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 206                                           struct kernfs_node* cgroup_root_node,
 207                                           void* payload,
 208                                           int* root_pos)
 209 {
 210         void* payload_start = payload;
 211         size_t filepart_length;
 212
 213 #ifdef UNROLL
 214 #pragma unroll
 215 #endif
 216         for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
 217                 filepart_length =
 218                         bpf_probe_read_kernel_str(payload, MAX_PATH,
 219                                                   BPF_CORE_READ(cgroup_node, name));
 220                 if (!cgroup_node)
 221                         return payload;
 222                 if (cgroup_node == cgroup_root_node)
 223                         *root_pos = payload - payload_start;
 224                 if (filepart_length <= MAX_PATH) {
 225                         barrier_var(filepart_length);
 226                         payload += filepart_length;
 227                 }
 228                 cgroup_node = BPF_CORE_READ(cgroup_node, parent);
 229         }
 230         return payload;
 231 }
 232
 233 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
 234 {
 235         struct kernfs_node___52* node52 = (void*)node;
 236
 237         if (bpf_core_field_exists(node52->id.ino)) {
 238                 barrier_var(node52);
 239                 return BPF_CORE_READ(node52, id.ino);
 240         } else {
 241                 barrier_var(node);
 242                 return (u64)BPF_CORE_READ(node, id);
 243         }
 244 }
 245
 246 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
 247 enum cgroup_subsys_id___local {
 248         pids_cgrp_id___local = 123, /* value doesn't matter */
 249 };
 250
 251 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 252                                          struct task_struct* task,
 253                                          void* payload)
 254 {
 255         struct kernfs_node* root_kernfs =
 256                 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 257         struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 258
 259 #if __has_builtin(__builtin_preserve_enum_value)
 260         if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
 261                 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
 262                                                   pids_cgrp_id___local);
 263 #ifdef UNROLL
 264 #pragma unroll
 265 #endif
 266                 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 267                         struct cgroup_subsys_state* subsys =
 268                                 BPF_CORE_READ(task, cgroups, subsys[i]);
 269                         if (subsys != NULL) {
 270                                 int subsys_id = BPF_CORE_READ(subsys, ss, id);
 271                                 if (subsys_id == cgrp_id) {
 272                                         proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
 273                                         root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
 274                                         break;
 275                                 }
 276                         }
 277                 }
 278         }
 279 #endif
 280
 281         cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
 282         cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
 283
 284         if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
 285                 cgroup_data->cgroup_root_mtime =
 286                         BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
 287                 cgroup_data->cgroup_proc_mtime =
 288                         BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
 289         } else {
 290                 struct kernfs_iattrs___52* root_iattr =
 291                         (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
 292                 cgroup_data->cgroup_root_mtime =
 293                         BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
 294
 295                 struct kernfs_iattrs___52* proc_iattr =
 296                         (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
 297                 cgroup_data->cgroup_proc_mtime =
 298                         BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
 299         }
 300
 301         cgroup_data->cgroup_root_length = 0;
 302         cgroup_data->cgroup_proc_length = 0;
 303         cgroup_data->cgroup_full_length = 0;
 304
 305         size_t cgroup_root_length =
 306                 bpf_probe_read_kernel_str(payload, MAX_PATH,
 307                                           BPF_CORE_READ(root_kernfs, name));
 308         barrier_var(cgroup_root_length);
 309         if (cgroup_root_length <= MAX_PATH) {
 310                 barrier_var(cgroup_root_length);
 311                 cgroup_data->cgroup_root_length = cgroup_root_length;
 312                 payload += cgroup_root_length;
 313         }
 314
 315         size_t cgroup_proc_length =
 316                 bpf_probe_read_kernel_str(payload, MAX_PATH,
 317                                           BPF_CORE_READ(proc_kernfs, name));
 318         barrier_var(cgroup_proc_length);
 319         if (cgroup_proc_length <= MAX_PATH) {
 320                 barrier_var(cgroup_proc_length);
 321                 cgroup_data->cgroup_proc_length = cgroup_proc_length;
 322                 payload += cgroup_proc_length;
 323         }
 324
 325         if (FETCH_CGROUPS_FROM_BPF) {
 326                 cgroup_data->cgroup_full_path_root_pos = -1;
 327                 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
 328                                                               &cgroup_data->cgroup_full_path_root_pos);
 329                 cgroup_data->cgroup_full_length = payload_end_pos - payload;
 330                 payload = payload_end_pos;
 331         }
 332
 333         return (void*)payload;
 334 }
 335
 336 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
 337                                           struct task_struct* task,
 338                                           u32 pid, void* payload)
 339 {
 340         u64 uid_gid = bpf_get_current_uid_gid();
 341
 342         metadata->uid = (u32)uid_gid;
 343         metadata->gid = uid_gid >> 32;
 344         metadata->pid = pid;
 345         metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
 346         metadata->start_time = BPF_CORE_READ(task, start_time);
 347         metadata->comm_length = 0;
 348
 349         size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 350         barrier_var(comm_length);
 351         if (comm_length <= TASK_COMM_LEN) {
 352                 barrier_var(comm_length);
 353                 metadata->comm_length = comm_length;
 354                 payload += comm_length;
 355         }
 356
 357         return (void*)payload;
 358 }
 359
 360 static INLINE struct var_kill_data_t*
 361 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
 362 {
 363         int zero = 0;
 364         struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 365
 366         if (kill_data == NULL)
 367                 return NULL;
 368         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 369
 370         void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
 371         payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
 372         size_t payload_length = payload - (void*)kill_data->payload;
 373         kill_data->payload_length = payload_length;
 374         populate_ancestors(task, &kill_data->ancestors_info);
 375         kill_data->meta.type = KILL_EVENT;
 376         kill_data->kill_target_pid = tpid;
 377         kill_data->kill_sig = sig;
 378         kill_data->kill_count = 1;
 379         kill_data->last_kill_time = bpf_ktime_get_ns();
 380         return kill_data;
 381 }
 382
 383 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 384 {
 385         if ((KILL_SIGNALS & (1ULL << sig)) == 0)
 386                 return 0;
 387
 388         u32 spid = get_userspace_pid();
 389         struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 390
 391         if (arr_struct == NULL) {
 392                 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
 393                 int zero = 0;
 394
 395                 if (kill_data == NULL)
 396                         return 0;
 397                 arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
 398                 if (arr_struct == NULL)
 399                         return 0;
 400                 bpf_probe_read_kernel(&arr_struct->array[0],
 401                                       sizeof(arr_struct->array[0]), kill_data);
 402         } else {
 403                 int index = get_var_spid_index(arr_struct, spid);
 404
 405                 if (index == -1) {
 406                         struct var_kill_data_t* kill_data =
 407                                 get_var_kill_data(ctx, spid, tpid, sig);
 408                         if (kill_data == NULL)
 409                                 return 0;
 410 #ifdef UNROLL
 411 #pragma unroll
 412 #endif
 413                         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 414                                 if (arr_struct->array[i].meta.pid == 0) {
 415                                         bpf_probe_read_kernel(&arr_struct->array[i],
 416                                                               sizeof(arr_struct->array[i]),
 417                                                               kill_data);
 418                                         bpf_map_update_elem(&var_tpid_to_data, &tpid,
 419                                                             arr_struct, 0);
 420
 421                                         return 0;
 422                                 }
 423                         return 0;
 424                 }
 425
 426                 struct var_kill_data_t* kill_data = &arr_struct->array[index];
 427
 428                 u64 delta_sec =
 429                         (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
 430
 431                 if (delta_sec < STALE_INFO) {
 432                         kill_data->kill_count++;
 433                         kill_data->last_kill_time = bpf_ktime_get_ns();
 434                         bpf_probe_read_kernel(&arr_struct->array[index],
 435                                               sizeof(arr_struct->array[index]),
 436                                               kill_data);
 437                 } else {
 438                         struct var_kill_data_t* kill_data =
 439                                 get_var_kill_data(ctx, spid, tpid, sig);
 440                         if (kill_data == NULL)
 441                                 return 0;
 442                         bpf_probe_read_kernel(&arr_struct->array[index],
 443                                               sizeof(arr_struct->array[index]),
 444                                               kill_data);
 445                 }
 446         }
 447         bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
 448         return 0;
 449 }
 450
 451 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
 452                                    enum bpf_function_id func_id)
 453 {
 454         int func_id_key = func_id;
 455
 456         bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
 457         bpf_stat_ctx->bpf_func_stats_data_val =
 458                 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
 459         if (bpf_stat_ctx->bpf_func_stats_data_val)
 460                 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
 461 }
 462
 463 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
 464 {
 465         if (bpf_stat_ctx->bpf_func_stats_data_val)
 466                 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
 467                         bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
 468 }
 469
 470 static INLINE void
 471 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
 472                                     struct var_metadata_t* meta)
 473 {
 474         if (bpf_stat_ctx->bpf_func_stats_data_val) {
 475                 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
 476                 meta->bpf_stats_num_perf_events =
 477                         bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
 478         }
 479         meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
 480         meta->cpu_id = bpf_get_smp_processor_id();
 481 }
 482
 483 static INLINE size_t
 484 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
 485 {
 486         size_t length = 0;
 487         size_t filepart_length;
 488         struct dentry* parent_dentry;
 489
 490 #ifdef UNROLL
 491 #pragma unroll
 492 #endif
 493         for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 494                 filepart_length =
 495                         bpf_probe_read_kernel_str(payload, MAX_PATH,
 496                                                   BPF_CORE_READ(filp_dentry, d_name.name));
 497                 barrier_var(filepart_length);
 498                 if (filepart_length > MAX_PATH)
 499                         break;
 500                 barrier_var(filepart_length);
 501                 payload += filepart_length;
 502                 length += filepart_length;
 503
 504                 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 505                 if (filp_dentry == parent_dentry)
 506                         break;
 507                 filp_dentry = parent_dentry;
 508         }
 509
 510         return length;
 511 }
 512
 513 static INLINE bool
 514 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
 515 {
 516         struct dentry* parent_dentry;
 517 #ifdef UNROLL
 518 #pragma unroll
 519 #endif
 520         for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 521                 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
 522                 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
 523
 524                 if (allowed_dir != NULL)
 525                         return true;
 526                 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 527                 if (filp_dentry == parent_dentry)
 528                         break;
 529                 filp_dentry = parent_dentry;
 530         }
 531         return false;
 532 }
 533
 534 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
 535                                                  u32* device_id,
 536                                                  u64* file_ino)
 537 {
 538         u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
 539         *device_id = dev_id;
 540         bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
 541
 542         if (allowed_device == NULL)
 543                 return false;
 544
 545         u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
 546         *file_ino = ino;
 547         bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
 548
 549         if (allowed_file == NULL)
 550                 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
 551                         return false;
 552         return true;
 553 }
 554
 555 SEC("kprobe/proc_sys_write")
 556 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 557                    struct file* filp, const char* buf,
 558                    size_t count, loff_t* ppos)
 559 {
 560         struct bpf_func_stats_ctx stats_ctx;
 561         bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
 562
 563         u32 pid = get_userspace_pid();
 564         int zero = 0;
 565         struct var_sysctl_data_t* sysctl_data =
 566                 bpf_map_lookup_elem(&data_heap, &zero);
 567         if (!sysctl_data)
 568                 goto out;
 569
 570         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 571         sysctl_data->meta.type = SYSCTL_EVENT;
 572         void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
 573         payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
 574
 575         populate_ancestors(task, &sysctl_data->ancestors_info);
 576
 577         sysctl_data->sysctl_val_length = 0;
 578         sysctl_data->sysctl_path_length = 0;
 579
 580         size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
 581                                                              CTL_MAXNAME, buf);
 582         barrier_var(sysctl_val_length);
 583         if (sysctl_val_length <= CTL_MAXNAME) {
 584                 barrier_var(sysctl_val_length);
 585                 sysctl_data->sysctl_val_length = sysctl_val_length;
 586                 payload += sysctl_val_length;
 587         }
 588
 589         size_t sysctl_path_length =
 590                 bpf_probe_read_kernel_str(payload, MAX_PATH,
 591                                           BPF_CORE_READ(filp, f_path.dentry,
 592                                                         d_name.name));
 593         barrier_var(sysctl_path_length);
 594         if (sysctl_path_length <= MAX_PATH) {
 595                 barrier_var(sysctl_path_length);
 596                 sysctl_data->sysctl_path_length = sysctl_path_length;
 597                 payload += sysctl_path_length;
 598         }
 599
 600         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
 601         unsigned long data_len = payload - (void*)sysctl_data;
 602         data_len = data_len > sizeof(struct var_sysctl_data_t)
 603                 ? sizeof(struct var_sysctl_data_t)
 604                 : data_len;
 605         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
 606 out:
 607         bpf_stats_exit(&stats_ctx);
 608         return 0;
 609 }
 610
 611 SEC("tracepoint/syscalls/sys_enter_kill")
 612 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
 613 {
 614         struct bpf_func_stats_ctx stats_ctx;
 615
 616         bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
 617         int pid = ctx->args[0];
 618         int sig = ctx->args[1];
 619         int ret = trace_var_sys_kill(ctx, pid, sig);
 620         bpf_stats_exit(&stats_ctx);
 621         return ret;
 622 };
 623
 624 SEC("raw_tracepoint/sched_process_exit")
 625 int raw_tracepoint__sched_process_exit(void* ctx)
 626 {
 627         int zero = 0;
 628         struct bpf_func_stats_ctx stats_ctx;
 629         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
 630
 631         u32 tpid = get_userspace_pid();
 632
 633         struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 634         struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 635
 636         if (arr_struct == NULL || kill_data == NULL)
 637                 goto out;
 638
 639         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 640         struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 641
 642 #ifdef UNROLL
 643 #pragma unroll
 644 #endif
 645         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
 646                 struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
 647
 648                 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
 649                         bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
 650                                               past_kill_data);
 651                         void* payload = kill_data->payload;
 652                         size_t offset = kill_data->payload_length;
 653                         if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
 654                                 return 0;
 655                         payload += offset;
 656
 657                         kill_data->kill_target_name_length = 0;
 658                         kill_data->kill_target_cgroup_proc_length = 0;
 659
 660                         size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 661                         barrier_var(comm_length);
 662                         if (comm_length <= TASK_COMM_LEN) {
 663                                 barrier_var(comm_length);
 664                                 kill_data->kill_target_name_length = comm_length;
 665                                 payload += comm_length;
 666                         }
 667
 668                         size_t cgroup_proc_length =
 669                                 bpf_probe_read_kernel_str(payload,
 670                                                           KILL_TARGET_LEN,
 671                                                           BPF_CORE_READ(proc_kernfs, name));
 672                         barrier_var(cgroup_proc_length);
 673                         if (cgroup_proc_length <= KILL_TARGET_LEN) {
 674                                 barrier_var(cgroup_proc_length);
 675                                 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
 676                                 payload += cgroup_proc_length;
 677                         }
 678
 679                         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
 680                         unsigned long data_len = (void*)payload - (void*)kill_data;
 681                         data_len = data_len > sizeof(struct var_kill_data_t)
 682                                 ? sizeof(struct var_kill_data_t)
 683                                 : data_len;
 684                         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
 685                 }
 686         }
 687         bpf_map_delete_elem(&var_tpid_to_data, &tpid);
 688 out:
 689         bpf_stats_exit(&stats_ctx);
 690         return 0;
 691 }
 692
 693 SEC("raw_tracepoint/sched_process_exec")
 694 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 695 {
 696         struct bpf_func_stats_ctx stats_ctx;
 697         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
 698
 699         struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
 700         u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
 701
 702         bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
 703         if (should_filter_binprm != NULL)
 704                 goto out;
 705
 706         int zero = 0;
 707         struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
 708         if (!proc_exec_data)
 709                 goto out;
 710
 711         if (INODE_FILTER && inode != INODE_FILTER)
 712                 return 0;
 713
 714         u32 pid = get_userspace_pid();
 715         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 716
 717         proc_exec_data->meta.type = EXEC_EVENT;
 718         proc_exec_data->bin_path_length = 0;
 719         proc_exec_data->cmdline_length = 0;
 720         proc_exec_data->environment_length = 0;
 721         void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
 722                                               proc_exec_data->payload);
 723         payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
 724
 725         struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
 726         proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
 727         proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
 728         proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
 729         proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
 730
 731         const char* filename = BPF_CORE_READ(bprm, filename);
 732         size_t bin_path_length =
 733                 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
 734         barrier_var(bin_path_length);
 735         if (bin_path_length <= MAX_FILENAME_LEN) {
 736                 barrier_var(bin_path_length);
 737                 proc_exec_data->bin_path_length = bin_path_length;
 738                 payload += bin_path_length;
 739         }
 740
 741         void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
 742         void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
 743         unsigned int cmdline_length = probe_read_lim(payload, arg_start,
 744                                                      arg_end - arg_start, MAX_ARGS_LEN);
 745
 746         if (cmdline_length <= MAX_ARGS_LEN) {
 747                 barrier_var(cmdline_length);
 748                 proc_exec_data->cmdline_length = cmdline_length;
 749                 payload += cmdline_length;
 750         }
 751
 752         if (READ_ENVIRON_FROM_EXEC) {
 753                 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
 754                 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
 755                 unsigned long env_len = probe_read_lim(payload, env_start,
 756                                                        env_end - env_start, MAX_ENVIRON_LEN);
 757                 if (cmdline_length <= MAX_ENVIRON_LEN) {
 758                         proc_exec_data->environment_length = env_len;
 759                         payload += env_len;
 760                 }
 761         }
 762
 763         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
 764         unsigned long data_len = payload - (void*)proc_exec_data;
 765         data_len = data_len > sizeof(struct var_exec_data_t)
 766                 ? sizeof(struct var_exec_data_t)
 767                 : data_len;
 768         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
 769 out:
 770         bpf_stats_exit(&stats_ctx);
 771         return 0;
 772 }
 773
 774 SEC("kretprobe/do_filp_open")
 775 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
 776 {
 777         struct bpf_func_stats_ctx stats_ctx;
 778         bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
 779
 780         struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
 781
 782         if (filp == NULL || IS_ERR(filp))
 783                 goto out;
 784         unsigned int flags = BPF_CORE_READ(filp, f_flags);
 785         if ((flags & (O_RDWR | O_WRONLY)) == 0)
 786                 goto out;
 787         if ((flags & O_TMPFILE) > 0)
 788                 goto out;
 789         struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
 790         umode_t mode = BPF_CORE_READ(file_inode, i_mode);
 791         if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
 792             S_ISSOCK(mode))
 793                 goto out;
 794
 795         struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
 796         u32 device_id = 0;
 797         u64 file_ino = 0;
 798         if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
 799                 goto out;
 800
 801         int zero = 0;
 802         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 803         if (!filemod_data)
 804                 goto out;
 805
 806         u32 pid = get_userspace_pid();
 807         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 808
 809         filemod_data->meta.type = FILEMOD_EVENT;
 810         filemod_data->fmod_type = FMOD_OPEN;
 811         filemod_data->dst_flags = flags;
 812         filemod_data->src_inode = 0;
 813         filemod_data->dst_inode = file_ino;
 814         filemod_data->src_device_id = 0;
 815         filemod_data->dst_device_id = device_id;
 816         filemod_data->src_filepath_length = 0;
 817         filemod_data->dst_filepath_length = 0;
 818
 819         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 820                                               filemod_data->payload);
 821         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 822
 823         size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
 824         barrier_var(len);
 825         if (len <= MAX_FILEPATH_LENGTH) {
 826                 barrier_var(len);
 827                 payload += len;
 828                 filemod_data->dst_filepath_length = len;
 829         }
 830         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 831         unsigned long data_len = payload - (void*)filemod_data;
 832         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 833         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 834 out:
 835         bpf_stats_exit(&stats_ctx);
 836         return 0;
 837 }
 838
 839 SEC("kprobe/vfs_link")
 840 int BPF_KPROBE(kprobe__vfs_link,
 841                struct dentry* old_dentry, struct mnt_idmap *idmap,
 842                struct inode* dir, struct dentry* new_dentry,
 843                struct inode** delegated_inode)
 844 {
 845         struct bpf_func_stats_ctx stats_ctx;
 846         bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
 847
 848         u32 src_device_id = 0;
 849         u64 src_file_ino = 0;
 850         u32 dst_device_id = 0;
 851         u64 dst_file_ino = 0;
 852         if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
 853             !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
 854                 goto out;
 855
 856         int zero = 0;
 857         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 858         if (!filemod_data)
 859                 goto out;
 860
 861         u32 pid = get_userspace_pid();
 862         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 863
 864         filemod_data->meta.type = FILEMOD_EVENT;
 865         filemod_data->fmod_type = FMOD_LINK;
 866         filemod_data->dst_flags = 0;
 867         filemod_data->src_inode = src_file_ino;
 868         filemod_data->dst_inode = dst_file_ino;
 869         filemod_data->src_device_id = src_device_id;
 870         filemod_data->dst_device_id = dst_device_id;
 871         filemod_data->src_filepath_length = 0;
 872         filemod_data->dst_filepath_length = 0;
 873
 874         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 875                                               filemod_data->payload);
 876         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 877
 878         size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
 879         barrier_var(len);
 880         if (len <= MAX_FILEPATH_LENGTH) {
 881                 barrier_var(len);
 882                 payload += len;
 883                 filemod_data->src_filepath_length = len;
 884         }
 885
 886         len = read_absolute_file_path_from_dentry(new_dentry, payload);
 887         barrier_var(len);
 888         if (len <= MAX_FILEPATH_LENGTH) {
 889                 barrier_var(len);
 890                 payload += len;
 891                 filemod_data->dst_filepath_length = len;
 892         }
 893
 894         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 895         unsigned long data_len = payload - (void*)filemod_data;
 896         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 897         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 898 out:
 899         bpf_stats_exit(&stats_ctx);
 900         return 0;
 901 }
 902
 903 SEC("kprobe/vfs_symlink")
 904 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
 905                const char* oldname)
 906 {
 907         struct bpf_func_stats_ctx stats_ctx;
 908         bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
 909
 910         u32 dst_device_id = 0;
 911         u64 dst_file_ino = 0;
 912         if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
 913                 goto out;
 914
 915         int zero = 0;
 916         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 917         if (!filemod_data)
 918                 goto out;
 919
 920         u32 pid = get_userspace_pid();
 921         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 922
 923         filemod_data->meta.type = FILEMOD_EVENT;
 924         filemod_data->fmod_type = FMOD_SYMLINK;
 925         filemod_data->dst_flags = 0;
 926         filemod_data->src_inode = 0;
 927         filemod_data->dst_inode = dst_file_ino;
 928         filemod_data->src_device_id = 0;
 929         filemod_data->dst_device_id = dst_device_id;
 930         filemod_data->src_filepath_length = 0;
 931         filemod_data->dst_filepath_length = 0;
 932
 933         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 934                                               filemod_data->payload);
 935         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 936
 937         size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
 938                                                oldname);
 939         barrier_var(len);
 940         if (len <= MAX_FILEPATH_LENGTH) {
 941                 barrier_var(len);
 942                 payload += len;
 943                 filemod_data->src_filepath_length = len;
 944         }
 945         len = read_absolute_file_path_from_dentry(dentry, payload);
 946         barrier_var(len);
 947         if (len <= MAX_FILEPATH_LENGTH) {
 948                 barrier_var(len);
 949                 payload += len;
 950                 filemod_data->dst_filepath_length = len;
 951         }
 952         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 953         unsigned long data_len = payload - (void*)filemod_data;
 954         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 955         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 956 out:
 957         bpf_stats_exit(&stats_ctx);
 958         return 0;
 959 }
 960
 961 SEC("raw_tracepoint/sched_process_fork")
 962 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
 963 {
 964         struct bpf_func_stats_ctx stats_ctx;
 965         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
 966
 967         int zero = 0;
 968         struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
 969         if (!fork_data)
 970                 goto out;
 971
 972         struct task_struct* parent = (struct task_struct*)ctx->args[0];
 973         struct task_struct* child = (struct task_struct*)ctx->args[1];
 974         fork_data->meta.type = FORK_EVENT;
 975
 976         void* payload = populate_var_metadata(&fork_data->meta, child,
 977                                               BPF_CORE_READ(child, pid), fork_data->payload);
 978         fork_data->parent_pid = BPF_CORE_READ(parent, pid);
 979         fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
 980         fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
 981         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
 982
 983         unsigned long data_len = payload - (void*)fork_data;
 984         data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
 985         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
 986 out:
 987         bpf_stats_exit(&stats_ctx);
 988         return 0;
 989 }
 990 char _license[] SEC("license") = "GPL";