linux/tools/testing/selftests/bpf/progs/profiler.inc.h
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2020 Facebook */
   3#include <vmlinux.h>
   4#include <bpf/bpf_core_read.h>
   5#include <bpf/bpf_helpers.h>
   6#include <bpf/bpf_tracing.h>
   7
   8#include "profiler.h"
   9
  10#ifndef NULL
  11#define NULL 0
  12#endif
  13
  14#define O_WRONLY 00000001
  15#define O_RDWR 00000002
  16#define O_DIRECTORY 00200000
  17#define __O_TMPFILE 020000000
  18#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
  19#define MAX_ERRNO 4095
  20#define S_IFMT 00170000
  21#define S_IFSOCK 0140000
  22#define S_IFLNK 0120000
  23#define S_IFREG 0100000
  24#define S_IFBLK 0060000
  25#define S_IFDIR 0040000
  26#define S_IFCHR 0020000
  27#define S_IFIFO 0010000
  28#define S_ISUID 0004000
  29#define S_ISGID 0002000
  30#define S_ISVTX 0001000
  31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
  32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
  33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
  34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
  35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
  36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
  37#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
  38
  39#define KILL_DATA_ARRAY_SIZE 8
  40
  41struct var_kill_data_arr_t {
  42        struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
  43};
  44
  45union any_profiler_data_t {
  46        struct var_exec_data_t var_exec;
  47        struct var_kill_data_t var_kill;
  48        struct var_sysctl_data_t var_sysctl;
  49        struct var_filemod_data_t var_filemod;
  50        struct var_fork_data_t var_fork;
  51        struct var_kill_data_arr_t var_kill_data_arr;
  52};
  53
  54volatile struct profiler_config_struct bpf_config = {};
  55
  56#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
  57#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
  58#define CGROUP_LOGIN_SESSION_INODE \
  59        (bpf_config.cgroup_login_session_inode)
  60#define KILL_SIGNALS (bpf_config.kill_signals_mask)
  61#define STALE_INFO (bpf_config.stale_info_secs)
  62#define INODE_FILTER (bpf_config.inode_filter)
  63#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
  64#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
  65
  66struct kernfs_iattrs___52 {
  67        struct iattr ia_iattr;
  68};
  69
  70struct kernfs_node___52 {
  71        union /* kernfs_node_id */ {
  72                struct {
  73                        u32 ino;
  74                        u32 generation;
  75                };
  76                u64 id;
  77        } id;
  78};
  79
  80struct {
  81        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  82        __uint(max_entries, 1);
  83        __type(key, u32);
  84        __type(value, union any_profiler_data_t);
  85} data_heap SEC(".maps");
  86
  87struct {
  88        __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
  89        __uint(key_size, sizeof(int));
  90        __uint(value_size, sizeof(int));
  91} events SEC(".maps");
  92
  93struct {
  94        __uint(type, BPF_MAP_TYPE_HASH);
  95        __uint(max_entries, KILL_DATA_ARRAY_SIZE);
  96        __type(key, u32);
  97        __type(value, struct var_kill_data_arr_t);
  98} var_tpid_to_data SEC(".maps");
  99
 100struct {
 101        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 102        __uint(max_entries, profiler_bpf_max_function_id);
 103        __type(key, u32);
 104        __type(value, struct bpf_func_stats_data);
 105} bpf_func_stats SEC(".maps");
 106
 107struct {
 108        __uint(type, BPF_MAP_TYPE_HASH);
 109        __type(key, u32);
 110        __type(value, bool);
 111        __uint(max_entries, 16);
 112} allowed_devices SEC(".maps");
 113
 114struct {
 115        __uint(type, BPF_MAP_TYPE_HASH);
 116        __type(key, u64);
 117        __type(value, bool);
 118        __uint(max_entries, 1024);
 119} allowed_file_inodes SEC(".maps");
 120
 121struct {
 122        __uint(type, BPF_MAP_TYPE_HASH);
 123        __type(key, u64);
 124        __type(value, bool);
 125        __uint(max_entries, 1024);
 126} allowed_directory_inodes SEC(".maps");
 127
 128struct {
 129        __uint(type, BPF_MAP_TYPE_HASH);
 130        __type(key, u32);
 131        __type(value, bool);
 132        __uint(max_entries, 16);
 133} disallowed_exec_inodes SEC(".maps");
 134
 135#ifndef ARRAY_SIZE
 136#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 137#endif
 138
 139static INLINE bool IS_ERR(const void* ptr)
 140{
 141        return IS_ERR_VALUE((unsigned long)ptr);
 142}
 143
 144static INLINE u32 get_userspace_pid()
 145{
 146        return bpf_get_current_pid_tgid() >> 32;
 147}
 148
 149static INLINE bool is_init_process(u32 tgid)
 150{
 151        return tgid == 1 || tgid == 0;
 152}
 153
 154static INLINE unsigned long
 155probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
 156{
 157        len = len < max ? len : max;
 158        if (len > 1) {
 159                if (bpf_probe_read(dst, len, src))
 160                        return 0;
 161        } else if (len == 1) {
 162                if (bpf_probe_read(dst, 1, src))
 163                        return 0;
 164        }
 165        return len;
 166}
 167
 168static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
 169                                     int spid)
 170{
 171#ifdef UNROLL
 172#pragma unroll
 173#endif
 174        for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 175                if (arr_struct->array[i].meta.pid == spid)
 176                        return i;
 177        return -1;
 178}
 179
 180static INLINE void populate_ancestors(struct task_struct* task,
 181                                      struct ancestors_data_t* ancestors_data)
 182{
 183        struct task_struct* parent = task;
 184        u32 num_ancestors, ppid;
 185
 186        ancestors_data->num_ancestors = 0;
 187#ifdef UNROLL
 188#pragma unroll
 189#endif
 190        for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
 191                parent = BPF_CORE_READ(parent, real_parent);
 192                if (parent == NULL)
 193                        break;
 194                ppid = BPF_CORE_READ(parent, tgid);
 195                if (is_init_process(ppid))
 196                        break;
 197                ancestors_data->ancestor_pids[num_ancestors] = ppid;
 198                ancestors_data->ancestor_exec_ids[num_ancestors] =
 199                        BPF_CORE_READ(parent, self_exec_id);
 200                ancestors_data->ancestor_start_times[num_ancestors] =
 201                        BPF_CORE_READ(parent, start_time);
 202                ancestors_data->num_ancestors = num_ancestors;
 203        }
 204}
 205
 206static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 207                                          struct kernfs_node* cgroup_root_node,
 208                                          void* payload,
 209                                          int* root_pos)
 210{
 211        void* payload_start = payload;
 212        size_t filepart_length;
 213
 214#ifdef UNROLL
 215#pragma unroll
 216#endif
 217        for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
 218                filepart_length =
 219                        bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
 220                if (!cgroup_node)
 221                        return payload;
 222                if (cgroup_node == cgroup_root_node)
 223                        *root_pos = payload - payload_start;
 224                if (filepart_length <= MAX_PATH) {
 225                        barrier_var(filepart_length);
 226                        payload += filepart_length;
 227                }
 228                cgroup_node = BPF_CORE_READ(cgroup_node, parent);
 229        }
 230        return payload;
 231}
 232
 233static ino_t get_inode_from_kernfs(struct kernfs_node* node)
 234{
 235        struct kernfs_node___52* node52 = (void*)node;
 236
 237        if (bpf_core_field_exists(node52->id.ino)) {
 238                barrier_var(node52);
 239                return BPF_CORE_READ(node52, id.ino);
 240        } else {
 241                barrier_var(node);
 242                return (u64)BPF_CORE_READ(node, id);
 243        }
 244}
 245
 246extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
 247enum cgroup_subsys_id___local {
 248        pids_cgrp_id___local = 123, /* value doesn't matter */
 249};
 250
 251static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 252                                         struct task_struct* task,
 253                                         void* payload)
 254{
 255        struct kernfs_node* root_kernfs =
 256                BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 257        struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 258
 259#if __has_builtin(__builtin_preserve_enum_value)
 260        if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
 261                int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
 262                                                  pids_cgrp_id___local);
 263#ifdef UNROLL
 264#pragma unroll
 265#endif
 266                for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 267                        struct cgroup_subsys_state* subsys =
 268                                BPF_CORE_READ(task, cgroups, subsys[i]);
 269                        if (subsys != NULL) {
 270                                int subsys_id = BPF_CORE_READ(subsys, ss, id);
 271                                if (subsys_id == cgrp_id) {
 272                                        proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
 273                                        root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
 274                                        break;
 275                                }
 276                        }
 277                }
 278        }
 279#endif
 280
 281        cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
 282        cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
 283
 284        if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
 285                cgroup_data->cgroup_root_mtime =
 286                        BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
 287                cgroup_data->cgroup_proc_mtime =
 288                        BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
 289        } else {
 290                struct kernfs_iattrs___52* root_iattr =
 291                        (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
 292                cgroup_data->cgroup_root_mtime =
 293                        BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
 294
 295                struct kernfs_iattrs___52* proc_iattr =
 296                        (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
 297                cgroup_data->cgroup_proc_mtime =
 298                        BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
 299        }
 300
 301        cgroup_data->cgroup_root_length = 0;
 302        cgroup_data->cgroup_proc_length = 0;
 303        cgroup_data->cgroup_full_length = 0;
 304
 305        size_t cgroup_root_length =
 306                bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
 307        barrier_var(cgroup_root_length);
 308        if (cgroup_root_length <= MAX_PATH) {
 309                barrier_var(cgroup_root_length);
 310                cgroup_data->cgroup_root_length = cgroup_root_length;
 311                payload += cgroup_root_length;
 312        }
 313
 314        size_t cgroup_proc_length =
 315                bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
 316        barrier_var(cgroup_proc_length);
 317        if (cgroup_proc_length <= MAX_PATH) {
 318                barrier_var(cgroup_proc_length);
 319                cgroup_data->cgroup_proc_length = cgroup_proc_length;
 320                payload += cgroup_proc_length;
 321        }
 322
 323        if (FETCH_CGROUPS_FROM_BPF) {
 324                cgroup_data->cgroup_full_path_root_pos = -1;
 325                void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
 326                                                              &cgroup_data->cgroup_full_path_root_pos);
 327                cgroup_data->cgroup_full_length = payload_end_pos - payload;
 328                payload = payload_end_pos;
 329        }
 330
 331        return (void*)payload;
 332}
 333
 334static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
 335                                          struct task_struct* task,
 336                                          u32 pid, void* payload)
 337{
 338        u64 uid_gid = bpf_get_current_uid_gid();
 339
 340        metadata->uid = (u32)uid_gid;
 341        metadata->gid = uid_gid >> 32;
 342        metadata->pid = pid;
 343        metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
 344        metadata->start_time = BPF_CORE_READ(task, start_time);
 345        metadata->comm_length = 0;
 346
 347        size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 348        barrier_var(comm_length);
 349        if (comm_length <= TASK_COMM_LEN) {
 350                barrier_var(comm_length);
 351                metadata->comm_length = comm_length;
 352                payload += comm_length;
 353        }
 354
 355        return (void*)payload;
 356}
 357
 358static INLINE struct var_kill_data_t*
 359get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
 360{
 361        int zero = 0;
 362        struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 363
 364        if (kill_data == NULL)
 365                return NULL;
 366        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 367
 368        void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
 369        payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
 370        size_t payload_length = payload - (void*)kill_data->payload;
 371        kill_data->payload_length = payload_length;
 372        populate_ancestors(task, &kill_data->ancestors_info);
 373        kill_data->meta.type = KILL_EVENT;
 374        kill_data->kill_target_pid = tpid;
 375        kill_data->kill_sig = sig;
 376        kill_data->kill_count = 1;
 377        kill_data->last_kill_time = bpf_ktime_get_ns();
 378        return kill_data;
 379}
 380
 381static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 382{
 383        if ((KILL_SIGNALS & (1ULL << sig)) == 0)
 384                return 0;
 385
 386        u32 spid = get_userspace_pid();
 387        struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 388
 389        if (arr_struct == NULL) {
 390                struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
 391                int zero = 0;
 392
 393                if (kill_data == NULL)
 394                        return 0;
 395                arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
 396                if (arr_struct == NULL)
 397                        return 0;
 398                bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
 399        } else {
 400                int index = get_var_spid_index(arr_struct, spid);
 401
 402                if (index == -1) {
 403                        struct var_kill_data_t* kill_data =
 404                                get_var_kill_data(ctx, spid, tpid, sig);
 405                        if (kill_data == NULL)
 406                                return 0;
 407#ifdef UNROLL
 408#pragma unroll
 409#endif
 410                        for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 411                                if (arr_struct->array[i].meta.pid == 0) {
 412                                        bpf_probe_read(&arr_struct->array[i],
 413                                                       sizeof(arr_struct->array[i]), kill_data);
 414                                        bpf_map_update_elem(&var_tpid_to_data, &tpid,
 415                                                            arr_struct, 0);
 416
 417                                        return 0;
 418                                }
 419                        return 0;
 420                }
 421
 422                struct var_kill_data_t* kill_data = &arr_struct->array[index];
 423
 424                u64 delta_sec =
 425                        (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
 426
 427                if (delta_sec < STALE_INFO) {
 428                        kill_data->kill_count++;
 429                        kill_data->last_kill_time = bpf_ktime_get_ns();
 430                        bpf_probe_read(&arr_struct->array[index],
 431                                       sizeof(arr_struct->array[index]),
 432                                       kill_data);
 433                } else {
 434                        struct var_kill_data_t* kill_data =
 435                                get_var_kill_data(ctx, spid, tpid, sig);
 436                        if (kill_data == NULL)
 437                                return 0;
 438                        bpf_probe_read(&arr_struct->array[index],
 439                                       sizeof(arr_struct->array[index]),
 440                                       kill_data);
 441                }
 442        }
 443        bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
 444        return 0;
 445}
 446
 447static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
 448                                   enum bpf_function_id func_id)
 449{
 450        int func_id_key = func_id;
 451
 452        bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
 453        bpf_stat_ctx->bpf_func_stats_data_val =
 454                bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
 455        if (bpf_stat_ctx->bpf_func_stats_data_val)
 456                bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
 457}
 458
 459static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
 460{
 461        if (bpf_stat_ctx->bpf_func_stats_data_val)
 462                bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
 463                        bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
 464}
 465
 466static INLINE void
 467bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
 468                                    struct var_metadata_t* meta)
 469{
 470        if (bpf_stat_ctx->bpf_func_stats_data_val) {
 471                bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
 472                meta->bpf_stats_num_perf_events =
 473                        bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
 474        }
 475        meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
 476        meta->cpu_id = bpf_get_smp_processor_id();
 477}
 478
 479static INLINE size_t
 480read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
 481{
 482        size_t length = 0;
 483        size_t filepart_length;
 484        struct dentry* parent_dentry;
 485
 486#ifdef UNROLL
 487#pragma unroll
 488#endif
 489        for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 490                filepart_length = bpf_probe_read_str(payload, MAX_PATH,
 491                                                     BPF_CORE_READ(filp_dentry, d_name.name));
 492                barrier_var(filepart_length);
 493                if (filepart_length > MAX_PATH)
 494                        break;
 495                barrier_var(filepart_length);
 496                payload += filepart_length;
 497                length += filepart_length;
 498
 499                parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 500                if (filp_dentry == parent_dentry)
 501                        break;
 502                filp_dentry = parent_dentry;
 503        }
 504
 505        return length;
 506}
 507
 508static INLINE bool
 509is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
 510{
 511        struct dentry* parent_dentry;
 512#ifdef UNROLL
 513#pragma unroll
 514#endif
 515        for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 516                u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
 517                bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
 518
 519                if (allowed_dir != NULL)
 520                        return true;
 521                parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 522                if (filp_dentry == parent_dentry)
 523                        break;
 524                filp_dentry = parent_dentry;
 525        }
 526        return false;
 527}
 528
 529static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
 530                                                 u32* device_id,
 531                                                 u64* file_ino)
 532{
 533        u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
 534        *device_id = dev_id;
 535        bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
 536
 537        if (allowed_device == NULL)
 538                return false;
 539
 540        u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
 541        *file_ino = ino;
 542        bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
 543
 544        if (allowed_file == NULL)
 545                if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
 546                        return false;
 547        return true;
 548}
 549
 550SEC("kprobe/proc_sys_write")
 551ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 552                   struct file* filp, const char* buf,
 553                   size_t count, loff_t* ppos)
 554{
 555        struct bpf_func_stats_ctx stats_ctx;
 556        bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
 557
 558        u32 pid = get_userspace_pid();
 559        int zero = 0;
 560        struct var_sysctl_data_t* sysctl_data =
 561                bpf_map_lookup_elem(&data_heap, &zero);
 562        if (!sysctl_data)
 563                goto out;
 564
 565        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 566        sysctl_data->meta.type = SYSCTL_EVENT;
 567        void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
 568        payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
 569
 570        populate_ancestors(task, &sysctl_data->ancestors_info);
 571
 572        sysctl_data->sysctl_val_length = 0;
 573        sysctl_data->sysctl_path_length = 0;
 574
 575        size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
 576        barrier_var(sysctl_val_length);
 577        if (sysctl_val_length <= CTL_MAXNAME) {
 578                barrier_var(sysctl_val_length);
 579                sysctl_data->sysctl_val_length = sysctl_val_length;
 580                payload += sysctl_val_length;
 581        }
 582
 583        size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
 584                                                       BPF_CORE_READ(filp, f_path.dentry, d_name.name));
 585        barrier_var(sysctl_path_length);
 586        if (sysctl_path_length <= MAX_PATH) {
 587                barrier_var(sysctl_path_length);
 588                sysctl_data->sysctl_path_length = sysctl_path_length;
 589                payload += sysctl_path_length;
 590        }
 591
 592        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
 593        unsigned long data_len = payload - (void*)sysctl_data;
 594        data_len = data_len > sizeof(struct var_sysctl_data_t)
 595                ? sizeof(struct var_sysctl_data_t)
 596                : data_len;
 597        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
 598out:
 599        bpf_stats_exit(&stats_ctx);
 600        return 0;
 601}
 602
 603SEC("tracepoint/syscalls/sys_enter_kill")
 604int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
 605{
 606        struct bpf_func_stats_ctx stats_ctx;
 607
 608        bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
 609        int pid = ctx->args[0];
 610        int sig = ctx->args[1];
 611        int ret = trace_var_sys_kill(ctx, pid, sig);
 612        bpf_stats_exit(&stats_ctx);
 613        return ret;
 614};
 615
 616SEC("raw_tracepoint/sched_process_exit")
 617int raw_tracepoint__sched_process_exit(void* ctx)
 618{
 619        int zero = 0;
 620        struct bpf_func_stats_ctx stats_ctx;
 621        bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
 622
 623        u32 tpid = get_userspace_pid();
 624
 625        struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 626        struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 627
 628        if (arr_struct == NULL || kill_data == NULL)
 629                goto out;
 630
 631        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 632        struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 633
 634#ifdef UNROLL
 635#pragma unroll
 636#endif
 637        for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
 638                struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
 639
 640                if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
 641                        bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
 642                        void* payload = kill_data->payload;
 643                        size_t offset = kill_data->payload_length;
 644                        if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
 645                                return 0;
 646                        payload += offset;
 647
 648                        kill_data->kill_target_name_length = 0;
 649                        kill_data->kill_target_cgroup_proc_length = 0;
 650
 651                        size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 652                        barrier_var(comm_length);
 653                        if (comm_length <= TASK_COMM_LEN) {
 654                                barrier_var(comm_length);
 655                                kill_data->kill_target_name_length = comm_length;
 656                                payload += comm_length;
 657                        }
 658
 659                        size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
 660                                                                       BPF_CORE_READ(proc_kernfs, name));
 661                        barrier_var(cgroup_proc_length);
 662                        if (cgroup_proc_length <= KILL_TARGET_LEN) {
 663                                barrier_var(cgroup_proc_length);
 664                                kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
 665                                payload += cgroup_proc_length;
 666                        }
 667
 668                        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
 669                        unsigned long data_len = (void*)payload - (void*)kill_data;
 670                        data_len = data_len > sizeof(struct var_kill_data_t)
 671                                ? sizeof(struct var_kill_data_t)
 672                                : data_len;
 673                        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
 674                }
 675        }
 676        bpf_map_delete_elem(&var_tpid_to_data, &tpid);
 677out:
 678        bpf_stats_exit(&stats_ctx);
 679        return 0;
 680}
 681
 682SEC("raw_tracepoint/sched_process_exec")
 683int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 684{
 685        struct bpf_func_stats_ctx stats_ctx;
 686        bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
 687
 688        struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
 689        u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
 690
 691        bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
 692        if (should_filter_binprm != NULL)
 693                goto out;
 694
 695        int zero = 0;
 696        struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
 697        if (!proc_exec_data)
 698                goto out;
 699
 700        if (INODE_FILTER && inode != INODE_FILTER)
 701                return 0;
 702
 703        u32 pid = get_userspace_pid();
 704        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 705
 706        proc_exec_data->meta.type = EXEC_EVENT;
 707        proc_exec_data->bin_path_length = 0;
 708        proc_exec_data->cmdline_length = 0;
 709        proc_exec_data->environment_length = 0;
 710        void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
 711                                              proc_exec_data->payload);
 712        payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
 713
 714        struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
 715        proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
 716        proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
 717        proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
 718        proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
 719
 720        const char* filename = BPF_CORE_READ(bprm, filename);
 721        size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
 722        barrier_var(bin_path_length);
 723        if (bin_path_length <= MAX_FILENAME_LEN) {
 724                barrier_var(bin_path_length);
 725                proc_exec_data->bin_path_length = bin_path_length;
 726                payload += bin_path_length;
 727        }
 728
 729        void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
 730        void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
 731        unsigned int cmdline_length = probe_read_lim(payload, arg_start,
 732                                                     arg_end - arg_start, MAX_ARGS_LEN);
 733
 734        if (cmdline_length <= MAX_ARGS_LEN) {
 735                barrier_var(cmdline_length);
 736                proc_exec_data->cmdline_length = cmdline_length;
 737                payload += cmdline_length;
 738        }
 739
 740        if (READ_ENVIRON_FROM_EXEC) {
 741                void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
 742                void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
 743                unsigned long env_len = probe_read_lim(payload, env_start,
 744                                                       env_end - env_start, MAX_ENVIRON_LEN);
 745                if (cmdline_length <= MAX_ENVIRON_LEN) {
 746                        proc_exec_data->environment_length = env_len;
 747                        payload += env_len;
 748                }
 749        }
 750
 751        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
 752        unsigned long data_len = payload - (void*)proc_exec_data;
 753        data_len = data_len > sizeof(struct var_exec_data_t)
 754                ? sizeof(struct var_exec_data_t)
 755                : data_len;
 756        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
 757out:
 758        bpf_stats_exit(&stats_ctx);
 759        return 0;
 760}
 761
 762SEC("kretprobe/do_filp_open")
 763int kprobe_ret__do_filp_open(struct pt_regs* ctx)
 764{
 765        struct bpf_func_stats_ctx stats_ctx;
 766        bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
 767
 768        struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
 769
 770        if (filp == NULL || IS_ERR(filp))
 771                goto out;
 772        unsigned int flags = BPF_CORE_READ(filp, f_flags);
 773        if ((flags & (O_RDWR | O_WRONLY)) == 0)
 774                goto out;
 775        if ((flags & O_TMPFILE) > 0)
 776                goto out;
 777        struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
 778        umode_t mode = BPF_CORE_READ(file_inode, i_mode);
 779        if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
 780            S_ISSOCK(mode))
 781                goto out;
 782
 783        struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
 784        u32 device_id = 0;
 785        u64 file_ino = 0;
 786        if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
 787                goto out;
 788
 789        int zero = 0;
 790        struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 791        if (!filemod_data)
 792                goto out;
 793
 794        u32 pid = get_userspace_pid();
 795        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 796
 797        filemod_data->meta.type = FILEMOD_EVENT;
 798        filemod_data->fmod_type = FMOD_OPEN;
 799        filemod_data->dst_flags = flags;
 800        filemod_data->src_inode = 0;
 801        filemod_data->dst_inode = file_ino;
 802        filemod_data->src_device_id = 0;
 803        filemod_data->dst_device_id = device_id;
 804        filemod_data->src_filepath_length = 0;
 805        filemod_data->dst_filepath_length = 0;
 806
 807        void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 808                                              filemod_data->payload);
 809        payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 810
 811        size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
 812        barrier_var(len);
 813        if (len <= MAX_FILEPATH_LENGTH) {
 814                barrier_var(len);
 815                payload += len;
 816                filemod_data->dst_filepath_length = len;
 817        }
 818        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 819        unsigned long data_len = payload - (void*)filemod_data;
 820        data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 821        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 822out:
 823        bpf_stats_exit(&stats_ctx);
 824        return 0;
 825}
 826
 827SEC("kprobe/vfs_link")
 828int BPF_KPROBE(kprobe__vfs_link,
 829               struct dentry* old_dentry, struct inode* dir,
 830               struct dentry* new_dentry, struct inode** delegated_inode)
 831{
 832        struct bpf_func_stats_ctx stats_ctx;
 833        bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
 834
 835        u32 src_device_id = 0;
 836        u64 src_file_ino = 0;
 837        u32 dst_device_id = 0;
 838        u64 dst_file_ino = 0;
 839        if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
 840            !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
 841                goto out;
 842
 843        int zero = 0;
 844        struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 845        if (!filemod_data)
 846                goto out;
 847
 848        u32 pid = get_userspace_pid();
 849        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 850
 851        filemod_data->meta.type = FILEMOD_EVENT;
 852        filemod_data->fmod_type = FMOD_LINK;
 853        filemod_data->dst_flags = 0;
 854        filemod_data->src_inode = src_file_ino;
 855        filemod_data->dst_inode = dst_file_ino;
 856        filemod_data->src_device_id = src_device_id;
 857        filemod_data->dst_device_id = dst_device_id;
 858        filemod_data->src_filepath_length = 0;
 859        filemod_data->dst_filepath_length = 0;
 860
 861        void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 862                                              filemod_data->payload);
 863        payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 864
 865        size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
 866        barrier_var(len);
 867        if (len <= MAX_FILEPATH_LENGTH) {
 868                barrier_var(len);
 869                payload += len;
 870                filemod_data->src_filepath_length = len;
 871        }
 872
 873        len = read_absolute_file_path_from_dentry(new_dentry, payload);
 874        barrier_var(len);
 875        if (len <= MAX_FILEPATH_LENGTH) {
 876                barrier_var(len);
 877                payload += len;
 878                filemod_data->dst_filepath_length = len;
 879        }
 880
 881        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 882        unsigned long data_len = payload - (void*)filemod_data;
 883        data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 884        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 885out:
 886        bpf_stats_exit(&stats_ctx);
 887        return 0;
 888}
 889
 890SEC("kprobe/vfs_symlink")
 891int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
 892               const char* oldname)
 893{
 894        struct bpf_func_stats_ctx stats_ctx;
 895        bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
 896
 897        u32 dst_device_id = 0;
 898        u64 dst_file_ino = 0;
 899        if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
 900                goto out;
 901
 902        int zero = 0;
 903        struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 904        if (!filemod_data)
 905                goto out;
 906
 907        u32 pid = get_userspace_pid();
 908        struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 909
 910        filemod_data->meta.type = FILEMOD_EVENT;
 911        filemod_data->fmod_type = FMOD_SYMLINK;
 912        filemod_data->dst_flags = 0;
 913        filemod_data->src_inode = 0;
 914        filemod_data->dst_inode = dst_file_ino;
 915        filemod_data->src_device_id = 0;
 916        filemod_data->dst_device_id = dst_device_id;
 917        filemod_data->src_filepath_length = 0;
 918        filemod_data->dst_filepath_length = 0;
 919
 920        void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 921                                              filemod_data->payload);
 922        payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 923
 924        size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
 925        barrier_var(len);
 926        if (len <= MAX_FILEPATH_LENGTH) {
 927                barrier_var(len);
 928                payload += len;
 929                filemod_data->src_filepath_length = len;
 930        }
 931        len = read_absolute_file_path_from_dentry(dentry, payload);
 932        barrier_var(len);
 933        if (len <= MAX_FILEPATH_LENGTH) {
 934                barrier_var(len);
 935                payload += len;
 936                filemod_data->dst_filepath_length = len;
 937        }
 938        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 939        unsigned long data_len = payload - (void*)filemod_data;
 940        data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 941        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 942out:
 943        bpf_stats_exit(&stats_ctx);
 944        return 0;
 945}
 946
 947SEC("raw_tracepoint/sched_process_fork")
 948int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
 949{
 950        struct bpf_func_stats_ctx stats_ctx;
 951        bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
 952
 953        int zero = 0;
 954        struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
 955        if (!fork_data)
 956                goto out;
 957
 958        struct task_struct* parent = (struct task_struct*)ctx->args[0];
 959        struct task_struct* child = (struct task_struct*)ctx->args[1];
 960        fork_data->meta.type = FORK_EVENT;
 961
 962        void* payload = populate_var_metadata(&fork_data->meta, child,
 963                                              BPF_CORE_READ(child, pid), fork_data->payload);
 964        fork_data->parent_pid = BPF_CORE_READ(parent, pid);
 965        fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
 966        fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
 967        bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
 968
 969        unsigned long data_len = payload - (void*)fork_data;
 970        data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
 971        bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
 972out:
 973        bpf_stats_exit(&stats_ctx);
 974        return 0;
 975}
 976char _license[] SEC("license") = "GPL";
 977