linux/samples/bpf/offwaketime_kern.c
<<
>>
Prefs
   1/* Copyright (c) 2016 Facebook
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 */
   7#include <uapi/linux/bpf.h>
   8#include <bpf/bpf_helpers.h>
   9#include <bpf/bpf_tracing.h>
  10#include <uapi/linux/ptrace.h>
  11#include <uapi/linux/perf_event.h>
  12#include <linux/version.h>
  13#include <linux/sched.h>
  14
  15#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
  16
  17#define MINBLOCK_US     1
  18
  19struct key_t {
  20        char waker[TASK_COMM_LEN];
  21        char target[TASK_COMM_LEN];
  22        u32 wret;
  23        u32 tret;
  24};
  25
  26struct bpf_map_def SEC("maps") counts = {
  27        .type = BPF_MAP_TYPE_HASH,
  28        .key_size = sizeof(struct key_t),
  29        .value_size = sizeof(u64),
  30        .max_entries = 10000,
  31};
  32
  33struct bpf_map_def SEC("maps") start = {
  34        .type = BPF_MAP_TYPE_HASH,
  35        .key_size = sizeof(u32),
  36        .value_size = sizeof(u64),
  37        .max_entries = 10000,
  38};
  39
  40struct wokeby_t {
  41        char name[TASK_COMM_LEN];
  42        u32 ret;
  43};
  44
  45struct bpf_map_def SEC("maps") wokeby = {
  46        .type = BPF_MAP_TYPE_HASH,
  47        .key_size = sizeof(u32),
  48        .value_size = sizeof(struct wokeby_t),
  49        .max_entries = 10000,
  50};
  51
  52struct bpf_map_def SEC("maps") stackmap = {
  53        .type = BPF_MAP_TYPE_STACK_TRACE,
  54        .key_size = sizeof(u32),
  55        .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
  56        .max_entries = 10000,
  57};
  58
  59#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
  60
  61SEC("kprobe/try_to_wake_up")
  62int waker(struct pt_regs *ctx)
  63{
  64        struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
  65        struct wokeby_t woke;
  66        u32 pid;
  67
  68        pid = _(p->pid);
  69
  70        bpf_get_current_comm(&woke.name, sizeof(woke.name));
  71        woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  72
  73        bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
  74        return 0;
  75}
  76
  77static inline int update_counts(void *ctx, u32 pid, u64 delta)
  78{
  79        struct wokeby_t *woke;
  80        u64 zero = 0, *val;
  81        struct key_t key;
  82
  83        __builtin_memset(&key.waker, 0, sizeof(key.waker));
  84        bpf_get_current_comm(&key.target, sizeof(key.target));
  85        key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  86        key.wret = 0;
  87
  88        woke = bpf_map_lookup_elem(&wokeby, &pid);
  89        if (woke) {
  90                key.wret = woke->ret;
  91                __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
  92                bpf_map_delete_elem(&wokeby, &pid);
  93        }
  94
  95        val = bpf_map_lookup_elem(&counts, &key);
  96        if (!val) {
  97                bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
  98                val = bpf_map_lookup_elem(&counts, &key);
  99                if (!val)
 100                        return 0;
 101        }
 102        (*val) += delta;
 103        return 0;
 104}
 105
 106#if 1
 107/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 108struct sched_switch_args {
 109        unsigned long long pad;
 110        char prev_comm[16];
 111        int prev_pid;
 112        int prev_prio;
 113        long long prev_state;
 114        char next_comm[16];
 115        int next_pid;
 116        int next_prio;
 117};
 118SEC("tracepoint/sched/sched_switch")
 119int oncpu(struct sched_switch_args *ctx)
 120{
 121        /* record previous thread sleep time */
 122        u32 pid = ctx->prev_pid;
 123#else
 124SEC("kprobe/finish_task_switch")
 125int oncpu(struct pt_regs *ctx)
 126{
 127        struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
 128        /* record previous thread sleep time */
 129        u32 pid = _(p->pid);
 130#endif
 131        u64 delta, ts, *tsp;
 132
 133        ts = bpf_ktime_get_ns();
 134        bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
 135
 136        /* calculate current thread's delta time */
 137        pid = bpf_get_current_pid_tgid();
 138        tsp = bpf_map_lookup_elem(&start, &pid);
 139        if (!tsp)
 140                /* missed start or filtered */
 141                return 0;
 142
 143        delta = bpf_ktime_get_ns() - *tsp;
 144        bpf_map_delete_elem(&start, &pid);
 145        delta = delta / 1000;
 146        if (delta < MINBLOCK_US)
 147                return 0;
 148
 149        return update_counts(ctx, pid, delta);
 150}
 151char _license[] SEC("license") = "GPL";
 152u32 _version SEC("version") = LINUX_VERSION_CODE;
 153