linux/samples/bpf/offwaketime_kern.c
<<
>>
Prefs
   1/* Copyright (c) 2016 Facebook
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 */
   7#include <uapi/linux/bpf.h>
   8#include <uapi/linux/ptrace.h>
   9#include <uapi/linux/perf_event.h>
  10#include <linux/version.h>
  11#include <linux/sched.h>
  12#include <bpf/bpf_helpers.h>
  13#include <bpf/bpf_tracing.h>
  14
  15#define _(P)                                                                   \
  16        ({                                                                     \
  17                typeof(P) val;                                                 \
  18                bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
  19                val;                                                           \
  20        })
  21
  22#define MINBLOCK_US     1
  23
  24struct key_t {
  25        char waker[TASK_COMM_LEN];
  26        char target[TASK_COMM_LEN];
  27        u32 wret;
  28        u32 tret;
  29};
  30
  31struct {
  32        __uint(type, BPF_MAP_TYPE_HASH);
  33        __type(key, struct key_t);
  34        __type(value, u64);
  35        __uint(max_entries, 10000);
  36} counts SEC(".maps");
  37
  38struct {
  39        __uint(type, BPF_MAP_TYPE_HASH);
  40        __type(key, u32);
  41        __type(value, u64);
  42        __uint(max_entries, 10000);
  43} start SEC(".maps");
  44
  45struct wokeby_t {
  46        char name[TASK_COMM_LEN];
  47        u32 ret;
  48};
  49
  50struct {
  51        __uint(type, BPF_MAP_TYPE_HASH);
  52        __type(key, u32);
  53        __type(value, struct wokeby_t);
  54        __uint(max_entries, 10000);
  55} wokeby SEC(".maps");
  56
  57struct {
  58        __uint(type, BPF_MAP_TYPE_STACK_TRACE);
  59        __uint(key_size, sizeof(u32));
  60        __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
  61        __uint(max_entries, 10000);
  62} stackmap SEC(".maps");
  63
  64#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
  65
  66SEC("kprobe/try_to_wake_up")
  67int waker(struct pt_regs *ctx)
  68{
  69        struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
  70        struct wokeby_t woke;
  71        u32 pid;
  72
  73        pid = _(p->pid);
  74
  75        bpf_get_current_comm(&woke.name, sizeof(woke.name));
  76        woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  77
  78        bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
  79        return 0;
  80}
  81
  82static inline int update_counts(void *ctx, u32 pid, u64 delta)
  83{
  84        struct wokeby_t *woke;
  85        u64 zero = 0, *val;
  86        struct key_t key;
  87
  88        __builtin_memset(&key.waker, 0, sizeof(key.waker));
  89        bpf_get_current_comm(&key.target, sizeof(key.target));
  90        key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
  91        key.wret = 0;
  92
  93        woke = bpf_map_lookup_elem(&wokeby, &pid);
  94        if (woke) {
  95                key.wret = woke->ret;
  96                __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
  97                bpf_map_delete_elem(&wokeby, &pid);
  98        }
  99
 100        val = bpf_map_lookup_elem(&counts, &key);
 101        if (!val) {
 102                bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
 103                val = bpf_map_lookup_elem(&counts, &key);
 104                if (!val)
 105                        return 0;
 106        }
 107        (*val) += delta;
 108        return 0;
 109}
 110
 111#if 1
 112/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 113struct sched_switch_args {
 114        unsigned long long pad;
 115        char prev_comm[16];
 116        int prev_pid;
 117        int prev_prio;
 118        long long prev_state;
 119        char next_comm[16];
 120        int next_pid;
 121        int next_prio;
 122};
 123SEC("tracepoint/sched/sched_switch")
 124int oncpu(struct sched_switch_args *ctx)
 125{
 126        /* record previous thread sleep time */
 127        u32 pid = ctx->prev_pid;
 128#else
 129SEC("kprobe/finish_task_switch")
 130int oncpu(struct pt_regs *ctx)
 131{
 132        struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
 133        /* record previous thread sleep time */
 134        u32 pid = _(p->pid);
 135#endif
 136        u64 delta, ts, *tsp;
 137
 138        ts = bpf_ktime_get_ns();
 139        bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
 140
 141        /* calculate current thread's delta time */
 142        pid = bpf_get_current_pid_tgid();
 143        tsp = bpf_map_lookup_elem(&start, &pid);
 144        if (!tsp)
 145                /* missed start or filtered */
 146                return 0;
 147
 148        delta = bpf_ktime_get_ns() - *tsp;
 149        bpf_map_delete_elem(&start, &pid);
 150        delta = delta / 1000;
 151        if (delta < MINBLOCK_US)
 152                return 0;
 153
 154        return update_counts(ctx, pid, delta);
 155}
 156char _license[] SEC("license") = "GPL";
 157u32 _version SEC("version") = LINUX_VERSION_CODE;
 158