linux/include/linux/psi_types.h
<<
>>
Prefs
   1#ifndef _LINUX_PSI_TYPES_H
   2#define _LINUX_PSI_TYPES_H
   3
   4#include <linux/kthread.h>
   5#include <linux/seqlock.h>
   6#include <linux/types.h>
   7#include <linux/kref.h>
   8#include <linux/wait.h>
   9
  10#ifdef CONFIG_PSI
  11
  12/* Tracked task states */
  13enum psi_task_count {
  14        NR_IOWAIT,
  15        NR_MEMSTALL,
  16        NR_RUNNING,
  17        /*
  18         * This can't have values other than 0 or 1 and could be
  19         * implemented as a bit flag. But for now we still have room
  20         * in the first cacheline of psi_group_cpu, and this way we
  21         * don't have to special case any state tracking for it.
  22         */
  23        NR_ONCPU,
  24        NR_PSI_TASK_COUNTS = 4,
  25};
  26
  27/* Task state bitmasks */
  28#define TSK_IOWAIT      (1 << NR_IOWAIT)
  29#define TSK_MEMSTALL    (1 << NR_MEMSTALL)
  30#define TSK_RUNNING     (1 << NR_RUNNING)
  31#define TSK_ONCPU       (1 << NR_ONCPU)
  32
  33/* Resources that workloads could be stalled on */
  34enum psi_res {
  35        PSI_IO,
  36        PSI_MEM,
  37        PSI_CPU,
  38        NR_PSI_RESOURCES = 3,
  39};
  40
  41/*
  42 * Pressure states for each resource:
  43 *
  44 * SOME: Stalled tasks & working tasks
  45 * FULL: Stalled tasks & no working tasks
  46 */
  47enum psi_states {
  48        PSI_IO_SOME,
  49        PSI_IO_FULL,
  50        PSI_MEM_SOME,
  51        PSI_MEM_FULL,
  52        PSI_CPU_SOME,
  53        PSI_CPU_FULL,
  54        /* Only per-CPU, to weigh the CPU in the global average: */
  55        PSI_NONIDLE,
  56        NR_PSI_STATES = 7,
  57};
  58
  59enum psi_aggregators {
  60        PSI_AVGS = 0,
  61        PSI_POLL,
  62        NR_PSI_AGGREGATORS,
  63};
  64
  65struct psi_group_cpu {
  66        /* 1st cacheline updated by the scheduler */
  67
  68        /* Aggregator needs to know of concurrent changes */
  69        seqcount_t seq ____cacheline_aligned_in_smp;
  70
  71        /* States of the tasks belonging to this group */
  72        unsigned int tasks[NR_PSI_TASK_COUNTS];
  73
  74        /* Aggregate pressure state derived from the tasks */
  75        u32 state_mask;
  76
  77        /* Period time sampling buckets for each state of interest (ns) */
  78        u32 times[NR_PSI_STATES];
  79
  80        /* Time of last task change in this group (rq_clock) */
  81        u64 state_start;
  82
  83        /* 2nd cacheline updated by the aggregator */
  84
  85        /* Delta detection against the sampling buckets */
  86        u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
  87                        ____cacheline_aligned_in_smp;
  88};
  89
  90/* PSI growth tracking window */
  91struct psi_window {
  92        /* Window size in ns */
  93        u64 size;
  94
  95        /* Start time of the current window in ns */
  96        u64 start_time;
  97
  98        /* Value at the start of the window */
  99        u64 start_value;
 100
 101        /* Value growth in the previous window */
 102        u64 prev_growth;
 103};
 104
 105struct psi_trigger {
 106        /* PSI state being monitored by the trigger */
 107        enum psi_states state;
 108
 109        /* User-spacified threshold in ns */
 110        u64 threshold;
 111
 112        /* List node inside triggers list */
 113        struct list_head node;
 114
 115        /* Backpointer needed during trigger destruction */
 116        struct psi_group *group;
 117
 118        /* Wait queue for polling */
 119        wait_queue_head_t event_wait;
 120
 121        /* Pending event flag */
 122        int event;
 123
 124        /* Tracking window */
 125        struct psi_window win;
 126
 127        /*
 128         * Time last event was generated. Used for rate-limiting
 129         * events to one per window
 130         */
 131        u64 last_event_time;
 132
 133        /* Refcounting to prevent premature destruction */
 134        struct kref refcount;
 135};
 136
 137struct psi_group {
 138        /* Protects data used by the aggregator */
 139        struct mutex avgs_lock;
 140
 141        /* Per-cpu task state & time tracking */
 142        struct psi_group_cpu __percpu *pcpu;
 143
 144        /* Running pressure averages */
 145        u64 avg_total[NR_PSI_STATES - 1];
 146        u64 avg_last_update;
 147        u64 avg_next_update;
 148
 149        /* Aggregator work control */
 150        struct delayed_work avgs_work;
 151
 152        /* Total stall times and sampled pressure averages */
 153        u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
 154        unsigned long avg[NR_PSI_STATES - 1][3];
 155
 156        /* Monitor work control */
 157        struct task_struct __rcu *poll_task;
 158        struct timer_list poll_timer;
 159        wait_queue_head_t poll_wait;
 160        atomic_t poll_wakeup;
 161
 162        /* Protects data used by the monitor */
 163        struct mutex trigger_lock;
 164
 165        /* Configured polling triggers */
 166        struct list_head triggers;
 167        u32 nr_triggers[NR_PSI_STATES - 1];
 168        u32 poll_states;
 169        u64 poll_min_period;
 170
 171        /* Total stall times at the start of monitor activation */
 172        u64 polling_total[NR_PSI_STATES - 1];
 173        u64 polling_next_update;
 174        u64 polling_until;
 175};
 176
 177#else /* CONFIG_PSI */
 178
 179struct psi_group { };
 180
 181#endif /* CONFIG_PSI */
 182
 183#endif /* _LINUX_PSI_TYPES_H */
 184