LXR linux/kernel/irq/timings.c

   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
   3
   4#include <linux/kernel.h>
   5#include <linux/percpu.h>
   6#include <linux/slab.h>
   7#include <linux/static_key.h>
   8#include <linux/interrupt.h>
   9#include <linux/idr.h>
  10#include <linux/irq.h>
  11#include <linux/math64.h>
  12
  13#include <trace/events/irq.h>
  14
  15#include "internals.h"
  16
  17DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
  18
  19DEFINE_PER_CPU(struct irq_timings, irq_timings);
  20
  21struct irqt_stat {
  22        u64     next_evt;
  23        u64     last_ts;
  24        u64     variance;
  25        u32     avg;
  26        u32     nr_samples;
  27        int     anomalies;
  28        int     valid;
  29};
  30
  31static DEFINE_IDR(irqt_stats);
  32
  33void irq_timings_enable(void)
  34{
  35        static_branch_enable(&irq_timing_enabled);
  36}
  37
  38void irq_timings_disable(void)
  39{
  40        static_branch_disable(&irq_timing_enabled);
  41}
  42
  43/**
  44 * irqs_update - update the irq timing statistics with a new timestamp
  45 *
  46 * @irqs: an irqt_stat struct pointer
  47 * @ts: the new timestamp
  48 *
  49 * The statistics are computed online, in other words, the code is
  50 * designed to compute the statistics on a stream of values rather
  51 * than doing multiple passes on the values to compute the average,
  52 * then the variance. The integer division introduces a loss of
  53 * precision but with an acceptable error margin regarding the results
  54 * we would have with the double floating precision: we are dealing
  55 * with nanosec, so big numbers, consequently the mantisse is
  56 * negligeable, especially when converting the time in usec
  57 * afterwards.
  58 *
  59 * The computation happens at idle time. When the CPU is not idle, the
  60 * interrupts' timestamps are stored in the circular buffer, when the
  61 * CPU goes idle and this routine is called, all the buffer's values
  62 * are injected in the statistical model continuying to extend the
  63 * statistics from the previous busy-idle cycle.
  64 *
  65 * The observations showed a device will trigger a burst of periodic
  66 * interrupts followed by one or two peaks of longer time, for
  67 * instance when a SD card device flushes its cache, then the periodic
  68 * intervals occur again. A one second inactivity period resets the
  69 * stats, that gives us the certitude the statistical values won't
  70 * exceed 1x10^9, thus the computation won't overflow.
  71 *
  72 * Basically, the purpose of the algorithm is to watch the periodic
  73 * interrupts and eliminate the peaks.
  74 *
  75 * An interrupt is considered periodically stable if the interval of
  76 * its occurences follow the normal distribution, thus the values
  77 * comply with:
  78 *
  79 *      avg - 3 x stddev < value < avg + 3 x stddev
  80 *
  81 * Which can be simplified to:
  82 *
  83 *      -3 x stddev < value - avg < 3 x stddev
  84 *
  85 *      abs(value - avg) < 3 x stddev
  86 *
  87 * In order to save a costly square root computation, we use the
  88 * variance. For the record, stddev = sqrt(variance). The equation
  89 * above becomes:
  90 *
  91 *      abs(value - avg) < 3 x sqrt(variance)
  92 *
  93 * And finally we square it:
  94 *
  95 *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
  96 *
  97 *      (value - avg) x (value - avg) < 9 x variance
  98 *
  99 * Statistically speaking, any values out of this interval is
 100 * considered as an anomaly and is discarded. However, a normal
 101 * distribution appears when the number of samples is 30 (it is the
 102 * rule of thumb in statistics, cf. "30 samples" on Internet). When
 103 * there are three consecutive anomalies, the statistics are resetted.
 104 *
 105 */
 106static void irqs_update(struct irqt_stat *irqs, u64 ts)
 107{
 108        u64 old_ts = irqs->last_ts;
 109        u64 variance = 0;
 110        u64 interval;
 111        s64 diff;
 112
 113        /*
 114         * The timestamps are absolute time values, we need to compute
 115         * the timing interval between two interrupts.
 116         */
 117        irqs->last_ts = ts;
 118
 119        /*
 120         * The interval type is u64 in order to deal with the same
 121         * type in our computation, that prevent mindfuck issues with
 122         * overflow, sign and division.
 123         */
 124        interval = ts - old_ts;
 125
 126        /*
 127         * The interrupt triggered more than one second apart, that
 128         * ends the sequence as predictible for our purpose. In this
 129         * case, assume we have the beginning of a sequence and the
 130         * timestamp is the first value. As it is impossible to
 131         * predict anything at this point, return.
 132         *
 133         * Note the first timestamp of the sequence will always fall
 134         * in this test because the old_ts is zero. That is what we
 135         * want as we need another timestamp to compute an interval.
 136         */
 137        if (interval >= NSEC_PER_SEC) {
 138                memset(irqs, 0, sizeof(*irqs));
 139                irqs->last_ts = ts;
 140                return;
 141        }
 142
 143        /*
 144         * Pre-compute the delta with the average as the result is
 145         * used several times in this function.
 146         */
 147        diff = interval - irqs->avg;
 148
 149        /*
 150         * Increment the number of samples.
 151         */
 152        irqs->nr_samples++;
 153
 154        /*
 155         * Online variance divided by the number of elements if there
 156         * is more than one sample.  Normally the formula is division
 157         * by nr_samples - 1 but we assume the number of element will be
 158         * more than 32 and dividing by 32 instead of 31 is enough
 159         * precise.
 160         */
 161        if (likely(irqs->nr_samples > 1))
 162                variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
 163
 164        /*
 165         * The rule of thumb in statistics for the normal distribution
 166         * is having at least 30 samples in order to have the model to
 167         * apply. Values outside the interval are considered as an
 168         * anomaly.
 169         */
 170        if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
 171                /*
 172                 * After three consecutive anomalies, we reset the
 173                 * stats as it is no longer stable enough.
 174                 */
 175                if (irqs->anomalies++ >= 3) {
 176                        memset(irqs, 0, sizeof(*irqs));
 177                        irqs->last_ts = ts;
 178                        return;
 179                }
 180        } else {
 181                /*
 182                 * The anomalies must be consecutives, so at this
 183                 * point, we reset the anomalies counter.
 184                 */
 185                irqs->anomalies = 0;
 186        }
 187
 188        /*
 189         * The interrupt is considered stable enough to try to predict
 190         * the next event on it.
 191         */
 192        irqs->valid = 1;
 193
 194        /*
 195         * Online average algorithm:
 196         *
 197         *  new_average = average + ((value - average) / count)
 198         *
 199         * The variance computation depends on the new average
 200         * to be computed here first.
 201         *
 202         */
 203        irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
 204
 205        /*
 206         * Online variance algorithm:
 207         *
 208         *  new_variance = variance + (value - average) x (value - new_average)
 209         *
 210         * Warning: irqs->avg is updated with the line above, hence
 211         * 'interval - irqs->avg' is no longer equal to 'diff'
 212         */
 213        irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
 214
 215        /*
 216         * Update the next event
 217         */
 218        irqs->next_evt = ts + irqs->avg;
 219}
 220
 221/**
 222 * irq_timings_next_event - Return when the next event is supposed to arrive
 223 *
 224 * During the last busy cycle, the number of interrupts is incremented
 225 * and stored in the irq_timings structure. This information is
 226 * necessary to:
 227 *
 228 * - know if the index in the table wrapped up:
 229 *
 230 *      If more than the array size interrupts happened during the
 231 *      last busy/idle cycle, the index wrapped up and we have to
 232 *      begin with the next element in the array which is the last one
 233 *      in the sequence, otherwise it is a the index 0.
 234 *
 235 * - have an indication of the interrupts activity on this CPU
 236 *   (eg. irq/sec)
 237 *
 238 * The values are 'consumed' after inserting in the statistical model,
 239 * thus the count is reinitialized.
 240 *
 241 * The array of values **must** be browsed in the time direction, the
 242 * timestamp must increase between an element and the next one.
 243 *
 244 * Returns a nanosec time based estimation of the earliest interrupt,
 245 * U64_MAX otherwise.
 246 */
 247u64 irq_timings_next_event(u64 now)
 248{
 249        struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
 250        struct irqt_stat *irqs;
 251        struct irqt_stat __percpu *s;
 252        u64 ts, next_evt = U64_MAX;
 253        int i, irq = 0;
 254
 255        /*
 256         * This function must be called with the local irq disabled in
 257         * order to prevent the timings circular buffer to be updated
 258         * while we are reading it.
 259         */
 260        lockdep_assert_irqs_disabled();
 261
 262        /*
 263         * Number of elements in the circular buffer: If it happens it
 264         * was flushed before, then the number of elements could be
 265         * smaller than IRQ_TIMINGS_SIZE, so the count is used,
 266         * otherwise the array size is used as we wrapped. The index
 267         * begins from zero when we did not wrap. That could be done
 268         * in a nicer way with the proper circular array structure
 269         * type but with the cost of extra computation in the
 270         * interrupt handler hot path. We choose efficiency.
 271         *
 272         * Inject measured irq/timestamp to the statistical model
 273         * while decrementing the counter because we consume the data
 274         * from our circular buffer.
 275         */
 276        for (i = irqts->count & IRQ_TIMINGS_MASK,
 277                     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
 278             irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
 279
 280                irq = irq_timing_decode(irqts->values[i], &ts);
 281
 282                s = idr_find(&irqt_stats, irq);
 283                if (s) {
 284                        irqs = this_cpu_ptr(s);
 285                        irqs_update(irqs, ts);
 286                }
 287        }
 288
 289        /*
 290         * Look in the list of interrupts' statistics, the earliest
 291         * next event.
 292         */
 293        idr_for_each_entry(&irqt_stats, s, i) {
 294
 295                irqs = this_cpu_ptr(s);
 296
 297                if (!irqs->valid)
 298                        continue;
 299
 300                if (irqs->next_evt <= now) {
 301                        irq = i;
 302                        next_evt = now;
 303
 304                        /*
 305                         * This interrupt mustn't use in the future
 306                         * until new events occur and update the
 307                         * statistics.
 308                         */
 309                        irqs->valid = 0;
 310                        break;
 311                }
 312
 313                if (irqs->next_evt < next_evt) {
 314                        irq = i;
 315                        next_evt = irqs->next_evt;
 316                }
 317        }
 318
 319        return next_evt;
 320}
 321
 322void irq_timings_free(int irq)
 323{
 324        struct irqt_stat __percpu *s;
 325
 326        s = idr_find(&irqt_stats, irq);
 327        if (s) {
 328                free_percpu(s);
 329                idr_remove(&irqt_stats, irq);
 330        }
 331}
 332
 333int irq_timings_alloc(int irq)
 334{
 335        struct irqt_stat __percpu *s;
 336        int id;
 337
 338        /*
 339         * Some platforms can have the same private interrupt per cpu,
 340         * so this function may be be called several times with the
 341         * same interrupt number. Just bail out in case the per cpu
 342         * stat structure is already allocated.
 343         */
 344        s = idr_find(&irqt_stats, irq);
 345        if (s)
 346                return 0;
 347
 348        s = alloc_percpu(*s);
 349        if (!s)
 350                return -ENOMEM;
 351
 352        idr_preload(GFP_KERNEL);
 353        id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
 354        idr_preload_end();
 355
 356        if (id < 0) {
 357                free_percpu(s);
 358                return id;
 359        }
 360
 361        return 0;
 362}
 363