linux/lib/proportions.c
<<
>>
Prefs
   1/*
   2 * Floating proportions
   3 *
   4 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
   5 *
   6 * Description:
   7 *
   8 * The floating proportion is a time derivative with an exponentially decaying
   9 * history:
  10 *
  11 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12 *
  13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14 * and i the time period over which the differential is taken. So d/dt_{-i} is
  15 * the differential over the i-th last period.
  16 *
  17 * The decaying history gives smooth transitions. The time differential carries
  18 * the notion of speed.
  19 *
  20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21 *
  22 *   \Sum_{i=0} 1/2^(1+i) = 1
  23 *
  24 * Further more, if we measure time (t) in the same events as x; so that:
  25 *
  26 *   t = \Sum_{j} x_{j}
  27 *
  28 * we get that:
  29 *
  30 *   \Sum_{j} p_{j} = 1
  31 *
  32 * Writing this in an iterative fashion we get (dropping the 'd's):
  33 *
  34 *   if (++x_{j}, ++t > period)
  35 *     t /= 2;
  36 *     for_each (j)
  37 *       x_{j} /= 2;
  38 *
  39 * so that:
  40 *
  41 *   p_{j} = x_{j} / t;
  42 *
  43 * We optimize away the '/= 2' for the global time delta by noting that:
  44 *
  45 *   if (++t > period) t /= 2:
  46 *
  47 * Can be approximated by:
  48 *
  49 *   period/2 + (++t % period/2)
  50 *
  51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52 *   binary operations and wraparound artefacts disappear. ]
  53 *
  54 * Also note that this yields a natural counter of the elapsed periods:
  55 *
  56 *   c = t / (period/2)
  57 *
  58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59 *   around issue. ]
  60 *
  61 * This allows us to do away with the loop over all prop_locals on each period
  62 * expiration. By remembering the period count under which it was last accessed
  63 * as c_{j}, we can obtain the number of 'missed' cycles from:
  64 *
  65 *   c - c_{j}
  66 *
  67 * We can then lazily catch up to the global period count every time we are
  68 * going to use x_{j}, by doing:
  69 *
  70 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
  71 */
  72
  73#include <linux/proportions.h>
  74#include <linux/rcupdate.h>
  75
  76int prop_descriptor_init(struct prop_descriptor *pd, int shift)
  77{
  78        int err;
  79
  80        if (shift > PROP_MAX_SHIFT)
  81                shift = PROP_MAX_SHIFT;
  82
  83        pd->index = 0;
  84        pd->pg[0].shift = shift;
  85        mutex_init(&pd->mutex);
  86        err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
  87        if (err)
  88                goto out;
  89
  90        err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
  91        if (err)
  92                percpu_counter_destroy(&pd->pg[0].events);
  93
  94out:
  95        return err;
  96}
  97
  98/*
  99 * We have two copies, and flip between them to make it seem like an atomic
 100 * update. The update is not really atomic wrt the events counter, but
 101 * it is internally consistent with the bit layout depending on shift.
 102 *
 103 * We copy the events count, move the bits around and flip the index.
 104 */
 105void prop_change_shift(struct prop_descriptor *pd, int shift)
 106{
 107        int index;
 108        int offset;
 109        u64 events;
 110        unsigned long flags;
 111
 112        if (shift > PROP_MAX_SHIFT)
 113                shift = PROP_MAX_SHIFT;
 114
 115        mutex_lock(&pd->mutex);
 116
 117        index = pd->index ^ 1;
 118        offset = pd->pg[pd->index].shift - shift;
 119        if (!offset)
 120                goto out;
 121
 122        pd->pg[index].shift = shift;
 123
 124        local_irq_save(flags);
 125        events = percpu_counter_sum(&pd->pg[pd->index].events);
 126        if (offset < 0)
 127                events <<= -offset;
 128        else
 129                events >>= offset;
 130        percpu_counter_set(&pd->pg[index].events, events);
 131
 132        /*
 133         * ensure the new pg is fully written before the switch
 134         */
 135        smp_wmb();
 136        pd->index = index;
 137        local_irq_restore(flags);
 138
 139        synchronize_rcu();
 140
 141out:
 142        mutex_unlock(&pd->mutex);
 143}
 144
 145/*
 146 * wrap the access to the data in an rcu_read_lock() section;
 147 * this is used to track the active references.
 148 */
 149static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 150__acquires(RCU)
 151{
 152        int index;
 153
 154        rcu_read_lock();
 155        index = pd->index;
 156        /*
 157         * match the wmb from vcd_flip()
 158         */
 159        smp_rmb();
 160        return &pd->pg[index];
 161}
 162
 163static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
 164__releases(RCU)
 165{
 166        rcu_read_unlock();
 167}
 168
 169static void
 170prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 171{
 172        int offset = *pl_shift - new_shift;
 173
 174        if (!offset)
 175                return;
 176
 177        if (offset < 0)
 178                *pl_period <<= -offset;
 179        else
 180                *pl_period >>= offset;
 181
 182        *pl_shift = new_shift;
 183}
 184
 185/*
 186 * PERCPU
 187 */
 188
 189#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 190
 191int prop_local_init_percpu(struct prop_local_percpu *pl)
 192{
 193        raw_spin_lock_init(&pl->lock);
 194        pl->shift = 0;
 195        pl->period = 0;
 196        return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 197}
 198
 199void prop_local_destroy_percpu(struct prop_local_percpu *pl)
 200{
 201        percpu_counter_destroy(&pl->events);
 202}
 203
 204/*
 205 * Catch up with missed period expirations.
 206 *
 207 *   until (c_{j} == c)
 208 *     x_{j} -= x_{j}/2;
 209 *     c_{j}++;
 210 */
 211static
 212void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 213{
 214        unsigned long period = 1UL << (pg->shift - 1);
 215        unsigned long period_mask = ~(period - 1);
 216        unsigned long global_period;
 217        unsigned long flags;
 218
 219        global_period = percpu_counter_read(&pg->events);
 220        global_period &= period_mask;
 221
 222        /*
 223         * Fast path - check if the local and global period count still match
 224         * outside of the lock.
 225         */
 226        if (pl->period == global_period)
 227                return;
 228
 229        raw_spin_lock_irqsave(&pl->lock, flags);
 230        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 231
 232        /*
 233         * For each missed period, we half the local counter.
 234         * basically:
 235         *   pl->events >> (global_period - pl->period);
 236         */
 237        period = (global_period - pl->period) >> (pg->shift - 1);
 238        if (period < BITS_PER_LONG) {
 239                s64 val = percpu_counter_read(&pl->events);
 240
 241                if (val < (nr_cpu_ids * PROP_BATCH))
 242                        val = percpu_counter_sum(&pl->events);
 243
 244                __percpu_counter_add(&pl->events, -val + (val >> period),
 245                                        PROP_BATCH);
 246        } else
 247                percpu_counter_set(&pl->events, 0);
 248
 249        pl->period = global_period;
 250        raw_spin_unlock_irqrestore(&pl->lock, flags);
 251}
 252
 253/*
 254 *   ++x_{j}, ++t
 255 */
 256void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 257{
 258        struct prop_global *pg = prop_get_global(pd);
 259
 260        prop_norm_percpu(pg, pl);
 261        __percpu_counter_add(&pl->events, 1, PROP_BATCH);
 262        percpu_counter_add(&pg->events, 1);
 263        prop_put_global(pd, pg);
 264}
 265
 266/*
 267 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 268 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 269 */
 270void __prop_inc_percpu_max(struct prop_descriptor *pd,
 271                           struct prop_local_percpu *pl, long frac)
 272{
 273        struct prop_global *pg = prop_get_global(pd);
 274
 275        prop_norm_percpu(pg, pl);
 276
 277        if (unlikely(frac != PROP_FRAC_BASE)) {
 278                unsigned long period_2 = 1UL << (pg->shift - 1);
 279                unsigned long counter_mask = period_2 - 1;
 280                unsigned long global_count;
 281                long numerator, denominator;
 282
 283                numerator = percpu_counter_read_positive(&pl->events);
 284                global_count = percpu_counter_read(&pg->events);
 285                denominator = period_2 + (global_count & counter_mask);
 286
 287                if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
 288                        goto out_put;
 289        }
 290
 291        percpu_counter_add(&pl->events, 1);
 292        percpu_counter_add(&pg->events, 1);
 293
 294out_put:
 295        prop_put_global(pd, pg);
 296}
 297
 298/*
 299 * Obtain a fraction of this proportion
 300 *
 301 *   p_{j} = x_{j} / (period/2 + t % period/2)
 302 */
 303void prop_fraction_percpu(struct prop_descriptor *pd,
 304                struct prop_local_percpu *pl,
 305                long *numerator, long *denominator)
 306{
 307        struct prop_global *pg = prop_get_global(pd);
 308        unsigned long period_2 = 1UL << (pg->shift - 1);
 309        unsigned long counter_mask = period_2 - 1;
 310        unsigned long global_count;
 311
 312        prop_norm_percpu(pg, pl);
 313        *numerator = percpu_counter_read_positive(&pl->events);
 314
 315        global_count = percpu_counter_read(&pg->events);
 316        *denominator = period_2 + (global_count & counter_mask);
 317
 318        prop_put_global(pd, pg);
 319}
 320
 321/*
 322 * SINGLE
 323 */
 324
 325int prop_local_init_single(struct prop_local_single *pl)
 326{
 327        raw_spin_lock_init(&pl->lock);
 328        pl->shift = 0;
 329        pl->period = 0;
 330        pl->events = 0;
 331        return 0;
 332}
 333
 334void prop_local_destroy_single(struct prop_local_single *pl)
 335{
 336}
 337
 338/*
 339 * Catch up with missed period expirations.
 340 */
 341static
 342void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 343{
 344        unsigned long period = 1UL << (pg->shift - 1);
 345        unsigned long period_mask = ~(period - 1);
 346        unsigned long global_period;
 347        unsigned long flags;
 348
 349        global_period = percpu_counter_read(&pg->events);
 350        global_period &= period_mask;
 351
 352        /*
 353         * Fast path - check if the local and global period count still match
 354         * outside of the lock.
 355         */
 356        if (pl->period == global_period)
 357                return;
 358
 359        raw_spin_lock_irqsave(&pl->lock, flags);
 360        prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 361        /*
 362         * For each missed period, we half the local counter.
 363         */
 364        period = (global_period - pl->period) >> (pg->shift - 1);
 365        if (likely(period < BITS_PER_LONG))
 366                pl->events >>= period;
 367        else
 368                pl->events = 0;
 369        pl->period = global_period;
 370        raw_spin_unlock_irqrestore(&pl->lock, flags);
 371}
 372
 373/*
 374 *   ++x_{j}, ++t
 375 */
 376void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
 377{
 378        struct prop_global *pg = prop_get_global(pd);
 379
 380        prop_norm_single(pg, pl);
 381        pl->events++;
 382        percpu_counter_add(&pg->events, 1);
 383        prop_put_global(pd, pg);
 384}
 385
 386/*
 387 * Obtain a fraction of this proportion
 388 *
 389 *   p_{j} = x_{j} / (period/2 + t % period/2)
 390 */
 391void prop_fraction_single(struct prop_descriptor *pd,
 392                struct prop_local_single *pl,
 393                long *numerator, long *denominator)
 394{
 395        struct prop_global *pg = prop_get_global(pd);
 396        unsigned long period_2 = 1UL << (pg->shift - 1);
 397        unsigned long counter_mask = period_2 - 1;
 398        unsigned long global_count;
 399
 400        prop_norm_single(pg, pl);
 401        *numerator = pl->events;
 402
 403        global_count = percpu_counter_read(&pg->events);
 404        *denominator = period_2 + (global_count & counter_mask);
 405
 406        prop_put_global(pd, pg);
 407}
 408