qemu/util/qdist.c
<<
>>
Prefs
   1/*
   2 * qdist.c - QEMU helpers for handling frequency distributions of data.
   3 *
   4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
   5 *
   6 * License: GNU GPL, version 2 or later.
   7 *   See the COPYING file in the top-level directory.
   8 */
   9#include "qemu/osdep.h"
  10#include "qemu/qdist.h"
  11
  12#include <math.h>
  13#ifndef NAN
  14#define NAN (0.0 / 0.0)
  15#endif
  16
  17#define QDIST_EMPTY_STR "(empty)"
  18
  19void qdist_init(struct qdist *dist)
  20{
  21    dist->entries = g_new(struct qdist_entry, 1);
  22    dist->size = 1;
  23    dist->n = 0;
  24}
  25
  26void qdist_destroy(struct qdist *dist)
  27{
  28    g_free(dist->entries);
  29}
  30
  31static inline int qdist_cmp_double(double a, double b)
  32{
  33    if (a > b) {
  34        return 1;
  35    } else if (a < b) {
  36        return -1;
  37    }
  38    return 0;
  39}
  40
  41static int qdist_cmp(const void *ap, const void *bp)
  42{
  43    const struct qdist_entry *a = ap;
  44    const struct qdist_entry *b = bp;
  45
  46    return qdist_cmp_double(a->x, b->x);
  47}
  48
  49void qdist_add(struct qdist *dist, double x, long count)
  50{
  51    struct qdist_entry *entry = NULL;
  52
  53    if (dist->n) {
  54        struct qdist_entry e;
  55
  56        e.x = x;
  57        entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
  58    }
  59
  60    if (entry) {
  61        entry->count += count;
  62        return;
  63    }
  64
  65    if (unlikely(dist->n == dist->size)) {
  66        dist->size *= 2;
  67        dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
  68    }
  69    dist->n++;
  70    entry = &dist->entries[dist->n - 1];
  71    entry->x = x;
  72    entry->count = count;
  73    qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
  74}
  75
  76void qdist_inc(struct qdist *dist, double x)
  77{
  78    qdist_add(dist, x, 1);
  79}
  80
  81/*
  82 * Unicode for block elements. See:
  83 *   https://en.wikipedia.org/wiki/Block_Elements
  84 */
  85static const gunichar qdist_blocks[] = {
  86    0x2581,
  87    0x2582,
  88    0x2583,
  89    0x2584,
  90    0x2585,
  91    0x2586,
  92    0x2587,
  93    0x2588
  94};
  95
  96#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
  97
  98/*
  99 * Print a distribution into a string.
 100 *
 101 * This function assumes that appropriate binning has been done on the input;
 102 * see qdist_bin__internal() and qdist_pr_plain().
 103 *
 104 * Callers must free the returned string with g_free().
 105 */
 106static char *qdist_pr_internal(const struct qdist *dist)
 107{
 108    double min, max;
 109    GString *s = g_string_new("");
 110    size_t i;
 111
 112    /* if only one entry, its printout will be either full or empty */
 113    if (dist->n == 1) {
 114        if (dist->entries[0].count) {
 115            g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
 116        } else {
 117            g_string_append_c(s, ' ');
 118        }
 119        goto out;
 120    }
 121
 122    /* get min and max counts */
 123    min = dist->entries[0].count;
 124    max = min;
 125    for (i = 0; i < dist->n; i++) {
 126        struct qdist_entry *e = &dist->entries[i];
 127
 128        if (e->count < min) {
 129            min = e->count;
 130        }
 131        if (e->count > max) {
 132            max = e->count;
 133        }
 134    }
 135
 136    for (i = 0; i < dist->n; i++) {
 137        struct qdist_entry *e = &dist->entries[i];
 138        int index;
 139
 140        /* make an exception with 0; instead of using block[0], print a space */
 141        if (e->count) {
 142            /* divide first to avoid loss of precision when e->count == max */
 143            index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
 144            g_string_append_unichar(s, qdist_blocks[index]);
 145        } else {
 146            g_string_append_c(s, ' ');
 147        }
 148    }
 149 out:
 150    return g_string_free(s, FALSE);
 151}
 152
 153/*
 154 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
 155 * intervals, copying the result to @to.
 156 *
 157 * This function is internal to qdist: only this file and test code should
 158 * ever call it.
 159 *
 160 * Note: calling this function on an already-binned qdist is a bug.
 161 *
 162 * If @n == 0 or @from->n == 1, use @from->n.
 163 */
 164void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
 165{
 166    double xmin, xmax;
 167    double step;
 168    size_t i, j;
 169
 170    qdist_init(to);
 171
 172    if (from->n == 0) {
 173        return;
 174    }
 175    if (n == 0 || from->n == 1) {
 176        n = from->n;
 177    }
 178
 179    /* set equally-sized bins between @from's left and right */
 180    xmin = qdist_xmin(from);
 181    xmax = qdist_xmax(from);
 182    step = (xmax - xmin) / n;
 183
 184    if (n == from->n) {
 185        /* if @from's entries are equally spaced, no need to re-bin */
 186        for (i = 0; i < from->n; i++) {
 187            if (from->entries[i].x != xmin + i * step) {
 188                goto rebin;
 189            }
 190        }
 191        /* they're equally spaced, so copy the dist and bail out */
 192        to->entries = g_renew(struct qdist_entry, to->entries, n);
 193        to->n = from->n;
 194        memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
 195        return;
 196    }
 197
 198 rebin:
 199    j = 0;
 200    for (i = 0; i < n; i++) {
 201        double x;
 202        double left, right;
 203
 204        left = xmin + i * step;
 205        right = xmin + (i + 1) * step;
 206
 207        /* Add x, even if it might not get any counts later */
 208        x = left;
 209        qdist_add(to, x, 0);
 210
 211        /*
 212         * To avoid double-counting we capture [left, right) ranges, except for
 213         * the righmost bin, which captures a [left, right] range.
 214         */
 215        while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
 216            struct qdist_entry *o = &from->entries[j];
 217
 218            qdist_add(to, x, o->count);
 219            j++;
 220        }
 221    }
 222}
 223
 224/*
 225 * Print @dist into a string, after re-binning it into @n bins of consecutive,
 226 * non-overlapping intervals.
 227 *
 228 * If @n == 0, use @orig->n.
 229 *
 230 * Callers must free the returned string with g_free().
 231 */
 232char *qdist_pr_plain(const struct qdist *dist, size_t n)
 233{
 234    struct qdist binned;
 235    char *ret;
 236
 237    if (dist->n == 0) {
 238        return g_strdup(QDIST_EMPTY_STR);
 239    }
 240    qdist_bin__internal(&binned, dist, n);
 241    ret = qdist_pr_internal(&binned);
 242    qdist_destroy(&binned);
 243    return ret;
 244}
 245
 246static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
 247                            uint32_t opt, bool is_left)
 248{
 249    const char *percent;
 250    const char *lparen;
 251    const char *rparen;
 252    GString *s;
 253    double x1, x2, step;
 254    double x;
 255    double n;
 256    int dec;
 257
 258    s = g_string_new("");
 259    if (!(opt & QDIST_PR_LABELS)) {
 260        goto out;
 261    }
 262
 263    dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
 264    percent = opt & QDIST_PR_PERCENT ? "%" : "";
 265
 266    n = n_bins ? n_bins : dist->n;
 267    x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
 268    step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
 269
 270    if (opt & QDIST_PR_100X) {
 271        x *= 100.0;
 272        step *= 100.0;
 273    }
 274    if (opt & QDIST_PR_NOBINRANGE) {
 275        lparen = rparen = "";
 276        x1 = x;
 277        x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
 278    } else {
 279        lparen = "[";
 280        rparen = is_left ? ")" : "]";
 281        if (is_left) {
 282            x1 = x;
 283            x2 = x + step;
 284        } else {
 285            x1 = x - step;
 286            x2 = x;
 287        }
 288    }
 289    g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
 290    if (!(opt & QDIST_PR_NOBINRANGE)) {
 291        g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
 292    }
 293    g_string_append(s, percent);
 294 out:
 295    return g_string_free(s, FALSE);
 296}
 297
 298/*
 299 * Print the distribution's histogram into a string.
 300 *
 301 * See also: qdist_pr_plain().
 302 *
 303 * Callers must free the returned string with g_free().
 304 */
 305char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
 306{
 307    const char *border = opt & QDIST_PR_BORDER ? "|" : "";
 308    char *llabel, *rlabel;
 309    char *hgram;
 310    GString *s;
 311
 312    if (dist->n == 0) {
 313        return g_strdup(QDIST_EMPTY_STR);
 314    }
 315
 316    s = g_string_new("");
 317
 318    llabel = qdist_pr_label(dist, n_bins, opt, true);
 319    rlabel = qdist_pr_label(dist, n_bins, opt, false);
 320    hgram = qdist_pr_plain(dist, n_bins);
 321    g_string_append_printf(s, "%s%s%s%s%s",
 322                           llabel, border, hgram, border, rlabel);
 323    g_free(llabel);
 324    g_free(rlabel);
 325    g_free(hgram);
 326
 327    return g_string_free(s, FALSE);
 328}
 329
 330static inline double qdist_x(const struct qdist *dist, int index)
 331{
 332    if (dist->n == 0) {
 333        return NAN;
 334    }
 335    return dist->entries[index].x;
 336}
 337
 338double qdist_xmin(const struct qdist *dist)
 339{
 340    return qdist_x(dist, 0);
 341}
 342
 343double qdist_xmax(const struct qdist *dist)
 344{
 345    return qdist_x(dist, dist->n - 1);
 346}
 347
 348size_t qdist_unique_entries(const struct qdist *dist)
 349{
 350    return dist->n;
 351}
 352
 353unsigned long qdist_sample_count(const struct qdist *dist)
 354{
 355    unsigned long count = 0;
 356    size_t i;
 357
 358    for (i = 0; i < dist->n; i++) {
 359        struct qdist_entry *e = &dist->entries[i];
 360
 361        count += e->count;
 362    }
 363    return count;
 364}
 365
 366static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
 367                                 size_t n, unsigned long count)
 368{
 369    /* amortize the recursion by using a base case > 2 */
 370    if (n <= 8) {
 371        size_t i;
 372        double ret = 0;
 373
 374        for (i = 0; i < n; i++) {
 375            struct qdist_entry *e = &dist->entries[index + i];
 376
 377            ret += e->x * e->count / count;
 378        }
 379        return ret;
 380    } else {
 381        size_t n2 = n / 2;
 382
 383        return qdist_pairwise_avg(dist, index, n2, count) +
 384               qdist_pairwise_avg(dist, index + n2, n - n2, count);
 385    }
 386}
 387
 388double qdist_avg(const struct qdist *dist)
 389{
 390    unsigned long count;
 391
 392    count = qdist_sample_count(dist);
 393    if (!count) {
 394        return NAN;
 395    }
 396    return qdist_pairwise_avg(dist, 0, dist->n, count);
 397}
 398