LXR linux/mm/damon/core.c

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Data Access Monitor
   4 *
   5 * Author: SeongJae Park <sjpark@amazon.de>
   6 */
   7
   8#define pr_fmt(fmt) "damon: " fmt
   9
  10#include <linux/damon.h>
  11#include <linux/delay.h>
  12#include <linux/kthread.h>
  13#include <linux/random.h>
  14#include <linux/slab.h>
  15
  16#define CREATE_TRACE_POINTS
  17#include <trace/events/damon.h>
  18
  19#ifdef CONFIG_DAMON_KUNIT_TEST
  20#undef DAMON_MIN_REGION
  21#define DAMON_MIN_REGION 1
  22#endif
  23
  24/* Get a random number in [l, r) */
  25#define damon_rand(l, r) (l + prandom_u32_max(r - l))
  26
  27static DEFINE_MUTEX(damon_lock);
  28static int nr_running_ctxs;
  29
  30/*
  31 * Construct a damon_region struct
  32 *
  33 * Returns the pointer to the new struct if success, or NULL otherwise
  34 */
  35struct damon_region *damon_new_region(unsigned long start, unsigned long end)
  36{
  37        struct damon_region *region;
  38
  39        region = kmalloc(sizeof(*region), GFP_KERNEL);
  40        if (!region)
  41                return NULL;
  42
  43        region->ar.start = start;
  44        region->ar.end = end;
  45        region->nr_accesses = 0;
  46        INIT_LIST_HEAD(&region->list);
  47
  48        return region;
  49}
  50
  51/*
  52 * Add a region between two other regions
  53 */
  54inline void damon_insert_region(struct damon_region *r,
  55                struct damon_region *prev, struct damon_region *next,
  56                struct damon_target *t)
  57{
  58        __list_add(&r->list, &prev->list, &next->list);
  59        t->nr_regions++;
  60}
  61
  62void damon_add_region(struct damon_region *r, struct damon_target *t)
  63{
  64        list_add_tail(&r->list, &t->regions_list);
  65        t->nr_regions++;
  66}
  67
  68static void damon_del_region(struct damon_region *r, struct damon_target *t)
  69{
  70        list_del(&r->list);
  71        t->nr_regions--;
  72}
  73
  74static void damon_free_region(struct damon_region *r)
  75{
  76        kfree(r);
  77}
  78
  79void damon_destroy_region(struct damon_region *r, struct damon_target *t)
  80{
  81        damon_del_region(r, t);
  82        damon_free_region(r);
  83}
  84
  85/*
  86 * Construct a damon_target struct
  87 *
  88 * Returns the pointer to the new struct if success, or NULL otherwise
  89 */
  90struct damon_target *damon_new_target(unsigned long id)
  91{
  92        struct damon_target *t;
  93
  94        t = kmalloc(sizeof(*t), GFP_KERNEL);
  95        if (!t)
  96                return NULL;
  97
  98        t->id = id;
  99        t->nr_regions = 0;
 100        INIT_LIST_HEAD(&t->regions_list);
 101
 102        return t;
 103}
 104
 105void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 106{
 107        list_add_tail(&t->list, &ctx->adaptive_targets);
 108}
 109
 110static void damon_del_target(struct damon_target *t)
 111{
 112        list_del(&t->list);
 113}
 114
 115void damon_free_target(struct damon_target *t)
 116{
 117        struct damon_region *r, *next;
 118
 119        damon_for_each_region_safe(r, next, t)
 120                damon_free_region(r);
 121        kfree(t);
 122}
 123
 124void damon_destroy_target(struct damon_target *t)
 125{
 126        damon_del_target(t);
 127        damon_free_target(t);
 128}
 129
 130unsigned int damon_nr_regions(struct damon_target *t)
 131{
 132        return t->nr_regions;
 133}
 134
 135struct damon_ctx *damon_new_ctx(void)
 136{
 137        struct damon_ctx *ctx;
 138
 139        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 140        if (!ctx)
 141                return NULL;
 142
 143        ctx->sample_interval = 5 * 1000;
 144        ctx->aggr_interval = 100 * 1000;
 145        ctx->primitive_update_interval = 60 * 1000 * 1000;
 146
 147        ktime_get_coarse_ts64(&ctx->last_aggregation);
 148        ctx->last_primitive_update = ctx->last_aggregation;
 149
 150        mutex_init(&ctx->kdamond_lock);
 151
 152        ctx->min_nr_regions = 10;
 153        ctx->max_nr_regions = 1000;
 154
 155        INIT_LIST_HEAD(&ctx->adaptive_targets);
 156
 157        return ctx;
 158}
 159
 160static void damon_destroy_targets(struct damon_ctx *ctx)
 161{
 162        struct damon_target *t, *next_t;
 163
 164        if (ctx->primitive.cleanup) {
 165                ctx->primitive.cleanup(ctx);
 166                return;
 167        }
 168
 169        damon_for_each_target_safe(t, next_t, ctx)
 170                damon_destroy_target(t);
 171}
 172
 173void damon_destroy_ctx(struct damon_ctx *ctx)
 174{
 175        damon_destroy_targets(ctx);
 176        kfree(ctx);
 177}
 178
 179/**
 180 * damon_set_targets() - Set monitoring targets.
 181 * @ctx:        monitoring context
 182 * @ids:        array of target ids
 183 * @nr_ids:     number of entries in @ids
 184 *
 185 * This function should not be called while the kdamond is running.
 186 *
 187 * Return: 0 on success, negative error code otherwise.
 188 */
 189int damon_set_targets(struct damon_ctx *ctx,
 190                      unsigned long *ids, ssize_t nr_ids)
 191{
 192        ssize_t i;
 193        struct damon_target *t, *next;
 194
 195        damon_destroy_targets(ctx);
 196
 197        for (i = 0; i < nr_ids; i++) {
 198                t = damon_new_target(ids[i]);
 199                if (!t) {
 200                        pr_err("Failed to alloc damon_target\n");
 201                        /* The caller should do cleanup of the ids itself */
 202                        damon_for_each_target_safe(t, next, ctx)
 203                                damon_destroy_target(t);
 204                        return -ENOMEM;
 205                }
 206                damon_add_target(ctx, t);
 207        }
 208
 209        return 0;
 210}
 211
 212/**
 213 * damon_set_attrs() - Set attributes for the monitoring.
 214 * @ctx:                monitoring context
 215 * @sample_int:         time interval between samplings
 216 * @aggr_int:           time interval between aggregations
 217 * @primitive_upd_int:  time interval between monitoring primitive updates
 218 * @min_nr_reg:         minimal number of regions
 219 * @max_nr_reg:         maximum number of regions
 220 *
 221 * This function should not be called while the kdamond is running.
 222 * Every time interval is in micro-seconds.
 223 *
 224 * Return: 0 on success, negative error code otherwise.
 225 */
 226int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 227                    unsigned long aggr_int, unsigned long primitive_upd_int,
 228                    unsigned long min_nr_reg, unsigned long max_nr_reg)
 229{
 230        if (min_nr_reg < 3) {
 231                pr_err("min_nr_regions (%lu) must be at least 3\n",
 232                                min_nr_reg);
 233                return -EINVAL;
 234        }
 235        if (min_nr_reg > max_nr_reg) {
 236                pr_err("invalid nr_regions.  min (%lu) > max (%lu)\n",
 237                                min_nr_reg, max_nr_reg);
 238                return -EINVAL;
 239        }
 240
 241        ctx->sample_interval = sample_int;
 242        ctx->aggr_interval = aggr_int;
 243        ctx->primitive_update_interval = primitive_upd_int;
 244        ctx->min_nr_regions = min_nr_reg;
 245        ctx->max_nr_regions = max_nr_reg;
 246
 247        return 0;
 248}
 249
 250/**
 251 * damon_nr_running_ctxs() - Return number of currently running contexts.
 252 */
 253int damon_nr_running_ctxs(void)
 254{
 255        int nr_ctxs;
 256
 257        mutex_lock(&damon_lock);
 258        nr_ctxs = nr_running_ctxs;
 259        mutex_unlock(&damon_lock);
 260
 261        return nr_ctxs;
 262}
 263
 264/* Returns the size upper limit for each monitoring region */
 265static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 266{
 267        struct damon_target *t;
 268        struct damon_region *r;
 269        unsigned long sz = 0;
 270
 271        damon_for_each_target(t, ctx) {
 272                damon_for_each_region(r, t)
 273                        sz += r->ar.end - r->ar.start;
 274        }
 275
 276        if (ctx->min_nr_regions)
 277                sz /= ctx->min_nr_regions;
 278        if (sz < DAMON_MIN_REGION)
 279                sz = DAMON_MIN_REGION;
 280
 281        return sz;
 282}
 283
 284static bool damon_kdamond_running(struct damon_ctx *ctx)
 285{
 286        bool running;
 287
 288        mutex_lock(&ctx->kdamond_lock);
 289        running = ctx->kdamond != NULL;
 290        mutex_unlock(&ctx->kdamond_lock);
 291
 292        return running;
 293}
 294
 295static int kdamond_fn(void *data);
 296
 297/*
 298 * __damon_start() - Starts monitoring with given context.
 299 * @ctx:        monitoring context
 300 *
 301 * This function should be called while damon_lock is hold.
 302 *
 303 * Return: 0 on success, negative error code otherwise.
 304 */
 305static int __damon_start(struct damon_ctx *ctx)
 306{
 307        int err = -EBUSY;
 308
 309        mutex_lock(&ctx->kdamond_lock);
 310        if (!ctx->kdamond) {
 311                err = 0;
 312                ctx->kdamond_stop = false;
 313                ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 314                                nr_running_ctxs);
 315                if (IS_ERR(ctx->kdamond)) {
 316                        err = PTR_ERR(ctx->kdamond);
 317                        ctx->kdamond = 0;
 318                }
 319        }
 320        mutex_unlock(&ctx->kdamond_lock);
 321
 322        return err;
 323}
 324
 325/**
 326 * damon_start() - Starts the monitorings for a given group of contexts.
 327 * @ctxs:       an array of the pointers for contexts to start monitoring
 328 * @nr_ctxs:    size of @ctxs
 329 *
 330 * This function starts a group of monitoring threads for a group of monitoring
 331 * contexts.  One thread per each context is created and run in parallel.  The
 332 * caller should handle synchronization between the threads by itself.  If a
 333 * group of threads that created by other 'damon_start()' call is currently
 334 * running, this function does nothing but returns -EBUSY.
 335 *
 336 * Return: 0 on success, negative error code otherwise.
 337 */
 338int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
 339{
 340        int i;
 341        int err = 0;
 342
 343        mutex_lock(&damon_lock);
 344        if (nr_running_ctxs) {
 345                mutex_unlock(&damon_lock);
 346                return -EBUSY;
 347        }
 348
 349        for (i = 0; i < nr_ctxs; i++) {
 350                err = __damon_start(ctxs[i]);
 351                if (err)
 352                        break;
 353                nr_running_ctxs++;
 354        }
 355        mutex_unlock(&damon_lock);
 356
 357        return err;
 358}
 359
 360/*
 361 * __damon_stop() - Stops monitoring of given context.
 362 * @ctx:        monitoring context
 363 *
 364 * Return: 0 on success, negative error code otherwise.
 365 */
 366static int __damon_stop(struct damon_ctx *ctx)
 367{
 368        mutex_lock(&ctx->kdamond_lock);
 369        if (ctx->kdamond) {
 370                ctx->kdamond_stop = true;
 371                mutex_unlock(&ctx->kdamond_lock);
 372                while (damon_kdamond_running(ctx))
 373                        usleep_range(ctx->sample_interval,
 374                                        ctx->sample_interval * 2);
 375                return 0;
 376        }
 377        mutex_unlock(&ctx->kdamond_lock);
 378
 379        return -EPERM;
 380}
 381
 382/**
 383 * damon_stop() - Stops the monitorings for a given group of contexts.
 384 * @ctxs:       an array of the pointers for contexts to stop monitoring
 385 * @nr_ctxs:    size of @ctxs
 386 *
 387 * Return: 0 on success, negative error code otherwise.
 388 */
 389int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 390{
 391        int i, err = 0;
 392
 393        for (i = 0; i < nr_ctxs; i++) {
 394                /* nr_running_ctxs is decremented in kdamond_fn */
 395                err = __damon_stop(ctxs[i]);
 396                if (err)
 397                        return err;
 398        }
 399
 400        return err;
 401}
 402
 403/*
 404 * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 405 * @baseline:   the time to check whether the interval has elapsed since
 406 * @interval:   the time interval (microseconds)
 407 *
 408 * See whether the given time interval has passed since the given baseline
 409 * time.  If so, it also updates the baseline to current time for next check.
 410 *
 411 * Return:      true if the time interval has passed, or false otherwise.
 412 */
 413static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 414                unsigned long interval)
 415{
 416        struct timespec64 now;
 417
 418        ktime_get_coarse_ts64(&now);
 419        if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 420                        interval * 1000)
 421                return false;
 422        *baseline = now;
 423        return true;
 424}
 425
 426/*
 427 * Check whether it is time to flush the aggregated information
 428 */
 429static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 430{
 431        return damon_check_reset_time_interval(&ctx->last_aggregation,
 432                        ctx->aggr_interval);
 433}
 434
 435/*
 436 * Reset the aggregated monitoring results ('nr_accesses' of each region).
 437 */
 438static void kdamond_reset_aggregated(struct damon_ctx *c)
 439{
 440        struct damon_target *t;
 441
 442        damon_for_each_target(t, c) {
 443                struct damon_region *r;
 444
 445                damon_for_each_region(r, t) {
 446                        trace_damon_aggregated(t, r, damon_nr_regions(t));
 447                        r->nr_accesses = 0;
 448                }
 449        }
 450}
 451
 452#define sz_damon_region(r) (r->ar.end - r->ar.start)
 453
 454/*
 455 * Merge two adjacent regions into one region
 456 */
 457static void damon_merge_two_regions(struct damon_target *t,
 458                struct damon_region *l, struct damon_region *r)
 459{
 460        unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 461
 462        l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 463                        (sz_l + sz_r);
 464        l->ar.end = r->ar.end;
 465        damon_destroy_region(r, t);
 466}
 467
 468#define diff_of(a, b) (a > b ? a - b : b - a)
 469
 470/*
 471 * Merge adjacent regions having similar access frequencies
 472 *
 473 * t            target affected by this merge operation
 474 * thres        '->nr_accesses' diff threshold for the merge
 475 * sz_limit     size upper limit of each region
 476 */
 477static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 478                                   unsigned long sz_limit)
 479{
 480        struct damon_region *r, *prev = NULL, *next;
 481
 482        damon_for_each_region_safe(r, next, t) {
 483                if (prev && prev->ar.end == r->ar.start &&
 484                    diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
 485                    sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 486                        damon_merge_two_regions(t, prev, r);
 487                else
 488                        prev = r;
 489        }
 490}
 491
 492/*
 493 * Merge adjacent regions having similar access frequencies
 494 *
 495 * threshold    '->nr_accesses' diff threshold for the merge
 496 * sz_limit     size upper limit of each region
 497 *
 498 * This function merges monitoring target regions which are adjacent and their
 499 * access frequencies are similar.  This is for minimizing the monitoring
 500 * overhead under the dynamically changeable access pattern.  If a merge was
 501 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 502 */
 503static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 504                                  unsigned long sz_limit)
 505{
 506        struct damon_target *t;
 507
 508        damon_for_each_target(t, c)
 509                damon_merge_regions_of(t, threshold, sz_limit);
 510}
 511
 512/*
 513 * Split a region in two
 514 *
 515 * r            the region to be split
 516 * sz_r         size of the first sub-region that will be made
 517 */
 518static void damon_split_region_at(struct damon_ctx *ctx,
 519                struct damon_target *t, struct damon_region *r,
 520                unsigned long sz_r)
 521{
 522        struct damon_region *new;
 523
 524        new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 525        if (!new)
 526                return;
 527
 528        r->ar.end = new->ar.start;
 529
 530        damon_insert_region(new, r, damon_next_region(r), t);
 531}
 532
 533/* Split every region in the given target into 'nr_subs' regions */
 534static void damon_split_regions_of(struct damon_ctx *ctx,
 535                                     struct damon_target *t, int nr_subs)
 536{
 537        struct damon_region *r, *next;
 538        unsigned long sz_region, sz_sub = 0;
 539        int i;
 540
 541        damon_for_each_region_safe(r, next, t) {
 542                sz_region = r->ar.end - r->ar.start;
 543
 544                for (i = 0; i < nr_subs - 1 &&
 545                                sz_region > 2 * DAMON_MIN_REGION; i++) {
 546                        /*
 547                         * Randomly select size of left sub-region to be at
 548                         * least 10 percent and at most 90% of original region
 549                         */
 550                        sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 551                                        sz_region / 10, DAMON_MIN_REGION);
 552                        /* Do not allow blank region */
 553                        if (sz_sub == 0 || sz_sub >= sz_region)
 554                                continue;
 555
 556                        damon_split_region_at(ctx, t, r, sz_sub);
 557                        sz_region = sz_sub;
 558                }
 559        }
 560}
 561
 562/*
 563 * Split every target region into randomly-sized small regions
 564 *
 565 * This function splits every target region into random-sized small regions if
 566 * current total number of the regions is equal or smaller than half of the
 567 * user-specified maximum number of regions.  This is for maximizing the
 568 * monitoring accuracy under the dynamically changeable access patterns.  If a
 569 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 570 * it.
 571 */
 572static void kdamond_split_regions(struct damon_ctx *ctx)
 573{
 574        struct damon_target *t;
 575        unsigned int nr_regions = 0;
 576        static unsigned int last_nr_regions;
 577        int nr_subregions = 2;
 578
 579        damon_for_each_target(t, ctx)
 580                nr_regions += damon_nr_regions(t);
 581
 582        if (nr_regions > ctx->max_nr_regions / 2)
 583                return;
 584
 585        /* Maybe the middle of the region has different access frequency */
 586        if (last_nr_regions == nr_regions &&
 587                        nr_regions < ctx->max_nr_regions / 3)
 588                nr_subregions = 3;
 589
 590        damon_for_each_target(t, ctx)
 591                damon_split_regions_of(ctx, t, nr_subregions);
 592
 593        last_nr_regions = nr_regions;
 594}
 595
 596/*
 597 * Check whether it is time to check and apply the target monitoring regions
 598 *
 599 * Returns true if it is.
 600 */
 601static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
 602{
 603        return damon_check_reset_time_interval(&ctx->last_primitive_update,
 604                        ctx->primitive_update_interval);
 605}
 606
 607/*
 608 * Check whether current monitoring should be stopped
 609 *
 610 * The monitoring is stopped when either the user requested to stop, or all
 611 * monitoring targets are invalid.
 612 *
 613 * Returns true if need to stop current monitoring.
 614 */
 615static bool kdamond_need_stop(struct damon_ctx *ctx)
 616{
 617        struct damon_target *t;
 618        bool stop;
 619
 620        mutex_lock(&ctx->kdamond_lock);
 621        stop = ctx->kdamond_stop;
 622        mutex_unlock(&ctx->kdamond_lock);
 623        if (stop)
 624                return true;
 625
 626        if (!ctx->primitive.target_valid)
 627                return false;
 628
 629        damon_for_each_target(t, ctx) {
 630                if (ctx->primitive.target_valid(t))
 631                        return false;
 632        }
 633
 634        return true;
 635}
 636
 637static void set_kdamond_stop(struct damon_ctx *ctx)
 638{
 639        mutex_lock(&ctx->kdamond_lock);
 640        ctx->kdamond_stop = true;
 641        mutex_unlock(&ctx->kdamond_lock);
 642}
 643
 644/*
 645 * The monitoring daemon that runs as a kernel thread
 646 */
 647static int kdamond_fn(void *data)
 648{
 649        struct damon_ctx *ctx = (struct damon_ctx *)data;
 650        struct damon_target *t;
 651        struct damon_region *r, *next;
 652        unsigned int max_nr_accesses = 0;
 653        unsigned long sz_limit = 0;
 654
 655        mutex_lock(&ctx->kdamond_lock);
 656        pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
 657        mutex_unlock(&ctx->kdamond_lock);
 658
 659        if (ctx->primitive.init)
 660                ctx->primitive.init(ctx);
 661        if (ctx->callback.before_start && ctx->callback.before_start(ctx))
 662                set_kdamond_stop(ctx);
 663
 664        sz_limit = damon_region_sz_limit(ctx);
 665
 666        while (!kdamond_need_stop(ctx)) {
 667                if (ctx->primitive.prepare_access_checks)
 668                        ctx->primitive.prepare_access_checks(ctx);
 669                if (ctx->callback.after_sampling &&
 670                                ctx->callback.after_sampling(ctx))
 671                        set_kdamond_stop(ctx);
 672
 673                usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
 674
 675                if (ctx->primitive.check_accesses)
 676                        max_nr_accesses = ctx->primitive.check_accesses(ctx);
 677
 678                if (kdamond_aggregate_interval_passed(ctx)) {
 679                        kdamond_merge_regions(ctx,
 680                                        max_nr_accesses / 10,
 681                                        sz_limit);
 682                        if (ctx->callback.after_aggregation &&
 683                                        ctx->callback.after_aggregation(ctx))
 684                                set_kdamond_stop(ctx);
 685                        kdamond_reset_aggregated(ctx);
 686                        kdamond_split_regions(ctx);
 687                        if (ctx->primitive.reset_aggregated)
 688                                ctx->primitive.reset_aggregated(ctx);
 689                }
 690
 691                if (kdamond_need_update_primitive(ctx)) {
 692                        if (ctx->primitive.update)
 693                                ctx->primitive.update(ctx);
 694                        sz_limit = damon_region_sz_limit(ctx);
 695                }
 696        }
 697        damon_for_each_target(t, ctx) {
 698                damon_for_each_region_safe(r, next, t)
 699                        damon_destroy_region(r, t);
 700        }
 701
 702        if (ctx->callback.before_terminate &&
 703                        ctx->callback.before_terminate(ctx))
 704                set_kdamond_stop(ctx);
 705        if (ctx->primitive.cleanup)
 706                ctx->primitive.cleanup(ctx);
 707
 708        pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
 709        mutex_lock(&ctx->kdamond_lock);
 710        ctx->kdamond = NULL;
 711        mutex_unlock(&ctx->kdamond_lock);
 712
 713        mutex_lock(&damon_lock);
 714        nr_running_ctxs--;
 715        mutex_unlock(&damon_lock);
 716
 717        do_exit(0);
 718}
 719
 720#include "core-test.h"
 721