linux/include/linux/damon.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * DAMON api
   4 *
   5 * Author: SeongJae Park <sjpark@amazon.de>
   6 */
   7
   8#ifndef _DAMON_H_
   9#define _DAMON_H_
  10
  11#include <linux/mutex.h>
  12#include <linux/time64.h>
  13#include <linux/types.h>
  14#include <linux/random.h>
  15
  16/* Minimal region size.  Every damon_region is aligned by this. */
  17#define DAMON_MIN_REGION        PAGE_SIZE
  18/* Max priority score for DAMON-based operation schemes */
  19#define DAMOS_MAX_SCORE         (99)
  20
  21/* Get a random number in [l, r) */
  22static inline unsigned long damon_rand(unsigned long l, unsigned long r)
  23{
  24        return l + prandom_u32_max(r - l);
  25}
  26
  27/**
  28 * struct damon_addr_range - Represents an address region of [@start, @end).
  29 * @start:      Start address of the region (inclusive).
  30 * @end:        End address of the region (exclusive).
  31 */
  32struct damon_addr_range {
  33        unsigned long start;
  34        unsigned long end;
  35};
  36
  37/**
  38 * struct damon_region - Represents a monitoring target region.
  39 * @ar:                 The address range of the region.
  40 * @sampling_addr:      Address of the sample for the next access check.
  41 * @nr_accesses:        Access frequency of this region.
  42 * @list:               List head for siblings.
  43 * @age:                Age of this region.
  44 *
  45 * @age is initially zero, increased for each aggregation interval, and reset
  46 * to zero again if the access frequency is significantly changed.  If two
  47 * regions are merged into a new region, both @nr_accesses and @age of the new
  48 * region are set as region size-weighted average of those of the two regions.
  49 */
  50struct damon_region {
  51        struct damon_addr_range ar;
  52        unsigned long sampling_addr;
  53        unsigned int nr_accesses;
  54        struct list_head list;
  55
  56        unsigned int age;
  57/* private: Internal value for age calculation. */
  58        unsigned int last_nr_accesses;
  59};
  60
  61/**
  62 * struct damon_target - Represents a monitoring target.
  63 * @id:                 Unique identifier for this target.
  64 * @nr_regions:         Number of monitoring target regions of this target.
  65 * @regions_list:       Head of the monitoring target regions of this target.
  66 * @list:               List head for siblings.
  67 *
  68 * Each monitoring context could have multiple targets.  For example, a context
  69 * for virtual memory address spaces could have multiple target processes.  The
  70 * @id of each target should be unique among the targets of the context.  For
  71 * example, in the virtual address monitoring context, it could be a pidfd or
  72 * an address of an mm_struct.
  73 */
  74struct damon_target {
  75        unsigned long id;
  76        unsigned int nr_regions;
  77        struct list_head regions_list;
  78        struct list_head list;
  79};
  80
  81/**
  82 * enum damos_action - Represents an action of a Data Access Monitoring-based
  83 * Operation Scheme.
  84 *
  85 * @DAMOS_WILLNEED:     Call ``madvise()`` for the region with MADV_WILLNEED.
  86 * @DAMOS_COLD:         Call ``madvise()`` for the region with MADV_COLD.
  87 * @DAMOS_PAGEOUT:      Call ``madvise()`` for the region with MADV_PAGEOUT.
  88 * @DAMOS_HUGEPAGE:     Call ``madvise()`` for the region with MADV_HUGEPAGE.
  89 * @DAMOS_NOHUGEPAGE:   Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  90 * @DAMOS_STAT:         Do nothing but count the stat.
  91 */
  92enum damos_action {
  93        DAMOS_WILLNEED,
  94        DAMOS_COLD,
  95        DAMOS_PAGEOUT,
  96        DAMOS_HUGEPAGE,
  97        DAMOS_NOHUGEPAGE,
  98        DAMOS_STAT,             /* Do nothing but only record the stat */
  99};
 100
 101/**
 102 * struct damos_quota - Controls the aggressiveness of the given scheme.
 103 * @ms:                 Maximum milliseconds that the scheme can use.
 104 * @sz:                 Maximum bytes of memory that the action can be applied.
 105 * @reset_interval:     Charge reset interval in milliseconds.
 106 *
 107 * @weight_sz:          Weight of the region's size for prioritization.
 108 * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization.
 109 * @weight_age:         Weight of the region's age for prioritization.
 110 *
 111 * To avoid consuming too much CPU time or IO resources for applying the
 112 * &struct damos->action to large memory, DAMON allows users to set time and/or
 113 * size quotas.  The quotas can be set by writing non-zero values to &ms and
 114 * &sz, respectively.  If the time quota is set, DAMON tries to use only up to
 115 * &ms milliseconds within &reset_interval for applying the action.  If the
 116 * size quota is set, DAMON tries to apply the action only up to &sz bytes
 117 * within &reset_interval.
 118 *
 119 * Internally, the time quota is transformed to a size quota using estimated
 120 * throughput of the scheme's action.  DAMON then compares it against &sz and
 121 * uses smaller one as the effective quota.
 122 *
 123 * For selecting regions within the quota, DAMON prioritizes current scheme's
 124 * target memory regions using the &struct damon_primitive->get_scheme_score.
 125 * You could customize the prioritization logic by setting &weight_sz,
 126 * &weight_nr_accesses, and &weight_age, because monitoring primitives are
 127 * encouraged to respect those.
 128 */
 129struct damos_quota {
 130        unsigned long ms;
 131        unsigned long sz;
 132        unsigned long reset_interval;
 133
 134        unsigned int weight_sz;
 135        unsigned int weight_nr_accesses;
 136        unsigned int weight_age;
 137
 138/* private: */
 139        /* For throughput estimation */
 140        unsigned long total_charged_sz;
 141        unsigned long total_charged_ns;
 142
 143        unsigned long esz;      /* Effective size quota in bytes */
 144
 145        /* For charging the quota */
 146        unsigned long charged_sz;
 147        unsigned long charged_from;
 148        struct damon_target *charge_target_from;
 149        unsigned long charge_addr_from;
 150
 151        /* For prioritization */
 152        unsigned long histogram[DAMOS_MAX_SCORE + 1];
 153        unsigned int min_score;
 154};
 155
 156/**
 157 * enum damos_wmark_metric - Represents the watermark metric.
 158 *
 159 * @DAMOS_WMARK_NONE:           Ignore the watermarks of the given scheme.
 160 * @DAMOS_WMARK_FREE_MEM_RATE:  Free memory rate of the system in [0,1000].
 161 */
 162enum damos_wmark_metric {
 163        DAMOS_WMARK_NONE,
 164        DAMOS_WMARK_FREE_MEM_RATE,
 165};
 166
 167/**
 168 * struct damos_watermarks - Controls when a given scheme should be activated.
 169 * @metric:     Metric for the watermarks.
 170 * @interval:   Watermarks check time interval in microseconds.
 171 * @high:       High watermark.
 172 * @mid:        Middle watermark.
 173 * @low:        Low watermark.
 174 *
 175 * If &metric is &DAMOS_WMARK_NONE, the scheme is always active.  Being active
 176 * means DAMON does monitoring and applying the action of the scheme to
 177 * appropriate memory regions.  Else, DAMON checks &metric of the system for at
 178 * least every &interval microseconds and works as below.
 179 *
 180 * If &metric is higher than &high, the scheme is inactivated.  If &metric is
 181 * between &mid and &low, the scheme is activated.  If &metric is lower than
 182 * &low, the scheme is inactivated.
 183 */
 184struct damos_watermarks {
 185        enum damos_wmark_metric metric;
 186        unsigned long interval;
 187        unsigned long high;
 188        unsigned long mid;
 189        unsigned long low;
 190
 191/* private: */
 192        bool activated;
 193};
 194
 195/**
 196 * struct damos_stat - Statistics on a given scheme.
 197 * @nr_tried:   Total number of regions that the scheme is tried to be applied.
 198 * @sz_tried:   Total size of regions that the scheme is tried to be applied.
 199 * @nr_applied: Total number of regions that the scheme is applied.
 200 * @sz_applied: Total size of regions that the scheme is applied.
 201 * @qt_exceeds: Total number of times the quota of the scheme has exceeded.
 202 */
 203struct damos_stat {
 204        unsigned long nr_tried;
 205        unsigned long sz_tried;
 206        unsigned long nr_applied;
 207        unsigned long sz_applied;
 208        unsigned long qt_exceeds;
 209};
 210
 211/**
 212 * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
 213 * @min_sz_region:      Minimum size of target regions.
 214 * @max_sz_region:      Maximum size of target regions.
 215 * @min_nr_accesses:    Minimum ``->nr_accesses`` of target regions.
 216 * @max_nr_accesses:    Maximum ``->nr_accesses`` of target regions.
 217 * @min_age_region:     Minimum age of target regions.
 218 * @max_age_region:     Maximum age of target regions.
 219 * @action:             &damo_action to be applied to the target regions.
 220 * @quota:              Control the aggressiveness of this scheme.
 221 * @wmarks:             Watermarks for automated (in)activation of this scheme.
 222 * @stat:               Statistics of this scheme.
 223 * @list:               List head for siblings.
 224 *
 225 * For each aggregation interval, DAMON finds regions which fit in the
 226 * condition (&min_sz_region, &max_sz_region, &min_nr_accesses,
 227 * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to
 228 * those.  To avoid consuming too much CPU time or IO resources for the
 229 * &action, &quota is used.
 230 *
 231 * To do the work only when needed, schemes can be activated for specific
 232 * system situations using &wmarks.  If all schemes that registered to the
 233 * monitoring context are inactive, DAMON stops monitoring either, and just
 234 * repeatedly checks the watermarks.
 235 *
 236 * If all schemes that registered to a &struct damon_ctx are inactive, DAMON
 237 * stops monitoring and just repeatedly checks the watermarks.
 238 *
 239 * After applying the &action to each region, &stat_count and &stat_sz is
 240 * updated to reflect the number of regions and total size of regions that the
 241 * &action is applied.
 242 */
 243struct damos {
 244        unsigned long min_sz_region;
 245        unsigned long max_sz_region;
 246        unsigned int min_nr_accesses;
 247        unsigned int max_nr_accesses;
 248        unsigned int min_age_region;
 249        unsigned int max_age_region;
 250        enum damos_action action;
 251        struct damos_quota quota;
 252        struct damos_watermarks wmarks;
 253        struct damos_stat stat;
 254        struct list_head list;
 255};
 256
 257struct damon_ctx;
 258
 259/**
 260 * struct damon_primitive - Monitoring primitives for given use cases.
 261 *
 262 * @init:                       Initialize primitive-internal data structures.
 263 * @update:                     Update primitive-internal data structures.
 264 * @prepare_access_checks:      Prepare next access check of target regions.
 265 * @check_accesses:             Check the accesses to target regions.
 266 * @reset_aggregated:           Reset aggregated accesses monitoring results.
 267 * @get_scheme_score:           Get the score of a region for a scheme.
 268 * @apply_scheme:               Apply a DAMON-based operation scheme.
 269 * @target_valid:               Determine if the target is valid.
 270 * @cleanup:                    Clean up the context.
 271 *
 272 * DAMON can be extended for various address spaces and usages.  For this,
 273 * users should register the low level primitives for their target address
 274 * space and usecase via the &damon_ctx.primitive.  Then, the monitoring thread
 275 * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
 276 * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
 277 * @check_accesses, @target_valid and @prepare_access_checks after each
 278 * &damon_ctx.sample_interval.  Finally, @reset_aggregated is called after each
 279 * &damon_ctx.aggr_interval.
 280 *
 281 * @init should initialize primitive-internal data structures.  For example,
 282 * this could be used to construct proper monitoring target regions and link
 283 * those to @damon_ctx.adaptive_targets.
 284 * @update should update the primitive-internal data structures.  For example,
 285 * this could be used to update monitoring target regions for current status.
 286 * @prepare_access_checks should manipulate the monitoring regions to be
 287 * prepared for the next access check.
 288 * @check_accesses should check the accesses to each region that made after the
 289 * last preparation and update the number of observed accesses of each region.
 290 * It should also return max number of observed accesses that made as a result
 291 * of its update.  The value will be used for regions adjustment threshold.
 292 * @reset_aggregated should reset the access monitoring results that aggregated
 293 * by @check_accesses.
 294 * @get_scheme_score should return the priority score of a region for a scheme
 295 * as an integer in [0, &DAMOS_MAX_SCORE].
 296 * @apply_scheme is called from @kdamond when a region for user provided
 297 * DAMON-based operation scheme is found.  It should apply the scheme's action
 298 * to the region and return bytes of the region that the action is successfully
 299 * applied.
 300 * @target_valid should check whether the target is still valid for the
 301 * monitoring.
 302 * @cleanup is called from @kdamond just before its termination.
 303 */
 304struct damon_primitive {
 305        void (*init)(struct damon_ctx *context);
 306        void (*update)(struct damon_ctx *context);
 307        void (*prepare_access_checks)(struct damon_ctx *context);
 308        unsigned int (*check_accesses)(struct damon_ctx *context);
 309        void (*reset_aggregated)(struct damon_ctx *context);
 310        int (*get_scheme_score)(struct damon_ctx *context,
 311                        struct damon_target *t, struct damon_region *r,
 312                        struct damos *scheme);
 313        unsigned long (*apply_scheme)(struct damon_ctx *context,
 314                        struct damon_target *t, struct damon_region *r,
 315                        struct damos *scheme);
 316        bool (*target_valid)(void *target);
 317        void (*cleanup)(struct damon_ctx *context);
 318};
 319
 320/**
 321 * struct damon_callback - Monitoring events notification callbacks.
 322 *
 323 * @before_start:       Called before starting the monitoring.
 324 * @after_sampling:     Called after each sampling.
 325 * @after_aggregation:  Called after each aggregation.
 326 * @before_terminate:   Called before terminating the monitoring.
 327 * @private:            User private data.
 328 *
 329 * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
 330 * @before_terminate just before starting and finishing the monitoring,
 331 * respectively.  Therefore, those are good places for installing and cleaning
 332 * @private.
 333 *
 334 * The monitoring thread calls @after_sampling and @after_aggregation for each
 335 * of the sampling intervals and aggregation intervals, respectively.
 336 * Therefore, users can safely access the monitoring results without additional
 337 * protection.  For the reason, users are recommended to use these callback for
 338 * the accesses to the results.
 339 *
 340 * If any callback returns non-zero, monitoring stops.
 341 */
 342struct damon_callback {
 343        void *private;
 344
 345        int (*before_start)(struct damon_ctx *context);
 346        int (*after_sampling)(struct damon_ctx *context);
 347        int (*after_aggregation)(struct damon_ctx *context);
 348        void (*before_terminate)(struct damon_ctx *context);
 349};
 350
 351/**
 352 * struct damon_ctx - Represents a context for each monitoring.  This is the
 353 * main interface that allows users to set the attributes and get the results
 354 * of the monitoring.
 355 *
 356 * @sample_interval:            The time between access samplings.
 357 * @aggr_interval:              The time between monitor results aggregations.
 358 * @primitive_update_interval:  The time between monitoring primitive updates.
 359 *
 360 * For each @sample_interval, DAMON checks whether each region is accessed or
 361 * not.  It aggregates and keeps the access information (number of accesses to
 362 * each region) for @aggr_interval time.  DAMON also checks whether the target
 363 * memory regions need update (e.g., by ``mmap()`` calls from the application,
 364 * in case of virtual memory monitoring) and applies the changes for each
 365 * @primitive_update_interval.  All time intervals are in micro-seconds.
 366 * Please refer to &struct damon_primitive and &struct damon_callback for more
 367 * detail.
 368 *
 369 * @kdamond:            Kernel thread who does the monitoring.
 370 * @kdamond_stop:       Notifies whether kdamond should stop.
 371 * @kdamond_lock:       Mutex for the synchronizations with @kdamond.
 372 *
 373 * For each monitoring context, one kernel thread for the monitoring is
 374 * created.  The pointer to the thread is stored in @kdamond.
 375 *
 376 * Once started, the monitoring thread runs until explicitly required to be
 377 * terminated or every monitoring target is invalid.  The validity of the
 378 * targets is checked via the &damon_primitive.target_valid of @primitive.  The
 379 * termination can also be explicitly requested by writing non-zero to
 380 * @kdamond_stop.  The thread sets @kdamond to NULL when it terminates.
 381 * Therefore, users can know whether the monitoring is ongoing or terminated by
 382 * reading @kdamond.  Reads and writes to @kdamond and @kdamond_stop from
 383 * outside of the monitoring thread must be protected by @kdamond_lock.
 384 *
 385 * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
 386 * @kdamond_lock.  Accesses to other fields must be protected by themselves.
 387 *
 388 * @primitive:  Set of monitoring primitives for given use cases.
 389 * @callback:   Set of callbacks for monitoring events notifications.
 390 *
 391 * @min_nr_regions:     The minimum number of adaptive monitoring regions.
 392 * @max_nr_regions:     The maximum number of adaptive monitoring regions.
 393 * @adaptive_targets:   Head of monitoring targets (&damon_target) list.
 394 * @schemes:            Head of schemes (&damos) list.
 395 */
 396struct damon_ctx {
 397        unsigned long sample_interval;
 398        unsigned long aggr_interval;
 399        unsigned long primitive_update_interval;
 400
 401/* private: internal use only */
 402        struct timespec64 last_aggregation;
 403        struct timespec64 last_primitive_update;
 404
 405/* public: */
 406        struct task_struct *kdamond;
 407        struct mutex kdamond_lock;
 408
 409        struct damon_primitive primitive;
 410        struct damon_callback callback;
 411
 412        unsigned long min_nr_regions;
 413        unsigned long max_nr_regions;
 414        struct list_head adaptive_targets;
 415        struct list_head schemes;
 416};
 417
 418static inline struct damon_region *damon_next_region(struct damon_region *r)
 419{
 420        return container_of(r->list.next, struct damon_region, list);
 421}
 422
 423static inline struct damon_region *damon_prev_region(struct damon_region *r)
 424{
 425        return container_of(r->list.prev, struct damon_region, list);
 426}
 427
 428static inline struct damon_region *damon_last_region(struct damon_target *t)
 429{
 430        return list_last_entry(&t->regions_list, struct damon_region, list);
 431}
 432
 433#define damon_for_each_region(r, t) \
 434        list_for_each_entry(r, &t->regions_list, list)
 435
 436#define damon_for_each_region_safe(r, next, t) \
 437        list_for_each_entry_safe(r, next, &t->regions_list, list)
 438
 439#define damon_for_each_target(t, ctx) \
 440        list_for_each_entry(t, &(ctx)->adaptive_targets, list)
 441
 442#define damon_for_each_target_safe(t, next, ctx)        \
 443        list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list)
 444
 445#define damon_for_each_scheme(s, ctx) \
 446        list_for_each_entry(s, &(ctx)->schemes, list)
 447
 448#define damon_for_each_scheme_safe(s, next, ctx) \
 449        list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
 450
 451#ifdef CONFIG_DAMON
 452
 453struct damon_region *damon_new_region(unsigned long start, unsigned long end);
 454
 455/*
 456 * Add a region between two other regions
 457 */
 458static inline void damon_insert_region(struct damon_region *r,
 459                struct damon_region *prev, struct damon_region *next,
 460                struct damon_target *t)
 461{
 462        __list_add(&r->list, &prev->list, &next->list);
 463        t->nr_regions++;
 464}
 465
 466void damon_add_region(struct damon_region *r, struct damon_target *t);
 467void damon_destroy_region(struct damon_region *r, struct damon_target *t);
 468
 469struct damos *damon_new_scheme(
 470                unsigned long min_sz_region, unsigned long max_sz_region,
 471                unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 472                unsigned int min_age_region, unsigned int max_age_region,
 473                enum damos_action action, struct damos_quota *quota,
 474                struct damos_watermarks *wmarks);
 475void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 476void damon_destroy_scheme(struct damos *s);
 477
 478struct damon_target *damon_new_target(unsigned long id);
 479void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
 480bool damon_targets_empty(struct damon_ctx *ctx);
 481void damon_free_target(struct damon_target *t);
 482void damon_destroy_target(struct damon_target *t);
 483unsigned int damon_nr_regions(struct damon_target *t);
 484
 485struct damon_ctx *damon_new_ctx(void);
 486void damon_destroy_ctx(struct damon_ctx *ctx);
 487int damon_set_targets(struct damon_ctx *ctx,
 488                unsigned long *ids, ssize_t nr_ids);
 489int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 490                unsigned long aggr_int, unsigned long primitive_upd_int,
 491                unsigned long min_nr_reg, unsigned long max_nr_reg);
 492int damon_set_schemes(struct damon_ctx *ctx,
 493                        struct damos **schemes, ssize_t nr_schemes);
 494int damon_nr_running_ctxs(void);
 495
 496int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
 497int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
 498
 499#endif  /* CONFIG_DAMON */
 500
 501#ifdef CONFIG_DAMON_VADDR
 502bool damon_va_target_valid(void *t);
 503void damon_va_set_primitives(struct damon_ctx *ctx);
 504#endif  /* CONFIG_DAMON_VADDR */
 505
 506#ifdef CONFIG_DAMON_PADDR
 507bool damon_pa_target_valid(void *t);
 508void damon_pa_set_primitives(struct damon_ctx *ctx);
 509#endif  /* CONFIG_DAMON_PADDR */
 510
 511#endif  /* _DAMON_H */
 512