linux/mm/page_counter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Lockless hierarchical page accounting & limiting
   4 *
   5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
   6 */
   7
   8#include <linux/page_counter.h>
   9#include <linux/atomic.h>
  10#include <linux/kernel.h>
  11#include <linux/string.h>
  12#include <linux/sched.h>
  13#include <linux/bug.h>
  14#include <asm/page.h>
  15
  16static void propagate_protected_usage(struct page_counter *c,
  17                                      unsigned long usage)
  18{
  19        unsigned long protected, old_protected;
  20        long delta;
  21
  22        if (!c->parent)
  23                return;
  24
  25        if (c->min || atomic_long_read(&c->min_usage)) {
  26                if (usage <= c->min)
  27                        protected = usage;
  28                else
  29                        protected = 0;
  30
  31                old_protected = atomic_long_xchg(&c->min_usage, protected);
  32                delta = protected - old_protected;
  33                if (delta)
  34                        atomic_long_add(delta, &c->parent->children_min_usage);
  35        }
  36
  37        if (c->low || atomic_long_read(&c->low_usage)) {
  38                if (usage <= c->low)
  39                        protected = usage;
  40                else
  41                        protected = 0;
  42
  43                old_protected = atomic_long_xchg(&c->low_usage, protected);
  44                delta = protected - old_protected;
  45                if (delta)
  46                        atomic_long_add(delta, &c->parent->children_low_usage);
  47        }
  48}
  49
  50/**
  51 * page_counter_cancel - take pages out of the local counter
  52 * @counter: counter
  53 * @nr_pages: number of pages to cancel
  54 */
  55void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
  56{
  57        long new;
  58
  59        new = atomic_long_sub_return(nr_pages, &counter->usage);
  60        propagate_protected_usage(counter, new);
  61        /* More uncharges than charges? */
  62        WARN_ON_ONCE(new < 0);
  63}
  64
  65/**
  66 * page_counter_charge - hierarchically charge pages
  67 * @counter: counter
  68 * @nr_pages: number of pages to charge
  69 *
  70 * NOTE: This does not consider any configured counter limits.
  71 */
  72void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  73{
  74        struct page_counter *c;
  75
  76        for (c = counter; c; c = c->parent) {
  77                long new;
  78
  79                new = atomic_long_add_return(nr_pages, &c->usage);
  80                propagate_protected_usage(counter, new);
  81                /*
  82                 * This is indeed racy, but we can live with some
  83                 * inaccuracy in the watermark.
  84                 */
  85                if (new > c->watermark)
  86                        c->watermark = new;
  87        }
  88}
  89
  90/**
  91 * page_counter_try_charge - try to hierarchically charge pages
  92 * @counter: counter
  93 * @nr_pages: number of pages to charge
  94 * @fail: points first counter to hit its limit, if any
  95 *
  96 * Returns %true on success, or %false and @fail if the counter or one
  97 * of its ancestors has hit its configured limit.
  98 */
  99bool page_counter_try_charge(struct page_counter *counter,
 100                             unsigned long nr_pages,
 101                             struct page_counter **fail)
 102{
 103        struct page_counter *c;
 104
 105        for (c = counter; c; c = c->parent) {
 106                long new;
 107                /*
 108                 * Charge speculatively to avoid an expensive CAS.  If
 109                 * a bigger charge fails, it might falsely lock out a
 110                 * racing smaller charge and send it into reclaim
 111                 * early, but the error is limited to the difference
 112                 * between the two sizes, which is less than 2M/4M in
 113                 * case of a THP locking out a regular page charge.
 114                 *
 115                 * The atomic_long_add_return() implies a full memory
 116                 * barrier between incrementing the count and reading
 117                 * the limit.  When racing with page_counter_limit(),
 118                 * we either see the new limit or the setter sees the
 119                 * counter has changed and retries.
 120                 */
 121                new = atomic_long_add_return(nr_pages, &c->usage);
 122                if (new > c->max) {
 123                        atomic_long_sub(nr_pages, &c->usage);
 124                        propagate_protected_usage(counter, new);
 125                        /*
 126                         * This is racy, but we can live with some
 127                         * inaccuracy in the failcnt.
 128                         */
 129                        c->failcnt++;
 130                        *fail = c;
 131                        goto failed;
 132                }
 133                propagate_protected_usage(counter, new);
 134                /*
 135                 * Just like with failcnt, we can live with some
 136                 * inaccuracy in the watermark.
 137                 */
 138                if (new > c->watermark)
 139                        c->watermark = new;
 140        }
 141        return true;
 142
 143failed:
 144        for (c = counter; c != *fail; c = c->parent)
 145                page_counter_cancel(c, nr_pages);
 146
 147        return false;
 148}
 149
 150/**
 151 * page_counter_uncharge - hierarchically uncharge pages
 152 * @counter: counter
 153 * @nr_pages: number of pages to uncharge
 154 */
 155void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
 156{
 157        struct page_counter *c;
 158
 159        for (c = counter; c; c = c->parent)
 160                page_counter_cancel(c, nr_pages);
 161}
 162
 163/**
 164 * page_counter_set_max - set the maximum number of pages allowed
 165 * @counter: counter
 166 * @nr_pages: limit to set
 167 *
 168 * Returns 0 on success, -EBUSY if the current number of pages on the
 169 * counter already exceeds the specified limit.
 170 *
 171 * The caller must serialize invocations on the same counter.
 172 */
 173int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
 174{
 175        for (;;) {
 176                unsigned long old;
 177                long usage;
 178
 179                /*
 180                 * Update the limit while making sure that it's not
 181                 * below the concurrently-changing counter value.
 182                 *
 183                 * The xchg implies two full memory barriers before
 184                 * and after, so the read-swap-read is ordered and
 185                 * ensures coherency with page_counter_try_charge():
 186                 * that function modifies the count before checking
 187                 * the limit, so if it sees the old limit, we see the
 188                 * modified counter and retry.
 189                 */
 190                usage = atomic_long_read(&counter->usage);
 191
 192                if (usage > nr_pages)
 193                        return -EBUSY;
 194
 195                old = xchg(&counter->max, nr_pages);
 196
 197                if (atomic_long_read(&counter->usage) <= usage)
 198                        return 0;
 199
 200                counter->max = old;
 201                cond_resched();
 202        }
 203}
 204
 205/**
 206 * page_counter_set_min - set the amount of protected memory
 207 * @counter: counter
 208 * @nr_pages: value to set
 209 *
 210 * The caller must serialize invocations on the same counter.
 211 */
 212void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
 213{
 214        struct page_counter *c;
 215
 216        counter->min = nr_pages;
 217
 218        for (c = counter; c; c = c->parent)
 219                propagate_protected_usage(c, atomic_long_read(&c->usage));
 220}
 221
 222/**
 223 * page_counter_set_low - set the amount of protected memory
 224 * @counter: counter
 225 * @nr_pages: value to set
 226 *
 227 * The caller must serialize invocations on the same counter.
 228 */
 229void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
 230{
 231        struct page_counter *c;
 232
 233        counter->low = nr_pages;
 234
 235        for (c = counter; c; c = c->parent)
 236                propagate_protected_usage(c, atomic_long_read(&c->usage));
 237}
 238
 239/**
 240 * page_counter_memparse - memparse() for page counter limits
 241 * @buf: string to parse
 242 * @max: string meaning maximum possible value
 243 * @nr_pages: returns the result in number of pages
 244 *
 245 * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
 246 * limited to %PAGE_COUNTER_MAX.
 247 */
 248int page_counter_memparse(const char *buf, const char *max,
 249                          unsigned long *nr_pages)
 250{
 251        char *end;
 252        u64 bytes;
 253
 254        if (!strcmp(buf, max)) {
 255                *nr_pages = PAGE_COUNTER_MAX;
 256                return 0;
 257        }
 258
 259        bytes = memparse(buf, &end);
 260        if (*end != '\0')
 261                return -EINVAL;
 262
 263        *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
 264
 265        return 0;
 266}
 267