linux/fs/ceph/quota.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * quota.c - CephFS quota
   4 *
   5 * Copyright (C) 2017-2018 SUSE
   6 */
   7
   8#include <linux/statfs.h>
   9
  10#include "super.h"
  11#include "mds_client.h"
  12
  13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14{
  15        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  16        if (inc)
  17                atomic64_inc(&mdsc->quotarealms_count);
  18        else
  19                atomic64_dec(&mdsc->quotarealms_count);
  20}
  21
  22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23{
  24        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  25        struct super_block *sb = mdsc->fsc->sb;
  26
  27        if (atomic64_read(&mdsc->quotarealms_count) > 0)
  28                return true;
  29        /* if root is the real CephFS root, we don't have quota realms */
  30        if (sb->s_root->d_inode &&
  31            (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
  32                return false;
  33        /* otherwise, we can't know for sure */
  34        return true;
  35}
  36
  37void ceph_handle_quota(struct ceph_mds_client *mdsc,
  38                       struct ceph_mds_session *session,
  39                       struct ceph_msg *msg)
  40{
  41        struct super_block *sb = mdsc->fsc->sb;
  42        struct ceph_mds_quota *h = msg->front.iov_base;
  43        struct ceph_vino vino;
  44        struct inode *inode;
  45        struct ceph_inode_info *ci;
  46
  47        if (msg->front.iov_len < sizeof(*h)) {
  48                pr_err("%s corrupt message mds%d len %d\n", __func__,
  49                       session->s_mds, (int)msg->front.iov_len);
  50                ceph_msg_dump(msg);
  51                return;
  52        }
  53
  54        /* increment msg sequence number */
  55        mutex_lock(&session->s_mutex);
  56        session->s_seq++;
  57        mutex_unlock(&session->s_mutex);
  58
  59        /* lookup inode */
  60        vino.ino = le64_to_cpu(h->ino);
  61        vino.snap = CEPH_NOSNAP;
  62        inode = ceph_find_inode(sb, vino);
  63        if (!inode) {
  64                pr_warn("Failed to find inode %llu\n", vino.ino);
  65                return;
  66        }
  67        ci = ceph_inode(inode);
  68
  69        spin_lock(&ci->i_ceph_lock);
  70        ci->i_rbytes = le64_to_cpu(h->rbytes);
  71        ci->i_rfiles = le64_to_cpu(h->rfiles);
  72        ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  73        __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  74                            le64_to_cpu(h->max_files));
  75        spin_unlock(&ci->i_ceph_lock);
  76
  77        /* avoid calling iput_final() in dispatch thread */
  78        ceph_async_iput(inode);
  79}
  80
  81static struct ceph_quotarealm_inode *
  82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  83{
  84        struct ceph_quotarealm_inode *qri = NULL;
  85        struct rb_node **node, *parent = NULL;
  86
  87        mutex_lock(&mdsc->quotarealms_inodes_mutex);
  88        node = &(mdsc->quotarealms_inodes.rb_node);
  89        while (*node) {
  90                parent = *node;
  91                qri = container_of(*node, struct ceph_quotarealm_inode, node);
  92
  93                if (ino < qri->ino)
  94                        node = &((*node)->rb_left);
  95                else if (ino > qri->ino)
  96                        node = &((*node)->rb_right);
  97                else
  98                        break;
  99        }
 100        if (!qri || (qri->ino != ino)) {
 101                /* Not found, create a new one and insert it */
 102                qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 103                if (qri) {
 104                        qri->ino = ino;
 105                        qri->inode = NULL;
 106                        qri->timeout = 0;
 107                        mutex_init(&qri->mutex);
 108                        rb_link_node(&qri->node, parent, node);
 109                        rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 110                } else
 111                        pr_warn("Failed to alloc quotarealms_inode\n");
 112        }
 113        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 114
 115        return qri;
 116}
 117
 118/*
 119 * This function will try to lookup a realm inode which isn't visible in the
 120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 121 * maintained in the mdsc and freed only when the filesystem is umounted.
 122 *
 123 * Note that these inodes are kept in this list even if the lookup fails, which
 124 * allows to prevent useless lookup requests.
 125 */
 126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 127                                             struct super_block *sb,
 128                                             struct ceph_snap_realm *realm)
 129{
 130        struct ceph_quotarealm_inode *qri;
 131        struct inode *in;
 132
 133        qri = find_quotarealm_inode(mdsc, realm->ino);
 134        if (!qri)
 135                return NULL;
 136
 137        mutex_lock(&qri->mutex);
 138        if (qri->inode) {
 139                /* A request has already returned the inode */
 140                mutex_unlock(&qri->mutex);
 141                return qri->inode;
 142        }
 143        /* Check if this inode lookup has failed recently */
 144        if (qri->timeout &&
 145            time_before_eq(jiffies, qri->timeout)) {
 146                mutex_unlock(&qri->mutex);
 147                return NULL;
 148        }
 149        in = ceph_lookup_inode(sb, realm->ino);
 150        if (IS_ERR(in)) {
 151                pr_warn("Can't lookup inode %llx (err: %ld)\n",
 152                        realm->ino, PTR_ERR(in));
 153                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 154        } else {
 155                qri->timeout = 0;
 156                qri->inode = in;
 157        }
 158        mutex_unlock(&qri->mutex);
 159
 160        return in;
 161}
 162
 163void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 164{
 165        struct ceph_quotarealm_inode *qri;
 166        struct rb_node *node;
 167
 168        /*
 169         * It should now be safe to clean quotarealms_inode tree without holding
 170         * mdsc->quotarealms_inodes_mutex...
 171         */
 172        mutex_lock(&mdsc->quotarealms_inodes_mutex);
 173        while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 174                node = rb_first(&mdsc->quotarealms_inodes);
 175                qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 176                rb_erase(node, &mdsc->quotarealms_inodes);
 177                iput(qri->inode);
 178                kfree(qri);
 179        }
 180        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 181}
 182
 183/*
 184 * This function walks through the snaprealm for an inode and returns the
 185 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 186 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 187 * instead.
 188 *
 189 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 190 * returned realm.
 191 *
 192 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 193 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 194 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 195 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 196 * will be restarted.
 197 */
 198static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 199                                               struct inode *inode, bool retry)
 200{
 201        struct ceph_inode_info *ci = NULL;
 202        struct ceph_snap_realm *realm, *next;
 203        struct inode *in;
 204        bool has_quota;
 205
 206        if (ceph_snap(inode) != CEPH_NOSNAP)
 207                return NULL;
 208
 209restart:
 210        realm = ceph_inode(inode)->i_snap_realm;
 211        if (realm)
 212                ceph_get_snap_realm(mdsc, realm);
 213        else
 214                pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 215                                   "null i_snap_realm\n", ceph_vinop(inode));
 216        while (realm) {
 217                bool has_inode;
 218
 219                spin_lock(&realm->inodes_with_caps_lock);
 220                has_inode = realm->inode;
 221                in = has_inode ? igrab(realm->inode) : NULL;
 222                spin_unlock(&realm->inodes_with_caps_lock);
 223                if (has_inode && !in)
 224                        break;
 225                if (!in) {
 226                        up_read(&mdsc->snap_rwsem);
 227                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 228                        down_read(&mdsc->snap_rwsem);
 229                        if (IS_ERR_OR_NULL(in))
 230                                break;
 231                        ceph_put_snap_realm(mdsc, realm);
 232                        if (!retry)
 233                                return ERR_PTR(-EAGAIN);
 234                        goto restart;
 235                }
 236
 237                ci = ceph_inode(in);
 238                has_quota = __ceph_has_any_quota(ci);
 239                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 240                ceph_async_iput(in);
 241
 242                next = realm->parent;
 243                if (has_quota || !next)
 244                       return realm;
 245
 246                ceph_get_snap_realm(mdsc, next);
 247                ceph_put_snap_realm(mdsc, realm);
 248                realm = next;
 249        }
 250        if (realm)
 251                ceph_put_snap_realm(mdsc, realm);
 252
 253        return NULL;
 254}
 255
 256bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 257{
 258        struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
 259        struct ceph_snap_realm *old_realm, *new_realm;
 260        bool is_same;
 261
 262restart:
 263        /*
 264         * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 265         * However, get_quota_realm may drop it temporarily.  By setting the
 266         * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 267         * dropped and we can then restart the whole operation.
 268         */
 269        down_read(&mdsc->snap_rwsem);
 270        old_realm = get_quota_realm(mdsc, old, true);
 271        new_realm = get_quota_realm(mdsc, new, false);
 272        if (PTR_ERR(new_realm) == -EAGAIN) {
 273                up_read(&mdsc->snap_rwsem);
 274                if (old_realm)
 275                        ceph_put_snap_realm(mdsc, old_realm);
 276                goto restart;
 277        }
 278        is_same = (old_realm == new_realm);
 279        up_read(&mdsc->snap_rwsem);
 280
 281        if (old_realm)
 282                ceph_put_snap_realm(mdsc, old_realm);
 283        if (new_realm)
 284                ceph_put_snap_realm(mdsc, new_realm);
 285
 286        return is_same;
 287}
 288
 289enum quota_check_op {
 290        QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 291        QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 292        QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 293                                                   limit is approaching */
 294};
 295
 296/*
 297 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 298 * realm, it will execute quota check operation defined by the 'op' parameter.
 299 * The snaprealm walk is interrupted if the quota check detects that the quota
 300 * is exceeded or if the root inode is reached.
 301 */
 302static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 303                                 loff_t delta)
 304{
 305        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 306        struct ceph_inode_info *ci;
 307        struct ceph_snap_realm *realm, *next;
 308        struct inode *in;
 309        u64 max, rvalue;
 310        bool exceeded = false;
 311
 312        if (ceph_snap(inode) != CEPH_NOSNAP)
 313                return false;
 314
 315        down_read(&mdsc->snap_rwsem);
 316restart:
 317        realm = ceph_inode(inode)->i_snap_realm;
 318        if (realm)
 319                ceph_get_snap_realm(mdsc, realm);
 320        else
 321                pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 322                                   "null i_snap_realm\n", ceph_vinop(inode));
 323        while (realm) {
 324                bool has_inode;
 325
 326                spin_lock(&realm->inodes_with_caps_lock);
 327                has_inode = realm->inode;
 328                in = has_inode ? igrab(realm->inode) : NULL;
 329                spin_unlock(&realm->inodes_with_caps_lock);
 330                if (has_inode && !in)
 331                        break;
 332                if (!in) {
 333                        up_read(&mdsc->snap_rwsem);
 334                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 335                        down_read(&mdsc->snap_rwsem);
 336                        if (IS_ERR_OR_NULL(in))
 337                                break;
 338                        ceph_put_snap_realm(mdsc, realm);
 339                        goto restart;
 340                }
 341                ci = ceph_inode(in);
 342                spin_lock(&ci->i_ceph_lock);
 343                if (op == QUOTA_CHECK_MAX_FILES_OP) {
 344                        max = ci->i_max_files;
 345                        rvalue = ci->i_rfiles + ci->i_rsubdirs;
 346                } else {
 347                        max = ci->i_max_bytes;
 348                        rvalue = ci->i_rbytes;
 349                }
 350                spin_unlock(&ci->i_ceph_lock);
 351                switch (op) {
 352                case QUOTA_CHECK_MAX_FILES_OP:
 353                        exceeded = (max && (rvalue >= max));
 354                        break;
 355                case QUOTA_CHECK_MAX_BYTES_OP:
 356                        exceeded = (max && (rvalue + delta > max));
 357                        break;
 358                case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 359                        if (max) {
 360                                if (rvalue >= max)
 361                                        exceeded = true;
 362                                else {
 363                                        /*
 364                                         * when we're writing more that 1/16th
 365                                         * of the available space
 366                                         */
 367                                        exceeded =
 368                                                (((max - rvalue) >> 4) < delta);
 369                                }
 370                        }
 371                        break;
 372                default:
 373                        /* Shouldn't happen */
 374                        pr_warn("Invalid quota check op (%d)\n", op);
 375                        exceeded = true; /* Just break the loop */
 376                }
 377                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 378                ceph_async_iput(in);
 379
 380                next = realm->parent;
 381                if (exceeded || !next)
 382                        break;
 383                ceph_get_snap_realm(mdsc, next);
 384                ceph_put_snap_realm(mdsc, realm);
 385                realm = next;
 386        }
 387        if (realm)
 388                ceph_put_snap_realm(mdsc, realm);
 389        up_read(&mdsc->snap_rwsem);
 390
 391        return exceeded;
 392}
 393
 394/*
 395 * ceph_quota_is_max_files_exceeded - check if we can create a new file
 396 * @inode:      directory where a new file is being created
 397 *
 398 * This functions returns true is max_files quota allows a new file to be
 399 * created.  It is necessary to walk through the snaprealm hierarchy (until the
 400 * FS root) to check all realms with quotas set.
 401 */
 402bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 403{
 404        if (!ceph_has_realms_with_quotas(inode))
 405                return false;
 406
 407        WARN_ON(!S_ISDIR(inode->i_mode));
 408
 409        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
 410}
 411
 412/*
 413 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 414 * @inode:      inode being written
 415 * @newsize:    new size if write succeeds
 416 *
 417 * This functions returns true is max_bytes quota allows a file size to reach
 418 * @newsize; it returns false otherwise.
 419 */
 420bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 421{
 422        loff_t size = i_size_read(inode);
 423
 424        if (!ceph_has_realms_with_quotas(inode))
 425                return false;
 426
 427        /* return immediately if we're decreasing file size */
 428        if (newsize <= size)
 429                return false;
 430
 431        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 432}
 433
 434/*
 435 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 436 * @inode:      inode being written
 437 * @newsize:    new size if write succeeds
 438 *
 439 * This function returns true if the new file size @newsize will be consuming
 440 * more than 1/16th of the available quota space; it returns false otherwise.
 441 */
 442bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 443{
 444        loff_t size = ceph_inode(inode)->i_reported_size;
 445
 446        if (!ceph_has_realms_with_quotas(inode))
 447                return false;
 448
 449        /* return immediately if we're decreasing file size */
 450        if (newsize <= size)
 451                return false;
 452
 453        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 454                                    (newsize - size));
 455}
 456
 457/*
 458 * ceph_quota_update_statfs - if root has quota update statfs with quota status
 459 * @fsc:        filesystem client instance
 460 * @buf:        statfs to update
 461 *
 462 * If the mounted filesystem root has max_bytes quota set, update the filesystem
 463 * statistics with the quota status.
 464 *
 465 * This function returns true if the stats have been updated, false otherwise.
 466 */
 467bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 468{
 469        struct ceph_mds_client *mdsc = fsc->mdsc;
 470        struct ceph_inode_info *ci;
 471        struct ceph_snap_realm *realm;
 472        struct inode *in;
 473        u64 total = 0, used, free;
 474        bool is_updated = false;
 475
 476        down_read(&mdsc->snap_rwsem);
 477        realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 478        up_read(&mdsc->snap_rwsem);
 479        if (!realm)
 480                return false;
 481
 482        spin_lock(&realm->inodes_with_caps_lock);
 483        in = realm->inode ? igrab(realm->inode) : NULL;
 484        spin_unlock(&realm->inodes_with_caps_lock);
 485        if (in) {
 486                ci = ceph_inode(in);
 487                spin_lock(&ci->i_ceph_lock);
 488                if (ci->i_max_bytes) {
 489                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 490                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 491                        /* It is possible for a quota to be exceeded.
 492                         * Report 'zero' in that case
 493                         */
 494                        free = total > used ? total - used : 0;
 495                }
 496                spin_unlock(&ci->i_ceph_lock);
 497                if (total) {
 498                        buf->f_blocks = total;
 499                        buf->f_bfree = free;
 500                        buf->f_bavail = free;
 501                        is_updated = true;
 502                }
 503                iput(in);
 504        }
 505        ceph_put_snap_realm(mdsc, realm);
 506
 507        return is_updated;
 508}
 509
 510