linux/fs/ceph/quota.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * quota.c - CephFS quota
   4 *
   5 * Copyright (C) 2017-2018 SUSE
   6 */
   7
   8#include <linux/statfs.h>
   9
  10#include "super.h"
  11#include "mds_client.h"
  12
  13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14{
  15        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  16        if (inc)
  17                atomic64_inc(&mdsc->quotarealms_count);
  18        else
  19                atomic64_dec(&mdsc->quotarealms_count);
  20}
  21
  22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23{
  24        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  25        struct super_block *sb = mdsc->fsc->sb;
  26        struct inode *root = d_inode(sb->s_root);
  27
  28        if (atomic64_read(&mdsc->quotarealms_count) > 0)
  29                return true;
  30        /* if root is the real CephFS root, we don't have quota realms */
  31        if (root && ceph_ino(root) == CEPH_INO_ROOT)
  32                return false;
  33        /* otherwise, we can't know for sure */
  34        return true;
  35}
  36
  37void ceph_handle_quota(struct ceph_mds_client *mdsc,
  38                       struct ceph_mds_session *session,
  39                       struct ceph_msg *msg)
  40{
  41        struct super_block *sb = mdsc->fsc->sb;
  42        struct ceph_mds_quota *h = msg->front.iov_base;
  43        struct ceph_vino vino;
  44        struct inode *inode;
  45        struct ceph_inode_info *ci;
  46
  47        if (msg->front.iov_len < sizeof(*h)) {
  48                pr_err("%s corrupt message mds%d len %d\n", __func__,
  49                       session->s_mds, (int)msg->front.iov_len);
  50                ceph_msg_dump(msg);
  51                return;
  52        }
  53
  54        /* increment msg sequence number */
  55        mutex_lock(&session->s_mutex);
  56        session->s_seq++;
  57        mutex_unlock(&session->s_mutex);
  58
  59        /* lookup inode */
  60        vino.ino = le64_to_cpu(h->ino);
  61        vino.snap = CEPH_NOSNAP;
  62        inode = ceph_find_inode(sb, vino);
  63        if (!inode) {
  64                pr_warn("Failed to find inode %llu\n", vino.ino);
  65                return;
  66        }
  67        ci = ceph_inode(inode);
  68
  69        spin_lock(&ci->i_ceph_lock);
  70        ci->i_rbytes = le64_to_cpu(h->rbytes);
  71        ci->i_rfiles = le64_to_cpu(h->rfiles);
  72        ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  73        __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  74                            le64_to_cpu(h->max_files));
  75        spin_unlock(&ci->i_ceph_lock);
  76
  77        /* avoid calling iput_final() in dispatch thread */
  78        ceph_async_iput(inode);
  79}
  80
  81static struct ceph_quotarealm_inode *
  82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  83{
  84        struct ceph_quotarealm_inode *qri = NULL;
  85        struct rb_node **node, *parent = NULL;
  86
  87        mutex_lock(&mdsc->quotarealms_inodes_mutex);
  88        node = &(mdsc->quotarealms_inodes.rb_node);
  89        while (*node) {
  90                parent = *node;
  91                qri = container_of(*node, struct ceph_quotarealm_inode, node);
  92
  93                if (ino < qri->ino)
  94                        node = &((*node)->rb_left);
  95                else if (ino > qri->ino)
  96                        node = &((*node)->rb_right);
  97                else
  98                        break;
  99        }
 100        if (!qri || (qri->ino != ino)) {
 101                /* Not found, create a new one and insert it */
 102                qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 103                if (qri) {
 104                        qri->ino = ino;
 105                        qri->inode = NULL;
 106                        qri->timeout = 0;
 107                        mutex_init(&qri->mutex);
 108                        rb_link_node(&qri->node, parent, node);
 109                        rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 110                } else
 111                        pr_warn("Failed to alloc quotarealms_inode\n");
 112        }
 113        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 114
 115        return qri;
 116}
 117
 118/*
 119 * This function will try to lookup a realm inode which isn't visible in the
 120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 121 * maintained in the mdsc and freed only when the filesystem is umounted.
 122 *
 123 * Note that these inodes are kept in this list even if the lookup fails, which
 124 * allows to prevent useless lookup requests.
 125 */
 126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 127                                             struct super_block *sb,
 128                                             struct ceph_snap_realm *realm)
 129{
 130        struct ceph_quotarealm_inode *qri;
 131        struct inode *in;
 132
 133        qri = find_quotarealm_inode(mdsc, realm->ino);
 134        if (!qri)
 135                return NULL;
 136
 137        mutex_lock(&qri->mutex);
 138        if (qri->inode && ceph_is_any_caps(qri->inode)) {
 139                /* A request has already returned the inode */
 140                mutex_unlock(&qri->mutex);
 141                return qri->inode;
 142        }
 143        /* Check if this inode lookup has failed recently */
 144        if (qri->timeout &&
 145            time_before_eq(jiffies, qri->timeout)) {
 146                mutex_unlock(&qri->mutex);
 147                return NULL;
 148        }
 149        if (qri->inode) {
 150                /* get caps */
 151                int ret = __ceph_do_getattr(qri->inode, NULL,
 152                                            CEPH_STAT_CAP_INODE, true);
 153                if (ret >= 0)
 154                        in = qri->inode;
 155                else
 156                        in = ERR_PTR(ret);
 157        }  else {
 158                in = ceph_lookup_inode(sb, realm->ino);
 159        }
 160
 161        if (IS_ERR(in)) {
 162                dout("Can't lookup inode %llx (err: %ld)\n",
 163                     realm->ino, PTR_ERR(in));
 164                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 165        } else {
 166                qri->timeout = 0;
 167                qri->inode = in;
 168        }
 169        mutex_unlock(&qri->mutex);
 170
 171        return in;
 172}
 173
 174void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 175{
 176        struct ceph_quotarealm_inode *qri;
 177        struct rb_node *node;
 178
 179        /*
 180         * It should now be safe to clean quotarealms_inode tree without holding
 181         * mdsc->quotarealms_inodes_mutex...
 182         */
 183        mutex_lock(&mdsc->quotarealms_inodes_mutex);
 184        while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 185                node = rb_first(&mdsc->quotarealms_inodes);
 186                qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 187                rb_erase(node, &mdsc->quotarealms_inodes);
 188                iput(qri->inode);
 189                kfree(qri);
 190        }
 191        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 192}
 193
 194/*
 195 * This function walks through the snaprealm for an inode and returns the
 196 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 197 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 198 * instead.
 199 *
 200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 201 * returned realm.
 202 *
 203 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 204 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 205 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 206 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 207 * will be restarted.
 208 */
 209static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 210                                               struct inode *inode, bool retry)
 211{
 212        struct ceph_inode_info *ci = NULL;
 213        struct ceph_snap_realm *realm, *next;
 214        struct inode *in;
 215        bool has_quota;
 216
 217        if (ceph_snap(inode) != CEPH_NOSNAP)
 218                return NULL;
 219
 220restart:
 221        realm = ceph_inode(inode)->i_snap_realm;
 222        if (realm)
 223                ceph_get_snap_realm(mdsc, realm);
 224        else
 225                pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 226                                   "null i_snap_realm\n", ceph_vinop(inode));
 227        while (realm) {
 228                bool has_inode;
 229
 230                spin_lock(&realm->inodes_with_caps_lock);
 231                has_inode = realm->inode;
 232                in = has_inode ? igrab(realm->inode) : NULL;
 233                spin_unlock(&realm->inodes_with_caps_lock);
 234                if (has_inode && !in)
 235                        break;
 236                if (!in) {
 237                        up_read(&mdsc->snap_rwsem);
 238                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 239                        down_read(&mdsc->snap_rwsem);
 240                        if (IS_ERR_OR_NULL(in))
 241                                break;
 242                        ceph_put_snap_realm(mdsc, realm);
 243                        if (!retry)
 244                                return ERR_PTR(-EAGAIN);
 245                        goto restart;
 246                }
 247
 248                ci = ceph_inode(in);
 249                has_quota = __ceph_has_any_quota(ci);
 250                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 251                ceph_async_iput(in);
 252
 253                next = realm->parent;
 254                if (has_quota || !next)
 255                       return realm;
 256
 257                ceph_get_snap_realm(mdsc, next);
 258                ceph_put_snap_realm(mdsc, realm);
 259                realm = next;
 260        }
 261        if (realm)
 262                ceph_put_snap_realm(mdsc, realm);
 263
 264        return NULL;
 265}
 266
 267static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 268{
 269        struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
 270        struct ceph_snap_realm *old_realm, *new_realm;
 271        bool is_same;
 272
 273restart:
 274        /*
 275         * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 276         * However, get_quota_realm may drop it temporarily.  By setting the
 277         * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 278         * dropped and we can then restart the whole operation.
 279         */
 280        down_read(&mdsc->snap_rwsem);
 281        old_realm = get_quota_realm(mdsc, old, true);
 282        new_realm = get_quota_realm(mdsc, new, false);
 283        if (PTR_ERR(new_realm) == -EAGAIN) {
 284                up_read(&mdsc->snap_rwsem);
 285                if (old_realm)
 286                        ceph_put_snap_realm(mdsc, old_realm);
 287                goto restart;
 288        }
 289        is_same = (old_realm == new_realm);
 290        up_read(&mdsc->snap_rwsem);
 291
 292        if (old_realm)
 293                ceph_put_snap_realm(mdsc, old_realm);
 294        if (new_realm)
 295                ceph_put_snap_realm(mdsc, new_realm);
 296
 297        return is_same;
 298}
 299
 300enum quota_check_op {
 301        QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 302        QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 303        QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 304                                                   limit is approaching */
 305};
 306
 307/*
 308 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 309 * realm, it will execute quota check operation defined by the 'op' parameter.
 310 * The snaprealm walk is interrupted if the quota check detects that the quota
 311 * is exceeded or if the root inode is reached.
 312 */
 313static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 314                                 loff_t delta)
 315{
 316        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 317        struct ceph_inode_info *ci;
 318        struct ceph_snap_realm *realm, *next;
 319        struct inode *in;
 320        u64 max, rvalue;
 321        bool exceeded = false;
 322
 323        if (ceph_snap(inode) != CEPH_NOSNAP)
 324                return false;
 325
 326        down_read(&mdsc->snap_rwsem);
 327restart:
 328        realm = ceph_inode(inode)->i_snap_realm;
 329        if (realm)
 330                ceph_get_snap_realm(mdsc, realm);
 331        else
 332                pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 333                                   "null i_snap_realm\n", ceph_vinop(inode));
 334        while (realm) {
 335                bool has_inode;
 336
 337                spin_lock(&realm->inodes_with_caps_lock);
 338                has_inode = realm->inode;
 339                in = has_inode ? igrab(realm->inode) : NULL;
 340                spin_unlock(&realm->inodes_with_caps_lock);
 341                if (has_inode && !in)
 342                        break;
 343                if (!in) {
 344                        up_read(&mdsc->snap_rwsem);
 345                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 346                        down_read(&mdsc->snap_rwsem);
 347                        if (IS_ERR_OR_NULL(in))
 348                                break;
 349                        ceph_put_snap_realm(mdsc, realm);
 350                        goto restart;
 351                }
 352                ci = ceph_inode(in);
 353                spin_lock(&ci->i_ceph_lock);
 354                if (op == QUOTA_CHECK_MAX_FILES_OP) {
 355                        max = ci->i_max_files;
 356                        rvalue = ci->i_rfiles + ci->i_rsubdirs;
 357                } else {
 358                        max = ci->i_max_bytes;
 359                        rvalue = ci->i_rbytes;
 360                }
 361                spin_unlock(&ci->i_ceph_lock);
 362                switch (op) {
 363                case QUOTA_CHECK_MAX_FILES_OP:
 364                case QUOTA_CHECK_MAX_BYTES_OP:
 365                        exceeded = (max && (rvalue + delta > max));
 366                        break;
 367                case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 368                        if (max) {
 369                                if (rvalue >= max)
 370                                        exceeded = true;
 371                                else {
 372                                        /*
 373                                         * when we're writing more that 1/16th
 374                                         * of the available space
 375                                         */
 376                                        exceeded =
 377                                                (((max - rvalue) >> 4) < delta);
 378                                }
 379                        }
 380                        break;
 381                default:
 382                        /* Shouldn't happen */
 383                        pr_warn("Invalid quota check op (%d)\n", op);
 384                        exceeded = true; /* Just break the loop */
 385                }
 386                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 387                ceph_async_iput(in);
 388
 389                next = realm->parent;
 390                if (exceeded || !next)
 391                        break;
 392                ceph_get_snap_realm(mdsc, next);
 393                ceph_put_snap_realm(mdsc, realm);
 394                realm = next;
 395        }
 396        if (realm)
 397                ceph_put_snap_realm(mdsc, realm);
 398        up_read(&mdsc->snap_rwsem);
 399
 400        return exceeded;
 401}
 402
 403/*
 404 * ceph_quota_is_max_files_exceeded - check if we can create a new file
 405 * @inode:      directory where a new file is being created
 406 *
 407 * This functions returns true is max_files quota allows a new file to be
 408 * created.  It is necessary to walk through the snaprealm hierarchy (until the
 409 * FS root) to check all realms with quotas set.
 410 */
 411bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 412{
 413        if (!ceph_has_realms_with_quotas(inode))
 414                return false;
 415
 416        WARN_ON(!S_ISDIR(inode->i_mode));
 417
 418        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
 419}
 420
 421/*
 422 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 423 * @inode:      inode being written
 424 * @newsize:    new size if write succeeds
 425 *
 426 * This functions returns true is max_bytes quota allows a file size to reach
 427 * @newsize; it returns false otherwise.
 428 */
 429bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 430{
 431        loff_t size = i_size_read(inode);
 432
 433        if (!ceph_has_realms_with_quotas(inode))
 434                return false;
 435
 436        /* return immediately if we're decreasing file size */
 437        if (newsize <= size)
 438                return false;
 439
 440        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 441}
 442
 443/*
 444 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 445 * @inode:      inode being written
 446 * @newsize:    new size if write succeeds
 447 *
 448 * This function returns true if the new file size @newsize will be consuming
 449 * more than 1/16th of the available quota space; it returns false otherwise.
 450 */
 451bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 452{
 453        loff_t size = ceph_inode(inode)->i_reported_size;
 454
 455        if (!ceph_has_realms_with_quotas(inode))
 456                return false;
 457
 458        /* return immediately if we're decreasing file size */
 459        if (newsize <= size)
 460                return false;
 461
 462        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 463                                    (newsize - size));
 464}
 465
 466/*
 467 * ceph_quota_update_statfs - if root has quota update statfs with quota status
 468 * @fsc:        filesystem client instance
 469 * @buf:        statfs to update
 470 *
 471 * If the mounted filesystem root has max_bytes quota set, update the filesystem
 472 * statistics with the quota status.
 473 *
 474 * This function returns true if the stats have been updated, false otherwise.
 475 */
 476bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 477{
 478        struct ceph_mds_client *mdsc = fsc->mdsc;
 479        struct ceph_inode_info *ci;
 480        struct ceph_snap_realm *realm;
 481        struct inode *in;
 482        u64 total = 0, used, free;
 483        bool is_updated = false;
 484
 485        down_read(&mdsc->snap_rwsem);
 486        realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 487        up_read(&mdsc->snap_rwsem);
 488        if (!realm)
 489                return false;
 490
 491        spin_lock(&realm->inodes_with_caps_lock);
 492        in = realm->inode ? igrab(realm->inode) : NULL;
 493        spin_unlock(&realm->inodes_with_caps_lock);
 494        if (in) {
 495                ci = ceph_inode(in);
 496                spin_lock(&ci->i_ceph_lock);
 497                if (ci->i_max_bytes) {
 498                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 499                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 500                        /* It is possible for a quota to be exceeded.
 501                         * Report 'zero' in that case
 502                         */
 503                        free = total > used ? total - used : 0;
 504                }
 505                spin_unlock(&ci->i_ceph_lock);
 506                if (total) {
 507                        buf->f_blocks = total;
 508                        buf->f_bfree = free;
 509                        buf->f_bavail = free;
 510                        is_updated = true;
 511                }
 512                iput(in);
 513        }
 514        ceph_put_snap_realm(mdsc, realm);
 515
 516        return is_updated;
 517}
 518
 519/*
 520 * ceph_quota_check_rename - check if a rename can be executed
 521 * @mdsc:       MDS client instance
 522 * @old:        inode to be copied
 523 * @new:        destination inode (directory)
 524 *
 525 * This function verifies if a rename (e.g. moving a file or directory) can be
 526 * executed.  It forces an rstat update in the @new target directory (and in the
 527 * source @old as well, if it's a directory).  The actual check is done both for
 528 * max_files and max_bytes.
 529 *
 530 * This function returns 0 if it's OK to do the rename, or, if quotas are
 531 * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
 532 */
 533int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
 534                            struct inode *old, struct inode *new)
 535{
 536        struct ceph_inode_info *ci_old = ceph_inode(old);
 537        int ret = 0;
 538
 539        if (ceph_quota_is_same_realm(old, new))
 540                return 0;
 541
 542        /*
 543         * Get the latest rstat for target directory (and for source, if a
 544         * directory)
 545         */
 546        ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
 547        if (ret)
 548                return ret;
 549
 550        if (S_ISDIR(old->i_mode)) {
 551                ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
 552                if (ret)
 553                        return ret;
 554                ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
 555                                           ci_old->i_rbytes);
 556                if (!ret)
 557                        ret = check_quota_exceeded(new,
 558                                                   QUOTA_CHECK_MAX_FILES_OP,
 559                                                   ci_old->i_rfiles +
 560                                                   ci_old->i_rsubdirs);
 561                if (ret)
 562                        ret = -EXDEV;
 563        } else {
 564                ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
 565                                           i_size_read(old));
 566                if (!ret)
 567                        ret = check_quota_exceeded(new,
 568                                                   QUOTA_CHECK_MAX_FILES_OP, 1);
 569                if (ret)
 570                        ret = -EDQUOT;
 571        }
 572
 573        return ret;
 574}
 575