linux/fs/ceph/quota.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * quota.c - CephFS quota
   4 *
   5 * Copyright (C) 2017-2018 SUSE
   6 */
   7
   8#include <linux/statfs.h>
   9
  10#include "super.h"
  11#include "mds_client.h"
  12
  13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14{
  15        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
  16        if (inc)
  17                atomic64_inc(&mdsc->quotarealms_count);
  18        else
  19                atomic64_dec(&mdsc->quotarealms_count);
  20}
  21
  22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23{
  24        struct super_block *sb = inode->i_sb;
  25        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
  26        struct inode *root = d_inode(sb->s_root);
  27
  28        if (atomic64_read(&mdsc->quotarealms_count) > 0)
  29                return true;
  30        /* if root is the real CephFS root, we don't have quota realms */
  31        if (root && ceph_ino(root) == CEPH_INO_ROOT)
  32                return false;
  33        /* MDS stray dirs have no quota realms */
  34        if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
  35                return false;
  36        /* otherwise, we can't know for sure */
  37        return true;
  38}
  39
  40void ceph_handle_quota(struct ceph_mds_client *mdsc,
  41                       struct ceph_mds_session *session,
  42                       struct ceph_msg *msg)
  43{
  44        struct super_block *sb = mdsc->fsc->sb;
  45        struct ceph_mds_quota *h = msg->front.iov_base;
  46        struct ceph_vino vino;
  47        struct inode *inode;
  48        struct ceph_inode_info *ci;
  49
  50        if (msg->front.iov_len < sizeof(*h)) {
  51                pr_err("%s corrupt message mds%d len %d\n", __func__,
  52                       session->s_mds, (int)msg->front.iov_len);
  53                ceph_msg_dump(msg);
  54                return;
  55        }
  56
  57        /* increment msg sequence number */
  58        mutex_lock(&session->s_mutex);
  59        inc_session_sequence(session);
  60        mutex_unlock(&session->s_mutex);
  61
  62        /* lookup inode */
  63        vino.ino = le64_to_cpu(h->ino);
  64        vino.snap = CEPH_NOSNAP;
  65        inode = ceph_find_inode(sb, vino);
  66        if (!inode) {
  67                pr_warn("Failed to find inode %llu\n", vino.ino);
  68                return;
  69        }
  70        ci = ceph_inode(inode);
  71
  72        spin_lock(&ci->i_ceph_lock);
  73        ci->i_rbytes = le64_to_cpu(h->rbytes);
  74        ci->i_rfiles = le64_to_cpu(h->rfiles);
  75        ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  76        __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  77                            le64_to_cpu(h->max_files));
  78        spin_unlock(&ci->i_ceph_lock);
  79
  80        iput(inode);
  81}
  82
  83static struct ceph_quotarealm_inode *
  84find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  85{
  86        struct ceph_quotarealm_inode *qri = NULL;
  87        struct rb_node **node, *parent = NULL;
  88
  89        mutex_lock(&mdsc->quotarealms_inodes_mutex);
  90        node = &(mdsc->quotarealms_inodes.rb_node);
  91        while (*node) {
  92                parent = *node;
  93                qri = container_of(*node, struct ceph_quotarealm_inode, node);
  94
  95                if (ino < qri->ino)
  96                        node = &((*node)->rb_left);
  97                else if (ino > qri->ino)
  98                        node = &((*node)->rb_right);
  99                else
 100                        break;
 101        }
 102        if (!qri || (qri->ino != ino)) {
 103                /* Not found, create a new one and insert it */
 104                qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 105                if (qri) {
 106                        qri->ino = ino;
 107                        qri->inode = NULL;
 108                        qri->timeout = 0;
 109                        mutex_init(&qri->mutex);
 110                        rb_link_node(&qri->node, parent, node);
 111                        rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 112                } else
 113                        pr_warn("Failed to alloc quotarealms_inode\n");
 114        }
 115        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 116
 117        return qri;
 118}
 119
 120/*
 121 * This function will try to lookup a realm inode which isn't visible in the
 122 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 123 * maintained in the mdsc and freed only when the filesystem is umounted.
 124 *
 125 * Note that these inodes are kept in this list even if the lookup fails, which
 126 * allows to prevent useless lookup requests.
 127 */
 128static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 129                                             struct super_block *sb,
 130                                             struct ceph_snap_realm *realm)
 131{
 132        struct ceph_quotarealm_inode *qri;
 133        struct inode *in;
 134
 135        qri = find_quotarealm_inode(mdsc, realm->ino);
 136        if (!qri)
 137                return NULL;
 138
 139        mutex_lock(&qri->mutex);
 140        if (qri->inode && ceph_is_any_caps(qri->inode)) {
 141                /* A request has already returned the inode */
 142                mutex_unlock(&qri->mutex);
 143                return qri->inode;
 144        }
 145        /* Check if this inode lookup has failed recently */
 146        if (qri->timeout &&
 147            time_before_eq(jiffies, qri->timeout)) {
 148                mutex_unlock(&qri->mutex);
 149                return NULL;
 150        }
 151        if (qri->inode) {
 152                /* get caps */
 153                int ret = __ceph_do_getattr(qri->inode, NULL,
 154                                            CEPH_STAT_CAP_INODE, true);
 155                if (ret >= 0)
 156                        in = qri->inode;
 157                else
 158                        in = ERR_PTR(ret);
 159        }  else {
 160                in = ceph_lookup_inode(sb, realm->ino);
 161        }
 162
 163        if (IS_ERR(in)) {
 164                dout("Can't lookup inode %llx (err: %ld)\n",
 165                     realm->ino, PTR_ERR(in));
 166                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 167        } else {
 168                qri->timeout = 0;
 169                qri->inode = in;
 170        }
 171        mutex_unlock(&qri->mutex);
 172
 173        return in;
 174}
 175
 176void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 177{
 178        struct ceph_quotarealm_inode *qri;
 179        struct rb_node *node;
 180
 181        /*
 182         * It should now be safe to clean quotarealms_inode tree without holding
 183         * mdsc->quotarealms_inodes_mutex...
 184         */
 185        mutex_lock(&mdsc->quotarealms_inodes_mutex);
 186        while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 187                node = rb_first(&mdsc->quotarealms_inodes);
 188                qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 189                rb_erase(node, &mdsc->quotarealms_inodes);
 190                iput(qri->inode);
 191                kfree(qri);
 192        }
 193        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 194}
 195
 196/*
 197 * This function walks through the snaprealm for an inode and returns the
 198 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 199 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 200 * instead.
 201 *
 202 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 203 * returned realm.
 204 *
 205 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 206 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 207 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 208 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 209 * will be restarted.
 210 */
 211static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 212                                               struct inode *inode, bool retry)
 213{
 214        struct ceph_inode_info *ci = NULL;
 215        struct ceph_snap_realm *realm, *next;
 216        struct inode *in;
 217        bool has_quota;
 218
 219        if (ceph_snap(inode) != CEPH_NOSNAP)
 220                return NULL;
 221
 222restart:
 223        realm = ceph_inode(inode)->i_snap_realm;
 224        if (realm)
 225                ceph_get_snap_realm(mdsc, realm);
 226        else
 227                pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 228                                   "null i_snap_realm\n", ceph_vinop(inode));
 229        while (realm) {
 230                bool has_inode;
 231
 232                spin_lock(&realm->inodes_with_caps_lock);
 233                has_inode = realm->inode;
 234                in = has_inode ? igrab(realm->inode) : NULL;
 235                spin_unlock(&realm->inodes_with_caps_lock);
 236                if (has_inode && !in)
 237                        break;
 238                if (!in) {
 239                        up_read(&mdsc->snap_rwsem);
 240                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 241                        down_read(&mdsc->snap_rwsem);
 242                        if (IS_ERR_OR_NULL(in))
 243                                break;
 244                        ceph_put_snap_realm(mdsc, realm);
 245                        if (!retry)
 246                                return ERR_PTR(-EAGAIN);
 247                        goto restart;
 248                }
 249
 250                ci = ceph_inode(in);
 251                has_quota = __ceph_has_any_quota(ci);
 252                iput(in);
 253
 254                next = realm->parent;
 255                if (has_quota || !next)
 256                       return realm;
 257
 258                ceph_get_snap_realm(mdsc, next);
 259                ceph_put_snap_realm(mdsc, realm);
 260                realm = next;
 261        }
 262        if (realm)
 263                ceph_put_snap_realm(mdsc, realm);
 264
 265        return NULL;
 266}
 267
 268bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 269{
 270        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
 271        struct ceph_snap_realm *old_realm, *new_realm;
 272        bool is_same;
 273
 274restart:
 275        /*
 276         * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 277         * However, get_quota_realm may drop it temporarily.  By setting the
 278         * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 279         * dropped and we can then restart the whole operation.
 280         */
 281        down_read(&mdsc->snap_rwsem);
 282        old_realm = get_quota_realm(mdsc, old, true);
 283        new_realm = get_quota_realm(mdsc, new, false);
 284        if (PTR_ERR(new_realm) == -EAGAIN) {
 285                up_read(&mdsc->snap_rwsem);
 286                if (old_realm)
 287                        ceph_put_snap_realm(mdsc, old_realm);
 288                goto restart;
 289        }
 290        is_same = (old_realm == new_realm);
 291        up_read(&mdsc->snap_rwsem);
 292
 293        if (old_realm)
 294                ceph_put_snap_realm(mdsc, old_realm);
 295        if (new_realm)
 296                ceph_put_snap_realm(mdsc, new_realm);
 297
 298        return is_same;
 299}
 300
 301enum quota_check_op {
 302        QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 303        QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 304        QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 305                                                   limit is approaching */
 306};
 307
 308/*
 309 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 310 * realm, it will execute quota check operation defined by the 'op' parameter.
 311 * The snaprealm walk is interrupted if the quota check detects that the quota
 312 * is exceeded or if the root inode is reached.
 313 */
 314static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 315                                 loff_t delta)
 316{
 317        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 318        struct ceph_inode_info *ci;
 319        struct ceph_snap_realm *realm, *next;
 320        struct inode *in;
 321        u64 max, rvalue;
 322        bool exceeded = false;
 323
 324        if (ceph_snap(inode) != CEPH_NOSNAP)
 325                return false;
 326
 327        down_read(&mdsc->snap_rwsem);
 328restart:
 329        realm = ceph_inode(inode)->i_snap_realm;
 330        if (realm)
 331                ceph_get_snap_realm(mdsc, realm);
 332        else
 333                pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 334                                   "null i_snap_realm\n", ceph_vinop(inode));
 335        while (realm) {
 336                bool has_inode;
 337
 338                spin_lock(&realm->inodes_with_caps_lock);
 339                has_inode = realm->inode;
 340                in = has_inode ? igrab(realm->inode) : NULL;
 341                spin_unlock(&realm->inodes_with_caps_lock);
 342                if (has_inode && !in)
 343                        break;
 344                if (!in) {
 345                        up_read(&mdsc->snap_rwsem);
 346                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 347                        down_read(&mdsc->snap_rwsem);
 348                        if (IS_ERR_OR_NULL(in))
 349                                break;
 350                        ceph_put_snap_realm(mdsc, realm);
 351                        goto restart;
 352                }
 353                ci = ceph_inode(in);
 354                spin_lock(&ci->i_ceph_lock);
 355                if (op == QUOTA_CHECK_MAX_FILES_OP) {
 356                        max = ci->i_max_files;
 357                        rvalue = ci->i_rfiles + ci->i_rsubdirs;
 358                } else {
 359                        max = ci->i_max_bytes;
 360                        rvalue = ci->i_rbytes;
 361                }
 362                spin_unlock(&ci->i_ceph_lock);
 363                switch (op) {
 364                case QUOTA_CHECK_MAX_FILES_OP:
 365                case QUOTA_CHECK_MAX_BYTES_OP:
 366                        exceeded = (max && (rvalue + delta > max));
 367                        break;
 368                case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 369                        if (max) {
 370                                if (rvalue >= max)
 371                                        exceeded = true;
 372                                else {
 373                                        /*
 374                                         * when we're writing more that 1/16th
 375                                         * of the available space
 376                                         */
 377                                        exceeded =
 378                                                (((max - rvalue) >> 4) < delta);
 379                                }
 380                        }
 381                        break;
 382                default:
 383                        /* Shouldn't happen */
 384                        pr_warn("Invalid quota check op (%d)\n", op);
 385                        exceeded = true; /* Just break the loop */
 386                }
 387                iput(in);
 388
 389                next = realm->parent;
 390                if (exceeded || !next)
 391                        break;
 392                ceph_get_snap_realm(mdsc, next);
 393                ceph_put_snap_realm(mdsc, realm);
 394                realm = next;
 395        }
 396        if (realm)
 397                ceph_put_snap_realm(mdsc, realm);
 398        up_read(&mdsc->snap_rwsem);
 399
 400        return exceeded;
 401}
 402
 403/*
 404 * ceph_quota_is_max_files_exceeded - check if we can create a new file
 405 * @inode:      directory where a new file is being created
 406 *
 407 * This functions returns true is max_files quota allows a new file to be
 408 * created.  It is necessary to walk through the snaprealm hierarchy (until the
 409 * FS root) to check all realms with quotas set.
 410 */
 411bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 412{
 413        if (!ceph_has_realms_with_quotas(inode))
 414                return false;
 415
 416        WARN_ON(!S_ISDIR(inode->i_mode));
 417
 418        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
 419}
 420
 421/*
 422 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 423 * @inode:      inode being written
 424 * @newsize:    new size if write succeeds
 425 *
 426 * This functions returns true is max_bytes quota allows a file size to reach
 427 * @newsize; it returns false otherwise.
 428 */
 429bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 430{
 431        loff_t size = i_size_read(inode);
 432
 433        if (!ceph_has_realms_with_quotas(inode))
 434                return false;
 435
 436        /* return immediately if we're decreasing file size */
 437        if (newsize <= size)
 438                return false;
 439
 440        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 441}
 442
 443/*
 444 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 445 * @inode:      inode being written
 446 * @newsize:    new size if write succeeds
 447 *
 448 * This function returns true if the new file size @newsize will be consuming
 449 * more than 1/16th of the available quota space; it returns false otherwise.
 450 */
 451bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 452{
 453        loff_t size = ceph_inode(inode)->i_reported_size;
 454
 455        if (!ceph_has_realms_with_quotas(inode))
 456                return false;
 457
 458        /* return immediately if we're decreasing file size */
 459        if (newsize <= size)
 460                return false;
 461
 462        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 463                                    (newsize - size));
 464}
 465
 466/*
 467 * ceph_quota_update_statfs - if root has quota update statfs with quota status
 468 * @fsc:        filesystem client instance
 469 * @buf:        statfs to update
 470 *
 471 * If the mounted filesystem root has max_bytes quota set, update the filesystem
 472 * statistics with the quota status.
 473 *
 474 * This function returns true if the stats have been updated, false otherwise.
 475 */
 476bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 477{
 478        struct ceph_mds_client *mdsc = fsc->mdsc;
 479        struct ceph_inode_info *ci;
 480        struct ceph_snap_realm *realm;
 481        struct inode *in;
 482        u64 total = 0, used, free;
 483        bool is_updated = false;
 484
 485        down_read(&mdsc->snap_rwsem);
 486        realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 487        up_read(&mdsc->snap_rwsem);
 488        if (!realm)
 489                return false;
 490
 491        spin_lock(&realm->inodes_with_caps_lock);
 492        in = realm->inode ? igrab(realm->inode) : NULL;
 493        spin_unlock(&realm->inodes_with_caps_lock);
 494        if (in) {
 495                ci = ceph_inode(in);
 496                spin_lock(&ci->i_ceph_lock);
 497                if (ci->i_max_bytes) {
 498                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 499                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 500                        /* For quota size less than 4MB, use 4KB block size */
 501                        if (!total) {
 502                                total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
 503                                used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
 504                                buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
 505                        }
 506                        /* It is possible for a quota to be exceeded.
 507                         * Report 'zero' in that case
 508                         */
 509                        free = total > used ? total - used : 0;
 510                        /* For quota size less than 4KB, report the
 511                         * total=used=4KB,free=0 when quota is full
 512                         * and total=free=4KB, used=0 otherwise */
 513                        if (!total) {
 514                                total = 1;
 515                                free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
 516                                buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
 517                        }
 518                }
 519                spin_unlock(&ci->i_ceph_lock);
 520                if (total) {
 521                        buf->f_blocks = total;
 522                        buf->f_bfree = free;
 523                        buf->f_bavail = free;
 524                        is_updated = true;
 525                }
 526                iput(in);
 527        }
 528        ceph_put_snap_realm(mdsc, realm);
 529
 530        return is_updated;
 531}
 532
 533