linux/fs/ceph/quota.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * quota.c - CephFS quota
   4 *
   5 * Copyright (C) 2017-2018 SUSE
   6 */
   7
   8#include <linux/statfs.h>
   9
  10#include "super.h"
  11#include "mds_client.h"
  12
  13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14{
  15        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  16        if (inc)
  17                atomic64_inc(&mdsc->quotarealms_count);
  18        else
  19                atomic64_dec(&mdsc->quotarealms_count);
  20}
  21
  22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23{
  24        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
  25        struct super_block *sb = mdsc->fsc->sb;
  26
  27        if (atomic64_read(&mdsc->quotarealms_count) > 0)
  28                return true;
  29        /* if root is the real CephFS root, we don't have quota realms */
  30        if (sb->s_root->d_inode &&
  31            (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
  32                return false;
  33        /* otherwise, we can't know for sure */
  34        return true;
  35}
  36
  37void ceph_handle_quota(struct ceph_mds_client *mdsc,
  38                       struct ceph_mds_session *session,
  39                       struct ceph_msg *msg)
  40{
  41        struct super_block *sb = mdsc->fsc->sb;
  42        struct ceph_mds_quota *h = msg->front.iov_base;
  43        struct ceph_vino vino;
  44        struct inode *inode;
  45        struct ceph_inode_info *ci;
  46
  47        if (msg->front.iov_len < sizeof(*h)) {
  48                pr_err("%s corrupt message mds%d len %d\n", __func__,
  49                       session->s_mds, (int)msg->front.iov_len);
  50                ceph_msg_dump(msg);
  51                return;
  52        }
  53
  54        /* increment msg sequence number */
  55        mutex_lock(&session->s_mutex);
  56        session->s_seq++;
  57        mutex_unlock(&session->s_mutex);
  58
  59        /* lookup inode */
  60        vino.ino = le64_to_cpu(h->ino);
  61        vino.snap = CEPH_NOSNAP;
  62        inode = ceph_find_inode(sb, vino);
  63        if (!inode) {
  64                pr_warn("Failed to find inode %llu\n", vino.ino);
  65                return;
  66        }
  67        ci = ceph_inode(inode);
  68
  69        spin_lock(&ci->i_ceph_lock);
  70        ci->i_rbytes = le64_to_cpu(h->rbytes);
  71        ci->i_rfiles = le64_to_cpu(h->rfiles);
  72        ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  73        __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  74                            le64_to_cpu(h->max_files));
  75        spin_unlock(&ci->i_ceph_lock);
  76
  77        /* avoid calling iput_final() in dispatch thread */
  78        ceph_async_iput(inode);
  79}
  80
  81static struct ceph_quotarealm_inode *
  82find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  83{
  84        struct ceph_quotarealm_inode *qri = NULL;
  85        struct rb_node **node, *parent = NULL;
  86
  87        mutex_lock(&mdsc->quotarealms_inodes_mutex);
  88        node = &(mdsc->quotarealms_inodes.rb_node);
  89        while (*node) {
  90                parent = *node;
  91                qri = container_of(*node, struct ceph_quotarealm_inode, node);
  92
  93                if (ino < qri->ino)
  94                        node = &((*node)->rb_left);
  95                else if (ino > qri->ino)
  96                        node = &((*node)->rb_right);
  97                else
  98                        break;
  99        }
 100        if (!qri || (qri->ino != ino)) {
 101                /* Not found, create a new one and insert it */
 102                qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 103                if (qri) {
 104                        qri->ino = ino;
 105                        qri->inode = NULL;
 106                        qri->timeout = 0;
 107                        mutex_init(&qri->mutex);
 108                        rb_link_node(&qri->node, parent, node);
 109                        rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 110                } else
 111                        pr_warn("Failed to alloc quotarealms_inode\n");
 112        }
 113        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 114
 115        return qri;
 116}
 117
 118/*
 119 * This function will try to lookup a realm inode which isn't visible in the
 120 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 121 * maintained in the mdsc and freed only when the filesystem is umounted.
 122 *
 123 * Note that these inodes are kept in this list even if the lookup fails, which
 124 * allows to prevent useless lookup requests.
 125 */
 126static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 127                                             struct super_block *sb,
 128                                             struct ceph_snap_realm *realm)
 129{
 130        struct ceph_quotarealm_inode *qri;
 131        struct inode *in;
 132
 133        qri = find_quotarealm_inode(mdsc, realm->ino);
 134        if (!qri)
 135                return NULL;
 136
 137        mutex_lock(&qri->mutex);
 138        if (qri->inode && ceph_is_any_caps(qri->inode)) {
 139                /* A request has already returned the inode */
 140                mutex_unlock(&qri->mutex);
 141                return qri->inode;
 142        }
 143        /* Check if this inode lookup has failed recently */
 144        if (qri->timeout &&
 145            time_before_eq(jiffies, qri->timeout)) {
 146                mutex_unlock(&qri->mutex);
 147                return NULL;
 148        }
 149        if (qri->inode) {
 150                /* get caps */
 151                int ret = __ceph_do_getattr(qri->inode, NULL,
 152                                            CEPH_STAT_CAP_INODE, true);
 153                if (ret >= 0)
 154                        in = qri->inode;
 155                else
 156                        in = ERR_PTR(ret);
 157        }  else {
 158                in = ceph_lookup_inode(sb, realm->ino);
 159        }
 160
 161        if (IS_ERR(in)) {
 162                dout("Can't lookup inode %llx (err: %ld)\n",
 163                     realm->ino, PTR_ERR(in));
 164                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 165        } else {
 166                qri->timeout = 0;
 167                qri->inode = in;
 168        }
 169        mutex_unlock(&qri->mutex);
 170
 171        return in;
 172}
 173
 174void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 175{
 176        struct ceph_quotarealm_inode *qri;
 177        struct rb_node *node;
 178
 179        /*
 180         * It should now be safe to clean quotarealms_inode tree without holding
 181         * mdsc->quotarealms_inodes_mutex...
 182         */
 183        mutex_lock(&mdsc->quotarealms_inodes_mutex);
 184        while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 185                node = rb_first(&mdsc->quotarealms_inodes);
 186                qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 187                rb_erase(node, &mdsc->quotarealms_inodes);
 188                iput(qri->inode);
 189                kfree(qri);
 190        }
 191        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 192}
 193
 194/*
 195 * This function walks through the snaprealm for an inode and returns the
 196 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 197 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 198 * instead.
 199 *
 200 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 201 * returned realm.
 202 *
 203 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 204 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 205 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 206 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 207 * will be restarted.
 208 */
 209static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 210                                               struct inode *inode, bool retry)
 211{
 212        struct ceph_inode_info *ci = NULL;
 213        struct ceph_snap_realm *realm, *next;
 214        struct inode *in;
 215        bool has_quota;
 216
 217        if (ceph_snap(inode) != CEPH_NOSNAP)
 218                return NULL;
 219
 220restart:
 221        realm = ceph_inode(inode)->i_snap_realm;
 222        if (realm)
 223                ceph_get_snap_realm(mdsc, realm);
 224        else
 225                pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 226                                   "null i_snap_realm\n", ceph_vinop(inode));
 227        while (realm) {
 228                bool has_inode;
 229
 230                spin_lock(&realm->inodes_with_caps_lock);
 231                has_inode = realm->inode;
 232                in = has_inode ? igrab(realm->inode) : NULL;
 233                spin_unlock(&realm->inodes_with_caps_lock);
 234                if (has_inode && !in)
 235                        break;
 236                if (!in) {
 237                        up_read(&mdsc->snap_rwsem);
 238                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 239                        down_read(&mdsc->snap_rwsem);
 240                        if (IS_ERR_OR_NULL(in))
 241                                break;
 242                        ceph_put_snap_realm(mdsc, realm);
 243                        if (!retry)
 244                                return ERR_PTR(-EAGAIN);
 245                        goto restart;
 246                }
 247
 248                ci = ceph_inode(in);
 249                has_quota = __ceph_has_any_quota(ci);
 250                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 251                ceph_async_iput(in);
 252
 253                next = realm->parent;
 254                if (has_quota || !next)
 255                       return realm;
 256
 257                ceph_get_snap_realm(mdsc, next);
 258                ceph_put_snap_realm(mdsc, realm);
 259                realm = next;
 260        }
 261        if (realm)
 262                ceph_put_snap_realm(mdsc, realm);
 263
 264        return NULL;
 265}
 266
 267bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 268{
 269        struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
 270        struct ceph_snap_realm *old_realm, *new_realm;
 271        bool is_same;
 272
 273restart:
 274        /*
 275         * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 276         * However, get_quota_realm may drop it temporarily.  By setting the
 277         * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 278         * dropped and we can then restart the whole operation.
 279         */
 280        down_read(&mdsc->snap_rwsem);
 281        old_realm = get_quota_realm(mdsc, old, true);
 282        new_realm = get_quota_realm(mdsc, new, false);
 283        if (PTR_ERR(new_realm) == -EAGAIN) {
 284                up_read(&mdsc->snap_rwsem);
 285                if (old_realm)
 286                        ceph_put_snap_realm(mdsc, old_realm);
 287                goto restart;
 288        }
 289        is_same = (old_realm == new_realm);
 290        up_read(&mdsc->snap_rwsem);
 291
 292        if (old_realm)
 293                ceph_put_snap_realm(mdsc, old_realm);
 294        if (new_realm)
 295                ceph_put_snap_realm(mdsc, new_realm);
 296
 297        return is_same;
 298}
 299
 300enum quota_check_op {
 301        QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 302        QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 303        QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 304                                                   limit is approaching */
 305};
 306
 307/*
 308 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 309 * realm, it will execute quota check operation defined by the 'op' parameter.
 310 * The snaprealm walk is interrupted if the quota check detects that the quota
 311 * is exceeded or if the root inode is reached.
 312 */
 313static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 314                                 loff_t delta)
 315{
 316        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 317        struct ceph_inode_info *ci;
 318        struct ceph_snap_realm *realm, *next;
 319        struct inode *in;
 320        u64 max, rvalue;
 321        bool exceeded = false;
 322
 323        if (ceph_snap(inode) != CEPH_NOSNAP)
 324                return false;
 325
 326        down_read(&mdsc->snap_rwsem);
 327restart:
 328        realm = ceph_inode(inode)->i_snap_realm;
 329        if (realm)
 330                ceph_get_snap_realm(mdsc, realm);
 331        else
 332                pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 333                                   "null i_snap_realm\n", ceph_vinop(inode));
 334        while (realm) {
 335                bool has_inode;
 336
 337                spin_lock(&realm->inodes_with_caps_lock);
 338                has_inode = realm->inode;
 339                in = has_inode ? igrab(realm->inode) : NULL;
 340                spin_unlock(&realm->inodes_with_caps_lock);
 341                if (has_inode && !in)
 342                        break;
 343                if (!in) {
 344                        up_read(&mdsc->snap_rwsem);
 345                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 346                        down_read(&mdsc->snap_rwsem);
 347                        if (IS_ERR_OR_NULL(in))
 348                                break;
 349                        ceph_put_snap_realm(mdsc, realm);
 350                        goto restart;
 351                }
 352                ci = ceph_inode(in);
 353                spin_lock(&ci->i_ceph_lock);
 354                if (op == QUOTA_CHECK_MAX_FILES_OP) {
 355                        max = ci->i_max_files;
 356                        rvalue = ci->i_rfiles + ci->i_rsubdirs;
 357                } else {
 358                        max = ci->i_max_bytes;
 359                        rvalue = ci->i_rbytes;
 360                }
 361                spin_unlock(&ci->i_ceph_lock);
 362                switch (op) {
 363                case QUOTA_CHECK_MAX_FILES_OP:
 364                        exceeded = (max && (rvalue >= max));
 365                        break;
 366                case QUOTA_CHECK_MAX_BYTES_OP:
 367                        exceeded = (max && (rvalue + delta > max));
 368                        break;
 369                case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 370                        if (max) {
 371                                if (rvalue >= max)
 372                                        exceeded = true;
 373                                else {
 374                                        /*
 375                                         * when we're writing more that 1/16th
 376                                         * of the available space
 377                                         */
 378                                        exceeded =
 379                                                (((max - rvalue) >> 4) < delta);
 380                                }
 381                        }
 382                        break;
 383                default:
 384                        /* Shouldn't happen */
 385                        pr_warn("Invalid quota check op (%d)\n", op);
 386                        exceeded = true; /* Just break the loop */
 387                }
 388                /* avoid calling iput_final() while holding mdsc->snap_rwsem */
 389                ceph_async_iput(in);
 390
 391                next = realm->parent;
 392                if (exceeded || !next)
 393                        break;
 394                ceph_get_snap_realm(mdsc, next);
 395                ceph_put_snap_realm(mdsc, realm);
 396                realm = next;
 397        }
 398        if (realm)
 399                ceph_put_snap_realm(mdsc, realm);
 400        up_read(&mdsc->snap_rwsem);
 401
 402        return exceeded;
 403}
 404
 405/*
 406 * ceph_quota_is_max_files_exceeded - check if we can create a new file
 407 * @inode:      directory where a new file is being created
 408 *
 409 * This functions returns true is max_files quota allows a new file to be
 410 * created.  It is necessary to walk through the snaprealm hierarchy (until the
 411 * FS root) to check all realms with quotas set.
 412 */
 413bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 414{
 415        if (!ceph_has_realms_with_quotas(inode))
 416                return false;
 417
 418        WARN_ON(!S_ISDIR(inode->i_mode));
 419
 420        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
 421}
 422
 423/*
 424 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 425 * @inode:      inode being written
 426 * @newsize:    new size if write succeeds
 427 *
 428 * This functions returns true is max_bytes quota allows a file size to reach
 429 * @newsize; it returns false otherwise.
 430 */
 431bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 432{
 433        loff_t size = i_size_read(inode);
 434
 435        if (!ceph_has_realms_with_quotas(inode))
 436                return false;
 437
 438        /* return immediately if we're decreasing file size */
 439        if (newsize <= size)
 440                return false;
 441
 442        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 443}
 444
 445/*
 446 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 447 * @inode:      inode being written
 448 * @newsize:    new size if write succeeds
 449 *
 450 * This function returns true if the new file size @newsize will be consuming
 451 * more than 1/16th of the available quota space; it returns false otherwise.
 452 */
 453bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 454{
 455        loff_t size = ceph_inode(inode)->i_reported_size;
 456
 457        if (!ceph_has_realms_with_quotas(inode))
 458                return false;
 459
 460        /* return immediately if we're decreasing file size */
 461        if (newsize <= size)
 462                return false;
 463
 464        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 465                                    (newsize - size));
 466}
 467
 468/*
 469 * ceph_quota_update_statfs - if root has quota update statfs with quota status
 470 * @fsc:        filesystem client instance
 471 * @buf:        statfs to update
 472 *
 473 * If the mounted filesystem root has max_bytes quota set, update the filesystem
 474 * statistics with the quota status.
 475 *
 476 * This function returns true if the stats have been updated, false otherwise.
 477 */
 478bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 479{
 480        struct ceph_mds_client *mdsc = fsc->mdsc;
 481        struct ceph_inode_info *ci;
 482        struct ceph_snap_realm *realm;
 483        struct inode *in;
 484        u64 total = 0, used, free;
 485        bool is_updated = false;
 486
 487        down_read(&mdsc->snap_rwsem);
 488        realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 489        up_read(&mdsc->snap_rwsem);
 490        if (!realm)
 491                return false;
 492
 493        spin_lock(&realm->inodes_with_caps_lock);
 494        in = realm->inode ? igrab(realm->inode) : NULL;
 495        spin_unlock(&realm->inodes_with_caps_lock);
 496        if (in) {
 497                ci = ceph_inode(in);
 498                spin_lock(&ci->i_ceph_lock);
 499                if (ci->i_max_bytes) {
 500                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 501                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 502                        /* It is possible for a quota to be exceeded.
 503                         * Report 'zero' in that case
 504                         */
 505                        free = total > used ? total - used : 0;
 506                }
 507                spin_unlock(&ci->i_ceph_lock);
 508                if (total) {
 509                        buf->f_blocks = total;
 510                        buf->f_bfree = free;
 511                        buf->f_bavail = free;
 512                        is_updated = true;
 513                }
 514                iput(in);
 515        }
 516        ceph_put_snap_realm(mdsc, realm);
 517
 518        return is_updated;
 519}
 520
 521