linux/fs/ceph/quota.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * quota.c - CephFS quota
   4 *
   5 * Copyright (C) 2017-2018 SUSE
   6 */
   7
   8#include <linux/statfs.h>
   9
  10#include "super.h"
  11#include "mds_client.h"
  12
  13void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
  14{
  15        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
  16        if (inc)
  17                atomic64_inc(&mdsc->quotarealms_count);
  18        else
  19                atomic64_dec(&mdsc->quotarealms_count);
  20}
  21
  22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
  23{
  24        struct super_block *sb = inode->i_sb;
  25        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
  26        struct inode *root = d_inode(sb->s_root);
  27
  28        if (atomic64_read(&mdsc->quotarealms_count) > 0)
  29                return true;
  30        /* if root is the real CephFS root, we don't have quota realms */
  31        if (root && ceph_ino(root) == CEPH_INO_ROOT)
  32                return false;
  33        /* otherwise, we can't know for sure */
  34        return true;
  35}
  36
  37void ceph_handle_quota(struct ceph_mds_client *mdsc,
  38                       struct ceph_mds_session *session,
  39                       struct ceph_msg *msg)
  40{
  41        struct super_block *sb = mdsc->fsc->sb;
  42        struct ceph_mds_quota *h = msg->front.iov_base;
  43        struct ceph_vino vino;
  44        struct inode *inode;
  45        struct ceph_inode_info *ci;
  46
  47        if (msg->front.iov_len < sizeof(*h)) {
  48                pr_err("%s corrupt message mds%d len %d\n", __func__,
  49                       session->s_mds, (int)msg->front.iov_len);
  50                ceph_msg_dump(msg);
  51                return;
  52        }
  53
  54        /* increment msg sequence number */
  55        mutex_lock(&session->s_mutex);
  56        inc_session_sequence(session);
  57        mutex_unlock(&session->s_mutex);
  58
  59        /* lookup inode */
  60        vino.ino = le64_to_cpu(h->ino);
  61        vino.snap = CEPH_NOSNAP;
  62        inode = ceph_find_inode(sb, vino);
  63        if (!inode) {
  64                pr_warn("Failed to find inode %llu\n", vino.ino);
  65                return;
  66        }
  67        ci = ceph_inode(inode);
  68
  69        spin_lock(&ci->i_ceph_lock);
  70        ci->i_rbytes = le64_to_cpu(h->rbytes);
  71        ci->i_rfiles = le64_to_cpu(h->rfiles);
  72        ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
  73        __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
  74                            le64_to_cpu(h->max_files));
  75        spin_unlock(&ci->i_ceph_lock);
  76
  77        iput(inode);
  78}
  79
  80static struct ceph_quotarealm_inode *
  81find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
  82{
  83        struct ceph_quotarealm_inode *qri = NULL;
  84        struct rb_node **node, *parent = NULL;
  85
  86        mutex_lock(&mdsc->quotarealms_inodes_mutex);
  87        node = &(mdsc->quotarealms_inodes.rb_node);
  88        while (*node) {
  89                parent = *node;
  90                qri = container_of(*node, struct ceph_quotarealm_inode, node);
  91
  92                if (ino < qri->ino)
  93                        node = &((*node)->rb_left);
  94                else if (ino > qri->ino)
  95                        node = &((*node)->rb_right);
  96                else
  97                        break;
  98        }
  99        if (!qri || (qri->ino != ino)) {
 100                /* Not found, create a new one and insert it */
 101                qri = kmalloc(sizeof(*qri), GFP_KERNEL);
 102                if (qri) {
 103                        qri->ino = ino;
 104                        qri->inode = NULL;
 105                        qri->timeout = 0;
 106                        mutex_init(&qri->mutex);
 107                        rb_link_node(&qri->node, parent, node);
 108                        rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
 109                } else
 110                        pr_warn("Failed to alloc quotarealms_inode\n");
 111        }
 112        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 113
 114        return qri;
 115}
 116
 117/*
 118 * This function will try to lookup a realm inode which isn't visible in the
 119 * filesystem mountpoint.  A list of these kind of inodes (not visible) is
 120 * maintained in the mdsc and freed only when the filesystem is umounted.
 121 *
 122 * Note that these inodes are kept in this list even if the lookup fails, which
 123 * allows to prevent useless lookup requests.
 124 */
 125static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
 126                                             struct super_block *sb,
 127                                             struct ceph_snap_realm *realm)
 128{
 129        struct ceph_quotarealm_inode *qri;
 130        struct inode *in;
 131
 132        qri = find_quotarealm_inode(mdsc, realm->ino);
 133        if (!qri)
 134                return NULL;
 135
 136        mutex_lock(&qri->mutex);
 137        if (qri->inode && ceph_is_any_caps(qri->inode)) {
 138                /* A request has already returned the inode */
 139                mutex_unlock(&qri->mutex);
 140                return qri->inode;
 141        }
 142        /* Check if this inode lookup has failed recently */
 143        if (qri->timeout &&
 144            time_before_eq(jiffies, qri->timeout)) {
 145                mutex_unlock(&qri->mutex);
 146                return NULL;
 147        }
 148        if (qri->inode) {
 149                /* get caps */
 150                int ret = __ceph_do_getattr(qri->inode, NULL,
 151                                            CEPH_STAT_CAP_INODE, true);
 152                if (ret >= 0)
 153                        in = qri->inode;
 154                else
 155                        in = ERR_PTR(ret);
 156        }  else {
 157                in = ceph_lookup_inode(sb, realm->ino);
 158        }
 159
 160        if (IS_ERR(in)) {
 161                dout("Can't lookup inode %llx (err: %ld)\n",
 162                     realm->ino, PTR_ERR(in));
 163                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
 164        } else {
 165                qri->timeout = 0;
 166                qri->inode = in;
 167        }
 168        mutex_unlock(&qri->mutex);
 169
 170        return in;
 171}
 172
 173void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 174{
 175        struct ceph_quotarealm_inode *qri;
 176        struct rb_node *node;
 177
 178        /*
 179         * It should now be safe to clean quotarealms_inode tree without holding
 180         * mdsc->quotarealms_inodes_mutex...
 181         */
 182        mutex_lock(&mdsc->quotarealms_inodes_mutex);
 183        while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
 184                node = rb_first(&mdsc->quotarealms_inodes);
 185                qri = rb_entry(node, struct ceph_quotarealm_inode, node);
 186                rb_erase(node, &mdsc->quotarealms_inodes);
 187                iput(qri->inode);
 188                kfree(qri);
 189        }
 190        mutex_unlock(&mdsc->quotarealms_inodes_mutex);
 191}
 192
 193/*
 194 * This function walks through the snaprealm for an inode and returns the
 195 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
 196 * or max_bytes).  If the root is reached, return the root ceph_snap_realm
 197 * instead.
 198 *
 199 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
 200 * returned realm.
 201 *
 202 * Callers of this function need to hold mdsc->snap_rwsem.  However, if there's
 203 * a need to do an inode lookup, this rwsem will be temporarily dropped.  Hence
 204 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
 205 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
 206 * will be restarted.
 207 */
 208static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 209                                               struct inode *inode, bool retry)
 210{
 211        struct ceph_inode_info *ci = NULL;
 212        struct ceph_snap_realm *realm, *next;
 213        struct inode *in;
 214        bool has_quota;
 215
 216        if (ceph_snap(inode) != CEPH_NOSNAP)
 217                return NULL;
 218
 219restart:
 220        realm = ceph_inode(inode)->i_snap_realm;
 221        if (realm)
 222                ceph_get_snap_realm(mdsc, realm);
 223        else
 224                pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
 225                                   "null i_snap_realm\n", ceph_vinop(inode));
 226        while (realm) {
 227                bool has_inode;
 228
 229                spin_lock(&realm->inodes_with_caps_lock);
 230                has_inode = realm->inode;
 231                in = has_inode ? igrab(realm->inode) : NULL;
 232                spin_unlock(&realm->inodes_with_caps_lock);
 233                if (has_inode && !in)
 234                        break;
 235                if (!in) {
 236                        up_read(&mdsc->snap_rwsem);
 237                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 238                        down_read(&mdsc->snap_rwsem);
 239                        if (IS_ERR_OR_NULL(in))
 240                                break;
 241                        ceph_put_snap_realm(mdsc, realm);
 242                        if (!retry)
 243                                return ERR_PTR(-EAGAIN);
 244                        goto restart;
 245                }
 246
 247                ci = ceph_inode(in);
 248                has_quota = __ceph_has_any_quota(ci);
 249                iput(in);
 250
 251                next = realm->parent;
 252                if (has_quota || !next)
 253                       return realm;
 254
 255                ceph_get_snap_realm(mdsc, next);
 256                ceph_put_snap_realm(mdsc, realm);
 257                realm = next;
 258        }
 259        if (realm)
 260                ceph_put_snap_realm(mdsc, realm);
 261
 262        return NULL;
 263}
 264
 265bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 266{
 267        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
 268        struct ceph_snap_realm *old_realm, *new_realm;
 269        bool is_same;
 270
 271restart:
 272        /*
 273         * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
 274         * However, get_quota_realm may drop it temporarily.  By setting the
 275         * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
 276         * dropped and we can then restart the whole operation.
 277         */
 278        down_read(&mdsc->snap_rwsem);
 279        old_realm = get_quota_realm(mdsc, old, true);
 280        new_realm = get_quota_realm(mdsc, new, false);
 281        if (PTR_ERR(new_realm) == -EAGAIN) {
 282                up_read(&mdsc->snap_rwsem);
 283                if (old_realm)
 284                        ceph_put_snap_realm(mdsc, old_realm);
 285                goto restart;
 286        }
 287        is_same = (old_realm == new_realm);
 288        up_read(&mdsc->snap_rwsem);
 289
 290        if (old_realm)
 291                ceph_put_snap_realm(mdsc, old_realm);
 292        if (new_realm)
 293                ceph_put_snap_realm(mdsc, new_realm);
 294
 295        return is_same;
 296}
 297
 298enum quota_check_op {
 299        QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
 300        QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
 301        QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
 302                                                   limit is approaching */
 303};
 304
 305/*
 306 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
 307 * realm, it will execute quota check operation defined by the 'op' parameter.
 308 * The snaprealm walk is interrupted if the quota check detects that the quota
 309 * is exceeded or if the root inode is reached.
 310 */
 311static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 312                                 loff_t delta)
 313{
 314        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 315        struct ceph_inode_info *ci;
 316        struct ceph_snap_realm *realm, *next;
 317        struct inode *in;
 318        u64 max, rvalue;
 319        bool exceeded = false;
 320
 321        if (ceph_snap(inode) != CEPH_NOSNAP)
 322                return false;
 323
 324        down_read(&mdsc->snap_rwsem);
 325restart:
 326        realm = ceph_inode(inode)->i_snap_realm;
 327        if (realm)
 328                ceph_get_snap_realm(mdsc, realm);
 329        else
 330                pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
 331                                   "null i_snap_realm\n", ceph_vinop(inode));
 332        while (realm) {
 333                bool has_inode;
 334
 335                spin_lock(&realm->inodes_with_caps_lock);
 336                has_inode = realm->inode;
 337                in = has_inode ? igrab(realm->inode) : NULL;
 338                spin_unlock(&realm->inodes_with_caps_lock);
 339                if (has_inode && !in)
 340                        break;
 341                if (!in) {
 342                        up_read(&mdsc->snap_rwsem);
 343                        in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
 344                        down_read(&mdsc->snap_rwsem);
 345                        if (IS_ERR_OR_NULL(in))
 346                                break;
 347                        ceph_put_snap_realm(mdsc, realm);
 348                        goto restart;
 349                }
 350                ci = ceph_inode(in);
 351                spin_lock(&ci->i_ceph_lock);
 352                if (op == QUOTA_CHECK_MAX_FILES_OP) {
 353                        max = ci->i_max_files;
 354                        rvalue = ci->i_rfiles + ci->i_rsubdirs;
 355                } else {
 356                        max = ci->i_max_bytes;
 357                        rvalue = ci->i_rbytes;
 358                }
 359                spin_unlock(&ci->i_ceph_lock);
 360                switch (op) {
 361                case QUOTA_CHECK_MAX_FILES_OP:
 362                case QUOTA_CHECK_MAX_BYTES_OP:
 363                        exceeded = (max && (rvalue + delta > max));
 364                        break;
 365                case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
 366                        if (max) {
 367                                if (rvalue >= max)
 368                                        exceeded = true;
 369                                else {
 370                                        /*
 371                                         * when we're writing more that 1/16th
 372                                         * of the available space
 373                                         */
 374                                        exceeded =
 375                                                (((max - rvalue) >> 4) < delta);
 376                                }
 377                        }
 378                        break;
 379                default:
 380                        /* Shouldn't happen */
 381                        pr_warn("Invalid quota check op (%d)\n", op);
 382                        exceeded = true; /* Just break the loop */
 383                }
 384                iput(in);
 385
 386                next = realm->parent;
 387                if (exceeded || !next)
 388                        break;
 389                ceph_get_snap_realm(mdsc, next);
 390                ceph_put_snap_realm(mdsc, realm);
 391                realm = next;
 392        }
 393        if (realm)
 394                ceph_put_snap_realm(mdsc, realm);
 395        up_read(&mdsc->snap_rwsem);
 396
 397        return exceeded;
 398}
 399
 400/*
 401 * ceph_quota_is_max_files_exceeded - check if we can create a new file
 402 * @inode:      directory where a new file is being created
 403 *
 404 * This functions returns true is max_files quota allows a new file to be
 405 * created.  It is necessary to walk through the snaprealm hierarchy (until the
 406 * FS root) to check all realms with quotas set.
 407 */
 408bool ceph_quota_is_max_files_exceeded(struct inode *inode)
 409{
 410        if (!ceph_has_realms_with_quotas(inode))
 411                return false;
 412
 413        WARN_ON(!S_ISDIR(inode->i_mode));
 414
 415        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
 416}
 417
 418/*
 419 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
 420 * @inode:      inode being written
 421 * @newsize:    new size if write succeeds
 422 *
 423 * This functions returns true is max_bytes quota allows a file size to reach
 424 * @newsize; it returns false otherwise.
 425 */
 426bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
 427{
 428        loff_t size = i_size_read(inode);
 429
 430        if (!ceph_has_realms_with_quotas(inode))
 431                return false;
 432
 433        /* return immediately if we're decreasing file size */
 434        if (newsize <= size)
 435                return false;
 436
 437        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
 438}
 439
 440/*
 441 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
 442 * @inode:      inode being written
 443 * @newsize:    new size if write succeeds
 444 *
 445 * This function returns true if the new file size @newsize will be consuming
 446 * more than 1/16th of the available quota space; it returns false otherwise.
 447 */
 448bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
 449{
 450        loff_t size = ceph_inode(inode)->i_reported_size;
 451
 452        if (!ceph_has_realms_with_quotas(inode))
 453                return false;
 454
 455        /* return immediately if we're decreasing file size */
 456        if (newsize <= size)
 457                return false;
 458
 459        return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
 460                                    (newsize - size));
 461}
 462
 463/*
 464 * ceph_quota_update_statfs - if root has quota update statfs with quota status
 465 * @fsc:        filesystem client instance
 466 * @buf:        statfs to update
 467 *
 468 * If the mounted filesystem root has max_bytes quota set, update the filesystem
 469 * statistics with the quota status.
 470 *
 471 * This function returns true if the stats have been updated, false otherwise.
 472 */
 473bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 474{
 475        struct ceph_mds_client *mdsc = fsc->mdsc;
 476        struct ceph_inode_info *ci;
 477        struct ceph_snap_realm *realm;
 478        struct inode *in;
 479        u64 total = 0, used, free;
 480        bool is_updated = false;
 481
 482        down_read(&mdsc->snap_rwsem);
 483        realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
 484        up_read(&mdsc->snap_rwsem);
 485        if (!realm)
 486                return false;
 487
 488        spin_lock(&realm->inodes_with_caps_lock);
 489        in = realm->inode ? igrab(realm->inode) : NULL;
 490        spin_unlock(&realm->inodes_with_caps_lock);
 491        if (in) {
 492                ci = ceph_inode(in);
 493                spin_lock(&ci->i_ceph_lock);
 494                if (ci->i_max_bytes) {
 495                        total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
 496                        used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
 497                        /* It is possible for a quota to be exceeded.
 498                         * Report 'zero' in that case
 499                         */
 500                        free = total > used ? total - used : 0;
 501                }
 502                spin_unlock(&ci->i_ceph_lock);
 503                if (total) {
 504                        buf->f_blocks = total;
 505                        buf->f_bfree = free;
 506                        buf->f_bavail = free;
 507                        is_updated = true;
 508                }
 509                iput(in);
 510        }
 511        ceph_put_snap_realm(mdsc, realm);
 512
 513        return is_updated;
 514}
 515
 516