LXR linux/fs/xfs/xfs

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_format.h"
   9#include "xfs_log_format.h"
  10#include "xfs_shared.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_bit.h"
  13#include "xfs_mount.h"
  14#include "xfs_defer.h"
  15#include "xfs_inode.h"
  16#include "xfs_bmap.h"
  17#include "xfs_bmap_util.h"
  18#include "xfs_alloc.h"
  19#include "xfs_quota.h"
  20#include "xfs_error.h"
  21#include "xfs_trans.h"
  22#include "xfs_buf_item.h"
  23#include "xfs_trans_space.h"
  24#include "xfs_trans_priv.h"
  25#include "xfs_qm.h"
  26#include "xfs_cksum.h"
  27#include "xfs_trace.h"
  28#include "xfs_log.h"
  29#include "xfs_bmap_btree.h"
  30
  31/*
  32 * Lock order:
  33 *
  34 * ip->i_lock
  35 *   qi->qi_tree_lock
  36 *     dquot->q_qlock (xfs_dqlock() and friends)
  37 *       dquot->q_flush (xfs_dqflock() and friends)
  38 *       qi->qi_lru_lock
  39 *
  40 * If two dquots need to be locked the order is user before group/project,
  41 * otherwise by the lowest id first, see xfs_dqlock2.
  42 */
  43
  44struct kmem_zone                *xfs_qm_dqtrxzone;
  45static struct kmem_zone         *xfs_qm_dqzone;
  46
  47static struct lock_class_key xfs_dquot_group_class;
  48static struct lock_class_key xfs_dquot_project_class;
  49
  50/*
  51 * This is called to free all the memory associated with a dquot
  52 */
  53void
  54xfs_qm_dqdestroy(
  55        xfs_dquot_t     *dqp)
  56{
  57        ASSERT(list_empty(&dqp->q_lru));
  58
  59        kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
  60        mutex_destroy(&dqp->q_qlock);
  61
  62        XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
  63        kmem_zone_free(xfs_qm_dqzone, dqp);
  64}
  65
  66/*
  67 * If default limits are in force, push them into the dquot now.
  68 * We overwrite the dquot limits only if they are zero and this
  69 * is not the root dquot.
  70 */
  71void
  72xfs_qm_adjust_dqlimits(
  73        struct xfs_mount        *mp,
  74        struct xfs_dquot        *dq)
  75{
  76        struct xfs_quotainfo    *q = mp->m_quotainfo;
  77        struct xfs_disk_dquot   *d = &dq->q_core;
  78        struct xfs_def_quota    *defq;
  79        int                     prealloc = 0;
  80
  81        ASSERT(d->d_id);
  82        defq = xfs_get_defquota(dq, q);
  83
  84        if (defq->bsoftlimit && !d->d_blk_softlimit) {
  85                d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
  86                prealloc = 1;
  87        }
  88        if (defq->bhardlimit && !d->d_blk_hardlimit) {
  89                d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
  90                prealloc = 1;
  91        }
  92        if (defq->isoftlimit && !d->d_ino_softlimit)
  93                d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
  94        if (defq->ihardlimit && !d->d_ino_hardlimit)
  95                d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
  96        if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
  97                d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
  98        if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
  99                d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
 100
 101        if (prealloc)
 102                xfs_dquot_set_prealloc_limits(dq);
 103}
 104
 105/*
 106 * Check the limits and timers of a dquot and start or reset timers
 107 * if necessary.
 108 * This gets called even when quota enforcement is OFF, which makes our
 109 * life a little less complicated. (We just don't reject any quota
 110 * reservations in that case, when enforcement is off).
 111 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
 112 * enforcement's off.
 113 * In contrast, warnings are a little different in that they don't
 114 * 'automatically' get started when limits get exceeded.  They do
 115 * get reset to zero, however, when we find the count to be under
 116 * the soft limit (they are only ever set non-zero via userspace).
 117 */
 118void
 119xfs_qm_adjust_dqtimers(
 120        xfs_mount_t             *mp,
 121        xfs_disk_dquot_t        *d)
 122{
 123        ASSERT(d->d_id);
 124
 125#ifdef DEBUG
 126        if (d->d_blk_hardlimit)
 127                ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
 128                       be64_to_cpu(d->d_blk_hardlimit));
 129        if (d->d_ino_hardlimit)
 130                ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
 131                       be64_to_cpu(d->d_ino_hardlimit));
 132        if (d->d_rtb_hardlimit)
 133                ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
 134                       be64_to_cpu(d->d_rtb_hardlimit));
 135#endif
 136
 137        if (!d->d_btimer) {
 138                if ((d->d_blk_softlimit &&
 139                     (be64_to_cpu(d->d_bcount) >
 140                      be64_to_cpu(d->d_blk_softlimit))) ||
 141                    (d->d_blk_hardlimit &&
 142                     (be64_to_cpu(d->d_bcount) >
 143                      be64_to_cpu(d->d_blk_hardlimit)))) {
 144                        d->d_btimer = cpu_to_be32(get_seconds() +
 145                                        mp->m_quotainfo->qi_btimelimit);
 146                } else {
 147                        d->d_bwarns = 0;
 148                }
 149        } else {
 150                if ((!d->d_blk_softlimit ||
 151                     (be64_to_cpu(d->d_bcount) <=
 152                      be64_to_cpu(d->d_blk_softlimit))) &&
 153                    (!d->d_blk_hardlimit ||
 154                    (be64_to_cpu(d->d_bcount) <=
 155                     be64_to_cpu(d->d_blk_hardlimit)))) {
 156                        d->d_btimer = 0;
 157                }
 158        }
 159
 160        if (!d->d_itimer) {
 161                if ((d->d_ino_softlimit &&
 162                     (be64_to_cpu(d->d_icount) >
 163                      be64_to_cpu(d->d_ino_softlimit))) ||
 164                    (d->d_ino_hardlimit &&
 165                     (be64_to_cpu(d->d_icount) >
 166                      be64_to_cpu(d->d_ino_hardlimit)))) {
 167                        d->d_itimer = cpu_to_be32(get_seconds() +
 168                                        mp->m_quotainfo->qi_itimelimit);
 169                } else {
 170                        d->d_iwarns = 0;
 171                }
 172        } else {
 173                if ((!d->d_ino_softlimit ||
 174                     (be64_to_cpu(d->d_icount) <=
 175                      be64_to_cpu(d->d_ino_softlimit)))  &&
 176                    (!d->d_ino_hardlimit ||
 177                     (be64_to_cpu(d->d_icount) <=
 178                      be64_to_cpu(d->d_ino_hardlimit)))) {
 179                        d->d_itimer = 0;
 180                }
 181        }
 182
 183        if (!d->d_rtbtimer) {
 184                if ((d->d_rtb_softlimit &&
 185                     (be64_to_cpu(d->d_rtbcount) >
 186                      be64_to_cpu(d->d_rtb_softlimit))) ||
 187                    (d->d_rtb_hardlimit &&
 188                     (be64_to_cpu(d->d_rtbcount) >
 189                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 190                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
 191                                        mp->m_quotainfo->qi_rtbtimelimit);
 192                } else {
 193                        d->d_rtbwarns = 0;
 194                }
 195        } else {
 196                if ((!d->d_rtb_softlimit ||
 197                     (be64_to_cpu(d->d_rtbcount) <=
 198                      be64_to_cpu(d->d_rtb_softlimit))) &&
 199                    (!d->d_rtb_hardlimit ||
 200                     (be64_to_cpu(d->d_rtbcount) <=
 201                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 202                        d->d_rtbtimer = 0;
 203                }
 204        }
 205}
 206
 207/*
 208 * initialize a buffer full of dquots and log the whole thing
 209 */
 210STATIC void
 211xfs_qm_init_dquot_blk(
 212        xfs_trans_t     *tp,
 213        xfs_mount_t     *mp,
 214        xfs_dqid_t      id,
 215        uint            type,
 216        xfs_buf_t       *bp)
 217{
 218        struct xfs_quotainfo    *q = mp->m_quotainfo;
 219        xfs_dqblk_t     *d;
 220        xfs_dqid_t      curid;
 221        int             i;
 222
 223        ASSERT(tp);
 224        ASSERT(xfs_buf_islocked(bp));
 225
 226        d = bp->b_addr;
 227
 228        /*
 229         * ID of the first dquot in the block - id's are zero based.
 230         */
 231        curid = id - (id % q->qi_dqperchunk);
 232        memset(d, 0, BBTOB(q->qi_dqchunklen));
 233        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
 234                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
 235                d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
 236                d->dd_diskdq.d_id = cpu_to_be32(curid);
 237                d->dd_diskdq.d_flags = type;
 238                if (xfs_sb_version_hascrc(&mp->m_sb)) {
 239                        uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
 240                        xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
 241                                         XFS_DQUOT_CRC_OFF);
 242                }
 243        }
 244
 245        xfs_trans_dquot_buf(tp, bp,
 246                            (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
 247                            ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
 248                             XFS_BLF_GDQUOT_BUF)));
 249        xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
 250}
 251
 252/*
 253 * Initialize the dynamic speculative preallocation thresholds. The lo/hi
 254 * watermarks correspond to the soft and hard limits by default. If a soft limit
 255 * is not specified, we use 95% of the hard limit.
 256 */
 257void
 258xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
 259{
 260        uint64_t space;
 261
 262        dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 263        dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
 264        if (!dqp->q_prealloc_lo_wmark) {
 265                dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
 266                do_div(dqp->q_prealloc_lo_wmark, 100);
 267                dqp->q_prealloc_lo_wmark *= 95;
 268        }
 269
 270        space = dqp->q_prealloc_hi_wmark;
 271
 272        do_div(space, 100);
 273        dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
 274        dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
 275        dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
 276}
 277
 278/*
 279 * Ensure that the given in-core dquot has a buffer on disk backing it, and
 280 * return the buffer. This is called when the bmapi finds a hole.
 281 */
 282STATIC int
 283xfs_dquot_disk_alloc(
 284        struct xfs_trans        **tpp,
 285        struct xfs_dquot        *dqp,
 286        struct xfs_buf          **bpp)
 287{
 288        struct xfs_bmbt_irec    map;
 289        struct xfs_trans        *tp = *tpp;
 290        struct xfs_mount        *mp = tp->t_mountp;
 291        struct xfs_buf          *bp;
 292        struct xfs_inode        *quotip = xfs_quota_inode(mp, dqp->dq_flags);
 293        int                     nmaps = 1;
 294        int                     error;
 295
 296        trace_xfs_dqalloc(dqp);
 297
 298        xfs_ilock(quotip, XFS_ILOCK_EXCL);
 299        if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 300                /*
 301                 * Return if this type of quotas is turned off while we didn't
 302                 * have an inode lock
 303                 */
 304                xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 305                return -ESRCH;
 306        }
 307
 308        /* Create the block mapping. */
 309        xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
 310        error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
 311                        XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 312                        XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps);
 313        if (error)
 314                return error;
 315        ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
 316        ASSERT(nmaps == 1);
 317        ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 318               (map.br_startblock != HOLESTARTBLOCK));
 319
 320        /*
 321         * Keep track of the blkno to save a lookup later
 322         */
 323        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 324
 325        /* now we can just get the buffer (there's nothing to read yet) */
 326        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
 327                        mp->m_quotainfo->qi_dqchunklen, 0);
 328        if (!bp)
 329                return -ENOMEM;
 330        bp->b_ops = &xfs_dquot_buf_ops;
 331
 332        /*
 333         * Make a chunk of dquots out of this buffer and log
 334         * the entire thing.
 335         */
 336        xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
 337                              dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 338        xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 339
 340        /*
 341         * Hold the buffer and join it to the dfops so that we'll still own
 342         * the buffer when we return to the caller.  The buffer disposal on
 343         * error must be paid attention to very carefully, as it has been
 344         * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota
 345         * code when allocating a new dquot record" in 2005, and the later
 346         * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep
 347         * the buffer locked across the _defer_finish call.  We can now do
 348         * this correctly with xfs_defer_bjoin.
 349         *
 350         * Above, we allocated a disk block for the dquot information and used
 351         * get_buf to initialize the dquot. If the _defer_finish fails, the old
 352         * transaction is gone but the new buffer is not joined or held to any
 353         * transaction, so we must _buf_relse it.
 354         *
 355         * If everything succeeds, the caller of this function is returned a
 356         * buffer that is locked and held to the transaction.  The caller
 357         * is responsible for unlocking any buffer passed back, either
 358         * manually or by committing the transaction.
 359         */
 360        xfs_trans_bhold(tp, bp);
 361        error = xfs_defer_finish(tpp);
 362        tp = *tpp;
 363        if (error) {
 364                xfs_buf_relse(bp);
 365                return error;
 366        }
 367        *bpp = bp;
 368        return 0;
 369}
 370
 371/*
 372 * Read in the in-core dquot's on-disk metadata and return the buffer.
 373 * Returns ENOENT to signal a hole.
 374 */
 375STATIC int
 376xfs_dquot_disk_read(
 377        struct xfs_mount        *mp,
 378        struct xfs_dquot        *dqp,
 379        struct xfs_buf          **bpp)
 380{
 381        struct xfs_bmbt_irec    map;
 382        struct xfs_buf          *bp;
 383        struct xfs_inode        *quotip = xfs_quota_inode(mp, dqp->dq_flags);
 384        uint                    lock_mode;
 385        int                     nmaps = 1;
 386        int                     error;
 387
 388        lock_mode = xfs_ilock_data_map_shared(quotip);
 389        if (!xfs_this_quota_on(mp, dqp->dq_flags)) {
 390                /*
 391                 * Return if this type of quotas is turned off while we
 392                 * didn't have the quota inode lock.
 393                 */
 394                xfs_iunlock(quotip, lock_mode);
 395                return -ESRCH;
 396        }
 397
 398        /*
 399         * Find the block map; no allocations yet
 400         */
 401        error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 402                        XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 403        xfs_iunlock(quotip, lock_mode);
 404        if (error)
 405                return error;
 406
 407        ASSERT(nmaps == 1);
 408        ASSERT(map.br_blockcount >= 1);
 409        ASSERT(map.br_startblock != DELAYSTARTBLOCK);
 410        if (map.br_startblock == HOLESTARTBLOCK)
 411                return -ENOENT;
 412
 413        trace_xfs_dqtobp_read(dqp);
 414
 415        /*
 416         * store the blkno etc so that we don't have to do the
 417         * mapping all the time
 418         */
 419        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 420
 421        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
 422                        mp->m_quotainfo->qi_dqchunklen, 0, &bp,
 423                        &xfs_dquot_buf_ops);
 424        if (error) {
 425                ASSERT(bp == NULL);
 426                return error;
 427        }
 428
 429        ASSERT(xfs_buf_islocked(bp));
 430        xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 431        *bpp = bp;
 432
 433        return 0;
 434}
 435
 436/* Allocate and initialize everything we need for an incore dquot. */
 437STATIC struct xfs_dquot *
 438xfs_dquot_alloc(
 439        struct xfs_mount        *mp,
 440        xfs_dqid_t              id,
 441        uint                    type)
 442{
 443        struct xfs_dquot        *dqp;
 444
 445        dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 446
 447        dqp->dq_flags = type;
 448        dqp->q_core.d_id = cpu_to_be32(id);
 449        dqp->q_mount = mp;
 450        INIT_LIST_HEAD(&dqp->q_lru);
 451        mutex_init(&dqp->q_qlock);
 452        init_waitqueue_head(&dqp->q_pinwait);
 453        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 454        /*
 455         * Offset of dquot in the (fixed sized) dquot chunk.
 456         */
 457        dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
 458                        sizeof(xfs_dqblk_t);
 459
 460        /*
 461         * Because we want to use a counting completion, complete
 462         * the flush completion once to allow a single access to
 463         * the flush completion without blocking.
 464         */
 465        init_completion(&dqp->q_flush);
 466        complete(&dqp->q_flush);
 467
 468        /*
 469         * Make sure group quotas have a different lock class than user
 470         * quotas.
 471         */
 472        switch (type) {
 473        case XFS_DQ_USER:
 474                /* uses the default lock class */
 475                break;
 476        case XFS_DQ_GROUP:
 477                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
 478                break;
 479        case XFS_DQ_PROJ:
 480                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
 481                break;
 482        default:
 483                ASSERT(0);
 484                break;
 485        }
 486
 487        xfs_qm_dquot_logitem_init(dqp);
 488
 489        XFS_STATS_INC(mp, xs_qm_dquot);
 490        return dqp;
 491}
 492
 493/* Copy the in-core quota fields in from the on-disk buffer. */
 494STATIC void
 495xfs_dquot_from_disk(
 496        struct xfs_dquot        *dqp,
 497        struct xfs_buf          *bp)
 498{
 499        struct xfs_disk_dquot   *ddqp = bp->b_addr + dqp->q_bufoffset;
 500
 501        /* copy everything from disk dquot to the incore dquot */
 502        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
 503
 504        /*
 505         * Reservation counters are defined as reservation plus current usage
 506         * to avoid having to add every time.
 507         */
 508        dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
 509        dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
 510        dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
 511
 512        /* initialize the dquot speculative prealloc thresholds */
 513        xfs_dquot_set_prealloc_limits(dqp);
 514}
 515
 516/* Allocate and initialize the dquot buffer for this in-core dquot. */
 517static int
 518xfs_qm_dqread_alloc(
 519        struct xfs_mount        *mp,
 520        struct xfs_dquot        *dqp,
 521        struct xfs_buf          **bpp)
 522{
 523        struct xfs_trans        *tp;
 524        struct xfs_buf          *bp;
 525        int                     error;
 526
 527        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
 528                        XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 529        if (error)
 530                goto err;
 531
 532        error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 533        if (error)
 534                goto err_cancel;
 535
 536        error = xfs_trans_commit(tp);
 537        if (error) {
 538                /*
 539                 * Buffer was held to the transaction, so we have to unlock it
 540                 * manually here because we're not passing it back.
 541                 */
 542                xfs_buf_relse(bp);
 543                goto err;
 544        }
 545        *bpp = bp;
 546        return 0;
 547
 548err_cancel:
 549        xfs_trans_cancel(tp);
 550err:
 551        return error;
 552}
 553
 554/*
 555 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
 556 * and release the buffer immediately.  If @can_alloc is true, fill any
 557 * holes in the on-disk metadata.
 558 */
 559static int
 560xfs_qm_dqread(
 561        struct xfs_mount        *mp,
 562        xfs_dqid_t              id,
 563        uint                    type,
 564        bool                    can_alloc,
 565        struct xfs_dquot        **dqpp)
 566{
 567        struct xfs_dquot        *dqp;
 568        struct xfs_buf          *bp;
 569        int                     error;
 570
 571        dqp = xfs_dquot_alloc(mp, id, type);
 572        trace_xfs_dqread(dqp);
 573
 574        /* Try to read the buffer, allocating if necessary. */
 575        error = xfs_dquot_disk_read(mp, dqp, &bp);
 576        if (error == -ENOENT && can_alloc)
 577                error = xfs_qm_dqread_alloc(mp, dqp, &bp);
 578        if (error)
 579                goto err;
 580
 581        /*
 582         * At this point we should have a clean locked buffer.  Copy the data
 583         * to the incore dquot and release the buffer since the incore dquot
 584         * has its own locking protocol so we needn't tie up the buffer any
 585         * further.
 586         */
 587        ASSERT(xfs_buf_islocked(bp));
 588        xfs_dquot_from_disk(dqp, bp);
 589
 590        xfs_buf_relse(bp);
 591        *dqpp = dqp;
 592        return error;
 593
 594err:
 595        trace_xfs_dqread_fail(dqp);
 596        xfs_qm_dqdestroy(dqp);
 597        *dqpp = NULL;
 598        return error;
 599}
 600
 601/*
 602 * Advance to the next id in the current chunk, or if at the
 603 * end of the chunk, skip ahead to first id in next allocated chunk
 604 * using the SEEK_DATA interface.
 605 */
 606static int
 607xfs_dq_get_next_id(
 608        struct xfs_mount        *mp,
 609        uint                    type,
 610        xfs_dqid_t              *id)
 611{
 612        struct xfs_inode        *quotip = xfs_quota_inode(mp, type);
 613        xfs_dqid_t              next_id = *id + 1; /* simple advance */
 614        uint                    lock_flags;
 615        struct xfs_bmbt_irec    got;
 616        struct xfs_iext_cursor  cur;
 617        xfs_fsblock_t           start;
 618        int                     error = 0;
 619
 620        /* If we'd wrap past the max ID, stop */
 621        if (next_id < *id)
 622                return -ENOENT;
 623
 624        /* If new ID is within the current chunk, advancing it sufficed */
 625        if (next_id % mp->m_quotainfo->qi_dqperchunk) {
 626                *id = next_id;
 627                return 0;
 628        }
 629
 630        /* Nope, next_id is now past the current chunk, so find the next one */
 631        start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
 632
 633        lock_flags = xfs_ilock_data_map_shared(quotip);
 634        if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
 635                error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
 636                if (error)
 637                        return error;
 638        }
 639
 640        if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
 641                /* contiguous chunk, bump startoff for the id calculation */
 642                if (got.br_startoff < start)
 643                        got.br_startoff = start;
 644                *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
 645        } else {
 646                error = -ENOENT;
 647        }
 648
 649        xfs_iunlock(quotip, lock_flags);
 650
 651        return error;
 652}
 653
 654/*
 655 * Look up the dquot in the in-core cache.  If found, the dquot is returned
 656 * locked and ready to go.
 657 */
 658static struct xfs_dquot *
 659xfs_qm_dqget_cache_lookup(
 660        struct xfs_mount        *mp,
 661        struct xfs_quotainfo    *qi,
 662        struct radix_tree_root  *tree,
 663        xfs_dqid_t              id)
 664{
 665        struct xfs_dquot        *dqp;
 666
 667restart:
 668        mutex_lock(&qi->qi_tree_lock);
 669        dqp = radix_tree_lookup(tree, id);
 670        if (!dqp) {
 671                mutex_unlock(&qi->qi_tree_lock);
 672                XFS_STATS_INC(mp, xs_qm_dqcachemisses);
 673                return NULL;
 674        }
 675
 676        xfs_dqlock(dqp);
 677        if (dqp->dq_flags & XFS_DQ_FREEING) {
 678                xfs_dqunlock(dqp);
 679                mutex_unlock(&qi->qi_tree_lock);
 680                trace_xfs_dqget_freeing(dqp);
 681                delay(1);
 682                goto restart;
 683        }
 684
 685        dqp->q_nrefs++;
 686        mutex_unlock(&qi->qi_tree_lock);
 687
 688        trace_xfs_dqget_hit(dqp);
 689        XFS_STATS_INC(mp, xs_qm_dqcachehits);
 690        return dqp;
 691}
 692
 693/*
 694 * Try to insert a new dquot into the in-core cache.  If an error occurs the
 695 * caller should throw away the dquot and start over.  Otherwise, the dquot
 696 * is returned locked (and held by the cache) as if there had been a cache
 697 * hit.
 698 */
 699static int
 700xfs_qm_dqget_cache_insert(
 701        struct xfs_mount        *mp,
 702        struct xfs_quotainfo    *qi,
 703        struct radix_tree_root  *tree,
 704        xfs_dqid_t              id,
 705        struct xfs_dquot        *dqp)
 706{
 707        int                     error;
 708
 709        mutex_lock(&qi->qi_tree_lock);
 710        error = radix_tree_insert(tree, id, dqp);
 711        if (unlikely(error)) {
 712                /* Duplicate found!  Caller must try again. */
 713                WARN_ON(error != -EEXIST);
 714                mutex_unlock(&qi->qi_tree_lock);
 715                trace_xfs_dqget_dup(dqp);
 716                return error;
 717        }
 718
 719        /* Return a locked dquot to the caller, with a reference taken. */
 720        xfs_dqlock(dqp);
 721        dqp->q_nrefs = 1;
 722
 723        qi->qi_dquots++;
 724        mutex_unlock(&qi->qi_tree_lock);
 725
 726        return 0;
 727}
 728
 729/* Check our input parameters. */
 730static int
 731xfs_qm_dqget_checks(
 732        struct xfs_mount        *mp,
 733        uint                    type)
 734{
 735        if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp)))
 736                return -ESRCH;
 737
 738        switch (type) {
 739        case XFS_DQ_USER:
 740                if (!XFS_IS_UQUOTA_ON(mp))
 741                        return -ESRCH;
 742                return 0;
 743        case XFS_DQ_GROUP:
 744                if (!XFS_IS_GQUOTA_ON(mp))
 745                        return -ESRCH;
 746                return 0;
 747        case XFS_DQ_PROJ:
 748                if (!XFS_IS_PQUOTA_ON(mp))
 749                        return -ESRCH;
 750                return 0;
 751        default:
 752                WARN_ON_ONCE(0);
 753                return -EINVAL;
 754        }
 755}
 756
 757/*
 758 * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked
 759 * dquot, doing an allocation (if requested) as needed.
 760 */
 761int
 762xfs_qm_dqget(
 763        struct xfs_mount        *mp,
 764        xfs_dqid_t              id,
 765        uint                    type,
 766        bool                    can_alloc,
 767        struct xfs_dquot        **O_dqpp)
 768{
 769        struct xfs_quotainfo    *qi = mp->m_quotainfo;
 770        struct radix_tree_root  *tree = xfs_dquot_tree(qi, type);
 771        struct xfs_dquot        *dqp;
 772        int                     error;
 773
 774        error = xfs_qm_dqget_checks(mp, type);
 775        if (error)
 776                return error;
 777
 778restart:
 779        dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
 780        if (dqp) {
 781                *O_dqpp = dqp;
 782                return 0;
 783        }
 784
 785        error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
 786        if (error)
 787                return error;
 788
 789        error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
 790        if (error) {
 791                /*
 792                 * Duplicate found. Just throw away the new dquot and start
 793                 * over.
 794                 */
 795                xfs_qm_dqdestroy(dqp);
 796                XFS_STATS_INC(mp, xs_qm_dquot_dups);
 797                goto restart;
 798        }
 799
 800        trace_xfs_dqget_miss(dqp);
 801        *O_dqpp = dqp;
 802        return 0;
 803}
 804
 805/*
 806 * Given a dquot id and type, read and initialize a dquot from the on-disk
 807 * metadata.  This function is only for use during quota initialization so
 808 * it ignores the dquot cache assuming that the dquot shrinker isn't set up.
 809 * The caller is responsible for _qm_dqdestroy'ing the returned dquot.
 810 */
 811int
 812xfs_qm_dqget_uncached(
 813        struct xfs_mount        *mp,
 814        xfs_dqid_t              id,
 815        uint                    type,
 816        struct xfs_dquot        **dqpp)
 817{
 818        int                     error;
 819
 820        error = xfs_qm_dqget_checks(mp, type);
 821        if (error)
 822                return error;
 823
 824        return xfs_qm_dqread(mp, id, type, 0, dqpp);
 825}
 826
 827/* Return the quota id for a given inode and type. */
 828xfs_dqid_t
 829xfs_qm_id_for_quotatype(
 830        struct xfs_inode        *ip,
 831        uint                    type)
 832{
 833        switch (type) {
 834        case XFS_DQ_USER:
 835                return ip->i_d.di_uid;
 836        case XFS_DQ_GROUP:
 837                return ip->i_d.di_gid;
 838        case XFS_DQ_PROJ:
 839                return xfs_get_projid(ip);
 840        }
 841        ASSERT(0);
 842        return 0;
 843}
 844
 845/*
 846 * Return the dquot for a given inode and type.  If @can_alloc is true, then
 847 * allocate blocks if needed.  The inode's ILOCK must be held and it must not
 848 * have already had an inode attached.
 849 */
 850int
 851xfs_qm_dqget_inode(
 852        struct xfs_inode        *ip,
 853        uint                    type,
 854        bool                    can_alloc,
 855        struct xfs_dquot        **O_dqpp)
 856{
 857        struct xfs_mount        *mp = ip->i_mount;
 858        struct xfs_quotainfo    *qi = mp->m_quotainfo;
 859        struct radix_tree_root  *tree = xfs_dquot_tree(qi, type);
 860        struct xfs_dquot        *dqp;
 861        xfs_dqid_t              id;
 862        int                     error;
 863
 864        error = xfs_qm_dqget_checks(mp, type);
 865        if (error)
 866                return error;
 867
 868        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 869        ASSERT(xfs_inode_dquot(ip, type) == NULL);
 870
 871        id = xfs_qm_id_for_quotatype(ip, type);
 872
 873restart:
 874        dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
 875        if (dqp) {
 876                *O_dqpp = dqp;
 877                return 0;
 878        }
 879
 880        /*
 881         * Dquot cache miss. We don't want to keep the inode lock across
 882         * a (potential) disk read. Also we don't want to deal with the lock
 883         * ordering between quotainode and this inode. OTOH, dropping the inode
 884         * lock here means dealing with a chown that can happen before
 885         * we re-acquire the lock.
 886         */
 887        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 888        error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
 889        xfs_ilock(ip, XFS_ILOCK_EXCL);
 890        if (error)
 891                return error;
 892
 893        /*
 894         * A dquot could be attached to this inode by now, since we had
 895         * dropped the ilock.
 896         */
 897        if (xfs_this_quota_on(mp, type)) {
 898                struct xfs_dquot        *dqp1;
 899
 900                dqp1 = xfs_inode_dquot(ip, type);
 901                if (dqp1) {
 902                        xfs_qm_dqdestroy(dqp);
 903                        dqp = dqp1;
 904                        xfs_dqlock(dqp);
 905                        goto dqret;
 906                }
 907        } else {
 908                /* inode stays locked on return */
 909                xfs_qm_dqdestroy(dqp);
 910                return -ESRCH;
 911        }
 912
 913        error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
 914        if (error) {
 915                /*
 916                 * Duplicate found. Just throw away the new dquot and start
 917                 * over.
 918                 */
 919                xfs_qm_dqdestroy(dqp);
 920                XFS_STATS_INC(mp, xs_qm_dquot_dups);
 921                goto restart;
 922        }
 923
 924dqret:
 925        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 926        trace_xfs_dqget_miss(dqp);
 927        *O_dqpp = dqp;
 928        return 0;
 929}
 930
 931/*
 932 * Starting at @id and progressing upwards, look for an initialized incore
 933 * dquot, lock it, and return it.
 934 */
 935int
 936xfs_qm_dqget_next(
 937        struct xfs_mount        *mp,
 938        xfs_dqid_t              id,
 939        uint                    type,
 940        struct xfs_dquot        **dqpp)
 941{
 942        struct xfs_dquot        *dqp;
 943        int                     error = 0;
 944
 945        *dqpp = NULL;
 946        for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) {
 947                error = xfs_qm_dqget(mp, id, type, false, &dqp);
 948                if (error == -ENOENT)
 949                        continue;
 950                else if (error != 0)
 951                        break;
 952
 953                if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 954                        *dqpp = dqp;
 955                        return 0;
 956                }
 957
 958                xfs_qm_dqput(dqp);
 959        }
 960
 961        return error;
 962}
 963
 964/*
 965 * Release a reference to the dquot (decrement ref-count) and unlock it.
 966 *
 967 * If there is a group quota attached to this dquot, carefully release that
 968 * too without tripping over deadlocks'n'stuff.
 969 */
 970void
 971xfs_qm_dqput(
 972        struct xfs_dquot        *dqp)
 973{
 974        ASSERT(dqp->q_nrefs > 0);
 975        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 976
 977        trace_xfs_dqput(dqp);
 978
 979        if (--dqp->q_nrefs == 0) {
 980                struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
 981                trace_xfs_dqput_free(dqp);
 982
 983                if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
 984                        XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
 985        }
 986        xfs_dqunlock(dqp);
 987}
 988
 989/*
 990 * Release a dquot. Flush it if dirty, then dqput() it.
 991 * dquot must not be locked.
 992 */
 993void
 994xfs_qm_dqrele(
 995        xfs_dquot_t     *dqp)
 996{
 997        if (!dqp)
 998                return;
 999
1000        trace_xfs_dqrele(dqp);

1001
1002        xfs_dqlock(dqp);
1003        /*
1004         * We don't care to flush it if the dquot is dirty here.
1005         * That will create stutters that we want to avoid.
1006         * Instead we do a delayed write when we try to reclaim
1007         * a dirty dquot. Also xfs_sync will take part of the burden...
1008         */
1009        xfs_qm_dqput(dqp);
1010}
1011
1012/*
1013 * This is the dquot flushing I/O completion routine.  It is called
1014 * from interrupt level when the buffer containing the dquot is
1015 * flushed to disk.  It is responsible for removing the dquot logitem
1016 * from the AIL if it has not been re-logged, and unlocking the dquot's
1017 * flush lock. This behavior is very similar to that of inodes..
1018 */
1019STATIC void
1020xfs_qm_dqflush_done(
1021        struct xfs_buf          *bp,
1022        struct xfs_log_item     *lip)
1023{
1024        xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
1025        xfs_dquot_t             *dqp = qip->qli_dquot;
1026        struct xfs_ail          *ailp = lip->li_ailp;
1027
1028        /*
1029         * We only want to pull the item from the AIL if its
1030         * location in the log has not changed since we started the flush.
1031         * Thus, we only bother if the dquot's lsn has
1032         * not changed. First we check the lsn outside the lock
1033         * since it's cheaper, and then we recheck while
1034         * holding the lock before removing the dquot from the AIL.
1035         */
1036        if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
1037            ((lip->li_lsn == qip->qli_flush_lsn) ||
1038             test_bit(XFS_LI_FAILED, &lip->li_flags))) {
1039
1040                /* xfs_trans_ail_delete() drops the AIL lock. */
1041                spin_lock(&ailp->ail_lock);
1042                if (lip->li_lsn == qip->qli_flush_lsn) {
1043                        xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
1044                } else {
1045                        /*
1046                         * Clear the failed state since we are about to drop the
1047                         * flush lock
1048                         */
1049                        xfs_clear_li_failed(lip);
1050                        spin_unlock(&ailp->ail_lock);
1051                }
1052        }
1053
1054        /*
1055         * Release the dq's flush lock since we're done with it.
1056         */
1057        xfs_dqfunlock(dqp);
1058}
1059
1060/*
1061 * Write a modified dquot to disk.
1062 * The dquot must be locked and the flush lock too taken by caller.
1063 * The flush lock will not be unlocked until the dquot reaches the disk,
1064 * but the dquot is free to be unlocked and modified by the caller
1065 * in the interim. Dquot is still locked on return. This behavior is
1066 * identical to that of inodes.
1067 */
1068int
1069xfs_qm_dqflush(
1070        struct xfs_dquot        *dqp,
1071        struct xfs_buf          **bpp)
1072{
1073        struct xfs_mount        *mp = dqp->q_mount;
1074        struct xfs_buf          *bp;
1075        struct xfs_dqblk        *dqb;
1076        struct xfs_disk_dquot   *ddqp;
1077        xfs_failaddr_t          fa;
1078        int                     error;
1079
1080        ASSERT(XFS_DQ_IS_LOCKED(dqp));
1081        ASSERT(!completion_done(&dqp->q_flush));
1082
1083        trace_xfs_dqflush(dqp);
1084
1085        *bpp = NULL;
1086
1087        xfs_qm_dqunpin_wait(dqp);
1088
1089        /*
1090         * This may have been unpinned because the filesystem is shutting
1091         * down forcibly. If that's the case we must not write this dquot
1092         * to disk, because the log record didn't make it to disk.
1093         *
1094         * We also have to remove the log item from the AIL in this case,
1095         * as we wait for an emptry AIL as part of the unmount process.
1096         */
1097        if (XFS_FORCED_SHUTDOWN(mp)) {
1098                struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
1099                dqp->dq_flags &= ~XFS_DQ_DIRTY;
1100
1101                xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
1102
1103                error = -EIO;
1104                goto out_unlock;
1105        }
1106
1107        /*
1108         * Get the buffer containing the on-disk dquot
1109         */
1110        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1111                                   mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1112                                   &xfs_dquot_buf_ops);
1113        if (error)
1114                goto out_unlock;
1115
1116        /*
1117         * Calculate the location of the dquot inside the buffer.
1118         */
1119        dqb = bp->b_addr + dqp->q_bufoffset;
1120        ddqp = &dqb->dd_diskdq;
1121
1122        /*
1123         * A simple sanity check in case we got a corrupted dquot.
1124         */
1125        fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0);
1126        if (fa) {
1127                xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
1128                                be32_to_cpu(ddqp->d_id), fa);
1129                xfs_buf_relse(bp);
1130                xfs_dqfunlock(dqp);
1131                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1132                return -EIO;
1133        }
1134
1135        /* This is the only portion of data that needs to persist */
1136        memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1137
1138        /*
1139         * Clear the dirty field and remember the flush lsn for later use.
1140         */
1141        dqp->dq_flags &= ~XFS_DQ_DIRTY;
1142
1143        xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1144                                        &dqp->q_logitem.qli_item.li_lsn);
1145
1146        /*
1147         * copy the lsn into the on-disk dquot now while we have the in memory
1148         * dquot here. This can't be done later in the write verifier as we
1149         * can't get access to the log item at that point in time.
1150         *
1151         * We also calculate the CRC here so that the on-disk dquot in the
1152         * buffer always has a valid CRC. This ensures there is no possibility
1153         * of a dquot without an up-to-date CRC getting to disk.
1154         */
1155        if (xfs_sb_version_hascrc(&mp->m_sb)) {
1156                dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1157                xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
1158                                 XFS_DQUOT_CRC_OFF);
1159        }
1160
1161        /*
1162         * Attach an iodone routine so that we can remove this dquot from the
1163         * AIL and release the flush lock once the dquot is synced to disk.
1164         */
1165        xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1166                                  &dqp->q_logitem.qli_item);
1167
1168        /*
1169         * If the buffer is pinned then push on the log so we won't
1170         * get stuck waiting in the write for too long.
1171         */
1172        if (xfs_buf_ispinned(bp)) {
1173                trace_xfs_dqflush_force(dqp);
1174                xfs_log_force(mp, 0);
1175        }
1176
1177        trace_xfs_dqflush_done(dqp);
1178        *bpp = bp;
1179        return 0;
1180
1181out_unlock:
1182        xfs_dqfunlock(dqp);
1183        return -EIO;
1184}
1185
1186/*
1187 * Lock two xfs_dquot structures.
1188 *
1189 * To avoid deadlocks we always lock the quota structure with
1190 * the lowerd id first.
1191 */
1192void
1193xfs_dqlock2(
1194        xfs_dquot_t     *d1,
1195        xfs_dquot_t     *d2)
1196{
1197        if (d1 && d2) {
1198                ASSERT(d1 != d2);
1199                if (be32_to_cpu(d1->q_core.d_id) >
1200                    be32_to_cpu(d2->q_core.d_id)) {
1201                        mutex_lock(&d2->q_qlock);
1202                        mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
1203                } else {
1204                        mutex_lock(&d1->q_qlock);
1205                        mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1206                }
1207        } else if (d1) {
1208                mutex_lock(&d1->q_qlock);
1209        } else if (d2) {
1210                mutex_lock(&d2->q_qlock);
1211        }
1212}
1213
1214int __init
1215xfs_qm_init(void)
1216{
1217        xfs_qm_dqzone =
1218                kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1219        if (!xfs_qm_dqzone)
1220                goto out;
1221
1222        xfs_qm_dqtrxzone =
1223                kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1224        if (!xfs_qm_dqtrxzone)
1225                goto out_free_dqzone;
1226
1227        return 0;
1228
1229out_free_dqzone:
1230        kmem_zone_destroy(xfs_qm_dqzone);
1231out:
1232        return -ENOMEM;
1233}
1234
1235void
1236xfs_qm_exit(void)
1237{
1238        kmem_zone_destroy(xfs_qm_dqtrxzone);
1239        kmem_zone_destroy(xfs_qm_dqzone);
1240}
1241
1242/*
1243 * Iterate every dquot of a particular type.  The caller must ensure that the
1244 * particular quota type is active.  iter_fn can return negative error codes,
1245 * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
1246 */
1247int
1248xfs_qm_dqiterate(
1249        struct xfs_mount        *mp,
1250        uint                    dqtype,
1251        xfs_qm_dqiterate_fn     iter_fn,
1252        void                    *priv)
1253{
1254        struct xfs_dquot        *dq;
1255        xfs_dqid_t              id = 0;
1256        int                     error;
1257
1258        do {
1259                error = xfs_qm_dqget_next(mp, id, dqtype, &dq);
1260                if (error == -ENOENT)
1261                        return 0;
1262                if (error)
1263                        return error;
1264
1265                error = iter_fn(dq, dqtype, priv);
1266                id = be32_to_cpu(dq->q_core.d_id);
1267                xfs_qm_dqput(dq);
1268                id++;
1269        } while (error == 0 && id != 0);
1270
1271        return error;
1272}
1273