LXR linux/fs/xfs/xfs

   1/*
   2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_format.h"
  21#include "xfs_log_format.h"
  22#include "xfs_shared.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_bit.h"
  25#include "xfs_mount.h"
  26#include "xfs_defer.h"
  27#include "xfs_inode.h"
  28#include "xfs_bmap.h"
  29#include "xfs_bmap_util.h"
  30#include "xfs_alloc.h"
  31#include "xfs_quota.h"
  32#include "xfs_error.h"
  33#include "xfs_trans.h"
  34#include "xfs_buf_item.h"
  35#include "xfs_trans_space.h"
  36#include "xfs_trans_priv.h"
  37#include "xfs_qm.h"
  38#include "xfs_cksum.h"
  39#include "xfs_trace.h"
  40#include "xfs_log.h"
  41#include "xfs_bmap_btree.h"
  42
  43/*
  44 * Lock order:
  45 *
  46 * ip->i_lock
  47 *   qi->qi_tree_lock
  48 *     dquot->q_qlock (xfs_dqlock() and friends)
  49 *       dquot->q_flush (xfs_dqflock() and friends)
  50 *       qi->qi_lru_lock
  51 *
  52 * If two dquots need to be locked the order is user before group/project,
  53 * otherwise by the lowest id first, see xfs_dqlock2.
  54 */
  55
  56struct kmem_zone                *xfs_qm_dqtrxzone;
  57static struct kmem_zone         *xfs_qm_dqzone;
  58
  59static struct lock_class_key xfs_dquot_group_class;
  60static struct lock_class_key xfs_dquot_project_class;
  61
  62/*
  63 * This is called to free all the memory associated with a dquot
  64 */
  65void
  66xfs_qm_dqdestroy(
  67        xfs_dquot_t     *dqp)
  68{
  69        ASSERT(list_empty(&dqp->q_lru));
  70
  71        kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
  72        mutex_destroy(&dqp->q_qlock);
  73
  74        XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
  75        kmem_zone_free(xfs_qm_dqzone, dqp);
  76}
  77
  78/*
  79 * If default limits are in force, push them into the dquot now.
  80 * We overwrite the dquot limits only if they are zero and this
  81 * is not the root dquot.
  82 */
  83void
  84xfs_qm_adjust_dqlimits(
  85        struct xfs_mount        *mp,
  86        struct xfs_dquot        *dq)
  87{
  88        struct xfs_quotainfo    *q = mp->m_quotainfo;
  89        struct xfs_disk_dquot   *d = &dq->q_core;
  90        struct xfs_def_quota    *defq;
  91        int                     prealloc = 0;
  92
  93        ASSERT(d->d_id);
  94        defq = xfs_get_defquota(dq, q);
  95
  96        if (defq->bsoftlimit && !d->d_blk_softlimit) {
  97                d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
  98                prealloc = 1;
  99        }
 100        if (defq->bhardlimit && !d->d_blk_hardlimit) {
 101                d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
 102                prealloc = 1;
 103        }
 104        if (defq->isoftlimit && !d->d_ino_softlimit)
 105                d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
 106        if (defq->ihardlimit && !d->d_ino_hardlimit)
 107                d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
 108        if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
 109                d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
 110        if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
 111                d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
 112
 113        if (prealloc)
 114                xfs_dquot_set_prealloc_limits(dq);
 115}
 116
 117/*
 118 * Check the limits and timers of a dquot and start or reset timers
 119 * if necessary.
 120 * This gets called even when quota enforcement is OFF, which makes our
 121 * life a little less complicated. (We just don't reject any quota
 122 * reservations in that case, when enforcement is off).
 123 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
 124 * enforcement's off.
 125 * In contrast, warnings are a little different in that they don't
 126 * 'automatically' get started when limits get exceeded.  They do
 127 * get reset to zero, however, when we find the count to be under
 128 * the soft limit (they are only ever set non-zero via userspace).
 129 */
 130void
 131xfs_qm_adjust_dqtimers(
 132        xfs_mount_t             *mp,
 133        xfs_disk_dquot_t        *d)
 134{
 135        ASSERT(d->d_id);
 136
 137#ifdef DEBUG
 138        if (d->d_blk_hardlimit)
 139                ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
 140                       be64_to_cpu(d->d_blk_hardlimit));
 141        if (d->d_ino_hardlimit)
 142                ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
 143                       be64_to_cpu(d->d_ino_hardlimit));
 144        if (d->d_rtb_hardlimit)
 145                ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
 146                       be64_to_cpu(d->d_rtb_hardlimit));
 147#endif
 148
 149        if (!d->d_btimer) {
 150                if ((d->d_blk_softlimit &&
 151                     (be64_to_cpu(d->d_bcount) >
 152                      be64_to_cpu(d->d_blk_softlimit))) ||
 153                    (d->d_blk_hardlimit &&
 154                     (be64_to_cpu(d->d_bcount) >
 155                      be64_to_cpu(d->d_blk_hardlimit)))) {
 156                        d->d_btimer = cpu_to_be32(get_seconds() +
 157                                        mp->m_quotainfo->qi_btimelimit);
 158                } else {
 159                        d->d_bwarns = 0;
 160                }
 161        } else {
 162                if ((!d->d_blk_softlimit ||
 163                     (be64_to_cpu(d->d_bcount) <=
 164                      be64_to_cpu(d->d_blk_softlimit))) &&
 165                    (!d->d_blk_hardlimit ||
 166                    (be64_to_cpu(d->d_bcount) <=
 167                     be64_to_cpu(d->d_blk_hardlimit)))) {
 168                        d->d_btimer = 0;
 169                }
 170        }
 171
 172        if (!d->d_itimer) {
 173                if ((d->d_ino_softlimit &&
 174                     (be64_to_cpu(d->d_icount) >
 175                      be64_to_cpu(d->d_ino_softlimit))) ||
 176                    (d->d_ino_hardlimit &&
 177                     (be64_to_cpu(d->d_icount) >
 178                      be64_to_cpu(d->d_ino_hardlimit)))) {
 179                        d->d_itimer = cpu_to_be32(get_seconds() +
 180                                        mp->m_quotainfo->qi_itimelimit);
 181                } else {
 182                        d->d_iwarns = 0;
 183                }
 184        } else {
 185                if ((!d->d_ino_softlimit ||
 186                     (be64_to_cpu(d->d_icount) <=
 187                      be64_to_cpu(d->d_ino_softlimit)))  &&
 188                    (!d->d_ino_hardlimit ||
 189                     (be64_to_cpu(d->d_icount) <=
 190                      be64_to_cpu(d->d_ino_hardlimit)))) {
 191                        d->d_itimer = 0;
 192                }
 193        }
 194
 195        if (!d->d_rtbtimer) {
 196                if ((d->d_rtb_softlimit &&
 197                     (be64_to_cpu(d->d_rtbcount) >
 198                      be64_to_cpu(d->d_rtb_softlimit))) ||
 199                    (d->d_rtb_hardlimit &&
 200                     (be64_to_cpu(d->d_rtbcount) >
 201                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 202                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
 203                                        mp->m_quotainfo->qi_rtbtimelimit);
 204                } else {
 205                        d->d_rtbwarns = 0;
 206                }
 207        } else {
 208                if ((!d->d_rtb_softlimit ||
 209                     (be64_to_cpu(d->d_rtbcount) <=
 210                      be64_to_cpu(d->d_rtb_softlimit))) &&
 211                    (!d->d_rtb_hardlimit ||
 212                     (be64_to_cpu(d->d_rtbcount) <=
 213                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 214                        d->d_rtbtimer = 0;
 215                }
 216        }
 217}
 218
 219/*
 220 * initialize a buffer full of dquots and log the whole thing
 221 */
 222STATIC void
 223xfs_qm_init_dquot_blk(
 224        xfs_trans_t     *tp,
 225        xfs_mount_t     *mp,
 226        xfs_dqid_t      id,
 227        uint            type,
 228        xfs_buf_t       *bp)
 229{
 230        struct xfs_quotainfo    *q = mp->m_quotainfo;
 231        xfs_dqblk_t     *d;
 232        xfs_dqid_t      curid;
 233        int             i;
 234
 235        ASSERT(tp);
 236        ASSERT(xfs_buf_islocked(bp));
 237
 238        d = bp->b_addr;
 239
 240        /*
 241         * ID of the first dquot in the block - id's are zero based.
 242         */
 243        curid = id - (id % q->qi_dqperchunk);
 244        memset(d, 0, BBTOB(q->qi_dqchunklen));
 245        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
 246                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
 247                d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
 248                d->dd_diskdq.d_id = cpu_to_be32(curid);
 249                d->dd_diskdq.d_flags = type;
 250                if (xfs_sb_version_hascrc(&mp->m_sb)) {
 251                        uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
 252                        xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
 253                                         XFS_DQUOT_CRC_OFF);
 254                }
 255        }
 256
 257        xfs_trans_dquot_buf(tp, bp,
 258                            (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
 259                            ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
 260                             XFS_BLF_GDQUOT_BUF)));
 261        xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
 262}
 263
 264/*
 265 * Initialize the dynamic speculative preallocation thresholds. The lo/hi
 266 * watermarks correspond to the soft and hard limits by default. If a soft limit
 267 * is not specified, we use 95% of the hard limit.
 268 */
 269void
 270xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
 271{
 272        uint64_t space;
 273
 274        dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 275        dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
 276        if (!dqp->q_prealloc_lo_wmark) {
 277                dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
 278                do_div(dqp->q_prealloc_lo_wmark, 100);
 279                dqp->q_prealloc_lo_wmark *= 95;
 280        }
 281
 282        space = dqp->q_prealloc_hi_wmark;
 283
 284        do_div(space, 100);
 285        dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
 286        dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
 287        dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
 288}
 289
 290/*
 291 * Allocate a block and fill it with dquots.
 292 * This is called when the bmapi finds a hole.
 293 */
 294STATIC int
 295xfs_qm_dqalloc(
 296        xfs_trans_t     **tpp,
 297        xfs_mount_t     *mp,
 298        xfs_dquot_t     *dqp,
 299        xfs_inode_t     *quotip,
 300        xfs_fileoff_t   offset_fsb,
 301        xfs_buf_t       **O_bpp)
 302{
 303        xfs_fsblock_t   firstblock;
 304        struct xfs_defer_ops dfops;
 305        xfs_bmbt_irec_t map;
 306        int             nmaps, error;
 307        xfs_buf_t       *bp;
 308        xfs_trans_t     *tp = *tpp;
 309
 310        ASSERT(tp != NULL);
 311
 312        trace_xfs_dqalloc(dqp);
 313
 314        /*
 315         * Initialize the bmap freelist prior to calling bmapi code.
 316         */
 317        xfs_defer_init(&dfops, &firstblock);
 318        xfs_ilock(quotip, XFS_ILOCK_EXCL);
 319        /*
 320         * Return if this type of quotas is turned off while we didn't
 321         * have an inode lock
 322         */
 323        if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 324                xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 325                return -ESRCH;
 326        }
 327
 328        xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
 329        nmaps = 1;
 330        error = xfs_bmapi_write(tp, quotip, offset_fsb,
 331                                XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 332                                &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
 333                                &map, &nmaps, &dfops);
 334        if (error)
 335                goto error0;
 336        ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
 337        ASSERT(nmaps == 1);
 338        ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 339               (map.br_startblock != HOLESTARTBLOCK));
 340
 341        /*
 342         * Keep track of the blkno to save a lookup later
 343         */
 344        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 345
 346        /* now we can just get the buffer (there's nothing to read yet) */
 347        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 348                               dqp->q_blkno,
 349                               mp->m_quotainfo->qi_dqchunklen,
 350                               0);
 351        if (!bp) {
 352                error = -ENOMEM;
 353                goto error1;
 354        }
 355        bp->b_ops = &xfs_dquot_buf_ops;
 356
 357        /*
 358         * Make a chunk of dquots out of this buffer and log
 359         * the entire thing.
 360         */
 361        xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
 362                              dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 363
 364        /*
 365         * xfs_defer_finish() may commit the current transaction and
 366         * start a second transaction if the freelist is not empty.
 367         *
 368         * Since we still want to modify this buffer, we need to
 369         * ensure that the buffer is not released on commit of
 370         * the first transaction and ensure the buffer is added to the
 371         * second transaction.
 372         *
 373         * If there is only one transaction then don't stop the buffer
 374         * from being released when it commits later on.
 375         */
 376
 377        xfs_trans_bhold(tp, bp);
 378
 379        error = xfs_defer_finish(tpp, &dfops);
 380        if (error)
 381                goto error1;
 382
 383        /* Transaction was committed? */
 384        if (*tpp != tp) {
 385                tp = *tpp;
 386                xfs_trans_bjoin(tp, bp);
 387        } else {
 388                xfs_trans_bhold_release(tp, bp);
 389        }
 390
 391        *O_bpp = bp;
 392        return 0;
 393
 394error1:
 395        xfs_defer_cancel(&dfops);
 396error0:
 397        xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 398
 399        return error;
 400}
 401
 402STATIC int
 403xfs_qm_dqrepair(
 404        struct xfs_mount        *mp,
 405        struct xfs_trans        *tp,
 406        struct xfs_dquot        *dqp,
 407        xfs_dqid_t              firstid,
 408        struct xfs_buf          **bpp)
 409{
 410        int                     error;
 411        struct xfs_disk_dquot   *ddq;
 412        struct xfs_dqblk        *d;
 413        int                     i;
 414
 415        /*
 416         * Read the buffer without verification so we get the corrupted
 417         * buffer returned to us. make sure we verify it on write, though.
 418         */
 419        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
 420                                   mp->m_quotainfo->qi_dqchunklen,
 421                                   0, bpp, NULL);
 422
 423        if (error) {
 424                ASSERT(*bpp == NULL);
 425                return error;
 426        }
 427        (*bpp)->b_ops = &xfs_dquot_buf_ops;
 428
 429        ASSERT(xfs_buf_islocked(*bpp));
 430        d = (struct xfs_dqblk *)(*bpp)->b_addr;
 431
 432        /* Do the actual repair of dquots in this buffer */
 433        for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
 434                ddq = &d[i].dd_diskdq;
 435                error = xfs_dqcheck(mp, ddq, firstid + i,
 436                                       dqp->dq_flags & XFS_DQ_ALLTYPES,
 437                                       XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
 438                if (error) {
 439                        /* repair failed, we're screwed */
 440                        xfs_trans_brelse(tp, *bpp);
 441                        return -EIO;
 442                }
 443        }
 444
 445        return 0;
 446}
 447
 448/*
 449 * Maps a dquot to the buffer containing its on-disk version.
 450 * This returns a ptr to the buffer containing the on-disk dquot
 451 * in the bpp param, and a ptr to the on-disk dquot within that buffer
 452 */
 453STATIC int
 454xfs_qm_dqtobp(
 455        xfs_trans_t             **tpp,
 456        xfs_dquot_t             *dqp,
 457        xfs_disk_dquot_t        **O_ddpp,
 458        xfs_buf_t               **O_bpp,
 459        uint                    flags)
 460{
 461        struct xfs_bmbt_irec    map;
 462        int                     nmaps = 1, error;
 463        struct xfs_buf          *bp;
 464        struct xfs_inode        *quotip;
 465        struct xfs_mount        *mp = dqp->q_mount;
 466        xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
 467        struct xfs_trans        *tp = (tpp ? *tpp : NULL);
 468        uint                    lock_mode;
 469
 470        quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
 471        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 472
 473        lock_mode = xfs_ilock_data_map_shared(quotip);
 474        if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 475                /*
 476                 * Return if this type of quotas is turned off while we
 477                 * didn't have the quota inode lock.
 478                 */
 479                xfs_iunlock(quotip, lock_mode);
 480                return -ESRCH;
 481        }
 482
 483        /*
 484         * Find the block map; no allocations yet
 485         */
 486        error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 487                               XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 488
 489        xfs_iunlock(quotip, lock_mode);
 490        if (error)
 491                return error;
 492
 493        ASSERT(nmaps == 1);
 494        ASSERT(map.br_blockcount == 1);
 495
 496        /*
 497         * Offset of dquot in the (fixed sized) dquot chunk.
 498         */
 499        dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
 500                sizeof(xfs_dqblk_t);
 501
 502        ASSERT(map.br_startblock != DELAYSTARTBLOCK);
 503        if (map.br_startblock == HOLESTARTBLOCK) {
 504                /*
 505                 * We don't allocate unless we're asked to
 506                 */
 507                if (!(flags & XFS_QMOPT_DQALLOC))
 508                        return -ENOENT;
 509
 510                ASSERT(tp);
 511                error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
 512                                        dqp->q_fileoffset, &bp);
 513                if (error)
 514                        return error;
 515                tp = *tpp;
 516        } else {
 517                trace_xfs_dqtobp_read(dqp);
 518
 519                /*
 520                 * store the blkno etc so that we don't have to do the
 521                 * mapping all the time
 522                 */
 523                dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 524
 525                error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 526                                           dqp->q_blkno,
 527                                           mp->m_quotainfo->qi_dqchunklen,
 528                                           0, &bp, &xfs_dquot_buf_ops);
 529
 530                if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
 531                        xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
 532                                                mp->m_quotainfo->qi_dqperchunk;
 533                        ASSERT(bp == NULL);
 534                        error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
 535                }
 536
 537                if (error) {
 538                        ASSERT(bp == NULL);
 539                        return error;
 540                }
 541        }
 542
 543        ASSERT(xfs_buf_islocked(bp));
 544        *O_bpp = bp;
 545        *O_ddpp = bp->b_addr + dqp->q_bufoffset;
 546
 547        return 0;
 548}
 549
 550
 551/*
 552 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
 553 * and release the buffer immediately.
 554 *
 555 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
 556 */
 557int
 558xfs_qm_dqread(
 559        struct xfs_mount        *mp,
 560        xfs_dqid_t              id,
 561        uint                    type,
 562        uint                    flags,
 563        struct xfs_dquot        **O_dqpp)
 564{
 565        struct xfs_dquot        *dqp;
 566        struct xfs_disk_dquot   *ddqp;
 567        struct xfs_buf          *bp;
 568        struct xfs_trans        *tp = NULL;
 569        int                     error;
 570
 571        dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 572
 573        dqp->dq_flags = type;
 574        dqp->q_core.d_id = cpu_to_be32(id);
 575        dqp->q_mount = mp;
 576        INIT_LIST_HEAD(&dqp->q_lru);
 577        mutex_init(&dqp->q_qlock);
 578        init_waitqueue_head(&dqp->q_pinwait);
 579
 580        /*
 581         * Because we want to use a counting completion, complete
 582         * the flush completion once to allow a single access to
 583         * the flush completion without blocking.
 584         */
 585        init_completion(&dqp->q_flush);
 586        complete(&dqp->q_flush);
 587
 588        /*
 589         * Make sure group quotas have a different lock class than user
 590         * quotas.
 591         */
 592        switch (type) {
 593        case XFS_DQ_USER:
 594                /* uses the default lock class */
 595                break;
 596        case XFS_DQ_GROUP:
 597                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
 598                break;
 599        case XFS_DQ_PROJ:
 600                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
 601                break;
 602        default:
 603                ASSERT(0);
 604                break;
 605        }
 606
 607        XFS_STATS_INC(mp, xs_qm_dquot);
 608
 609        trace_xfs_dqread(dqp);
 610
 611        if (flags & XFS_QMOPT_DQALLOC) {
 612                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
 613                                XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 614                if (error)
 615                        goto error0;
 616        }
 617
 618        /*
 619         * get a pointer to the on-disk dquot and the buffer containing it
 620         * dqp already knows its own type (GROUP/USER).
 621         */
 622        error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
 623        if (error) {
 624                /*
 625                 * This can happen if quotas got turned off (ESRCH),
 626                 * or if the dquot didn't exist on disk and we ask to
 627                 * allocate (ENOENT).
 628                 */
 629                trace_xfs_dqread_fail(dqp);
 630                goto error1;
 631        }
 632
 633        /* copy everything from disk dquot to the incore dquot */
 634        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
 635        xfs_qm_dquot_logitem_init(dqp);
 636
 637        /*
 638         * Reservation counters are defined as reservation plus current usage
 639         * to avoid having to add every time.
 640         */
 641        dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
 642        dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
 643        dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
 644
 645        /* initialize the dquot speculative prealloc thresholds */
 646        xfs_dquot_set_prealloc_limits(dqp);
 647
 648        /* Mark the buf so that this will stay incore a little longer */
 649        xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 650
 651        /*
 652         * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
 653         * So we need to release with xfs_trans_brelse().
 654         * The strategy here is identical to that of inodes; we lock
 655         * the dquot in xfs_qm_dqget() before making it accessible to
 656         * others. This is because dquots, like inodes, need a good level of
 657         * concurrency, and we don't want to take locks on the entire buffers
 658         * for dquot accesses.
 659         * Note also that the dquot buffer may even be dirty at this point, if
 660         * this particular dquot was repaired. We still aren't afraid to
 661         * brelse it because we have the changes incore.
 662         */
 663        ASSERT(xfs_buf_islocked(bp));
 664        xfs_trans_brelse(tp, bp);
 665
 666        if (tp) {
 667                error = xfs_trans_commit(tp);
 668                if (error)
 669                        goto error0;
 670        }
 671
 672        *O_dqpp = dqp;
 673        return error;
 674
 675error1:
 676        if (tp)
 677                xfs_trans_cancel(tp);
 678error0:
 679        xfs_qm_dqdestroy(dqp);
 680        *O_dqpp = NULL;
 681        return error;
 682}
 683
 684/*
 685 * Advance to the next id in the current chunk, or if at the
 686 * end of the chunk, skip ahead to first id in next allocated chunk
 687 * using the SEEK_DATA interface.
 688 */
 689static int
 690xfs_dq_get_next_id(
 691        struct xfs_mount        *mp,
 692        uint                    type,
 693        xfs_dqid_t              *id)
 694{
 695        struct xfs_inode        *quotip = xfs_quota_inode(mp, type);
 696        xfs_dqid_t              next_id = *id + 1; /* simple advance */
 697        uint                    lock_flags;
 698        struct xfs_bmbt_irec    got;
 699        struct xfs_iext_cursor  cur;
 700        xfs_fsblock_t           start;
 701        int                     error = 0;
 702
 703        /* If we'd wrap past the max ID, stop */
 704        if (next_id < *id)
 705                return -ENOENT;
 706
 707        /* If new ID is within the current chunk, advancing it sufficed */
 708        if (next_id % mp->m_quotainfo->qi_dqperchunk) {
 709                *id = next_id;
 710                return 0;
 711        }
 712
 713        /* Nope, next_id is now past the current chunk, so find the next one */
 714        start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
 715
 716        lock_flags = xfs_ilock_data_map_shared(quotip);
 717        if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
 718                error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
 719                if (error)
 720                        return error;
 721        }
 722
 723        if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
 724                /* contiguous chunk, bump startoff for the id calculation */
 725                if (got.br_startoff < start)
 726                        got.br_startoff = start;
 727                *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
 728        } else {
 729                error = -ENOENT;
 730        }
 731
 732        xfs_iunlock(quotip, lock_flags);
 733
 734        return error;
 735}
 736
 737/*
 738 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
 739 * a locked dquot, doing an allocation (if requested) as needed.
 740 * When both an inode and an id are given, the inode's id takes precedence.
 741 * That is, if the id changes while we don't hold the ilock inside this
 742 * function, the new dquot is returned, not necessarily the one requested
 743 * in the id argument.
 744 */
 745int
 746xfs_qm_dqget(
 747        xfs_mount_t     *mp,
 748        xfs_inode_t     *ip,      /* locked inode (optional) */
 749        xfs_dqid_t      id,       /* uid/projid/gid depending on type */
 750        uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
 751        uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
 752        xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
 753{
 754        struct xfs_quotainfo    *qi = mp->m_quotainfo;
 755        struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
 756        struct xfs_dquot        *dqp;
 757        int                     error;
 758
 759        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 760        if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
 761            (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
 762            (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
 763                return -ESRCH;
 764        }
 765
 766        ASSERT(type == XFS_DQ_USER ||
 767               type == XFS_DQ_PROJ ||
 768               type == XFS_DQ_GROUP);
 769        if (ip) {
 770                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 771                ASSERT(xfs_inode_dquot(ip, type) == NULL);
 772        }
 773
 774restart:
 775        mutex_lock(&qi->qi_tree_lock);
 776        dqp = radix_tree_lookup(tree, id);
 777        if (dqp) {
 778                xfs_dqlock(dqp);
 779                if (dqp->dq_flags & XFS_DQ_FREEING) {
 780                        xfs_dqunlock(dqp);
 781                        mutex_unlock(&qi->qi_tree_lock);
 782                        trace_xfs_dqget_freeing(dqp);
 783                        delay(1);
 784                        goto restart;
 785                }
 786
 787                /* uninit / unused quota found in radix tree, keep looking  */
 788                if (flags & XFS_QMOPT_DQNEXT) {
 789                        if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 790                                xfs_dqunlock(dqp);
 791                                mutex_unlock(&qi->qi_tree_lock);
 792                                error = xfs_dq_get_next_id(mp, type, &id);
 793                                if (error)
 794                                        return error;
 795                                goto restart;
 796                        }
 797                }
 798
 799                dqp->q_nrefs++;
 800                mutex_unlock(&qi->qi_tree_lock);
 801
 802                trace_xfs_dqget_hit(dqp);
 803                XFS_STATS_INC(mp, xs_qm_dqcachehits);
 804                *O_dqpp = dqp;
 805                return 0;
 806        }
 807        mutex_unlock(&qi->qi_tree_lock);
 808        XFS_STATS_INC(mp, xs_qm_dqcachemisses);
 809
 810        /*
 811         * Dquot cache miss. We don't want to keep the inode lock across
 812         * a (potential) disk read. Also we don't want to deal with the lock
 813         * ordering between quotainode and this inode. OTOH, dropping the inode
 814         * lock here means dealing with a chown that can happen before
 815         * we re-acquire the lock.
 816         */
 817        if (ip)
 818                xfs_iunlock(ip, XFS_ILOCK_EXCL);
 819
 820        error = xfs_qm_dqread(mp, id, type, flags, &dqp);
 821
 822        if (ip)
 823                xfs_ilock(ip, XFS_ILOCK_EXCL);
 824
 825        /* If we are asked to find next active id, keep looking */
 826        if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
 827                error = xfs_dq_get_next_id(mp, type, &id);
 828                if (!error)
 829                        goto restart;
 830        }
 831
 832        if (error)
 833                return error;
 834
 835        if (ip) {
 836                /*
 837                 * A dquot could be attached to this inode by now, since
 838                 * we had dropped the ilock.
 839                 */
 840                if (xfs_this_quota_on(mp, type)) {
 841                        struct xfs_dquot        *dqp1;
 842
 843                        dqp1 = xfs_inode_dquot(ip, type);
 844                        if (dqp1) {
 845                                xfs_qm_dqdestroy(dqp);
 846                                dqp = dqp1;
 847                                xfs_dqlock(dqp);
 848                                goto dqret;
 849                        }
 850                } else {
 851                        /* inode stays locked on return */
 852                        xfs_qm_dqdestroy(dqp);
 853                        return -ESRCH;
 854                }
 855        }
 856
 857        mutex_lock(&qi->qi_tree_lock);
 858        error = radix_tree_insert(tree, id, dqp);
 859        if (unlikely(error)) {
 860                WARN_ON(error != -EEXIST);
 861
 862                /*
 863                 * Duplicate found. Just throw away the new dquot and start
 864                 * over.
 865                 */
 866                mutex_unlock(&qi->qi_tree_lock);
 867                trace_xfs_dqget_dup(dqp);
 868                xfs_qm_dqdestroy(dqp);
 869                XFS_STATS_INC(mp, xs_qm_dquot_dups);
 870                goto restart;
 871        }
 872
 873        /*
 874         * We return a locked dquot to the caller, with a reference taken
 875         */
 876        xfs_dqlock(dqp);
 877        dqp->q_nrefs = 1;
 878
 879        qi->qi_dquots++;
 880        mutex_unlock(&qi->qi_tree_lock);
 881
 882        /* If we are asked to find next active id, keep looking */
 883        if (flags & XFS_QMOPT_DQNEXT) {
 884                if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 885                        xfs_qm_dqput(dqp);
 886                        error = xfs_dq_get_next_id(mp, type, &id);
 887                        if (error)
 888                                return error;
 889                        goto restart;
 890                }
 891        }
 892
 893 dqret:
 894        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
 895        trace_xfs_dqget_miss(dqp);
 896        *O_dqpp = dqp;
 897        return 0;
 898}
 899
 900/*
 901 * Release a reference to the dquot (decrement ref-count) and unlock it.
 902 *
 903 * If there is a group quota attached to this dquot, carefully release that
 904 * too without tripping over deadlocks'n'stuff.
 905 */
 906void
 907xfs_qm_dqput(
 908        struct xfs_dquot        *dqp)
 909{
 910        ASSERT(dqp->q_nrefs > 0);
 911        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 912
 913        trace_xfs_dqput(dqp);
 914
 915        if (--dqp->q_nrefs == 0) {
 916                struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
 917                trace_xfs_dqput_free(dqp);
 918
 919                if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
 920                        XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
 921        }
 922        xfs_dqunlock(dqp);
 923}
 924
 925/*
 926 * Release a dquot. Flush it if dirty, then dqput() it.
 927 * dquot must not be locked.
 928 */
 929void
 930xfs_qm_dqrele(
 931        xfs_dquot_t     *dqp)
 932{
 933        if (!dqp)
 934                return;
 935
 936        trace_xfs_dqrele(dqp);
 937
 938        xfs_dqlock(dqp);
 939        /*
 940         * We don't care to flush it if the dquot is dirty here.
 941         * That will create stutters that we want to avoid.
 942         * Instead we do a delayed write when we try to reclaim
 943         * a dirty dquot. Also xfs_sync will take part of the burden...
 944         */
 945        xfs_qm_dqput(dqp);
 946}
 947
 948/*
 949 * This is the dquot flushing I/O completion routine.  It is called
 950 * from interrupt level when the buffer containing the dquot is
 951 * flushed to disk.  It is responsible for removing the dquot logitem
 952 * from the AIL if it has not been re-logged, and unlocking the dquot's
 953 * flush lock. This behavior is very similar to that of inodes..
 954 */
 955STATIC void
 956xfs_qm_dqflush_done(
 957        struct xfs_buf          *bp,
 958        struct xfs_log_item     *lip)
 959{
 960        xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
 961        xfs_dquot_t             *dqp = qip->qli_dquot;
 962        struct xfs_ail          *ailp = lip->li_ailp;
 963
 964        /*
 965         * We only want to pull the item from the AIL if its
 966         * location in the log has not changed since we started the flush.
 967         * Thus, we only bother if the dquot's lsn has
 968         * not changed. First we check the lsn outside the lock
 969         * since it's cheaper, and then we recheck while
 970         * holding the lock before removing the dquot from the AIL.
 971         */
 972        if ((lip->li_flags & XFS_LI_IN_AIL) &&
 973            ((lip->li_lsn == qip->qli_flush_lsn) ||
 974             (lip->li_flags & XFS_LI_FAILED))) {
 975
 976                /* xfs_trans_ail_delete() drops the AIL lock. */
 977                spin_lock(&ailp->xa_lock);
 978                if (lip->li_lsn == qip->qli_flush_lsn) {
 979                        xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
 980                } else {
 981                        /*
 982                         * Clear the failed state since we are about to drop the
 983                         * flush lock
 984                         */
 985                        if (lip->li_flags & XFS_LI_FAILED)
 986                                xfs_clear_li_failed(lip);
 987                        spin_unlock(&ailp->xa_lock);
 988                }
 989        }
 990
 991        /*
 992         * Release the dq's flush lock since we're done with it.
 993         */
 994        xfs_dqfunlock(dqp);
 995}
 996
 997/*
 998 * Write a modified dquot to disk.
 999 * The dquot must be locked and the flush lock too taken by caller.
1000 * The flush lock will not be unlocked until the dquot reaches the disk,

1001 * but the dquot is free to be unlocked and modified by the caller
1002 * in the interim. Dquot is still locked on return. This behavior is
1003 * identical to that of inodes.
1004 */
1005int
1006xfs_qm_dqflush(
1007        struct xfs_dquot        *dqp,
1008        struct xfs_buf          **bpp)
1009{
1010        struct xfs_mount        *mp = dqp->q_mount;
1011        struct xfs_buf          *bp;
1012        struct xfs_disk_dquot   *ddqp;
1013        int                     error;
1014
1015        ASSERT(XFS_DQ_IS_LOCKED(dqp));
1016        ASSERT(!completion_done(&dqp->q_flush));
1017
1018        trace_xfs_dqflush(dqp);
1019
1020        *bpp = NULL;
1021
1022        xfs_qm_dqunpin_wait(dqp);
1023
1024        /*
1025         * This may have been unpinned because the filesystem is shutting
1026         * down forcibly. If that's the case we must not write this dquot
1027         * to disk, because the log record didn't make it to disk.
1028         *
1029         * We also have to remove the log item from the AIL in this case,
1030         * as we wait for an emptry AIL as part of the unmount process.
1031         */
1032        if (XFS_FORCED_SHUTDOWN(mp)) {
1033                struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
1034                dqp->dq_flags &= ~XFS_DQ_DIRTY;
1035
1036                xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
1037
1038                error = -EIO;
1039                goto out_unlock;
1040        }
1041
1042        /*
1043         * Get the buffer containing the on-disk dquot
1044         */
1045        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1046                                   mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1047                                   &xfs_dquot_buf_ops);
1048        if (error)
1049                goto out_unlock;
1050
1051        /*
1052         * Calculate the location of the dquot inside the buffer.
1053         */
1054        ddqp = bp->b_addr + dqp->q_bufoffset;
1055
1056        /*
1057         * A simple sanity check in case we got a corrupted dquot..
1058         */
1059        error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1060                           XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1061        if (error) {
1062                xfs_buf_relse(bp);
1063                xfs_dqfunlock(dqp);
1064                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1065                return -EIO;
1066        }
1067
1068        /* This is the only portion of data that needs to persist */
1069        memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1070
1071        /*
1072         * Clear the dirty field and remember the flush lsn for later use.
1073         */
1074        dqp->dq_flags &= ~XFS_DQ_DIRTY;
1075
1076        xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1077                                        &dqp->q_logitem.qli_item.li_lsn);
1078
1079        /*
1080         * copy the lsn into the on-disk dquot now while we have the in memory
1081         * dquot here. This can't be done later in the write verifier as we
1082         * can't get access to the log item at that point in time.
1083         *
1084         * We also calculate the CRC here so that the on-disk dquot in the
1085         * buffer always has a valid CRC. This ensures there is no possibility
1086         * of a dquot without an up-to-date CRC getting to disk.
1087         */
1088        if (xfs_sb_version_hascrc(&mp->m_sb)) {
1089                struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1090
1091                dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1092                xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
1093                                 XFS_DQUOT_CRC_OFF);
1094        }
1095
1096        /*
1097         * Attach an iodone routine so that we can remove this dquot from the
1098         * AIL and release the flush lock once the dquot is synced to disk.
1099         */
1100        xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1101                                  &dqp->q_logitem.qli_item);
1102
1103        /*
1104         * If the buffer is pinned then push on the log so we won't
1105         * get stuck waiting in the write for too long.
1106         */
1107        if (xfs_buf_ispinned(bp)) {
1108                trace_xfs_dqflush_force(dqp);
1109                xfs_log_force(mp, 0);
1110        }
1111
1112        trace_xfs_dqflush_done(dqp);
1113        *bpp = bp;
1114        return 0;
1115
1116out_unlock:
1117        xfs_dqfunlock(dqp);
1118        return -EIO;
1119}
1120
1121/*
1122 * Lock two xfs_dquot structures.
1123 *
1124 * To avoid deadlocks we always lock the quota structure with
1125 * the lowerd id first.
1126 */
1127void
1128xfs_dqlock2(
1129        xfs_dquot_t     *d1,
1130        xfs_dquot_t     *d2)
1131{
1132        if (d1 && d2) {
1133                ASSERT(d1 != d2);
1134                if (be32_to_cpu(d1->q_core.d_id) >
1135                    be32_to_cpu(d2->q_core.d_id)) {
1136                        mutex_lock(&d2->q_qlock);
1137                        mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
1138                } else {
1139                        mutex_lock(&d1->q_qlock);
1140                        mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1141                }
1142        } else if (d1) {
1143                mutex_lock(&d1->q_qlock);
1144        } else if (d2) {
1145                mutex_lock(&d2->q_qlock);
1146        }
1147}
1148
1149int __init
1150xfs_qm_init(void)
1151{
1152        xfs_qm_dqzone =
1153                kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1154        if (!xfs_qm_dqzone)
1155                goto out;
1156
1157        xfs_qm_dqtrxzone =
1158                kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1159        if (!xfs_qm_dqtrxzone)
1160                goto out_free_dqzone;
1161
1162        return 0;
1163
1164out_free_dqzone:
1165        kmem_zone_destroy(xfs_qm_dqzone);
1166out:
1167        return -ENOMEM;
1168}
1169
1170void
1171xfs_qm_exit(void)
1172{
1173        kmem_zone_destroy(xfs_qm_dqtrxzone);
1174        kmem_zone_destroy(xfs_qm_dqzone);
1175}
1176