linux/fs/xfs/xfs_dquot.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2003 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_bit.h"
  21#include "xfs_log.h"
  22#include "xfs_trans.h"
  23#include "xfs_sb.h"
  24#include "xfs_ag.h"
  25#include "xfs_alloc.h"
  26#include "xfs_quota.h"
  27#include "xfs_mount.h"
  28#include "xfs_bmap_btree.h"
  29#include "xfs_inode.h"
  30#include "xfs_bmap.h"
  31#include "xfs_rtalloc.h"
  32#include "xfs_error.h"
  33#include "xfs_itable.h"
  34#include "xfs_attr.h"
  35#include "xfs_buf_item.h"
  36#include "xfs_trans_space.h"
  37#include "xfs_trans_priv.h"
  38#include "xfs_qm.h"
  39#include "xfs_trace.h"
  40
  41/*
  42 * Lock order:
  43 *
  44 * ip->i_lock
  45 *   qi->qi_tree_lock
  46 *     dquot->q_qlock (xfs_dqlock() and friends)
  47 *       dquot->q_flush (xfs_dqflock() and friends)
  48 *       qi->qi_lru_lock
  49 *
  50 * If two dquots need to be locked the order is user before group/project,
  51 * otherwise by the lowest id first, see xfs_dqlock2.
  52 */
  53
  54#ifdef DEBUG
  55xfs_buftarg_t *xfs_dqerror_target;
  56int xfs_do_dqerror;
  57int xfs_dqreq_num;
  58int xfs_dqerror_mod = 33;
  59#endif
  60
  61struct kmem_zone                *xfs_qm_dqtrxzone;
  62static struct kmem_zone         *xfs_qm_dqzone;
  63
  64static struct lock_class_key xfs_dquot_other_class;
  65
  66/*
  67 * This is called to free all the memory associated with a dquot
  68 */
  69void
  70xfs_qm_dqdestroy(
  71        xfs_dquot_t     *dqp)
  72{
  73        ASSERT(list_empty(&dqp->q_lru));
  74
  75        mutex_destroy(&dqp->q_qlock);
  76        kmem_zone_free(xfs_qm_dqzone, dqp);
  77
  78        XFS_STATS_DEC(xs_qm_dquot);
  79}
  80
  81/*
  82 * If default limits are in force, push them into the dquot now.
  83 * We overwrite the dquot limits only if they are zero and this
  84 * is not the root dquot.
  85 */
  86void
  87xfs_qm_adjust_dqlimits(
  88        xfs_mount_t             *mp,
  89        xfs_disk_dquot_t        *d)
  90{
  91        xfs_quotainfo_t         *q = mp->m_quotainfo;
  92
  93        ASSERT(d->d_id);
  94
  95        if (q->qi_bsoftlimit && !d->d_blk_softlimit)
  96                d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
  97        if (q->qi_bhardlimit && !d->d_blk_hardlimit)
  98                d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
  99        if (q->qi_isoftlimit && !d->d_ino_softlimit)
 100                d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
 101        if (q->qi_ihardlimit && !d->d_ino_hardlimit)
 102                d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
 103        if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
 104                d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
 105        if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
 106                d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
 107}
 108
 109/*
 110 * Check the limits and timers of a dquot and start or reset timers
 111 * if necessary.
 112 * This gets called even when quota enforcement is OFF, which makes our
 113 * life a little less complicated. (We just don't reject any quota
 114 * reservations in that case, when enforcement is off).
 115 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
 116 * enforcement's off.
 117 * In contrast, warnings are a little different in that they don't
 118 * 'automatically' get started when limits get exceeded.  They do
 119 * get reset to zero, however, when we find the count to be under
 120 * the soft limit (they are only ever set non-zero via userspace).
 121 */
 122void
 123xfs_qm_adjust_dqtimers(
 124        xfs_mount_t             *mp,
 125        xfs_disk_dquot_t        *d)
 126{
 127        ASSERT(d->d_id);
 128
 129#ifdef DEBUG
 130        if (d->d_blk_hardlimit)
 131                ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
 132                       be64_to_cpu(d->d_blk_hardlimit));
 133        if (d->d_ino_hardlimit)
 134                ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
 135                       be64_to_cpu(d->d_ino_hardlimit));
 136        if (d->d_rtb_hardlimit)
 137                ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
 138                       be64_to_cpu(d->d_rtb_hardlimit));
 139#endif
 140
 141        if (!d->d_btimer) {
 142                if ((d->d_blk_softlimit &&
 143                     (be64_to_cpu(d->d_bcount) >
 144                      be64_to_cpu(d->d_blk_softlimit))) ||
 145                    (d->d_blk_hardlimit &&
 146                     (be64_to_cpu(d->d_bcount) >
 147                      be64_to_cpu(d->d_blk_hardlimit)))) {
 148                        d->d_btimer = cpu_to_be32(get_seconds() +
 149                                        mp->m_quotainfo->qi_btimelimit);
 150                } else {
 151                        d->d_bwarns = 0;
 152                }
 153        } else {
 154                if ((!d->d_blk_softlimit ||
 155                     (be64_to_cpu(d->d_bcount) <=
 156                      be64_to_cpu(d->d_blk_softlimit))) &&
 157                    (!d->d_blk_hardlimit ||
 158                    (be64_to_cpu(d->d_bcount) <=
 159                     be64_to_cpu(d->d_blk_hardlimit)))) {
 160                        d->d_btimer = 0;
 161                }
 162        }
 163
 164        if (!d->d_itimer) {
 165                if ((d->d_ino_softlimit &&
 166                     (be64_to_cpu(d->d_icount) >
 167                      be64_to_cpu(d->d_ino_softlimit))) ||
 168                    (d->d_ino_hardlimit &&
 169                     (be64_to_cpu(d->d_icount) >
 170                      be64_to_cpu(d->d_ino_hardlimit)))) {
 171                        d->d_itimer = cpu_to_be32(get_seconds() +
 172                                        mp->m_quotainfo->qi_itimelimit);
 173                } else {
 174                        d->d_iwarns = 0;
 175                }
 176        } else {
 177                if ((!d->d_ino_softlimit ||
 178                     (be64_to_cpu(d->d_icount) <=
 179                      be64_to_cpu(d->d_ino_softlimit)))  &&
 180                    (!d->d_ino_hardlimit ||
 181                     (be64_to_cpu(d->d_icount) <=
 182                      be64_to_cpu(d->d_ino_hardlimit)))) {
 183                        d->d_itimer = 0;
 184                }
 185        }
 186
 187        if (!d->d_rtbtimer) {
 188                if ((d->d_rtb_softlimit &&
 189                     (be64_to_cpu(d->d_rtbcount) >
 190                      be64_to_cpu(d->d_rtb_softlimit))) ||
 191                    (d->d_rtb_hardlimit &&
 192                     (be64_to_cpu(d->d_rtbcount) >
 193                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 194                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
 195                                        mp->m_quotainfo->qi_rtbtimelimit);
 196                } else {
 197                        d->d_rtbwarns = 0;
 198                }
 199        } else {
 200                if ((!d->d_rtb_softlimit ||
 201                     (be64_to_cpu(d->d_rtbcount) <=
 202                      be64_to_cpu(d->d_rtb_softlimit))) &&
 203                    (!d->d_rtb_hardlimit ||
 204                     (be64_to_cpu(d->d_rtbcount) <=
 205                      be64_to_cpu(d->d_rtb_hardlimit)))) {
 206                        d->d_rtbtimer = 0;
 207                }
 208        }
 209}
 210
 211/*
 212 * initialize a buffer full of dquots and log the whole thing
 213 */
 214STATIC void
 215xfs_qm_init_dquot_blk(
 216        xfs_trans_t     *tp,
 217        xfs_mount_t     *mp,
 218        xfs_dqid_t      id,
 219        uint            type,
 220        xfs_buf_t       *bp)
 221{
 222        struct xfs_quotainfo    *q = mp->m_quotainfo;
 223        xfs_dqblk_t     *d;
 224        int             curid, i;
 225
 226        ASSERT(tp);
 227        ASSERT(xfs_buf_islocked(bp));
 228
 229        d = bp->b_addr;
 230
 231        /*
 232         * ID of the first dquot in the block - id's are zero based.
 233         */
 234        curid = id - (id % q->qi_dqperchunk);
 235        ASSERT(curid >= 0);
 236        memset(d, 0, BBTOB(q->qi_dqchunklen));
 237        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
 238                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
 239                d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
 240                d->dd_diskdq.d_id = cpu_to_be32(curid);
 241                d->dd_diskdq.d_flags = type;
 242        }
 243
 244        xfs_trans_dquot_buf(tp, bp,
 245                            (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
 246                            ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
 247                             XFS_BLF_GDQUOT_BUF)));
 248        xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
 249}
 250
 251
 252
 253/*
 254 * Allocate a block and fill it with dquots.
 255 * This is called when the bmapi finds a hole.
 256 */
 257STATIC int
 258xfs_qm_dqalloc(
 259        xfs_trans_t     **tpp,
 260        xfs_mount_t     *mp,
 261        xfs_dquot_t     *dqp,
 262        xfs_inode_t     *quotip,
 263        xfs_fileoff_t   offset_fsb,
 264        xfs_buf_t       **O_bpp)
 265{
 266        xfs_fsblock_t   firstblock;
 267        xfs_bmap_free_t flist;
 268        xfs_bmbt_irec_t map;
 269        int             nmaps, error, committed;
 270        xfs_buf_t       *bp;
 271        xfs_trans_t     *tp = *tpp;
 272
 273        ASSERT(tp != NULL);
 274
 275        trace_xfs_dqalloc(dqp);
 276
 277        /*
 278         * Initialize the bmap freelist prior to calling bmapi code.
 279         */
 280        xfs_bmap_init(&flist, &firstblock);
 281        xfs_ilock(quotip, XFS_ILOCK_EXCL);
 282        /*
 283         * Return if this type of quotas is turned off while we didn't
 284         * have an inode lock
 285         */
 286        if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 287                xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 288                return (ESRCH);
 289        }
 290
 291        xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
 292        nmaps = 1;
 293        error = xfs_bmapi_write(tp, quotip, offset_fsb,
 294                                XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 295                                &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
 296                                &map, &nmaps, &flist);
 297        if (error)
 298                goto error0;
 299        ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
 300        ASSERT(nmaps == 1);
 301        ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 302               (map.br_startblock != HOLESTARTBLOCK));
 303
 304        /*
 305         * Keep track of the blkno to save a lookup later
 306         */
 307        dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 308
 309        /* now we can just get the buffer (there's nothing to read yet) */
 310        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
 311                               dqp->q_blkno,
 312                               mp->m_quotainfo->qi_dqchunklen,
 313                               0);
 314
 315        error = xfs_buf_geterror(bp);
 316        if (error)
 317                goto error1;
 318
 319        /*
 320         * Make a chunk of dquots out of this buffer and log
 321         * the entire thing.
 322         */
 323        xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
 324                              dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 325
 326        /*
 327         * xfs_bmap_finish() may commit the current transaction and
 328         * start a second transaction if the freelist is not empty.
 329         *
 330         * Since we still want to modify this buffer, we need to
 331         * ensure that the buffer is not released on commit of
 332         * the first transaction and ensure the buffer is added to the
 333         * second transaction.
 334         *
 335         * If there is only one transaction then don't stop the buffer
 336         * from being released when it commits later on.
 337         */
 338
 339        xfs_trans_bhold(tp, bp);
 340
 341        if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
 342                goto error1;
 343        }
 344
 345        if (committed) {
 346                tp = *tpp;
 347                xfs_trans_bjoin(tp, bp);
 348        } else {
 349                xfs_trans_bhold_release(tp, bp);
 350        }
 351
 352        *O_bpp = bp;
 353        return 0;
 354
 355      error1:
 356        xfs_bmap_cancel(&flist);
 357      error0:
 358        xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 359
 360        return (error);
 361}
 362
 363/*
 364 * Maps a dquot to the buffer containing its on-disk version.
 365 * This returns a ptr to the buffer containing the on-disk dquot
 366 * in the bpp param, and a ptr to the on-disk dquot within that buffer
 367 */
 368STATIC int
 369xfs_qm_dqtobp(
 370        xfs_trans_t             **tpp,
 371        xfs_dquot_t             *dqp,
 372        xfs_disk_dquot_t        **O_ddpp,
 373        xfs_buf_t               **O_bpp,
 374        uint                    flags)
 375{
 376        xfs_bmbt_irec_t map;
 377        int             nmaps = 1, error;
 378        xfs_buf_t       *bp;
 379        xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
 380        xfs_mount_t     *mp = dqp->q_mount;
 381        xfs_disk_dquot_t *ddq;
 382        xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
 383        xfs_trans_t     *tp = (tpp ? *tpp : NULL);
 384
 385        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 386
 387        xfs_ilock(quotip, XFS_ILOCK_SHARED);
 388        if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 389                /*
 390                 * Return if this type of quotas is turned off while we
 391                 * didn't have the quota inode lock.
 392                 */
 393                xfs_iunlock(quotip, XFS_ILOCK_SHARED);
 394                return ESRCH;
 395        }
 396
 397        /*
 398         * Find the block map; no allocations yet
 399         */
 400        error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 401                               XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 402
 403        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
 404        if (error)
 405                return error;
 406
 407        ASSERT(nmaps == 1);
 408        ASSERT(map.br_blockcount == 1);
 409
 410        /*
 411         * Offset of dquot in the (fixed sized) dquot chunk.
 412         */
 413        dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
 414                sizeof(xfs_dqblk_t);
 415
 416        ASSERT(map.br_startblock != DELAYSTARTBLOCK);
 417        if (map.br_startblock == HOLESTARTBLOCK) {
 418                /*
 419                 * We don't allocate unless we're asked to
 420                 */
 421                if (!(flags & XFS_QMOPT_DQALLOC))
 422                        return ENOENT;
 423
 424                ASSERT(tp);
 425                error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
 426                                        dqp->q_fileoffset, &bp);
 427                if (error)
 428                        return error;
 429                tp = *tpp;
 430        } else {
 431                trace_xfs_dqtobp_read(dqp);
 432
 433                /*
 434                 * store the blkno etc so that we don't have to do the
 435                 * mapping all the time
 436                 */
 437                dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 438
 439                error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 440                                           dqp->q_blkno,
 441                                           mp->m_quotainfo->qi_dqchunklen,
 442                                           0, &bp);
 443                if (error || !bp)
 444                        return XFS_ERROR(error);
 445        }
 446
 447        ASSERT(xfs_buf_islocked(bp));
 448
 449        /*
 450         * calculate the location of the dquot inside the buffer.
 451         */
 452        ddq = bp->b_addr + dqp->q_bufoffset;
 453
 454        /*
 455         * A simple sanity check in case we got a corrupted dquot...
 456         */
 457        error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
 458                           flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
 459                           "dqtobp");
 460        if (error) {
 461                if (!(flags & XFS_QMOPT_DQREPAIR)) {
 462                        xfs_trans_brelse(tp, bp);
 463                        return XFS_ERROR(EIO);
 464                }
 465        }
 466
 467        *O_bpp = bp;
 468        *O_ddpp = ddq;
 469
 470        return (0);
 471}
 472
 473
 474/*
 475 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
 476 * and release the buffer immediately.
 477 *
 478 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
 479 */
 480int
 481xfs_qm_dqread(
 482        struct xfs_mount        *mp,
 483        xfs_dqid_t              id,
 484        uint                    type,
 485        uint                    flags,
 486        struct xfs_dquot        **O_dqpp)
 487{
 488        struct xfs_dquot        *dqp;
 489        struct xfs_disk_dquot   *ddqp;
 490        struct xfs_buf          *bp;
 491        struct xfs_trans        *tp = NULL;
 492        int                     error;
 493        int                     cancelflags = 0;
 494
 495
 496        dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 497
 498        dqp->dq_flags = type;
 499        dqp->q_core.d_id = cpu_to_be32(id);
 500        dqp->q_mount = mp;
 501        INIT_LIST_HEAD(&dqp->q_lru);
 502        mutex_init(&dqp->q_qlock);
 503        init_waitqueue_head(&dqp->q_pinwait);
 504
 505        /*
 506         * Because we want to use a counting completion, complete
 507         * the flush completion once to allow a single access to
 508         * the flush completion without blocking.
 509         */
 510        init_completion(&dqp->q_flush);
 511        complete(&dqp->q_flush);
 512
 513        /*
 514         * Make sure group quotas have a different lock class than user
 515         * quotas.
 516         */
 517        if (!(type & XFS_DQ_USER))
 518                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
 519
 520        XFS_STATS_INC(xs_qm_dquot);
 521
 522        trace_xfs_dqread(dqp);
 523
 524        if (flags & XFS_QMOPT_DQALLOC) {
 525                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
 526                error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
 527                                XFS_WRITE_LOG_RES(mp) +
 528                                /*
 529                                 * Round the chunklen up to the next multiple
 530                                 * of 128 (buf log item chunk size)).
 531                                 */
 532                                BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
 533                                0,
 534                                XFS_TRANS_PERM_LOG_RES,
 535                                XFS_WRITE_LOG_COUNT);
 536                if (error)
 537                        goto error1;
 538                cancelflags = XFS_TRANS_RELEASE_LOG_RES;
 539        }
 540
 541        /*
 542         * get a pointer to the on-disk dquot and the buffer containing it
 543         * dqp already knows its own type (GROUP/USER).
 544         */
 545        error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
 546        if (error) {
 547                /*
 548                 * This can happen if quotas got turned off (ESRCH),
 549                 * or if the dquot didn't exist on disk and we ask to
 550                 * allocate (ENOENT).
 551                 */
 552                trace_xfs_dqread_fail(dqp);
 553                cancelflags |= XFS_TRANS_ABORT;
 554                goto error1;
 555        }
 556
 557        /* copy everything from disk dquot to the incore dquot */
 558        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
 559        xfs_qm_dquot_logitem_init(dqp);
 560
 561        /*
 562         * Reservation counters are defined as reservation plus current usage
 563         * to avoid having to add every time.
 564         */
 565        dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
 566        dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
 567        dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
 568
 569        /* Mark the buf so that this will stay incore a little longer */
 570        xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 571
 572        /*
 573         * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
 574         * So we need to release with xfs_trans_brelse().
 575         * The strategy here is identical to that of inodes; we lock
 576         * the dquot in xfs_qm_dqget() before making it accessible to
 577         * others. This is because dquots, like inodes, need a good level of
 578         * concurrency, and we don't want to take locks on the entire buffers
 579         * for dquot accesses.
 580         * Note also that the dquot buffer may even be dirty at this point, if
 581         * this particular dquot was repaired. We still aren't afraid to
 582         * brelse it because we have the changes incore.
 583         */
 584        ASSERT(xfs_buf_islocked(bp));
 585        xfs_trans_brelse(tp, bp);
 586
 587        if (tp) {
 588                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 589                if (error)
 590                        goto error0;
 591        }
 592
 593        *O_dqpp = dqp;
 594        return error;
 595
 596error1:
 597        if (tp)
 598                xfs_trans_cancel(tp, cancelflags);
 599error0:
 600        xfs_qm_dqdestroy(dqp);
 601        *O_dqpp = NULL;
 602        return error;
 603}
 604
 605/*
 606 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
 607 * a locked dquot, doing an allocation (if requested) as needed.
 608 * When both an inode and an id are given, the inode's id takes precedence.
 609 * That is, if the id changes while we don't hold the ilock inside this
 610 * function, the new dquot is returned, not necessarily the one requested
 611 * in the id argument.
 612 */
 613int
 614xfs_qm_dqget(
 615        xfs_mount_t     *mp,
 616        xfs_inode_t     *ip,      /* locked inode (optional) */
 617        xfs_dqid_t      id,       /* uid/projid/gid depending on type */
 618        uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
 619        uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
 620        xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
 621{
 622        struct xfs_quotainfo    *qi = mp->m_quotainfo;
 623        struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
 624        struct xfs_dquot        *dqp;
 625        int                     error;
 626
 627        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 628        if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
 629            (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
 630            (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
 631                return (ESRCH);
 632        }
 633
 634#ifdef DEBUG
 635        if (xfs_do_dqerror) {
 636                if ((xfs_dqerror_target == mp->m_ddev_targp) &&
 637                    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
 638                        xfs_debug(mp, "Returning error in dqget");
 639                        return (EIO);
 640                }
 641        }
 642
 643        ASSERT(type == XFS_DQ_USER ||
 644               type == XFS_DQ_PROJ ||
 645               type == XFS_DQ_GROUP);
 646        if (ip) {
 647                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 648                ASSERT(xfs_inode_dquot(ip, type) == NULL);
 649        }
 650#endif
 651
 652restart:
 653        mutex_lock(&qi->qi_tree_lock);
 654        dqp = radix_tree_lookup(tree, id);
 655        if (dqp) {
 656                xfs_dqlock(dqp);
 657                if (dqp->dq_flags & XFS_DQ_FREEING) {
 658                        xfs_dqunlock(dqp);
 659                        mutex_unlock(&qi->qi_tree_lock);
 660                        trace_xfs_dqget_freeing(dqp);
 661                        delay(1);
 662                        goto restart;
 663                }
 664
 665                dqp->q_nrefs++;
 666                mutex_unlock(&qi->qi_tree_lock);
 667
 668                trace_xfs_dqget_hit(dqp);
 669                XFS_STATS_INC(xs_qm_dqcachehits);
 670                *O_dqpp = dqp;
 671                return 0;
 672        }
 673        mutex_unlock(&qi->qi_tree_lock);
 674        XFS_STATS_INC(xs_qm_dqcachemisses);
 675
 676        /*
 677         * Dquot cache miss. We don't want to keep the inode lock across
 678         * a (potential) disk read. Also we don't want to deal with the lock
 679         * ordering between quotainode and this inode. OTOH, dropping the inode
 680         * lock here means dealing with a chown that can happen before
 681         * we re-acquire the lock.
 682         */
 683        if (ip)
 684                xfs_iunlock(ip, XFS_ILOCK_EXCL);
 685
 686        error = xfs_qm_dqread(mp, id, type, flags, &dqp);
 687
 688        if (ip)
 689                xfs_ilock(ip, XFS_ILOCK_EXCL);
 690
 691        if (error)
 692                return error;
 693
 694        if (ip) {
 695                /*
 696                 * A dquot could be attached to this inode by now, since
 697                 * we had dropped the ilock.
 698                 */
 699                if (xfs_this_quota_on(mp, type)) {
 700                        struct xfs_dquot        *dqp1;
 701
 702                        dqp1 = xfs_inode_dquot(ip, type);
 703                        if (dqp1) {
 704                                xfs_qm_dqdestroy(dqp);
 705                                dqp = dqp1;
 706                                xfs_dqlock(dqp);
 707                                goto dqret;
 708                        }
 709                } else {
 710                        /* inode stays locked on return */
 711                        xfs_qm_dqdestroy(dqp);
 712                        return XFS_ERROR(ESRCH);
 713                }
 714        }
 715
 716        mutex_lock(&qi->qi_tree_lock);
 717        error = -radix_tree_insert(tree, id, dqp);
 718        if (unlikely(error)) {
 719                WARN_ON(error != EEXIST);
 720
 721                /*
 722                 * Duplicate found. Just throw away the new dquot and start
 723                 * over.
 724                 */
 725                mutex_unlock(&qi->qi_tree_lock);
 726                trace_xfs_dqget_dup(dqp);
 727                xfs_qm_dqdestroy(dqp);
 728                XFS_STATS_INC(xs_qm_dquot_dups);
 729                goto restart;
 730        }
 731
 732        /*
 733         * We return a locked dquot to the caller, with a reference taken
 734         */
 735        xfs_dqlock(dqp);
 736        dqp->q_nrefs = 1;
 737
 738        qi->qi_dquots++;
 739        mutex_unlock(&qi->qi_tree_lock);
 740
 741 dqret:
 742        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
 743        trace_xfs_dqget_miss(dqp);
 744        *O_dqpp = dqp;
 745        return (0);
 746}
 747
 748
 749STATIC void
 750xfs_qm_dqput_final(
 751        struct xfs_dquot        *dqp)
 752{
 753        struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
 754        struct xfs_dquot        *gdqp;
 755
 756        trace_xfs_dqput_free(dqp);
 757
 758        mutex_lock(&qi->qi_lru_lock);
 759        if (list_empty(&dqp->q_lru)) {
 760                list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
 761                qi->qi_lru_count++;
 762                XFS_STATS_INC(xs_qm_dquot_unused);
 763        }
 764        mutex_unlock(&qi->qi_lru_lock);
 765
 766        /*
 767         * If we just added a udquot to the freelist, then we want to release
 768         * the gdquot reference that it (probably) has. Otherwise it'll keep
 769         * the gdquot from getting reclaimed.
 770         */
 771        gdqp = dqp->q_gdquot;
 772        if (gdqp) {
 773                xfs_dqlock(gdqp);
 774                dqp->q_gdquot = NULL;
 775        }
 776        xfs_dqunlock(dqp);
 777
 778        /*
 779         * If we had a group quota hint, release it now.
 780         */
 781        if (gdqp)
 782                xfs_qm_dqput(gdqp);
 783}
 784
 785/*
 786 * Release a reference to the dquot (decrement ref-count) and unlock it.
 787 *
 788 * If there is a group quota attached to this dquot, carefully release that
 789 * too without tripping over deadlocks'n'stuff.
 790 */
 791void
 792xfs_qm_dqput(
 793        struct xfs_dquot        *dqp)
 794{
 795        ASSERT(dqp->q_nrefs > 0);
 796        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 797
 798        trace_xfs_dqput(dqp);
 799
 800        if (--dqp->q_nrefs > 0)
 801                xfs_dqunlock(dqp);
 802        else
 803                xfs_qm_dqput_final(dqp);
 804}
 805
 806/*
 807 * Release a dquot. Flush it if dirty, then dqput() it.
 808 * dquot must not be locked.
 809 */
 810void
 811xfs_qm_dqrele(
 812        xfs_dquot_t     *dqp)
 813{
 814        if (!dqp)
 815                return;
 816
 817        trace_xfs_dqrele(dqp);
 818
 819        xfs_dqlock(dqp);
 820        /*
 821         * We don't care to flush it if the dquot is dirty here.
 822         * That will create stutters that we want to avoid.
 823         * Instead we do a delayed write when we try to reclaim
 824         * a dirty dquot. Also xfs_sync will take part of the burden...
 825         */
 826        xfs_qm_dqput(dqp);
 827}
 828
 829/*
 830 * This is the dquot flushing I/O completion routine.  It is called
 831 * from interrupt level when the buffer containing the dquot is
 832 * flushed to disk.  It is responsible for removing the dquot logitem
 833 * from the AIL if it has not been re-logged, and unlocking the dquot's
 834 * flush lock. This behavior is very similar to that of inodes..
 835 */
 836STATIC void
 837xfs_qm_dqflush_done(
 838        struct xfs_buf          *bp,
 839        struct xfs_log_item     *lip)
 840{
 841        xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
 842        xfs_dquot_t             *dqp = qip->qli_dquot;
 843        struct xfs_ail          *ailp = lip->li_ailp;
 844
 845        /*
 846         * We only want to pull the item from the AIL if its
 847         * location in the log has not changed since we started the flush.
 848         * Thus, we only bother if the dquot's lsn has
 849         * not changed. First we check the lsn outside the lock
 850         * since it's cheaper, and then we recheck while
 851         * holding the lock before removing the dquot from the AIL.
 852         */
 853        if ((lip->li_flags & XFS_LI_IN_AIL) &&
 854            lip->li_lsn == qip->qli_flush_lsn) {
 855
 856                /* xfs_trans_ail_delete() drops the AIL lock. */
 857                spin_lock(&ailp->xa_lock);
 858                if (lip->li_lsn == qip->qli_flush_lsn)
 859                        xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
 860                else
 861                        spin_unlock(&ailp->xa_lock);
 862        }
 863
 864        /*
 865         * Release the dq's flush lock since we're done with it.
 866         */
 867        xfs_dqfunlock(dqp);
 868}
 869
 870/*
 871 * Write a modified dquot to disk.
 872 * The dquot must be locked and the flush lock too taken by caller.
 873 * The flush lock will not be unlocked until the dquot reaches the disk,
 874 * but the dquot is free to be unlocked and modified by the caller
 875 * in the interim. Dquot is still locked on return. This behavior is
 876 * identical to that of inodes.
 877 */
 878int
 879xfs_qm_dqflush(
 880        struct xfs_dquot        *dqp,
 881        struct xfs_buf          **bpp)
 882{
 883        struct xfs_mount        *mp = dqp->q_mount;
 884        struct xfs_buf          *bp;
 885        struct xfs_disk_dquot   *ddqp;
 886        int                     error;
 887
 888        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 889        ASSERT(!completion_done(&dqp->q_flush));
 890
 891        trace_xfs_dqflush(dqp);
 892
 893        *bpp = NULL;
 894
 895        xfs_qm_dqunpin_wait(dqp);
 896
 897        /*
 898         * This may have been unpinned because the filesystem is shutting
 899         * down forcibly. If that's the case we must not write this dquot
 900         * to disk, because the log record didn't make it to disk.
 901         *
 902         * We also have to remove the log item from the AIL in this case,
 903         * as we wait for an emptry AIL as part of the unmount process.
 904         */
 905        if (XFS_FORCED_SHUTDOWN(mp)) {
 906                struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
 907                dqp->dq_flags &= ~XFS_DQ_DIRTY;
 908
 909                spin_lock(&mp->m_ail->xa_lock);
 910                if (lip->li_flags & XFS_LI_IN_AIL)
 911                        xfs_trans_ail_delete(mp->m_ail, lip,
 912                                             SHUTDOWN_CORRUPT_INCORE);
 913                else
 914                        spin_unlock(&mp->m_ail->xa_lock);
 915                error = XFS_ERROR(EIO);
 916                goto out_unlock;
 917        }
 918
 919        /*
 920         * Get the buffer containing the on-disk dquot
 921         */
 922        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
 923                                   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
 924        if (error)
 925                goto out_unlock;
 926
 927        /*
 928         * Calculate the location of the dquot inside the buffer.
 929         */
 930        ddqp = bp->b_addr + dqp->q_bufoffset;
 931
 932        /*
 933         * A simple sanity check in case we got a corrupted dquot..
 934         */
 935        error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
 936                           XFS_QMOPT_DOWARN, "dqflush (incore copy)");
 937        if (error) {
 938                xfs_buf_relse(bp);
 939                xfs_dqfunlock(dqp);
 940                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 941                return XFS_ERROR(EIO);
 942        }
 943
 944        /* This is the only portion of data that needs to persist */
 945        memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
 946
 947        /*
 948         * Clear the dirty field and remember the flush lsn for later use.
 949         */
 950        dqp->dq_flags &= ~XFS_DQ_DIRTY;
 951
 952        xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
 953                                        &dqp->q_logitem.qli_item.li_lsn);
 954
 955        /*
 956         * Attach an iodone routine so that we can remove this dquot from the
 957         * AIL and release the flush lock once the dquot is synced to disk.
 958         */
 959        xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
 960                                  &dqp->q_logitem.qli_item);
 961
 962        /*
 963         * If the buffer is pinned then push on the log so we won't
 964         * get stuck waiting in the write for too long.
 965         */
 966        if (xfs_buf_ispinned(bp)) {
 967                trace_xfs_dqflush_force(dqp);
 968                xfs_log_force(mp, 0);
 969        }
 970
 971        trace_xfs_dqflush_done(dqp);
 972        *bpp = bp;
 973        return 0;
 974
 975out_unlock:
 976        xfs_dqfunlock(dqp);
 977        return XFS_ERROR(EIO);
 978}
 979
 980/*
 981 * Lock two xfs_dquot structures.
 982 *
 983 * To avoid deadlocks we always lock the quota structure with
 984 * the lowerd id first.
 985 */
 986void
 987xfs_dqlock2(
 988        xfs_dquot_t     *d1,
 989        xfs_dquot_t     *d2)
 990{
 991        if (d1 && d2) {
 992                ASSERT(d1 != d2);
 993                if (be32_to_cpu(d1->q_core.d_id) >
 994                    be32_to_cpu(d2->q_core.d_id)) {
 995                        mutex_lock(&d2->q_qlock);
 996                        mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
 997                } else {
 998                        mutex_lock(&d1->q_qlock);
 999                        mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1000                }
1001        } else if (d1) {
1002                mutex_lock(&d1->q_qlock);
1003        } else if (d2) {
1004                mutex_lock(&d2->q_qlock);
1005        }
1006}
1007
1008int __init
1009xfs_qm_init(void)
1010{
1011        xfs_qm_dqzone =
1012                kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1013        if (!xfs_qm_dqzone)
1014                goto out;
1015
1016        xfs_qm_dqtrxzone =
1017                kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1018        if (!xfs_qm_dqtrxzone)
1019                goto out_free_dqzone;
1020
1021        return 0;
1022
1023out_free_dqzone:
1024        kmem_zone_destroy(xfs_qm_dqzone);
1025out:
1026        return -ENOMEM;
1027}
1028
1029void
1030xfs_qm_exit(void)
1031{
1032        kmem_zone_destroy(xfs_qm_dqtrxzone);
1033        kmem_zone_destroy(xfs_qm_dqzone);
1034}
1035