linux/fs/xfs/quota/xfs_qm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_bit.h"
  21#include "xfs_log.h"
  22#include "xfs_inum.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_dir2.h"
  27#include "xfs_alloc.h"
  28#include "xfs_dmapi.h"
  29#include "xfs_quota.h"
  30#include "xfs_mount.h"
  31#include "xfs_bmap_btree.h"
  32#include "xfs_alloc_btree.h"
  33#include "xfs_ialloc_btree.h"
  34#include "xfs_dir2_sf.h"
  35#include "xfs_attr_sf.h"
  36#include "xfs_dinode.h"
  37#include "xfs_inode.h"
  38#include "xfs_btree.h"
  39#include "xfs_ialloc.h"
  40#include "xfs_itable.h"
  41#include "xfs_rtalloc.h"
  42#include "xfs_error.h"
  43#include "xfs_bmap.h"
  44#include "xfs_rw.h"
  45#include "xfs_attr.h"
  46#include "xfs_buf_item.h"
  47#include "xfs_trans_space.h"
  48#include "xfs_utils.h"
  49#include "xfs_qm.h"
  50
  51/*
  52 * The global quota manager. There is only one of these for the entire
  53 * system, _not_ one per file system. XQM keeps track of the overall
  54 * quota functionality, including maintaining the freelist and hash
  55 * tables of dquots.
  56 */
  57struct mutex    xfs_Gqm_lock;
  58struct xfs_qm   *xfs_Gqm;
  59uint            ndquot;
  60
  61kmem_zone_t     *qm_dqzone;
  62kmem_zone_t     *qm_dqtrxzone;
  63
  64static cred_t   xfs_zerocr;
  65
  66STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
  67STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
  68
  69STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
  70STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
  71
  72STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
  73STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
  74STATIC int      xfs_qm_shake(int, gfp_t);
  75
  76static struct shrinker xfs_qm_shaker = {
  77        .shrink = xfs_qm_shake,
  78        .seeks = DEFAULT_SEEKS,
  79};
  80
  81#ifdef DEBUG
  82extern struct mutex     qcheck_lock;
  83#endif
  84
  85#ifdef QUOTADEBUG
  86#define XQM_LIST_PRINT(l, NXT, title) \
  87{ \
  88        xfs_dquot_t     *dqp; int i = 0; \
  89        cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
  90        for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
  91                cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
  92                                  "bcnt = %d, icnt = %d, refs = %d", \
  93                        ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
  94                        DQFLAGTO_TYPESTR(dqp),       \
  95                        (int) be64_to_cpu(dqp->q_core.d_bcount), \
  96                        (int) be64_to_cpu(dqp->q_core.d_icount), \
  97                        (int) dqp->q_nrefs);  } \
  98}
  99#else
 100#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
 101#endif
 102
 103/*
 104 * Initialize the XQM structure.
 105 * Note that there is not one quota manager per file system.
 106 */
 107STATIC struct xfs_qm *
 108xfs_Gqm_init(void)
 109{
 110        xfs_dqhash_t    *udqhash, *gdqhash;
 111        xfs_qm_t        *xqm;
 112        size_t          hsize;
 113        uint            i;
 114
 115        /*
 116         * Initialize the dquot hash tables.
 117         */
 118        udqhash = kmem_zalloc_greedy(&hsize,
 119                                     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
 120                                     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
 121                                     KM_SLEEP | KM_MAYFAIL | KM_LARGE);
 122        gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
 123        hsize /= sizeof(xfs_dqhash_t);
 124        ndquot = hsize << 8;
 125
 126        xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
 127        xqm->qm_dqhashmask = hsize - 1;
 128        xqm->qm_usr_dqhtable = udqhash;
 129        xqm->qm_grp_dqhtable = gdqhash;
 130        ASSERT(xqm->qm_usr_dqhtable != NULL);
 131        ASSERT(xqm->qm_grp_dqhtable != NULL);
 132
 133        for (i = 0; i < hsize; i++) {
 134                xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
 135                xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
 136        }
 137
 138        /*
 139         * Freelist of all dquots of all file systems
 140         */
 141        xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
 142
 143        /*
 144         * dquot zone. we register our own low-memory callback.
 145         */
 146        if (!qm_dqzone) {
 147                xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
 148                                                "xfs_dquots");
 149                qm_dqzone = xqm->qm_dqzone;
 150        } else
 151                xqm->qm_dqzone = qm_dqzone;
 152
 153        register_shrinker(&xfs_qm_shaker);
 154
 155        /*
 156         * The t_dqinfo portion of transactions.
 157         */
 158        if (!qm_dqtrxzone) {
 159                xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
 160                                                   "xfs_dqtrx");
 161                qm_dqtrxzone = xqm->qm_dqtrxzone;
 162        } else
 163                xqm->qm_dqtrxzone = qm_dqtrxzone;
 164
 165        atomic_set(&xqm->qm_totaldquots, 0);
 166        xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
 167        xqm->qm_nrefs = 0;
 168#ifdef DEBUG
 169        mutex_init(&qcheck_lock);
 170#endif
 171        return xqm;
 172}
 173
 174/*
 175 * Destroy the global quota manager when its reference count goes to zero.
 176 */
 177STATIC void
 178xfs_qm_destroy(
 179        struct xfs_qm   *xqm)
 180{
 181        int             hsize, i;
 182
 183        ASSERT(xqm != NULL);
 184        ASSERT(xqm->qm_nrefs == 0);
 185        unregister_shrinker(&xfs_qm_shaker);
 186        hsize = xqm->qm_dqhashmask + 1;
 187        for (i = 0; i < hsize; i++) {
 188                xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
 189                xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
 190        }
 191        kmem_free(xqm->qm_usr_dqhtable);
 192        kmem_free(xqm->qm_grp_dqhtable);
 193        xqm->qm_usr_dqhtable = NULL;
 194        xqm->qm_grp_dqhtable = NULL;
 195        xqm->qm_dqhashmask = 0;
 196        xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
 197#ifdef DEBUG
 198        mutex_destroy(&qcheck_lock);
 199#endif
 200        kmem_free(xqm);
 201}
 202
 203/*
 204 * Called at mount time to let XQM know that another file system is
 205 * starting quotas. This isn't crucial information as the individual mount
 206 * structures are pretty independent, but it helps the XQM keep a
 207 * global view of what's going on.
 208 */
 209/* ARGSUSED */
 210STATIC int
 211xfs_qm_hold_quotafs_ref(
 212        struct xfs_mount *mp)
 213{
 214        /*
 215         * Need to lock the xfs_Gqm structure for things like this. For example,
 216         * the structure could disappear between the entry to this routine and
 217         * a HOLD operation if not locked.
 218         */
 219        mutex_lock(&xfs_Gqm_lock);
 220
 221        if (xfs_Gqm == NULL)
 222                xfs_Gqm = xfs_Gqm_init();
 223        /*
 224         * We can keep a list of all filesystems with quotas mounted for
 225         * debugging and statistical purposes, but ...
 226         * Just take a reference and get out.
 227         */
 228        xfs_Gqm->qm_nrefs++;
 229        mutex_unlock(&xfs_Gqm_lock);
 230
 231        return 0;
 232}
 233
 234
 235/*
 236 * Release the reference that a filesystem took at mount time,
 237 * so that we know when we need to destroy the entire quota manager.
 238 */
 239/* ARGSUSED */
 240STATIC void
 241xfs_qm_rele_quotafs_ref(
 242        struct xfs_mount *mp)
 243{
 244        xfs_dquot_t     *dqp, *nextdqp;
 245
 246        ASSERT(xfs_Gqm);
 247        ASSERT(xfs_Gqm->qm_nrefs > 0);
 248
 249        /*
 250         * Go thru the freelist and destroy all inactive dquots.
 251         */
 252        xfs_qm_freelist_lock(xfs_Gqm);
 253
 254        for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
 255             dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
 256                xfs_dqlock(dqp);
 257                nextdqp = dqp->dq_flnext;
 258                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
 259                        ASSERT(dqp->q_mount == NULL);
 260                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
 261                        ASSERT(dqp->HL_PREVP == NULL);
 262                        ASSERT(dqp->MPL_PREVP == NULL);
 263                        XQM_FREELIST_REMOVE(dqp);
 264                        xfs_dqunlock(dqp);
 265                        xfs_qm_dqdestroy(dqp);
 266                } else {
 267                        xfs_dqunlock(dqp);
 268                }
 269                dqp = nextdqp;
 270        }
 271        xfs_qm_freelist_unlock(xfs_Gqm);
 272
 273        /*
 274         * Destroy the entire XQM. If somebody mounts with quotaon, this'll
 275         * be restarted.
 276         */
 277        mutex_lock(&xfs_Gqm_lock);
 278        if (--xfs_Gqm->qm_nrefs == 0) {
 279                xfs_qm_destroy(xfs_Gqm);
 280                xfs_Gqm = NULL;
 281        }
 282        mutex_unlock(&xfs_Gqm_lock);
 283}
 284
 285/*
 286 * Just destroy the quotainfo structure.
 287 */
 288void
 289xfs_qm_unmount(
 290        struct xfs_mount        *mp)
 291{
 292        if (mp->m_quotainfo) {
 293                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 294                xfs_qm_destroy_quotainfo(mp);
 295        }
 296}
 297
 298
 299/*
 300 * This is called from xfs_mountfs to start quotas and initialize all
 301 * necessary data structures like quotainfo.  This is also responsible for
 302 * running a quotacheck as necessary.  We are guaranteed that the superblock
 303 * is consistently read in at this point.
 304 *
 305 * If we fail here, the mount will continue with quota turned off. We don't
 306 * need to inidicate success or failure at all.
 307 */
 308void
 309xfs_qm_mount_quotas(
 310        xfs_mount_t     *mp)
 311{
 312        int             error = 0;
 313        uint            sbf;
 314
 315        /*
 316         * If quotas on realtime volumes is not supported, we disable
 317         * quotas immediately.
 318         */
 319        if (mp->m_sb.sb_rextents) {
 320                cmn_err(CE_NOTE,
 321                        "Cannot turn on quotas for realtime filesystem %s",
 322                        mp->m_fsname);
 323                mp->m_qflags = 0;
 324                goto write_changes;
 325        }
 326
 327        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 328
 329        /*
 330         * Allocate the quotainfo structure inside the mount struct, and
 331         * create quotainode(s), and change/rev superblock if necessary.
 332         */
 333        error = xfs_qm_init_quotainfo(mp);
 334        if (error) {
 335                /*
 336                 * We must turn off quotas.
 337                 */
 338                ASSERT(mp->m_quotainfo == NULL);
 339                mp->m_qflags = 0;
 340                goto write_changes;
 341        }
 342        /*
 343         * If any of the quotas are not consistent, do a quotacheck.
 344         */
 345        if (XFS_QM_NEED_QUOTACHECK(mp)) {
 346                error = xfs_qm_quotacheck(mp);
 347                if (error) {
 348                        /* Quotacheck failed and disabled quotas. */
 349                        return;
 350                }
 351        }
 352        /* 
 353         * If one type of quotas is off, then it will lose its
 354         * quotachecked status, since we won't be doing accounting for
 355         * that type anymore.
 356         */
 357        if (!XFS_IS_UQUOTA_ON(mp))
 358                mp->m_qflags &= ~XFS_UQUOTA_CHKD;
 359        if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
 360                mp->m_qflags &= ~XFS_OQUOTA_CHKD;
 361
 362 write_changes:
 363        /*
 364         * We actually don't have to acquire the m_sb_lock at all.
 365         * This can only be called from mount, and that's single threaded. XXX
 366         */
 367        spin_lock(&mp->m_sb_lock);
 368        sbf = mp->m_sb.sb_qflags;
 369        mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
 370        spin_unlock(&mp->m_sb_lock);
 371
 372        if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
 373                if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
 374                        /*
 375                         * We could only have been turning quotas off.
 376                         * We aren't in very good shape actually because
 377                         * the incore structures are convinced that quotas are
 378                         * off, but the on disk superblock doesn't know that !
 379                         */
 380                        ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
 381                        xfs_fs_cmn_err(CE_ALERT, mp,
 382                                "XFS mount_quotas: Superblock update failed!");
 383                }
 384        }
 385
 386        if (error) {
 387                xfs_fs_cmn_err(CE_WARN, mp,
 388                        "Failed to initialize disk quotas.");
 389                return;
 390        }
 391
 392#ifdef QUOTADEBUG
 393        if (XFS_IS_QUOTA_ON(mp))
 394                xfs_qm_internalqcheck(mp);
 395#endif
 396}
 397
 398/*
 399 * Called from the vfsops layer.
 400 */
 401void
 402xfs_qm_unmount_quotas(
 403        xfs_mount_t     *mp)
 404{
 405        /*
 406         * Release the dquots that root inode, et al might be holding,
 407         * before we flush quotas and blow away the quotainfo structure.
 408         */
 409        ASSERT(mp->m_rootip);
 410        xfs_qm_dqdetach(mp->m_rootip);
 411        if (mp->m_rbmip)
 412                xfs_qm_dqdetach(mp->m_rbmip);
 413        if (mp->m_rsumip)
 414                xfs_qm_dqdetach(mp->m_rsumip);
 415
 416        /*
 417         * Release the quota inodes.
 418         */
 419        if (mp->m_quotainfo) {
 420                if (mp->m_quotainfo->qi_uquotaip) {
 421                        IRELE(mp->m_quotainfo->qi_uquotaip);
 422                        mp->m_quotainfo->qi_uquotaip = NULL;
 423                }
 424                if (mp->m_quotainfo->qi_gquotaip) {
 425                        IRELE(mp->m_quotainfo->qi_gquotaip);
 426                        mp->m_quotainfo->qi_gquotaip = NULL;
 427                }
 428        }
 429}
 430
 431/*
 432 * Flush all dquots of the given file system to disk. The dquots are
 433 * _not_ purged from memory here, just their data written to disk.
 434 */
 435STATIC int
 436xfs_qm_dqflush_all(
 437        xfs_mount_t     *mp,
 438        int             flags)
 439{
 440        int             recl;
 441        xfs_dquot_t     *dqp;
 442        int             niters;
 443        int             error;
 444
 445        if (mp->m_quotainfo == NULL)
 446                return 0;
 447        niters = 0;
 448again:
 449        xfs_qm_mplist_lock(mp);
 450        FOREACH_DQUOT_IN_MP(dqp, mp) {
 451                xfs_dqlock(dqp);
 452                if (! XFS_DQ_IS_DIRTY(dqp)) {
 453                        xfs_dqunlock(dqp);
 454                        continue;
 455                }
 456                xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
 457                /* XXX a sentinel would be better */
 458                recl = XFS_QI_MPLRECLAIMS(mp);
 459                if (!xfs_dqflock_nowait(dqp)) {
 460                        /*
 461                         * If we can't grab the flush lock then check
 462                         * to see if the dquot has been flushed delayed
 463                         * write.  If so, grab its buffer and send it
 464                         * out immediately.  We'll be able to acquire
 465                         * the flush lock when the I/O completes.
 466                         */
 467                        xfs_qm_dqflock_pushbuf_wait(dqp);
 468                }
 469                /*
 470                 * Let go of the mplist lock. We don't want to hold it
 471                 * across a disk write.
 472                 */
 473                xfs_qm_mplist_unlock(mp);
 474                error = xfs_qm_dqflush(dqp, flags);
 475                xfs_dqunlock(dqp);
 476                if (error)
 477                        return error;
 478
 479                xfs_qm_mplist_lock(mp);
 480                if (recl != XFS_QI_MPLRECLAIMS(mp)) {
 481                        xfs_qm_mplist_unlock(mp);
 482                        /* XXX restart limit */
 483                        goto again;
 484                }
 485        }
 486
 487        xfs_qm_mplist_unlock(mp);
 488        /* return ! busy */
 489        return 0;
 490}
 491/*
 492 * Release the group dquot pointers the user dquots may be
 493 * carrying around as a hint. mplist is locked on entry and exit.
 494 */
 495STATIC void
 496xfs_qm_detach_gdquots(
 497        xfs_mount_t     *mp)
 498{
 499        xfs_dquot_t     *dqp, *gdqp;
 500        int             nrecl;
 501
 502 again:
 503        ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
 504        dqp = XFS_QI_MPLNEXT(mp);
 505        while (dqp) {
 506                xfs_dqlock(dqp);
 507                if ((gdqp = dqp->q_gdquot)) {
 508                        xfs_dqlock(gdqp);
 509                        dqp->q_gdquot = NULL;
 510                }
 511                xfs_dqunlock(dqp);
 512
 513                if (gdqp) {
 514                        /*
 515                         * Can't hold the mplist lock across a dqput.
 516                         * XXXmust convert to marker based iterations here.
 517                         */
 518                        nrecl = XFS_QI_MPLRECLAIMS(mp);
 519                        xfs_qm_mplist_unlock(mp);
 520                        xfs_qm_dqput(gdqp);
 521
 522                        xfs_qm_mplist_lock(mp);
 523                        if (nrecl != XFS_QI_MPLRECLAIMS(mp))
 524                                goto again;
 525                }
 526                dqp = dqp->MPL_NEXT;
 527        }
 528}
 529
 530/*
 531 * Go through all the incore dquots of this file system and take them
 532 * off the mplist and hashlist, if the dquot type matches the dqtype
 533 * parameter. This is used when turning off quota accounting for
 534 * users and/or groups, as well as when the filesystem is unmounting.
 535 */
 536STATIC int
 537xfs_qm_dqpurge_int(
 538        xfs_mount_t     *mp,
 539        uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
 540{
 541        xfs_dquot_t     *dqp;
 542        uint            dqtype;
 543        int             nrecl;
 544        xfs_dquot_t     *nextdqp;
 545        int             nmisses;
 546
 547        if (mp->m_quotainfo == NULL)
 548                return 0;
 549
 550        dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
 551        dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
 552        dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
 553
 554        xfs_qm_mplist_lock(mp);
 555
 556        /*
 557         * In the first pass through all incore dquots of this filesystem,
 558         * we release the group dquot pointers the user dquots may be
 559         * carrying around as a hint. We need to do this irrespective of
 560         * what's being turned off.
 561         */
 562        xfs_qm_detach_gdquots(mp);
 563
 564      again:
 565        nmisses = 0;
 566        ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
 567        /*
 568         * Try to get rid of all of the unwanted dquots. The idea is to
 569         * get them off mplist and hashlist, but leave them on freelist.
 570         */
 571        dqp = XFS_QI_MPLNEXT(mp);
 572        while (dqp) {
 573                /*
 574                 * It's OK to look at the type without taking dqlock here.
 575                 * We're holding the mplist lock here, and that's needed for
 576                 * a dqreclaim.
 577                 */
 578                if ((dqp->dq_flags & dqtype) == 0) {
 579                        dqp = dqp->MPL_NEXT;
 580                        continue;
 581                }
 582
 583                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
 584                        nrecl = XFS_QI_MPLRECLAIMS(mp);
 585                        xfs_qm_mplist_unlock(mp);
 586                        mutex_lock(&dqp->q_hash->qh_lock);
 587                        xfs_qm_mplist_lock(mp);
 588
 589                        /*
 590                         * XXXTheoretically, we can get into a very long
 591                         * ping pong game here.
 592                         * No one can be adding dquots to the mplist at
 593                         * this point, but somebody might be taking things off.
 594                         */
 595                        if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
 596                                mutex_unlock(&dqp->q_hash->qh_lock);
 597                                goto again;
 598                        }
 599                }
 600
 601                /*
 602                 * Take the dquot off the mplist and hashlist. It may remain on
 603                 * freelist in INACTIVE state.
 604                 */
 605                nextdqp = dqp->MPL_NEXT;
 606                nmisses += xfs_qm_dqpurge(dqp);
 607                dqp = nextdqp;
 608        }
 609        xfs_qm_mplist_unlock(mp);
 610        return nmisses;
 611}
 612
 613int
 614xfs_qm_dqpurge_all(
 615        xfs_mount_t     *mp,
 616        uint            flags)
 617{
 618        int             ndquots;
 619
 620        /*
 621         * Purge the dquot cache.
 622         * None of the dquots should really be busy at this point.
 623         */
 624        if (mp->m_quotainfo) {
 625                while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
 626                        delay(ndquots * 10);
 627                }
 628        }
 629        return 0;
 630}
 631
 632STATIC int
 633xfs_qm_dqattach_one(
 634        xfs_inode_t     *ip,
 635        xfs_dqid_t      id,
 636        uint            type,
 637        uint            doalloc,
 638        xfs_dquot_t     *udqhint, /* hint */
 639        xfs_dquot_t     **IO_idqpp)
 640{
 641        xfs_dquot_t     *dqp;
 642        int             error;
 643
 644        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 645        error = 0;
 646
 647        /*
 648         * See if we already have it in the inode itself. IO_idqpp is
 649         * &i_udquot or &i_gdquot. This made the code look weird, but
 650         * made the logic a lot simpler.
 651         */
 652        dqp = *IO_idqpp;
 653        if (dqp) {
 654                xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
 655                return 0;
 656        }
 657
 658        /*
 659         * udqhint is the i_udquot field in inode, and is non-NULL only
 660         * when the type arg is group/project. Its purpose is to save a
 661         * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
 662         * the user dquot.
 663         */
 664        if (udqhint) {
 665                ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
 666                xfs_dqlock(udqhint);
 667
 668                /*
 669                 * No need to take dqlock to look at the id.
 670                 *
 671                 * The ID can't change until it gets reclaimed, and it won't
 672                 * be reclaimed as long as we have a ref from inode and we
 673                 * hold the ilock.
 674                 */
 675                dqp = udqhint->q_gdquot;
 676                if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
 677                        xfs_dqlock(dqp);
 678                        XFS_DQHOLD(dqp);
 679                        ASSERT(*IO_idqpp == NULL);
 680                        *IO_idqpp = dqp;
 681
 682                        xfs_dqunlock(dqp);
 683                        xfs_dqunlock(udqhint);
 684                        return 0;
 685                }
 686
 687                /*
 688                 * We can't hold a dquot lock when we call the dqget code.
 689                 * We'll deadlock in no time, because of (not conforming to)
 690                 * lock ordering - the inodelock comes before any dquot lock,
 691                 * and we may drop and reacquire the ilock in xfs_qm_dqget().
 692                 */
 693                xfs_dqunlock(udqhint);
 694        }
 695
 696        /*
 697         * Find the dquot from somewhere. This bumps the
 698         * reference count of dquot and returns it locked.
 699         * This can return ENOENT if dquot didn't exist on
 700         * disk and we didn't ask it to allocate;
 701         * ESRCH if quotas got turned off suddenly.
 702         */
 703        error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
 704        if (error)
 705                return error;
 706
 707        xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
 708
 709        /*
 710         * dqget may have dropped and re-acquired the ilock, but it guarantees
 711         * that the dquot returned is the one that should go in the inode.
 712         */
 713        *IO_idqpp = dqp;
 714        xfs_dqunlock(dqp);
 715        return 0;
 716}
 717
 718
 719/*
 720 * Given a udquot and gdquot, attach a ptr to the group dquot in the
 721 * udquot as a hint for future lookups. The idea sounds simple, but the
 722 * execution isn't, because the udquot might have a group dquot attached
 723 * already and getting rid of that gets us into lock ordering constraints.
 724 * The process is complicated more by the fact that the dquots may or may not
 725 * be locked on entry.
 726 */
 727STATIC void
 728xfs_qm_dqattach_grouphint(
 729        xfs_dquot_t     *udq,
 730        xfs_dquot_t     *gdq)
 731{
 732        xfs_dquot_t     *tmp;
 733
 734        xfs_dqlock(udq);
 735
 736        if ((tmp = udq->q_gdquot)) {
 737                if (tmp == gdq) {
 738                        xfs_dqunlock(udq);
 739                        return;
 740                }
 741
 742                udq->q_gdquot = NULL;
 743                /*
 744                 * We can't keep any dqlocks when calling dqrele,
 745                 * because the freelist lock comes before dqlocks.
 746                 */
 747                xfs_dqunlock(udq);
 748                /*
 749                 * we took a hard reference once upon a time in dqget,
 750                 * so give it back when the udquot no longer points at it
 751                 * dqput() does the unlocking of the dquot.
 752                 */
 753                xfs_qm_dqrele(tmp);
 754
 755                xfs_dqlock(udq);
 756                xfs_dqlock(gdq);
 757
 758        } else {
 759                ASSERT(XFS_DQ_IS_LOCKED(udq));
 760                xfs_dqlock(gdq);
 761        }
 762
 763        ASSERT(XFS_DQ_IS_LOCKED(udq));
 764        ASSERT(XFS_DQ_IS_LOCKED(gdq));
 765        /*
 766         * Somebody could have attached a gdquot here,
 767         * when we dropped the uqlock. If so, just do nothing.
 768         */
 769        if (udq->q_gdquot == NULL) {
 770                XFS_DQHOLD(gdq);
 771                udq->q_gdquot = gdq;
 772        }
 773
 774        xfs_dqunlock(gdq);
 775        xfs_dqunlock(udq);
 776}
 777
 778
 779/*
 780 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
 781 * into account.
 782 * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
 783 * Inode may get unlocked and relocked in here, and the caller must deal with
 784 * the consequences.
 785 */
 786int
 787xfs_qm_dqattach_locked(
 788        xfs_inode_t     *ip,
 789        uint            flags)
 790{
 791        xfs_mount_t     *mp = ip->i_mount;
 792        uint            nquotas = 0;
 793        int             error = 0;
 794
 795        if (!XFS_IS_QUOTA_RUNNING(mp) ||
 796            !XFS_IS_QUOTA_ON(mp) ||
 797            !XFS_NOT_DQATTACHED(mp, ip) ||
 798            ip->i_ino == mp->m_sb.sb_uquotino ||
 799            ip->i_ino == mp->m_sb.sb_gquotino)
 800                return 0;
 801
 802        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 803
 804        if (XFS_IS_UQUOTA_ON(mp)) {
 805                error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
 806                                                flags & XFS_QMOPT_DQALLOC,
 807                                                NULL, &ip->i_udquot);
 808                if (error)
 809                        goto done;
 810                nquotas++;
 811        }
 812
 813        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 814        if (XFS_IS_OQUOTA_ON(mp)) {
 815                error = XFS_IS_GQUOTA_ON(mp) ?
 816                        xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
 817                                                flags & XFS_QMOPT_DQALLOC,
 818                                                ip->i_udquot, &ip->i_gdquot) :
 819                        xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
 820                                                flags & XFS_QMOPT_DQALLOC,
 821                                                ip->i_udquot, &ip->i_gdquot);
 822                /*
 823                 * Don't worry about the udquot that we may have
 824                 * attached above. It'll get detached, if not already.
 825                 */
 826                if (error)
 827                        goto done;
 828                nquotas++;
 829        }
 830
 831        /*
 832         * Attach this group quota to the user quota as a hint.
 833         * This WON'T, in general, result in a thrash.
 834         */
 835        if (nquotas == 2) {
 836                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 837                ASSERT(ip->i_udquot);
 838                ASSERT(ip->i_gdquot);
 839
 840                /*
 841                 * We may or may not have the i_udquot locked at this point,
 842                 * but this check is OK since we don't depend on the i_gdquot to
 843                 * be accurate 100% all the time. It is just a hint, and this
 844                 * will succeed in general.
 845                 */
 846                if (ip->i_udquot->q_gdquot == ip->i_gdquot)
 847                        goto done;
 848                /*
 849                 * Attach i_gdquot to the gdquot hint inside the i_udquot.
 850                 */
 851                xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 852        }
 853
 854 done:
 855#ifdef QUOTADEBUG
 856        if (! error) {
 857                if (XFS_IS_UQUOTA_ON(mp))
 858                        ASSERT(ip->i_udquot);
 859                if (XFS_IS_OQUOTA_ON(mp))
 860                        ASSERT(ip->i_gdquot);
 861        }
 862        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 863#endif
 864        return error;
 865}
 866
 867int
 868xfs_qm_dqattach(
 869        struct xfs_inode        *ip,
 870        uint                    flags)
 871{
 872        int                     error;
 873
 874        xfs_ilock(ip, XFS_ILOCK_EXCL);
 875        error = xfs_qm_dqattach_locked(ip, flags);
 876        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 877
 878        return error;
 879}
 880
 881/*
 882 * Release dquots (and their references) if any.
 883 * The inode should be locked EXCL except when this's called by
 884 * xfs_ireclaim.
 885 */
 886void
 887xfs_qm_dqdetach(
 888        xfs_inode_t     *ip)
 889{
 890        if (!(ip->i_udquot || ip->i_gdquot))
 891                return;
 892
 893        ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
 894        ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
 895        if (ip->i_udquot) {
 896                xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
 897                xfs_qm_dqrele(ip->i_udquot);
 898                ip->i_udquot = NULL;
 899        }
 900        if (ip->i_gdquot) {
 901                xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
 902                xfs_qm_dqrele(ip->i_gdquot);
 903                ip->i_gdquot = NULL;
 904        }
 905}
 906
 907int
 908xfs_qm_sync(
 909        xfs_mount_t     *mp,
 910        int             flags)
 911{
 912        int             recl, restarts;
 913        xfs_dquot_t     *dqp;
 914        uint            flush_flags;
 915        int             error;
 916
 917        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 918                return 0;
 919
 920        flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
 921        restarts = 0;
 922
 923  again:
 924        xfs_qm_mplist_lock(mp);
 925        /*
 926         * dqpurge_all() also takes the mplist lock and iterate thru all dquots
 927         * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
 928         * when we have the mplist lock, we know that dquots will be consistent
 929         * as long as we have it locked.
 930         */
 931        if (! XFS_IS_QUOTA_ON(mp)) {
 932                xfs_qm_mplist_unlock(mp);
 933                return 0;
 934        }
 935        FOREACH_DQUOT_IN_MP(dqp, mp) {
 936                /*
 937                 * If this is vfs_sync calling, then skip the dquots that
 938                 * don't 'seem' to be dirty. ie. don't acquire dqlock.
 939                 * This is very similar to what xfs_sync does with inodes.
 940                 */
 941                if (flags & SYNC_TRYLOCK) {
 942                        if (!XFS_DQ_IS_DIRTY(dqp))
 943                                continue;
 944                        if (!xfs_qm_dqlock_nowait(dqp))
 945                                continue;
 946                } else {
 947                        xfs_dqlock(dqp);
 948                }
 949
 950                /*
 951                 * Now, find out for sure if this dquot is dirty or not.
 952                 */
 953                if (! XFS_DQ_IS_DIRTY(dqp)) {
 954                        xfs_dqunlock(dqp);
 955                        continue;
 956                }
 957
 958                /* XXX a sentinel would be better */
 959                recl = XFS_QI_MPLRECLAIMS(mp);
 960                if (!xfs_dqflock_nowait(dqp)) {
 961                        if (flags & SYNC_TRYLOCK) {
 962                                xfs_dqunlock(dqp);
 963                                continue;
 964                        }
 965                        /*
 966                         * If we can't grab the flush lock then if the caller
 967                         * really wanted us to give this our best shot, so
 968                         * see if we can give a push to the buffer before we wait
 969                         * on the flush lock. At this point, we know that
 970                         * even though the dquot is being flushed,
 971                         * it has (new) dirty data.
 972                         */
 973                        xfs_qm_dqflock_pushbuf_wait(dqp);
 974                }
 975                /*
 976                 * Let go of the mplist lock. We don't want to hold it
 977                 * across a disk write
 978                 */
 979                xfs_qm_mplist_unlock(mp);
 980                xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
 981                error = xfs_qm_dqflush(dqp, flush_flags);
 982                xfs_dqunlock(dqp);
 983                if (error && XFS_FORCED_SHUTDOWN(mp))
 984                        return 0;       /* Need to prevent umount failure */
 985                else if (error)
 986                        return error;
 987
 988                xfs_qm_mplist_lock(mp);
 989                if (recl != XFS_QI_MPLRECLAIMS(mp)) {
 990                        if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
 991                                break;
 992
 993                        xfs_qm_mplist_unlock(mp);
 994                        goto again;
 995                }
 996        }
 997
 998        xfs_qm_mplist_unlock(mp);
 999        return 0;
1000}
1001
1002/*
1003 * The hash chains and the mplist use the same xfs_dqhash structure as
1004 * their list head, but we can take the mplist qh_lock and one of the
1005 * hash qh_locks at the same time without any problem as they aren't
1006 * related.
1007 */
1008static struct lock_class_key xfs_quota_mplist_class;
1009
1010/*
1011 * This initializes all the quota information that's kept in the
1012 * mount structure
1013 */
1014STATIC int
1015xfs_qm_init_quotainfo(
1016        xfs_mount_t     *mp)
1017{
1018        xfs_quotainfo_t *qinf;
1019        int             error;
1020        xfs_dquot_t     *dqp;
1021
1022        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1023
1024        /*
1025         * Tell XQM that we exist as soon as possible.
1026         */
1027        if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1028                return error;
1029        }
1030
1031        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1032
1033        /*
1034         * See if quotainodes are setup, and if not, allocate them,
1035         * and change the superblock accordingly.
1036         */
1037        if ((error = xfs_qm_init_quotainos(mp))) {
1038                kmem_free(qinf);
1039                mp->m_quotainfo = NULL;
1040                return error;
1041        }
1042
1043        xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1044        lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class);
1045
1046        qinf->qi_dqreclaims = 0;
1047
1048        /* mutex used to serialize quotaoffs */
1049        mutex_init(&qinf->qi_quotaofflock);
1050
1051        /* Precalc some constants */
1052        qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1053        ASSERT(qinf->qi_dqchunklen);
1054        qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1055        do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1056
1057        mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1058
1059        /*
1060         * We try to get the limits from the superuser's limits fields.
1061         * This is quite hacky, but it is standard quota practice.
1062         * We look at the USR dquot with id == 0 first, but if user quotas
1063         * are not enabled we goto the GRP dquot with id == 0.
1064         * We don't really care to keep separate default limits for user
1065         * and group quotas, at least not at this point.
1066         */
1067        error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1068                             XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1069                             (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1070                                XFS_DQ_PROJ),
1071                             XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1072                             &dqp);
1073        if (! error) {
1074                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1075
1076                /*
1077                 * The warnings and timers set the grace period given to
1078                 * a user or group before he or she can not perform any
1079                 * more writing. If it is zero, a default is used.
1080                 */
1081                qinf->qi_btimelimit = ddqp->d_btimer ?
1082                        be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1083                qinf->qi_itimelimit = ddqp->d_itimer ?
1084                        be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1085                qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1086                        be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1087                qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1088                        be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1089                qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1090                        be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1091                qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1092                        be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1093                qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1094                qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1095                qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1096                qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1097                qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1098                qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1099 
1100                /*
1101                 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1102                 * we don't want this dquot cached. We haven't done a
1103                 * quotacheck yet, and quotacheck doesn't like incore dquots.
1104                 */
1105                xfs_qm_dqdestroy(dqp);
1106        } else {
1107                qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1108                qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1109                qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1110                qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1111                qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1112                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1113        }
1114
1115        return 0;
1116}
1117
1118
1119/*
1120 * Gets called when unmounting a filesystem or when all quotas get
1121 * turned off.
1122 * This purges the quota inodes, destroys locks and frees itself.
1123 */
1124void
1125xfs_qm_destroy_quotainfo(
1126        xfs_mount_t     *mp)
1127{
1128        xfs_quotainfo_t *qi;
1129
1130        qi = mp->m_quotainfo;
1131        ASSERT(qi != NULL);
1132        ASSERT(xfs_Gqm != NULL);
1133
1134        /*
1135         * Release the reference that XQM kept, so that we know
1136         * when the XQM structure should be freed. We cannot assume
1137         * that xfs_Gqm is non-null after this point.
1138         */
1139        xfs_qm_rele_quotafs_ref(mp);
1140
1141        xfs_qm_list_destroy(&qi->qi_dqlist);
1142
1143        if (qi->qi_uquotaip) {
1144                IRELE(qi->qi_uquotaip);
1145                qi->qi_uquotaip = NULL; /* paranoia */
1146        }
1147        if (qi->qi_gquotaip) {
1148                IRELE(qi->qi_gquotaip);
1149                qi->qi_gquotaip = NULL;
1150        }
1151        mutex_destroy(&qi->qi_quotaofflock);
1152        kmem_free(qi);
1153        mp->m_quotainfo = NULL;
1154}
1155
1156
1157
1158/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1159
1160/* ARGSUSED */
1161STATIC void
1162xfs_qm_list_init(
1163        xfs_dqlist_t    *list,
1164        char            *str,
1165        int             n)
1166{
1167        mutex_init(&list->qh_lock);
1168        list->qh_next = NULL;
1169        list->qh_version = 0;
1170        list->qh_nelems = 0;
1171}
1172
1173STATIC void
1174xfs_qm_list_destroy(
1175        xfs_dqlist_t    *list)
1176{
1177        mutex_destroy(&(list->qh_lock));
1178}
1179
1180
1181/*
1182 * Stripped down version of dqattach. This doesn't attach, or even look at the
1183 * dquots attached to the inode. The rationale is that there won't be any
1184 * attached at the time this is called from quotacheck.
1185 */
1186STATIC int
1187xfs_qm_dqget_noattach(
1188        xfs_inode_t     *ip,
1189        xfs_dquot_t     **O_udqpp,
1190        xfs_dquot_t     **O_gdqpp)
1191{
1192        int             error;
1193        xfs_mount_t     *mp;
1194        xfs_dquot_t     *udqp, *gdqp;
1195
1196        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1197        mp = ip->i_mount;
1198        udqp = NULL;
1199        gdqp = NULL;
1200
1201        if (XFS_IS_UQUOTA_ON(mp)) {
1202                ASSERT(ip->i_udquot == NULL);
1203                /*
1204                 * We want the dquot allocated if it doesn't exist.
1205                 */
1206                if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1207                                         XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1208                                         &udqp))) {
1209                        /*
1210                         * Shouldn't be able to turn off quotas here.
1211                         */
1212                        ASSERT(error != ESRCH);
1213                        ASSERT(error != ENOENT);
1214                        return error;
1215                }
1216                ASSERT(udqp);
1217        }
1218
1219        if (XFS_IS_OQUOTA_ON(mp)) {
1220                ASSERT(ip->i_gdquot == NULL);
1221                if (udqp)
1222                        xfs_dqunlock(udqp);
1223                error = XFS_IS_GQUOTA_ON(mp) ?
1224                                xfs_qm_dqget(mp, ip,
1225                                             ip->i_d.di_gid, XFS_DQ_GROUP,
1226                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1227                                             &gdqp) :
1228                                xfs_qm_dqget(mp, ip,
1229                                             ip->i_d.di_projid, XFS_DQ_PROJ,
1230                                             XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1231                                             &gdqp);
1232                if (error) {
1233                        if (udqp)
1234                                xfs_qm_dqrele(udqp);
1235                        ASSERT(error != ESRCH);
1236                        ASSERT(error != ENOENT);
1237                        return error;
1238                }
1239                ASSERT(gdqp);
1240
1241                /* Reacquire the locks in the right order */
1242                if (udqp) {
1243                        if (! xfs_qm_dqlock_nowait(udqp)) {
1244                                xfs_dqunlock(gdqp);
1245                                xfs_dqlock(udqp);
1246                                xfs_dqlock(gdqp);
1247                        }
1248                }
1249        }
1250
1251        *O_udqpp = udqp;
1252        *O_gdqpp = gdqp;
1253
1254#ifdef QUOTADEBUG
1255        if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1256        if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1257#endif
1258        return 0;
1259}
1260
1261/*
1262 * Create an inode and return with a reference already taken, but unlocked
1263 * This is how we create quota inodes
1264 */
1265STATIC int
1266xfs_qm_qino_alloc(
1267        xfs_mount_t     *mp,
1268        xfs_inode_t     **ip,
1269        __int64_t       sbfields,
1270        uint            flags)
1271{
1272        xfs_trans_t     *tp;
1273        int             error;
1274        int             committed;
1275
1276        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1277        if ((error = xfs_trans_reserve(tp,
1278                                      XFS_QM_QINOCREATE_SPACE_RES(mp),
1279                                      XFS_CREATE_LOG_RES(mp), 0,
1280                                      XFS_TRANS_PERM_LOG_RES,
1281                                      XFS_CREATE_LOG_COUNT))) {
1282                xfs_trans_cancel(tp, 0);
1283                return error;
1284        }
1285
1286        if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1287                                   &xfs_zerocr, 0, 1, ip, &committed))) {
1288                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1289                                 XFS_TRANS_ABORT);
1290                return error;
1291        }
1292
1293        /*
1294         * Keep an extra reference to this quota inode. This inode is
1295         * locked exclusively and joined to the transaction already.
1296         */
1297        ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1298        IHOLD(*ip);
1299
1300        /*
1301         * Make the changes in the superblock, and log those too.
1302         * sbfields arg may contain fields other than *QUOTINO;
1303         * VERSIONNUM for example.
1304         */
1305        spin_lock(&mp->m_sb_lock);
1306        if (flags & XFS_QMOPT_SBVERSION) {
1307#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1308                unsigned oldv = mp->m_sb.sb_versionnum;
1309#endif
1310                ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1311                ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1312                                   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1313                       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1314                        XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1315
1316                xfs_sb_version_addquota(&mp->m_sb);
1317                mp->m_sb.sb_uquotino = NULLFSINO;
1318                mp->m_sb.sb_gquotino = NULLFSINO;
1319
1320                /* qflags will get updated _after_ quotacheck */
1321                mp->m_sb.sb_qflags = 0;
1322#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1323                cmn_err(CE_NOTE,
1324                        "Old superblock version %x, converting to %x.",
1325                        oldv, mp->m_sb.sb_versionnum);
1326#endif
1327        }
1328        if (flags & XFS_QMOPT_UQUOTA)
1329                mp->m_sb.sb_uquotino = (*ip)->i_ino;
1330        else
1331                mp->m_sb.sb_gquotino = (*ip)->i_ino;
1332        spin_unlock(&mp->m_sb_lock);
1333        xfs_mod_sb(tp, sbfields);
1334
1335        if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1336                xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1337                return error;
1338        }
1339        return 0;
1340}
1341
1342
1343STATIC void
1344xfs_qm_reset_dqcounts(
1345        xfs_mount_t     *mp,
1346        xfs_buf_t       *bp,
1347        xfs_dqid_t      id,
1348        uint            type)
1349{
1350        xfs_disk_dquot_t        *ddq;
1351        int                     j;
1352
1353        xfs_buftrace("RESET DQUOTS", bp);
1354        /*
1355         * Reset all counters and timers. They'll be
1356         * started afresh by xfs_qm_quotacheck.
1357         */
1358#ifdef DEBUG
1359        j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1360        do_div(j, sizeof(xfs_dqblk_t));
1361        ASSERT(XFS_QM_DQPERBLK(mp) == j);
1362#endif
1363        ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1364        for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1365                /*
1366                 * Do a sanity check, and if needed, repair the dqblk. Don't
1367                 * output any warnings because it's perfectly possible to
1368                 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1369                 */
1370                (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1371                                      "xfs_quotacheck");
1372                ddq->d_bcount = 0;
1373                ddq->d_icount = 0;
1374                ddq->d_rtbcount = 0;
1375                ddq->d_btimer = 0;
1376                ddq->d_itimer = 0;
1377                ddq->d_rtbtimer = 0;
1378                ddq->d_bwarns = 0;
1379                ddq->d_iwarns = 0;
1380                ddq->d_rtbwarns = 0;
1381                ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1382        }
1383}
1384
1385STATIC int
1386xfs_qm_dqiter_bufs(
1387        xfs_mount_t     *mp,
1388        xfs_dqid_t      firstid,
1389        xfs_fsblock_t   bno,
1390        xfs_filblks_t   blkcnt,
1391        uint            flags)
1392{
1393        xfs_buf_t       *bp;
1394        int             error;
1395        int             notcommitted;
1396        int             incr;
1397        int             type;
1398
1399        ASSERT(blkcnt > 0);
1400        notcommitted = 0;
1401        incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1402                XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1403        type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1404                (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1405        error = 0;
1406
1407        /*
1408         * Blkcnt arg can be a very big number, and might even be
1409         * larger than the log itself. So, we have to break it up into
1410         * manageable-sized transactions.
1411         * Note that we don't start a permanent transaction here; we might
1412         * not be able to get a log reservation for the whole thing up front,
1413         * and we don't really care to either, because we just discard
1414         * everything if we were to crash in the middle of this loop.
1415         */
1416        while (blkcnt--) {
1417                error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1418                              XFS_FSB_TO_DADDR(mp, bno),
1419                              (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1420                if (error)
1421                        break;
1422
1423                xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1424                xfs_bdwrite(mp, bp);
1425                /*
1426                 * goto the next block.
1427                 */
1428                bno++;
1429                firstid += XFS_QM_DQPERBLK(mp);
1430        }
1431        return error;
1432}
1433
1434/*
1435 * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1436 * caller supplied function for every chunk of dquots that we find.
1437 */
1438STATIC int
1439xfs_qm_dqiterate(
1440        xfs_mount_t     *mp,
1441        xfs_inode_t     *qip,
1442        uint            flags)
1443{
1444        xfs_bmbt_irec_t         *map;
1445        int                     i, nmaps;       /* number of map entries */
1446        int                     error;          /* return value */
1447        xfs_fileoff_t           lblkno;
1448        xfs_filblks_t           maxlblkcnt;
1449        xfs_dqid_t              firstid;
1450        xfs_fsblock_t           rablkno;
1451        xfs_filblks_t           rablkcnt;
1452
1453        error = 0;
1454        /*
1455         * This looks racy, but we can't keep an inode lock across a
1456         * trans_reserve. But, this gets called during quotacheck, and that
1457         * happens only at mount time which is single threaded.
1458         */
1459        if (qip->i_d.di_nblocks == 0)
1460                return 0;
1461
1462        map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1463
1464        lblkno = 0;
1465        maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1466        do {
1467                nmaps = XFS_DQITER_MAP_SIZE;
1468                /*
1469                 * We aren't changing the inode itself. Just changing
1470                 * some of its data. No new blocks are added here, and
1471                 * the inode is never added to the transaction.
1472                 */
1473                xfs_ilock(qip, XFS_ILOCK_SHARED);
1474                error = xfs_bmapi(NULL, qip, lblkno,
1475                                  maxlblkcnt - lblkno,
1476                                  XFS_BMAPI_METADATA,
1477                                  NULL,
1478                                  0, map, &nmaps, NULL, NULL);
1479                xfs_iunlock(qip, XFS_ILOCK_SHARED);
1480                if (error)
1481                        break;
1482
1483                ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1484                for (i = 0; i < nmaps; i++) {
1485                        ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1486                        ASSERT(map[i].br_blockcount);
1487
1488
1489                        lblkno += map[i].br_blockcount;
1490
1491                        if (map[i].br_startblock == HOLESTARTBLOCK)
1492                                continue;
1493
1494                        firstid = (xfs_dqid_t) map[i].br_startoff *
1495                                XFS_QM_DQPERBLK(mp);
1496                        /*
1497                         * Do a read-ahead on the next extent.
1498                         */
1499                        if ((i+1 < nmaps) &&
1500                            (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1501                                rablkcnt =  map[i+1].br_blockcount;
1502                                rablkno = map[i+1].br_startblock;
1503                                while (rablkcnt--) {
1504                                        xfs_baread(mp->m_ddev_targp,
1505                                               XFS_FSB_TO_DADDR(mp, rablkno),
1506                                               (int)XFS_QI_DQCHUNKLEN(mp));
1507                                        rablkno++;
1508                                }
1509                        }
1510                        /*
1511                         * Iterate thru all the blks in the extent and
1512                         * reset the counters of all the dquots inside them.
1513                         */
1514                        if ((error = xfs_qm_dqiter_bufs(mp,
1515                                                       firstid,
1516                                                       map[i].br_startblock,
1517                                                       map[i].br_blockcount,
1518                                                       flags))) {
1519                                break;
1520                        }
1521                }
1522
1523                if (error)
1524                        break;
1525        } while (nmaps > 0);
1526
1527        kmem_free(map);
1528
1529        return error;
1530}
1531
1532/*
1533 * Called by dqusage_adjust in doing a quotacheck.
1534 * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1535 * this updates its incore copy as well as the buffer copy. This is
1536 * so that once the quotacheck is done, we can just log all the buffers,
1537 * as opposed to logging numerous updates to individual dquots.
1538 */
1539STATIC void
1540xfs_qm_quotacheck_dqadjust(
1541        xfs_dquot_t             *dqp,
1542        xfs_qcnt_t              nblks,
1543        xfs_qcnt_t              rtblks)
1544{
1545        ASSERT(XFS_DQ_IS_LOCKED(dqp));
1546        xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1547        /*
1548         * Adjust the inode count and the block count to reflect this inode's
1549         * resource usage.
1550         */
1551        be64_add_cpu(&dqp->q_core.d_icount, 1);
1552        dqp->q_res_icount++;
1553        if (nblks) {
1554                be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1555                dqp->q_res_bcount += nblks;
1556        }
1557        if (rtblks) {
1558                be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1559                dqp->q_res_rtbcount += rtblks;
1560        }
1561
1562        /*
1563         * Set default limits, adjust timers (since we changed usages)
1564         */
1565        if (! XFS_IS_SUSER_DQUOT(dqp)) {
1566                xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1567                xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1568        }
1569
1570        dqp->dq_flags |= XFS_DQ_DIRTY;
1571}
1572
1573STATIC int
1574xfs_qm_get_rtblks(
1575        xfs_inode_t     *ip,
1576        xfs_qcnt_t      *O_rtblks)
1577{
1578        xfs_filblks_t   rtblks;                 /* total rt blks */
1579        xfs_extnum_t    idx;                    /* extent record index */
1580        xfs_ifork_t     *ifp;                   /* inode fork pointer */
1581        xfs_extnum_t    nextents;               /* number of extent entries */
1582        int             error;
1583
1584        ASSERT(XFS_IS_REALTIME_INODE(ip));
1585        ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1586        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1587                if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1588                        return error;
1589        }
1590        rtblks = 0;
1591        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1592        for (idx = 0; idx < nextents; idx++)
1593                rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1594        *O_rtblks = (xfs_qcnt_t)rtblks;
1595        return 0;
1596}
1597
1598/*
1599 * callback routine supplied to bulkstat(). Given an inumber, find its
1600 * dquots and update them to account for resources taken by that inode.
1601 */
1602/* ARGSUSED */
1603STATIC int
1604xfs_qm_dqusage_adjust(
1605        xfs_mount_t     *mp,            /* mount point for filesystem */
1606        xfs_ino_t       ino,            /* inode number to get data for */
1607        void            __user *buffer, /* not used */
1608        int             ubsize,         /* not used */
1609        void            *private_data,  /* not used */
1610        xfs_daddr_t     bno,            /* starting block of inode cluster */
1611        int             *ubused,        /* not used */
1612        void            *dip,           /* on-disk inode pointer (not used) */
1613        int             *res)           /* result code value */
1614{
1615        xfs_inode_t     *ip;
1616        xfs_dquot_t     *udqp, *gdqp;
1617        xfs_qcnt_t      nblks, rtblks;
1618        int             error;
1619
1620        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1621
1622        /*
1623         * rootino must have its resources accounted for, not so with the quota
1624         * inodes.
1625         */
1626        if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1627                *res = BULKSTAT_RV_NOTHING;
1628                return XFS_ERROR(EINVAL);
1629        }
1630
1631        /*
1632         * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1633         * interface expects the inode to be exclusively locked because that's
1634         * the case in all other instances. It's OK that we do this because
1635         * quotacheck is done only at mount time.
1636         */
1637        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1638                *res = BULKSTAT_RV_NOTHING;
1639                return error;
1640        }
1641
1642        /*
1643         * Obtain the locked dquots. In case of an error (eg. allocation
1644         * fails for ENOSPC), we return the negative of the error number
1645         * to bulkstat, so that it can get propagated to quotacheck() and
1646         * making us disable quotas for the file system.
1647         */
1648        if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1649                xfs_iput(ip, XFS_ILOCK_EXCL);
1650                *res = BULKSTAT_RV_GIVEUP;
1651                return error;
1652        }
1653
1654        rtblks = 0;
1655        if (! XFS_IS_REALTIME_INODE(ip)) {
1656                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1657        } else {
1658                /*
1659                 * Walk thru the extent list and count the realtime blocks.
1660                 */
1661                if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1662                        xfs_iput(ip, XFS_ILOCK_EXCL);
1663                        if (udqp)
1664                                xfs_qm_dqput(udqp);
1665                        if (gdqp)
1666                                xfs_qm_dqput(gdqp);
1667                        *res = BULKSTAT_RV_GIVEUP;
1668                        return error;
1669                }
1670                nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1671        }
1672        ASSERT(ip->i_delayed_blks == 0);
1673
1674        /*
1675         * We can't release the inode while holding its dquot locks.
1676         * The inode can go into inactive and might try to acquire the dquotlocks.
1677         * So, just unlock here and do a vn_rele at the end.
1678         */
1679        xfs_iunlock(ip, XFS_ILOCK_EXCL);
1680
1681        /*
1682         * Add the (disk blocks and inode) resources occupied by this
1683         * inode to its dquots. We do this adjustment in the incore dquot,
1684         * and also copy the changes to its buffer.
1685         * We don't care about putting these changes in a transaction
1686         * envelope because if we crash in the middle of a 'quotacheck'
1687         * we have to start from the beginning anyway.
1688         * Once we're done, we'll log all the dquot bufs.
1689         *
1690         * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1691         * and quotaoffs don't race. (Quotachecks happen at mount time only).
1692         */
1693        if (XFS_IS_UQUOTA_ON(mp)) {
1694                ASSERT(udqp);
1695                xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1696                xfs_qm_dqput(udqp);
1697        }
1698        if (XFS_IS_OQUOTA_ON(mp)) {
1699                ASSERT(gdqp);
1700                xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1701                xfs_qm_dqput(gdqp);
1702        }
1703        /*
1704         * Now release the inode. This will send it to 'inactive', and
1705         * possibly even free blocks.
1706         */
1707        IRELE(ip);
1708
1709        /*
1710         * Goto next inode.
1711         */
1712        *res = BULKSTAT_RV_DIDONE;
1713        return 0;
1714}
1715
1716/*
1717 * Walk thru all the filesystem inodes and construct a consistent view
1718 * of the disk quota world. If the quotacheck fails, disable quotas.
1719 */
1720int
1721xfs_qm_quotacheck(
1722        xfs_mount_t     *mp)
1723{
1724        int             done, count, error;
1725        xfs_ino_t       lastino;
1726        size_t          structsz;
1727        xfs_inode_t     *uip, *gip;
1728        uint            flags;
1729
1730        count = INT_MAX;
1731        structsz = 1;
1732        lastino = 0;
1733        flags = 0;
1734
1735        ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1736        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1737
1738        /*
1739         * There should be no cached dquots. The (simplistic) quotacheck
1740         * algorithm doesn't like that.
1741         */
1742        ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1743
1744        cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1745
1746        /*
1747         * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1748         * their counters to zero. We need a clean slate.
1749         * We don't log our changes till later.
1750         */
1751        if ((uip = XFS_QI_UQIP(mp))) {
1752                if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1753                        goto error_return;
1754                flags |= XFS_UQUOTA_CHKD;
1755        }
1756
1757        if ((gip = XFS_QI_GQIP(mp))) {
1758                if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1759                                        XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1760                        goto error_return;
1761                flags |= XFS_OQUOTA_CHKD;
1762        }
1763
1764        do {
1765                /*
1766                 * Iterate thru all the inodes in the file system,
1767                 * adjusting the corresponding dquot counters in core.
1768                 */
1769                if ((error = xfs_bulkstat(mp, &lastino, &count,
1770                                     xfs_qm_dqusage_adjust, NULL,
1771                                     structsz, NULL, BULKSTAT_FG_IGET, &done)))
1772                        break;
1773
1774        } while (! done);
1775
1776        /*
1777         * We've made all the changes that we need to make incore.
1778         * Flush them down to disk buffers if everything was updated
1779         * successfully.
1780         */
1781        if (!error)
1782                error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1783
1784        /*
1785         * We can get this error if we couldn't do a dquot allocation inside
1786         * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1787         * dirty dquots that might be cached, we just want to get rid of them
1788         * and turn quotaoff. The dquots won't be attached to any of the inodes
1789         * at this point (because we intentionally didn't in dqget_noattach).
1790         */
1791        if (error) {
1792                xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1793                goto error_return;
1794        }
1795
1796        /*
1797         * We didn't log anything, because if we crashed, we'll have to
1798         * start the quotacheck from scratch anyway. However, we must make
1799         * sure that our dquot changes are secure before we put the
1800         * quotacheck'd stamp on the superblock. So, here we do a synchronous
1801         * flush.
1802         */
1803        XFS_bflush(mp->m_ddev_targp);
1804
1805        /*
1806         * If one type of quotas is off, then it will lose its
1807         * quotachecked status, since we won't be doing accounting for
1808         * that type anymore.
1809         */
1810        mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1811        mp->m_qflags |= flags;
1812
1813        XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1814
1815 error_return:
1816        if (error) {
1817                cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1818                        "Disabling quotas.",
1819                        mp->m_fsname, error);
1820                /*
1821                 * We must turn off quotas.
1822                 */
1823                ASSERT(mp->m_quotainfo != NULL);
1824                ASSERT(xfs_Gqm != NULL);
1825                xfs_qm_destroy_quotainfo(mp);
1826                if (xfs_mount_reset_sbqflags(mp)) {
1827                        cmn_err(CE_WARN, "XFS quotacheck %s: "
1828                                "Failed to reset quota flags.", mp->m_fsname);
1829                }
1830        } else {
1831                cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1832        }
1833        return (error);
1834}
1835
1836/*
1837 * This is called after the superblock has been read in and we're ready to
1838 * iget the quota inodes.
1839 */
1840STATIC int
1841xfs_qm_init_quotainos(
1842        xfs_mount_t     *mp)
1843{
1844        xfs_inode_t     *uip, *gip;
1845        int             error;
1846        __int64_t       sbflags;
1847        uint            flags;
1848
1849        ASSERT(mp->m_quotainfo);
1850        uip = gip = NULL;
1851        sbflags = 0;
1852        flags = 0;
1853
1854        /*
1855         * Get the uquota and gquota inodes
1856         */
1857        if (xfs_sb_version_hasquota(&mp->m_sb)) {
1858                if (XFS_IS_UQUOTA_ON(mp) &&
1859                    mp->m_sb.sb_uquotino != NULLFSINO) {
1860                        ASSERT(mp->m_sb.sb_uquotino > 0);
1861                        if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1862                                             0, 0, &uip, 0)))
1863                                return XFS_ERROR(error);
1864                }
1865                if (XFS_IS_OQUOTA_ON(mp) &&
1866                    mp->m_sb.sb_gquotino != NULLFSINO) {
1867                        ASSERT(mp->m_sb.sb_gquotino > 0);
1868                        if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1869                                             0, 0, &gip, 0))) {
1870                                if (uip)
1871                                        IRELE(uip);
1872                                return XFS_ERROR(error);
1873                        }
1874                }
1875        } else {
1876                flags |= XFS_QMOPT_SBVERSION;
1877                sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1878                            XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1879        }
1880
1881        /*
1882         * Create the two inodes, if they don't exist already. The changes
1883         * made above will get added to a transaction and logged in one of
1884         * the qino_alloc calls below.  If the device is readonly,
1885         * temporarily switch to read-write to do this.
1886         */
1887        if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1888                if ((error = xfs_qm_qino_alloc(mp, &uip,
1889                                              sbflags | XFS_SB_UQUOTINO,
1890                                              flags | XFS_QMOPT_UQUOTA)))
1891                        return XFS_ERROR(error);
1892
1893                flags &= ~XFS_QMOPT_SBVERSION;
1894        }
1895        if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1896                flags |= (XFS_IS_GQUOTA_ON(mp) ?
1897                                XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1898                error = xfs_qm_qino_alloc(mp, &gip,
1899                                          sbflags | XFS_SB_GQUOTINO, flags);
1900                if (error) {
1901                        if (uip)
1902                                IRELE(uip);
1903
1904                        return XFS_ERROR(error);
1905                }
1906        }
1907
1908        XFS_QI_UQIP(mp) = uip;
1909        XFS_QI_GQIP(mp) = gip;
1910
1911        return 0;
1912}
1913
1914
1915/*
1916 * Traverse the freelist of dquots and attempt to reclaim a maximum of
1917 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1918 * favor the lookup function ...
1919 * XXXsup merge this with qm_reclaim_one().
1920 */
1921STATIC int
1922xfs_qm_shake_freelist(
1923        int howmany)
1924{
1925        int             nreclaimed;
1926        xfs_dqhash_t    *hash;
1927        xfs_dquot_t     *dqp, *nextdqp;
1928        int             restarts;
1929        int             nflushes;
1930
1931        if (howmany <= 0)
1932                return 0;
1933
1934        nreclaimed = 0;
1935        restarts = 0;
1936        nflushes = 0;
1937
1938#ifdef QUOTADEBUG
1939        cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
1940#endif
1941        /* lock order is : hashchainlock, freelistlock, mplistlock */
1942 tryagain:
1943        xfs_qm_freelist_lock(xfs_Gqm);
1944
1945        for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
1946             ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
1947              nreclaimed < howmany); ) {
1948                xfs_dqlock(dqp);
1949
1950                /*
1951                 * We are racing with dqlookup here. Naturally we don't
1952                 * want to reclaim a dquot that lookup wants.
1953                 */
1954                if (dqp->dq_flags & XFS_DQ_WANT) {
1955                        xfs_dqunlock(dqp);
1956                        xfs_qm_freelist_unlock(xfs_Gqm);
1957                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1958                                return nreclaimed;
1959                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1960                        goto tryagain;
1961                }
1962
1963                /*
1964                 * If the dquot is inactive, we are assured that it is
1965                 * not on the mplist or the hashlist, and that makes our
1966                 * life easier.
1967                 */
1968                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1969                        ASSERT(dqp->q_mount == NULL);
1970                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1971                        ASSERT(dqp->HL_PREVP == NULL);
1972                        ASSERT(dqp->MPL_PREVP == NULL);
1973                        XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1974                        nextdqp = dqp->dq_flnext;
1975                        goto off_freelist;
1976                }
1977
1978                ASSERT(dqp->MPL_PREVP);
1979                /*
1980                 * Try to grab the flush lock. If this dquot is in the process of
1981                 * getting flushed to disk, we don't want to reclaim it.
1982                 */
1983                if (!xfs_dqflock_nowait(dqp)) {
1984                        xfs_dqunlock(dqp);
1985                        dqp = dqp->dq_flnext;
1986                        continue;
1987                }
1988
1989                /*
1990                 * We have the flush lock so we know that this is not in the
1991                 * process of being flushed. So, if this is dirty, flush it
1992                 * DELWRI so that we don't get a freelist infested with
1993                 * dirty dquots.
1994                 */
1995                if (XFS_DQ_IS_DIRTY(dqp)) {
1996                        int     error;
1997                        xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
1998                        /*
1999                         * We flush it delayed write, so don't bother
2000                         * releasing the mplock.
2001                         */
2002                        error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2003                        if (error) {
2004                                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2005                        "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2006                        }
2007                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2008                        dqp = dqp->dq_flnext;
2009                        continue;
2010                }
2011                /*
2012                 * We're trying to get the hashlock out of order. This races
2013                 * with dqlookup; so, we giveup and goto the next dquot if
2014                 * we couldn't get the hashlock. This way, we won't starve
2015                 * a dqlookup process that holds the hashlock that is
2016                 * waiting for the freelist lock.
2017                 */
2018                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2019                        xfs_dqfunlock(dqp);
2020                        xfs_dqunlock(dqp);
2021                        dqp = dqp->dq_flnext;
2022                        continue;
2023                }
2024                /*
2025                 * This races with dquot allocation code as well as dqflush_all
2026                 * and reclaim code. So, if we failed to grab the mplist lock,
2027                 * giveup everything and start over.
2028                 */
2029                hash = dqp->q_hash;
2030                ASSERT(hash);
2031                if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2032                        /* XXX put a sentinel so that we can come back here */
2033                        xfs_dqfunlock(dqp);
2034                        xfs_dqunlock(dqp);
2035                        mutex_unlock(&hash->qh_lock);
2036                        xfs_qm_freelist_unlock(xfs_Gqm);
2037                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2038                                return nreclaimed;
2039                        goto tryagain;
2040                }
2041                xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2042#ifdef QUOTADEBUG
2043                cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2044                        dqp, be32_to_cpu(dqp->q_core.d_id));
2045#endif
2046                ASSERT(dqp->q_nrefs == 0);
2047                nextdqp = dqp->dq_flnext;
2048                XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2049                XQM_HASHLIST_REMOVE(hash, dqp);
2050                xfs_dqfunlock(dqp);
2051                xfs_qm_mplist_unlock(dqp->q_mount);
2052                mutex_unlock(&hash->qh_lock);
2053
2054 off_freelist:
2055                XQM_FREELIST_REMOVE(dqp);
2056                xfs_dqunlock(dqp);
2057                nreclaimed++;
2058                XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2059                xfs_qm_dqdestroy(dqp);
2060                dqp = nextdqp;
2061        }
2062        xfs_qm_freelist_unlock(xfs_Gqm);
2063        return nreclaimed;
2064}
2065
2066
2067/*
2068 * The kmem_shake interface is invoked when memory is running low.
2069 */
2070/* ARGSUSED */
2071STATIC int
2072xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2073{
2074        int     ndqused, nfree, n;
2075
2076        if (!kmem_shake_allow(gfp_mask))
2077                return 0;
2078        if (!xfs_Gqm)
2079                return 0;
2080
2081        nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2082        /* incore dquots in all f/s's */
2083        ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2084
2085        ASSERT(ndqused >= 0);
2086
2087        if (nfree <= ndqused && nfree < ndquot)
2088                return 0;
2089
2090        ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2091        n = nfree - ndqused - ndquot;           /* # over target */
2092
2093        return xfs_qm_shake_freelist(MAX(nfree, n));
2094}
2095
2096
2097/*
2098 * Just pop the least recently used dquot off the freelist and
2099 * recycle it. The returned dquot is locked.
2100 */
2101STATIC xfs_dquot_t *
2102xfs_qm_dqreclaim_one(void)
2103{
2104        xfs_dquot_t     *dqpout;
2105        xfs_dquot_t     *dqp;
2106        int             restarts;
2107        int             nflushes;
2108
2109        restarts = 0;
2110        dqpout = NULL;
2111        nflushes = 0;
2112
2113        /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2114 startagain:
2115        xfs_qm_freelist_lock(xfs_Gqm);
2116
2117        FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2118                xfs_dqlock(dqp);
2119
2120                /*
2121                 * We are racing with dqlookup here. Naturally we don't
2122                 * want to reclaim a dquot that lookup wants. We release the
2123                 * freelist lock and start over, so that lookup will grab
2124                 * both the dquot and the freelistlock.
2125                 */
2126                if (dqp->dq_flags & XFS_DQ_WANT) {
2127                        ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2128                        xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2129                        xfs_dqunlock(dqp);
2130                        xfs_qm_freelist_unlock(xfs_Gqm);
2131                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2132                                return NULL;
2133                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2134                        goto startagain;
2135                }
2136
2137                /*
2138                 * If the dquot is inactive, we are assured that it is
2139                 * not on the mplist or the hashlist, and that makes our
2140                 * life easier.
2141                 */
2142                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2143                        ASSERT(dqp->q_mount == NULL);
2144                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2145                        ASSERT(dqp->HL_PREVP == NULL);
2146                        ASSERT(dqp->MPL_PREVP == NULL);
2147                        XQM_FREELIST_REMOVE(dqp);
2148                        xfs_dqunlock(dqp);
2149                        dqpout = dqp;
2150                        XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2151                        break;
2152                }
2153
2154                ASSERT(dqp->q_hash);
2155                ASSERT(dqp->MPL_PREVP);
2156
2157                /*
2158                 * Try to grab the flush lock. If this dquot is in the process of
2159                 * getting flushed to disk, we don't want to reclaim it.
2160                 */
2161                if (!xfs_dqflock_nowait(dqp)) {
2162                        xfs_dqunlock(dqp);
2163                        continue;
2164                }
2165
2166                /*
2167                 * We have the flush lock so we know that this is not in the
2168                 * process of being flushed. So, if this is dirty, flush it
2169                 * DELWRI so that we don't get a freelist infested with
2170                 * dirty dquots.
2171                 */
2172                if (XFS_DQ_IS_DIRTY(dqp)) {
2173                        int     error;
2174                        xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2175                        /*
2176                         * We flush it delayed write, so don't bother
2177                         * releasing the freelist lock.
2178                         */
2179                        error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2180                        if (error) {
2181                                xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2182                        "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2183                        }
2184                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2185                        continue;
2186                }
2187
2188                if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2189                        xfs_dqfunlock(dqp);
2190                        xfs_dqunlock(dqp);
2191                        continue;
2192                }
2193
2194                if (!mutex_trylock(&dqp->q_hash->qh_lock))
2195                        goto mplistunlock;
2196
2197                ASSERT(dqp->q_nrefs == 0);
2198                xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2199                XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2200                XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2201                XQM_FREELIST_REMOVE(dqp);
2202                dqpout = dqp;
2203                mutex_unlock(&dqp->q_hash->qh_lock);
2204 mplistunlock:
2205                xfs_qm_mplist_unlock(dqp->q_mount);
2206                xfs_dqfunlock(dqp);
2207                xfs_dqunlock(dqp);
2208                if (dqpout)
2209                        break;
2210        }
2211
2212        xfs_qm_freelist_unlock(xfs_Gqm);
2213        return dqpout;
2214}
2215
2216
2217/*------------------------------------------------------------------*/
2218
2219/*
2220 * Return a new incore dquot. Depending on the number of
2221 * dquots in the system, we either allocate a new one on the kernel heap,
2222 * or reclaim a free one.
2223 * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2224 * to reclaim an existing one from the freelist.
2225 */
2226boolean_t
2227xfs_qm_dqalloc_incore(
2228        xfs_dquot_t **O_dqpp)
2229{
2230        xfs_dquot_t     *dqp;
2231
2232        /*
2233         * Check against high water mark to see if we want to pop
2234         * a nincompoop dquot off the freelist.
2235         */
2236        if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2237                /*
2238                 * Try to recycle a dquot from the freelist.
2239                 */
2240                if ((dqp = xfs_qm_dqreclaim_one())) {
2241                        XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2242                        /*
2243                         * Just zero the core here. The rest will get
2244                         * reinitialized by caller. XXX we shouldn't even
2245                         * do this zero ...
2246                         */
2247                        memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2248                        *O_dqpp = dqp;
2249                        return B_FALSE;
2250                }
2251                XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2252        }
2253
2254        /*
2255         * Allocate a brand new dquot on the kernel heap and return it
2256         * to the caller to initialize.
2257         */
2258        ASSERT(xfs_Gqm->qm_dqzone != NULL);
2259        *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2260        atomic_inc(&xfs_Gqm->qm_totaldquots);
2261
2262        return B_TRUE;
2263}
2264
2265
2266/*
2267 * Start a transaction and write the incore superblock changes to
2268 * disk. flags parameter indicates which fields have changed.
2269 */
2270int
2271xfs_qm_write_sb_changes(
2272        xfs_mount_t     *mp,
2273        __int64_t       flags)
2274{
2275        xfs_trans_t     *tp;
2276        int             error;
2277
2278#ifdef QUOTADEBUG
2279        cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2280#endif
2281        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2282        if ((error = xfs_trans_reserve(tp, 0,
2283                                      mp->m_sb.sb_sectsize + 128, 0,
2284                                      0,
2285                                      XFS_DEFAULT_LOG_COUNT))) {
2286                xfs_trans_cancel(tp, 0);
2287                return error;
2288        }
2289
2290        xfs_mod_sb(tp, flags);
2291        error = xfs_trans_commit(tp, 0);
2292
2293        return error;
2294}
2295
2296
2297/* --------------- utility functions for vnodeops ---------------- */
2298
2299
2300/*
2301 * Given an inode, a uid and gid (from cred_t) make sure that we have
2302 * allocated relevant dquot(s) on disk, and that we won't exceed inode
2303 * quotas by creating this file.
2304 * This also attaches dquot(s) to the given inode after locking it,
2305 * and returns the dquots corresponding to the uid and/or gid.
2306 *
2307 * in   : inode (unlocked)
2308 * out  : udquot, gdquot with references taken and unlocked
2309 */
2310int
2311xfs_qm_vop_dqalloc(
2312        struct xfs_inode        *ip,
2313        uid_t                   uid,
2314        gid_t                   gid,
2315        prid_t                  prid,
2316        uint                    flags,
2317        struct xfs_dquot        **O_udqpp,
2318        struct xfs_dquot        **O_gdqpp)
2319{
2320        struct xfs_mount        *mp = ip->i_mount;
2321        struct xfs_dquot        *uq, *gq;
2322        int                     error;
2323        uint                    lockflags;
2324
2325        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2326                return 0;
2327
2328        lockflags = XFS_ILOCK_EXCL;
2329        xfs_ilock(ip, lockflags);
2330
2331        if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2332                gid = ip->i_d.di_gid;
2333
2334        /*
2335         * Attach the dquot(s) to this inode, doing a dquot allocation
2336         * if necessary. The dquot(s) will not be locked.
2337         */
2338        if (XFS_NOT_DQATTACHED(mp, ip)) {
2339                error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
2340                if (error) {
2341                        xfs_iunlock(ip, lockflags);
2342                        return error;
2343                }
2344        }
2345
2346        uq = gq = NULL;
2347        if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2348                if (ip->i_d.di_uid != uid) {
2349                        /*
2350                         * What we need is the dquot that has this uid, and
2351                         * if we send the inode to dqget, the uid of the inode
2352                         * takes priority over what's sent in the uid argument.
2353                         * We must unlock inode here before calling dqget if
2354                         * we're not sending the inode, because otherwise
2355                         * we'll deadlock by doing trans_reserve while
2356                         * holding ilock.
2357                         */
2358                        xfs_iunlock(ip, lockflags);
2359                        if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2360                                                 XFS_DQ_USER,
2361                                                 XFS_QMOPT_DQALLOC |
2362                                                 XFS_QMOPT_DOWARN,
2363                                                 &uq))) {
2364                                ASSERT(error != ENOENT);
2365                                return error;
2366                        }
2367                        /*
2368                         * Get the ilock in the right order.
2369                         */
2370                        xfs_dqunlock(uq);
2371                        lockflags = XFS_ILOCK_SHARED;
2372                        xfs_ilock(ip, lockflags);
2373                } else {
2374                        /*
2375                         * Take an extra reference, because we'll return
2376                         * this to caller
2377                         */
2378                        ASSERT(ip->i_udquot);
2379                        uq = ip->i_udquot;
2380                        xfs_dqlock(uq);
2381                        XFS_DQHOLD(uq);
2382                        xfs_dqunlock(uq);
2383                }
2384        }
2385        if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2386                if (ip->i_d.di_gid != gid) {
2387                        xfs_iunlock(ip, lockflags);
2388                        if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2389                                                 XFS_DQ_GROUP,
2390                                                 XFS_QMOPT_DQALLOC |
2391                                                 XFS_QMOPT_DOWARN,
2392                                                 &gq))) {
2393                                if (uq)
2394                                        xfs_qm_dqrele(uq);
2395                                ASSERT(error != ENOENT);
2396                                return error;
2397                        }
2398                        xfs_dqunlock(gq);
2399                        lockflags = XFS_ILOCK_SHARED;
2400                        xfs_ilock(ip, lockflags);
2401                } else {
2402                        ASSERT(ip->i_gdquot);
2403                        gq = ip->i_gdquot;
2404                        xfs_dqlock(gq);
2405                        XFS_DQHOLD(gq);
2406                        xfs_dqunlock(gq);
2407                }
2408        } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2409                if (ip->i_d.di_projid != prid) {
2410                        xfs_iunlock(ip, lockflags);
2411                        if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2412                                                 XFS_DQ_PROJ,
2413                                                 XFS_QMOPT_DQALLOC |
2414                                                 XFS_QMOPT_DOWARN,
2415                                                 &gq))) {
2416                                if (uq)
2417                                        xfs_qm_dqrele(uq);
2418                                ASSERT(error != ENOENT);
2419                                return (error);
2420                        }
2421                        xfs_dqunlock(gq);
2422                        lockflags = XFS_ILOCK_SHARED;
2423                        xfs_ilock(ip, lockflags);
2424                } else {
2425                        ASSERT(ip->i_gdquot);
2426                        gq = ip->i_gdquot;
2427                        xfs_dqlock(gq);
2428                        XFS_DQHOLD(gq);
2429                        xfs_dqunlock(gq);
2430                }
2431        }
2432        if (uq)
2433                xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2434
2435        xfs_iunlock(ip, lockflags);
2436        if (O_udqpp)
2437                *O_udqpp = uq;
2438        else if (uq)
2439                xfs_qm_dqrele(uq);
2440        if (O_gdqpp)
2441                *O_gdqpp = gq;
2442        else if (gq)
2443                xfs_qm_dqrele(gq);
2444        return 0;
2445}
2446
2447/*
2448 * Actually transfer ownership, and do dquot modifications.
2449 * These were already reserved.
2450 */
2451xfs_dquot_t *
2452xfs_qm_vop_chown(
2453        xfs_trans_t     *tp,
2454        xfs_inode_t     *ip,
2455        xfs_dquot_t     **IO_olddq,
2456        xfs_dquot_t     *newdq)
2457{
2458        xfs_dquot_t     *prevdq;
2459        uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2460                                 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2461
2462
2463        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2464        ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2465
2466        /* old dquot */
2467        prevdq = *IO_olddq;
2468        ASSERT(prevdq);
2469        ASSERT(prevdq != newdq);
2470
2471        xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2472        xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2473
2474        /* the sparkling new dquot */
2475        xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2476        xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2477
2478        /*
2479         * Take an extra reference, because the inode
2480         * is going to keep this dquot pointer even
2481         * after the trans_commit.
2482         */
2483        xfs_dqlock(newdq);
2484        XFS_DQHOLD(newdq);
2485        xfs_dqunlock(newdq);
2486        *IO_olddq = newdq;
2487
2488        return prevdq;
2489}
2490
2491/*
2492 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2493 */
2494int
2495xfs_qm_vop_chown_reserve(
2496        xfs_trans_t     *tp,
2497        xfs_inode_t     *ip,
2498        xfs_dquot_t     *udqp,
2499        xfs_dquot_t     *gdqp,
2500        uint            flags)
2501{
2502        xfs_mount_t     *mp = ip->i_mount;
2503        uint            delblks, blkflags, prjflags = 0;
2504        xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2505        int             error;
2506
2507
2508        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2509        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2510
2511        delblks = ip->i_delayed_blks;
2512        delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2513        blkflags = XFS_IS_REALTIME_INODE(ip) ?
2514                        XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2515
2516        if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2517            ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2518                delblksudq = udqp;
2519                /*
2520                 * If there are delayed allocation blocks, then we have to
2521                 * unreserve those from the old dquot, and add them to the
2522                 * new dquot.
2523                 */
2524                if (delblks) {
2525                        ASSERT(ip->i_udquot);
2526                        unresudq = ip->i_udquot;
2527                }
2528        }
2529        if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2530                if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2531                     ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2532                        prjflags = XFS_QMOPT_ENOSPC;
2533
2534                if (prjflags ||
2535                    (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2536                     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2537                        delblksgdq = gdqp;
2538                        if (delblks) {
2539                                ASSERT(ip->i_gdquot);
2540                                unresgdq = ip->i_gdquot;
2541                        }
2542                }
2543        }
2544
2545        if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2546                                delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2547                                flags | blkflags | prjflags)))
2548                return (error);
2549
2550        /*
2551         * Do the delayed blks reservations/unreservations now. Since, these
2552         * are done without the help of a transaction, if a reservation fails
2553         * its previous reservations won't be automatically undone by trans
2554         * code. So, we have to do it manually here.
2555         */
2556        if (delblks) {
2557                /*
2558                 * Do the reservations first. Unreservation can't fail.
2559                 */
2560                ASSERT(delblksudq || delblksgdq);
2561                ASSERT(unresudq || unresgdq);
2562                if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2563                                delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2564                                flags | blkflags | prjflags)))
2565                        return (error);
2566                xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2567                                unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2568                                blkflags);
2569        }
2570
2571        return (0);
2572}
2573
2574int
2575xfs_qm_vop_rename_dqattach(
2576        struct xfs_inode        **i_tab)
2577{
2578        struct xfs_mount        *mp = i_tab[0]->i_mount;
2579        int                     i;
2580
2581        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2582                return 0;
2583
2584        for (i = 0; (i < 4 && i_tab[i]); i++) {
2585                struct xfs_inode        *ip = i_tab[i];
2586                int                     error;
2587
2588                /*
2589                 * Watch out for duplicate entries in the table.
2590                 */
2591                if (i == 0 || ip != i_tab[i-1]) {
2592                        if (XFS_NOT_DQATTACHED(mp, ip)) {
2593                                error = xfs_qm_dqattach(ip, 0);
2594                                if (error)
2595                                        return error;
2596                        }
2597                }
2598        }
2599        return 0;
2600}
2601
2602void
2603xfs_qm_vop_create_dqattach(
2604        struct xfs_trans        *tp,
2605        struct xfs_inode        *ip,
2606        struct xfs_dquot        *udqp,
2607        struct xfs_dquot        *gdqp)
2608{
2609        struct xfs_mount        *mp = tp->t_mountp;
2610
2611        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2612                return;
2613
2614        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2615        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2616
2617        if (udqp) {
2618                xfs_dqlock(udqp);
2619                XFS_DQHOLD(udqp);
2620                xfs_dqunlock(udqp);
2621                ASSERT(ip->i_udquot == NULL);
2622                ip->i_udquot = udqp;
2623                ASSERT(XFS_IS_UQUOTA_ON(mp));
2624                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2625                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2626        }
2627        if (gdqp) {
2628                xfs_dqlock(gdqp);
2629                XFS_DQHOLD(gdqp);
2630                xfs_dqunlock(gdqp);
2631                ASSERT(ip->i_gdquot == NULL);
2632                ip->i_gdquot = gdqp;
2633                ASSERT(XFS_IS_OQUOTA_ON(mp));
2634                ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2635                        ip->i_d.di_gid : ip->i_d.di_projid) ==
2636                                be32_to_cpu(gdqp->q_core.d_id));
2637                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2638        }
2639}
2640
2641/* ------------- list stuff -----------------*/
2642STATIC void
2643xfs_qm_freelist_init(xfs_frlist_t *ql)
2644{
2645        ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2646        mutex_init(&ql->qh_lock);
2647        ql->qh_version = 0;
2648        ql->qh_nelems = 0;
2649}
2650
2651STATIC void
2652xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2653{
2654        xfs_dquot_t     *dqp, *nextdqp;
2655
2656        mutex_lock(&ql->qh_lock);
2657        for (dqp = ql->qh_next;
2658             dqp != (xfs_dquot_t *)ql; ) {
2659                xfs_dqlock(dqp);
2660                nextdqp = dqp->dq_flnext;
2661#ifdef QUOTADEBUG
2662                cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2663#endif
2664                XQM_FREELIST_REMOVE(dqp);
2665                xfs_dqunlock(dqp);
2666                xfs_qm_dqdestroy(dqp);
2667                dqp = nextdqp;
2668        }
2669        mutex_unlock(&ql->qh_lock);
2670        mutex_destroy(&ql->qh_lock);
2671
2672        ASSERT(ql->qh_nelems == 0);
2673}
2674
2675STATIC void
2676xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2677{
2678        dq->dq_flnext = ql->qh_next;
2679        dq->dq_flprev = (xfs_dquot_t *)ql;
2680        ql->qh_next = dq;
2681        dq->dq_flnext->dq_flprev = dq;
2682        xfs_Gqm->qm_dqfreelist.qh_nelems++;
2683        xfs_Gqm->qm_dqfreelist.qh_version++;
2684}
2685
2686void
2687xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2688{
2689        xfs_dquot_t *next = dq->dq_flnext;
2690        xfs_dquot_t *prev = dq->dq_flprev;
2691
2692        next->dq_flprev = prev;
2693        prev->dq_flnext = next;
2694        dq->dq_flnext = dq->dq_flprev = dq;
2695        xfs_Gqm->qm_dqfreelist.qh_nelems--;
2696        xfs_Gqm->qm_dqfreelist.qh_version++;
2697}
2698
2699void
2700xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2701{
2702        xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2703}
2704