linux/fs/xfs/libxfs/xfs_ag_resv.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Oracle.  All Rights Reserved.
   3 *
   4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version 2
   9 * of the License, or (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it would be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write the Free Software Foundation,
  18 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19 */
  20#include "xfs.h"
  21#include "xfs_fs.h"
  22#include "xfs_shared.h"
  23#include "xfs_format.h"
  24#include "xfs_log_format.h"
  25#include "xfs_trans_resv.h"
  26#include "xfs_sb.h"
  27#include "xfs_mount.h"
  28#include "xfs_defer.h"
  29#include "xfs_alloc.h"
  30#include "xfs_error.h"
  31#include "xfs_trace.h"
  32#include "xfs_cksum.h"
  33#include "xfs_trans.h"
  34#include "xfs_bit.h"
  35#include "xfs_bmap.h"
  36#include "xfs_bmap_btree.h"
  37#include "xfs_ag_resv.h"
  38#include "xfs_trans_space.h"
  39#include "xfs_rmap_btree.h"
  40#include "xfs_btree.h"
  41#include "xfs_refcount_btree.h"
  42
  43/*
  44 * Per-AG Block Reservations
  45 *
  46 * For some kinds of allocation group metadata structures, it is advantageous
  47 * to reserve a small number of blocks in each AG so that future expansions of
  48 * that data structure do not encounter ENOSPC because errors during a btree
  49 * split cause the filesystem to go offline.
  50 *
  51 * Prior to the introduction of reflink, this wasn't an issue because the free
  52 * space btrees maintain a reserve of space (the AGFL) to handle any expansion
  53 * that may be necessary; and allocations of other metadata (inodes, BMBT,
  54 * dir/attr) aren't restricted to a single AG.  However, with reflink it is
  55 * possible to allocate all the space in an AG, have subsequent reflink/CoW
  56 * activity expand the refcount btree, and discover that there's no space left
  57 * to handle that expansion.  Since we can calculate the maximum size of the
  58 * refcount btree, we can reserve space for it and avoid ENOSPC.
  59 *
  60 * Handling per-AG reservations consists of three changes to the allocator's
  61 * behavior:  First, because these reservations are always needed, we decrease
  62 * the ag_max_usable counter to reflect the size of the AG after the reserved
  63 * blocks are taken.  Second, the reservations must be reflected in the
  64 * fdblocks count to maintain proper accounting.  Third, each AG must maintain
  65 * its own reserved block counter so that we can calculate the amount of space
  66 * that must remain free to maintain the reservations.  Fourth, the "remaining
  67 * reserved blocks" count must be used when calculating the length of the
  68 * longest free extent in an AG and to clamp maxlen in the per-AG allocation
  69 * functions.  In other words, we maintain a virtual allocation via in-core
  70 * accounting tricks so that we don't have to clean up after a crash. :)
  71 *
  72 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
  73 * values via struct xfs_alloc_arg or directly to the xfs_free_extent
  74 * function.  It might seem a little funny to maintain a reservoir of blocks
  75 * to feed another reservoir, but the AGFL only holds enough blocks to get
  76 * through the next transaction.  The per-AG reservation is to ensure (we
  77 * hope) that each AG never runs out of blocks.  Each data structure wanting
  78 * to use the reservation system should update ask/used in xfs_ag_resv_init.
  79 */
  80
  81/*
  82 * Are we critically low on blocks?  For now we'll define that as the number
  83 * of blocks we can get our hands on being less than 10% of what we reserved
  84 * or less than some arbitrary number (maximum btree height).
  85 */
  86bool
  87xfs_ag_resv_critical(
  88        struct xfs_perag                *pag,
  89        enum xfs_ag_resv_type           type)
  90{
  91        xfs_extlen_t                    avail;
  92        xfs_extlen_t                    orig;
  93
  94        switch (type) {
  95        case XFS_AG_RESV_METADATA:
  96                avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
  97                orig = pag->pag_meta_resv.ar_asked;
  98                break;
  99        case XFS_AG_RESV_AGFL:
 100                avail = pag->pagf_freeblks + pag->pagf_flcount -
 101                        pag->pag_meta_resv.ar_reserved;
 102                orig = pag->pag_agfl_resv.ar_asked;
 103                break;
 104        default:
 105                ASSERT(0);
 106                return false;
 107        }
 108
 109        trace_xfs_ag_resv_critical(pag, type, avail);
 110
 111        /* Critically low if less than 10% or max btree height remains. */
 112        return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
 113                        pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL,
 114                        XFS_RANDOM_AG_RESV_CRITICAL);
 115}
 116
 117/*
 118 * How many blocks are reserved but not used, and therefore must not be
 119 * allocated away?
 120 */
 121xfs_extlen_t
 122xfs_ag_resv_needed(
 123        struct xfs_perag                *pag,
 124        enum xfs_ag_resv_type           type)
 125{
 126        xfs_extlen_t                    len;
 127
 128        len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
 129        switch (type) {
 130        case XFS_AG_RESV_METADATA:
 131        case XFS_AG_RESV_AGFL:
 132                len -= xfs_perag_resv(pag, type)->ar_reserved;
 133                break;
 134        case XFS_AG_RESV_NONE:
 135                /* empty */
 136                break;
 137        default:
 138                ASSERT(0);
 139        }
 140
 141        trace_xfs_ag_resv_needed(pag, type, len);
 142
 143        return len;
 144}
 145
 146/* Clean out a reservation */
 147static int
 148__xfs_ag_resv_free(
 149        struct xfs_perag                *pag,
 150        enum xfs_ag_resv_type           type)
 151{
 152        struct xfs_ag_resv              *resv;
 153        xfs_extlen_t                    oldresv;
 154        int                             error;
 155
 156        trace_xfs_ag_resv_free(pag, type, 0);
 157
 158        resv = xfs_perag_resv(pag, type);
 159        pag->pag_mount->m_ag_max_usable += resv->ar_asked;
 160        /*
 161         * AGFL blocks are always considered "free", so whatever
 162         * was reserved at mount time must be given back at umount.
 163         */
 164        if (type == XFS_AG_RESV_AGFL)
 165                oldresv = resv->ar_orig_reserved;
 166        else
 167                oldresv = resv->ar_reserved;
 168        error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
 169        resv->ar_reserved = 0;
 170        resv->ar_asked = 0;
 171
 172        if (error)
 173                trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
 174                                error, _RET_IP_);
 175        return error;
 176}
 177
 178/* Free a per-AG reservation. */
 179int
 180xfs_ag_resv_free(
 181        struct xfs_perag                *pag)
 182{
 183        int                             error;
 184        int                             err2;
 185
 186        error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
 187        err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
 188        if (err2 && !error)
 189                error = err2;
 190        return error;
 191}
 192
 193static int
 194__xfs_ag_resv_init(
 195        struct xfs_perag                *pag,
 196        enum xfs_ag_resv_type           type,
 197        xfs_extlen_t                    ask,
 198        xfs_extlen_t                    used)
 199{
 200        struct xfs_mount                *mp = pag->pag_mount;
 201        struct xfs_ag_resv              *resv;
 202        int                             error;
 203
 204        resv = xfs_perag_resv(pag, type);
 205        if (used > ask)
 206                ask = used;
 207        resv->ar_asked = ask;
 208        resv->ar_reserved = resv->ar_orig_reserved = ask - used;
 209        mp->m_ag_max_usable -= ask;
 210
 211        trace_xfs_ag_resv_init(pag, type, ask);
 212
 213        error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
 214        if (error)
 215                trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
 216                                error, _RET_IP_);
 217
 218        return error;
 219}
 220
 221/* Create a per-AG block reservation. */
 222int
 223xfs_ag_resv_init(
 224        struct xfs_perag                *pag)
 225{
 226        xfs_extlen_t                    ask;
 227        xfs_extlen_t                    used;
 228        int                             error = 0;
 229
 230        /* Create the metadata reservation. */
 231        if (pag->pag_meta_resv.ar_asked == 0) {
 232                ask = used = 0;
 233
 234                error = xfs_refcountbt_calc_reserves(pag->pag_mount,
 235                                pag->pag_agno, &ask, &used);
 236                if (error)
 237                        goto out;
 238
 239                error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
 240                                ask, used);
 241                if (error)
 242                        goto out;
 243        }
 244
 245        /* Create the AGFL metadata reservation */
 246        if (pag->pag_agfl_resv.ar_asked == 0) {
 247                ask = used = 0;
 248
 249                error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
 250                                &ask, &used);
 251                if (error)
 252                        goto out;
 253
 254                error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
 255                if (error)
 256                        goto out;
 257        }
 258
 259out:
 260        return error;
 261}
 262
 263/* Allocate a block from the reservation. */
 264void
 265xfs_ag_resv_alloc_extent(
 266        struct xfs_perag                *pag,
 267        enum xfs_ag_resv_type           type,
 268        struct xfs_alloc_arg            *args)
 269{
 270        struct xfs_ag_resv              *resv;
 271        xfs_extlen_t                    len;
 272        uint                            field;
 273
 274        trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
 275
 276        switch (type) {
 277        case XFS_AG_RESV_METADATA:
 278        case XFS_AG_RESV_AGFL:
 279                resv = xfs_perag_resv(pag, type);
 280                break;
 281        default:
 282                ASSERT(0);
 283                /* fall through */
 284        case XFS_AG_RESV_NONE:
 285                field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
 286                                       XFS_TRANS_SB_FDBLOCKS;
 287                xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
 288                return;
 289        }
 290
 291        len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
 292        resv->ar_reserved -= len;
 293        if (type == XFS_AG_RESV_AGFL)
 294                return;
 295        /* Allocations of reserved blocks only need on-disk sb updates... */
 296        xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
 297        /* ...but non-reserved blocks need in-core and on-disk updates. */
 298        if (args->len > len)
 299                xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
 300                                -((int64_t)args->len - len));
 301}
 302
 303/* Free a block to the reservation. */
 304void
 305xfs_ag_resv_free_extent(
 306        struct xfs_perag                *pag,
 307        enum xfs_ag_resv_type           type,
 308        struct xfs_trans                *tp,
 309        xfs_extlen_t                    len)
 310{
 311        xfs_extlen_t                    leftover;
 312        struct xfs_ag_resv              *resv;
 313
 314        trace_xfs_ag_resv_free_extent(pag, type, len);
 315
 316        switch (type) {
 317        case XFS_AG_RESV_METADATA:
 318        case XFS_AG_RESV_AGFL:
 319                resv = xfs_perag_resv(pag, type);
 320                break;
 321        default:
 322                ASSERT(0);
 323                /* fall through */
 324        case XFS_AG_RESV_NONE:
 325                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
 326                return;
 327        }
 328
 329        leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
 330        resv->ar_reserved += leftover;
 331        if (type == XFS_AG_RESV_AGFL)
 332                return;
 333        /* Freeing into the reserved pool only requires on-disk update... */
 334        xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
 335        /* ...but freeing beyond that requires in-core and on-disk update. */
 336        if (len > leftover)
 337                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
 338}
 339