linux/fs/xfs/xfs_trans_ail.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4 * Copyright (c) 2008 Dave Chinner
   5 * All Rights Reserved.
   6 */
   7#include "xfs.h"
   8#include "xfs_fs.h"
   9#include "xfs_shared.h"
  10#include "xfs_format.h"
  11#include "xfs_log_format.h"
  12#include "xfs_trans_resv.h"
  13#include "xfs_mount.h"
  14#include "xfs_trans.h"
  15#include "xfs_trans_priv.h"
  16#include "xfs_trace.h"
  17#include "xfs_errortag.h"
  18#include "xfs_error.h"
  19#include "xfs_log.h"
  20
  21#ifdef DEBUG
  22/*
  23 * Check that the list is sorted as it should be.
  24 *
  25 * Called with the ail lock held, but we don't want to assert fail with it
  26 * held otherwise we'll lock everything up and won't be able to debug the
  27 * cause. Hence we sample and check the state under the AIL lock and return if
  28 * everything is fine, otherwise we drop the lock and run the ASSERT checks.
  29 * Asserts may not be fatal, so pick the lock back up and continue onwards.
  30 */
  31STATIC void
  32xfs_ail_check(
  33        struct xfs_ail          *ailp,
  34        struct xfs_log_item     *lip)
  35        __must_hold(&ailp->ail_lock)
  36{
  37        struct xfs_log_item     *prev_lip;
  38        struct xfs_log_item     *next_lip;
  39        xfs_lsn_t               prev_lsn = NULLCOMMITLSN;
  40        xfs_lsn_t               next_lsn = NULLCOMMITLSN;
  41        xfs_lsn_t               lsn;
  42        bool                    in_ail;
  43
  44
  45        if (list_empty(&ailp->ail_head))
  46                return;
  47
  48        /*
  49         * Sample then check the next and previous entries are valid.
  50         */
  51        in_ail = test_bit(XFS_LI_IN_AIL, &lip->li_flags);
  52        prev_lip = list_entry(lip->li_ail.prev, struct xfs_log_item, li_ail);
  53        if (&prev_lip->li_ail != &ailp->ail_head)
  54                prev_lsn = prev_lip->li_lsn;
  55        next_lip = list_entry(lip->li_ail.next, struct xfs_log_item, li_ail);
  56        if (&next_lip->li_ail != &ailp->ail_head)
  57                next_lsn = next_lip->li_lsn;
  58        lsn = lip->li_lsn;
  59
  60        if (in_ail &&
  61            (prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0) &&
  62            (next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0))
  63                return;
  64
  65        spin_unlock(&ailp->ail_lock);
  66        ASSERT(in_ail);
  67        ASSERT(prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0);
  68        ASSERT(next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0);
  69        spin_lock(&ailp->ail_lock);
  70}
  71#else /* !DEBUG */
  72#define xfs_ail_check(a,l)
  73#endif /* DEBUG */
  74
  75/*
  76 * Return a pointer to the last item in the AIL.  If the AIL is empty, then
  77 * return NULL.
  78 */
  79static struct xfs_log_item *
  80xfs_ail_max(
  81        struct xfs_ail  *ailp)
  82{
  83        if (list_empty(&ailp->ail_head))
  84                return NULL;
  85
  86        return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
  87}
  88
  89/*
  90 * Return a pointer to the item which follows the given item in the AIL.  If
  91 * the given item is the last item in the list, then return NULL.
  92 */
  93static struct xfs_log_item *
  94xfs_ail_next(
  95        struct xfs_ail          *ailp,
  96        struct xfs_log_item     *lip)
  97{
  98        if (lip->li_ail.next == &ailp->ail_head)
  99                return NULL;
 100
 101        return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
 102}
 103
 104/*
 105 * This is called by the log manager code to determine the LSN of the tail of
 106 * the log.  This is exactly the LSN of the first item in the AIL.  If the AIL
 107 * is empty, then this function returns 0.
 108 *
 109 * We need the AIL lock in order to get a coherent read of the lsn of the last
 110 * item in the AIL.
 111 */
 112static xfs_lsn_t
 113__xfs_ail_min_lsn(
 114        struct xfs_ail          *ailp)
 115{
 116        struct xfs_log_item     *lip = xfs_ail_min(ailp);
 117
 118        if (lip)
 119                return lip->li_lsn;
 120        return 0;
 121}
 122
 123xfs_lsn_t
 124xfs_ail_min_lsn(
 125        struct xfs_ail          *ailp)
 126{
 127        xfs_lsn_t               lsn;
 128
 129        spin_lock(&ailp->ail_lock);
 130        lsn = __xfs_ail_min_lsn(ailp);
 131        spin_unlock(&ailp->ail_lock);
 132
 133        return lsn;
 134}
 135
 136/*
 137 * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
 138 */
 139static xfs_lsn_t
 140xfs_ail_max_lsn(
 141        struct xfs_ail          *ailp)
 142{
 143        xfs_lsn_t               lsn = 0;
 144        struct xfs_log_item     *lip;
 145
 146        spin_lock(&ailp->ail_lock);
 147        lip = xfs_ail_max(ailp);
 148        if (lip)
 149                lsn = lip->li_lsn;
 150        spin_unlock(&ailp->ail_lock);
 151
 152        return lsn;
 153}
 154
 155/*
 156 * The cursor keeps track of where our current traversal is up to by tracking
 157 * the next item in the list for us. However, for this to be safe, removing an
 158 * object from the AIL needs to invalidate any cursor that points to it. hence
 159 * the traversal cursor needs to be linked to the struct xfs_ail so that
 160 * deletion can search all the active cursors for invalidation.
 161 */
 162STATIC void
 163xfs_trans_ail_cursor_init(
 164        struct xfs_ail          *ailp,
 165        struct xfs_ail_cursor   *cur)
 166{
 167        cur->item = NULL;
 168        list_add_tail(&cur->list, &ailp->ail_cursors);
 169}
 170
 171/*
 172 * Get the next item in the traversal and advance the cursor.  If the cursor
 173 * was invalidated (indicated by a lip of 1), restart the traversal.
 174 */
 175struct xfs_log_item *
 176xfs_trans_ail_cursor_next(
 177        struct xfs_ail          *ailp,
 178        struct xfs_ail_cursor   *cur)
 179{
 180        struct xfs_log_item     *lip = cur->item;
 181
 182        if ((uintptr_t)lip & 1)
 183                lip = xfs_ail_min(ailp);
 184        if (lip)
 185                cur->item = xfs_ail_next(ailp, lip);
 186        return lip;
 187}
 188
 189/*
 190 * When the traversal is complete, we need to remove the cursor from the list
 191 * of traversing cursors.
 192 */
 193void
 194xfs_trans_ail_cursor_done(
 195        struct xfs_ail_cursor   *cur)
 196{
 197        cur->item = NULL;
 198        list_del_init(&cur->list);
 199}
 200
 201/*
 202 * Invalidate any cursor that is pointing to this item. This is called when an
 203 * item is removed from the AIL. Any cursor pointing to this object is now
 204 * invalid and the traversal needs to be terminated so it doesn't reference a
 205 * freed object. We set the low bit of the cursor item pointer so we can
 206 * distinguish between an invalidation and the end of the list when getting the
 207 * next item from the cursor.
 208 */
 209STATIC void
 210xfs_trans_ail_cursor_clear(
 211        struct xfs_ail          *ailp,
 212        struct xfs_log_item     *lip)
 213{
 214        struct xfs_ail_cursor   *cur;
 215
 216        list_for_each_entry(cur, &ailp->ail_cursors, list) {
 217                if (cur->item == lip)
 218                        cur->item = (struct xfs_log_item *)
 219                                        ((uintptr_t)cur->item | 1);
 220        }
 221}
 222
 223/*
 224 * Find the first item in the AIL with the given @lsn by searching in ascending
 225 * LSN order and initialise the cursor to point to the next item for a
 226 * ascending traversal.  Pass a @lsn of zero to initialise the cursor to the
 227 * first item in the AIL. Returns NULL if the list is empty.
 228 */
 229struct xfs_log_item *
 230xfs_trans_ail_cursor_first(
 231        struct xfs_ail          *ailp,
 232        struct xfs_ail_cursor   *cur,
 233        xfs_lsn_t               lsn)
 234{
 235        struct xfs_log_item     *lip;
 236
 237        xfs_trans_ail_cursor_init(ailp, cur);
 238
 239        if (lsn == 0) {
 240                lip = xfs_ail_min(ailp);
 241                goto out;
 242        }
 243
 244        list_for_each_entry(lip, &ailp->ail_head, li_ail) {
 245                if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
 246                        goto out;
 247        }
 248        return NULL;
 249
 250out:
 251        if (lip)
 252                cur->item = xfs_ail_next(ailp, lip);
 253        return lip;
 254}
 255
 256static struct xfs_log_item *
 257__xfs_trans_ail_cursor_last(
 258        struct xfs_ail          *ailp,
 259        xfs_lsn_t               lsn)
 260{
 261        struct xfs_log_item     *lip;
 262
 263        list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
 264                if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
 265                        return lip;
 266        }
 267        return NULL;
 268}
 269
 270/*
 271 * Find the last item in the AIL with the given @lsn by searching in descending
 272 * LSN order and initialise the cursor to point to that item.  If there is no
 273 * item with the value of @lsn, then it sets the cursor to the last item with an
 274 * LSN lower than @lsn.  Returns NULL if the list is empty.
 275 */
 276struct xfs_log_item *
 277xfs_trans_ail_cursor_last(
 278        struct xfs_ail          *ailp,
 279        struct xfs_ail_cursor   *cur,
 280        xfs_lsn_t               lsn)
 281{
 282        xfs_trans_ail_cursor_init(ailp, cur);
 283        cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
 284        return cur->item;
 285}
 286
 287/*
 288 * Splice the log item list into the AIL at the given LSN. We splice to the
 289 * tail of the given LSN to maintain insert order for push traversals. The
 290 * cursor is optional, allowing repeated updates to the same LSN to avoid
 291 * repeated traversals.  This should not be called with an empty list.
 292 */
 293static void
 294xfs_ail_splice(
 295        struct xfs_ail          *ailp,
 296        struct xfs_ail_cursor   *cur,
 297        struct list_head        *list,
 298        xfs_lsn_t               lsn)
 299{
 300        struct xfs_log_item     *lip;
 301
 302        ASSERT(!list_empty(list));
 303
 304        /*
 305         * Use the cursor to determine the insertion point if one is
 306         * provided.  If not, or if the one we got is not valid,
 307         * find the place in the AIL where the items belong.
 308         */
 309        lip = cur ? cur->item : NULL;
 310        if (!lip || (uintptr_t)lip & 1)
 311                lip = __xfs_trans_ail_cursor_last(ailp, lsn);
 312
 313        /*
 314         * If a cursor is provided, we know we're processing the AIL
 315         * in lsn order, and future items to be spliced in will
 316         * follow the last one being inserted now.  Update the
 317         * cursor to point to that last item, now while we have a
 318         * reliable pointer to it.
 319         */
 320        if (cur)
 321                cur->item = list_entry(list->prev, struct xfs_log_item, li_ail);
 322
 323        /*
 324         * Finally perform the splice.  Unless the AIL was empty,
 325         * lip points to the item in the AIL _after_ which the new
 326         * items should go.  If lip is null the AIL was empty, so
 327         * the new items go at the head of the AIL.
 328         */
 329        if (lip)
 330                list_splice(list, &lip->li_ail);
 331        else
 332                list_splice(list, &ailp->ail_head);
 333}
 334
 335/*
 336 * Delete the given item from the AIL.  Return a pointer to the item.
 337 */
 338static void
 339xfs_ail_delete(
 340        struct xfs_ail          *ailp,
 341        struct xfs_log_item     *lip)
 342{
 343        xfs_ail_check(ailp, lip);
 344        list_del(&lip->li_ail);
 345        xfs_trans_ail_cursor_clear(ailp, lip);
 346}
 347
 348/*
 349 * Requeue a failed buffer for writeback.
 350 *
 351 * We clear the log item failed state here as well, but we have to be careful
 352 * about reference counts because the only active reference counts on the buffer
 353 * may be the failed log items. Hence if we clear the log item failed state
 354 * before queuing the buffer for IO we can release all active references to
 355 * the buffer and free it, leading to use after free problems in
 356 * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
 357 * order we process them in - the buffer is locked, and we own the buffer list
 358 * so nothing on them is going to change while we are performing this action.
 359 *
 360 * Hence we can safely queue the buffer for IO before we clear the failed log
 361 * item state, therefore  always having an active reference to the buffer and
 362 * avoiding the transient zero-reference state that leads to use-after-free.
 363 */
 364static inline int
 365xfsaild_resubmit_item(
 366        struct xfs_log_item     *lip,
 367        struct list_head        *buffer_list)
 368{
 369        struct xfs_buf          *bp = lip->li_buf;
 370
 371        if (!xfs_buf_trylock(bp))
 372                return XFS_ITEM_LOCKED;
 373
 374        if (!xfs_buf_delwri_queue(bp, buffer_list)) {
 375                xfs_buf_unlock(bp);
 376                return XFS_ITEM_FLUSHING;
 377        }
 378
 379        /* protected by ail_lock */
 380        list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
 381                if (bp->b_flags & _XBF_INODES)
 382                        clear_bit(XFS_LI_FAILED, &lip->li_flags);
 383                else
 384                        xfs_clear_li_failed(lip);
 385        }
 386
 387        xfs_buf_unlock(bp);
 388        return XFS_ITEM_SUCCESS;
 389}
 390
 391static inline uint
 392xfsaild_push_item(
 393        struct xfs_ail          *ailp,
 394        struct xfs_log_item     *lip)
 395{
 396        /*
 397         * If log item pinning is enabled, skip the push and track the item as
 398         * pinned. This can help induce head-behind-tail conditions.
 399         */
 400        if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
 401                return XFS_ITEM_PINNED;
 402
 403        /*
 404         * Consider the item pinned if a push callback is not defined so the
 405         * caller will force the log. This should only happen for intent items
 406         * as they are unpinned once the associated done item is committed to
 407         * the on-disk log.
 408         */
 409        if (!lip->li_ops->iop_push)
 410                return XFS_ITEM_PINNED;
 411        if (test_bit(XFS_LI_FAILED, &lip->li_flags))
 412                return xfsaild_resubmit_item(lip, &ailp->ail_buf_list);
 413        return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
 414}
 415
 416static long
 417xfsaild_push(
 418        struct xfs_ail          *ailp)
 419{
 420        xfs_mount_t             *mp = ailp->ail_mount;
 421        struct xfs_ail_cursor   cur;
 422        struct xfs_log_item     *lip;
 423        xfs_lsn_t               lsn;
 424        xfs_lsn_t               target;
 425        long                    tout;
 426        int                     stuck = 0;
 427        int                     flushing = 0;
 428        int                     count = 0;
 429
 430        /*
 431         * If we encountered pinned items or did not finish writing out all
 432         * buffers the last time we ran, force the log first and wait for it
 433         * before pushing again.
 434         */
 435        if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
 436            (!list_empty_careful(&ailp->ail_buf_list) ||
 437             xfs_ail_min_lsn(ailp))) {
 438                ailp->ail_log_flush = 0;
 439
 440                XFS_STATS_INC(mp, xs_push_ail_flush);
 441                xfs_log_force(mp, XFS_LOG_SYNC);
 442        }
 443
 444        spin_lock(&ailp->ail_lock);
 445
 446        /* barrier matches the ail_target update in xfs_ail_push() */
 447        smp_rmb();
 448        target = ailp->ail_target;
 449        ailp->ail_target_prev = target;
 450
 451        /* we're done if the AIL is empty or our push has reached the end */
 452        lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
 453        if (!lip)
 454                goto out_done;
 455
 456        XFS_STATS_INC(mp, xs_push_ail);
 457
 458        lsn = lip->li_lsn;
 459        while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
 460                int     lock_result;
 461
 462                /*
 463                 * Note that iop_push may unlock and reacquire the AIL lock.  We
 464                 * rely on the AIL cursor implementation to be able to deal with
 465                 * the dropped lock.
 466                 */
 467                lock_result = xfsaild_push_item(ailp, lip);
 468                switch (lock_result) {
 469                case XFS_ITEM_SUCCESS:
 470                        XFS_STATS_INC(mp, xs_push_ail_success);
 471                        trace_xfs_ail_push(lip);
 472
 473                        ailp->ail_last_pushed_lsn = lsn;
 474                        break;
 475
 476                case XFS_ITEM_FLUSHING:
 477                        /*
 478                         * The item or its backing buffer is already being
 479                         * flushed.  The typical reason for that is that an
 480                         * inode buffer is locked because we already pushed the
 481                         * updates to it as part of inode clustering.
 482                         *
 483                         * We do not want to stop flushing just because lots
 484                         * of items are already being flushed, but we need to
 485                         * re-try the flushing relatively soon if most of the
 486                         * AIL is being flushed.
 487                         */
 488                        XFS_STATS_INC(mp, xs_push_ail_flushing);
 489                        trace_xfs_ail_flushing(lip);
 490
 491                        flushing++;
 492                        ailp->ail_last_pushed_lsn = lsn;
 493                        break;
 494
 495                case XFS_ITEM_PINNED:
 496                        XFS_STATS_INC(mp, xs_push_ail_pinned);
 497                        trace_xfs_ail_pinned(lip);
 498
 499                        stuck++;
 500                        ailp->ail_log_flush++;
 501                        break;
 502                case XFS_ITEM_LOCKED:
 503                        XFS_STATS_INC(mp, xs_push_ail_locked);
 504                        trace_xfs_ail_locked(lip);
 505
 506                        stuck++;
 507                        break;
 508                default:
 509                        ASSERT(0);
 510                        break;
 511                }
 512
 513                count++;
 514
 515                /*
 516                 * Are there too many items we can't do anything with?
 517                 *
 518                 * If we are skipping too many items because we can't flush
 519                 * them or they are already being flushed, we back off and
 520                 * given them time to complete whatever operation is being
 521                 * done. i.e. remove pressure from the AIL while we can't make
 522                 * progress so traversals don't slow down further inserts and
 523                 * removals to/from the AIL.
 524                 *
 525                 * The value of 100 is an arbitrary magic number based on
 526                 * observation.
 527                 */
 528                if (stuck > 100)
 529                        break;
 530
 531                lip = xfs_trans_ail_cursor_next(ailp, &cur);
 532                if (lip == NULL)
 533                        break;
 534                lsn = lip->li_lsn;
 535        }
 536
 537out_done:
 538        xfs_trans_ail_cursor_done(&cur);
 539        spin_unlock(&ailp->ail_lock);
 540
 541        if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
 542                ailp->ail_log_flush++;
 543
 544        if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
 545                /*
 546                 * We reached the target or the AIL is empty, so wait a bit
 547                 * longer for I/O to complete and remove pushed items from the
 548                 * AIL before we start the next scan from the start of the AIL.
 549                 */
 550                tout = 50;
 551                ailp->ail_last_pushed_lsn = 0;
 552        } else if (((stuck + flushing) * 100) / count > 90) {
 553                /*
 554                 * Either there is a lot of contention on the AIL or we are
 555                 * stuck due to operations in progress. "Stuck" in this case
 556                 * is defined as >90% of the items we tried to push were stuck.
 557                 *
 558                 * Backoff a bit more to allow some I/O to complete before
 559                 * restarting from the start of the AIL. This prevents us from
 560                 * spinning on the same items, and if they are pinned will all
 561                 * the restart to issue a log force to unpin the stuck items.
 562                 */
 563                tout = 20;
 564                ailp->ail_last_pushed_lsn = 0;
 565        } else {
 566                /*
 567                 * Assume we have more work to do in a short while.
 568                 */
 569                tout = 10;
 570        }
 571
 572        return tout;
 573}
 574
 575static int
 576xfsaild(
 577        void            *data)
 578{
 579        struct xfs_ail  *ailp = data;
 580        long            tout = 0;       /* milliseconds */
 581        unsigned int    noreclaim_flag;
 582
 583        noreclaim_flag = memalloc_noreclaim_save();
 584        set_freezable();
 585
 586        while (1) {
 587                if (tout && tout <= 20)
 588                        set_current_state(TASK_KILLABLE);
 589                else
 590                        set_current_state(TASK_INTERRUPTIBLE);
 591
 592                /*
 593                 * Check kthread_should_stop() after we set the task state to
 594                 * guarantee that we either see the stop bit and exit or the
 595                 * task state is reset to runnable such that it's not scheduled
 596                 * out indefinitely and detects the stop bit at next iteration.
 597                 * A memory barrier is included in above task state set to
 598                 * serialize again kthread_stop().
 599                 */
 600                if (kthread_should_stop()) {
 601                        __set_current_state(TASK_RUNNING);
 602
 603                        /*
 604                         * The caller forces out the AIL before stopping the
 605                         * thread in the common case, which means the delwri
 606                         * queue is drained. In the shutdown case, the queue may
 607                         * still hold relogged buffers that haven't been
 608                         * submitted because they were pinned since added to the
 609                         * queue.
 610                         *
 611                         * Log I/O error processing stales the underlying buffer
 612                         * and clears the delwri state, expecting the buf to be
 613                         * removed on the next submission attempt. That won't
 614                         * happen if we're shutting down, so this is the last
 615                         * opportunity to release such buffers from the queue.
 616                         */
 617                        ASSERT(list_empty(&ailp->ail_buf_list) ||
 618                               XFS_FORCED_SHUTDOWN(ailp->ail_mount));
 619                        xfs_buf_delwri_cancel(&ailp->ail_buf_list);
 620                        break;
 621                }
 622
 623                spin_lock(&ailp->ail_lock);
 624
 625                /*
 626                 * Idle if the AIL is empty and we are not racing with a target
 627                 * update. We check the AIL after we set the task to a sleep
 628                 * state to guarantee that we either catch an ail_target update
 629                 * or that a wake_up resets the state to TASK_RUNNING.
 630                 * Otherwise, we run the risk of sleeping indefinitely.
 631                 *
 632                 * The barrier matches the ail_target update in xfs_ail_push().
 633                 */
 634                smp_rmb();
 635                if (!xfs_ail_min(ailp) &&
 636                    ailp->ail_target == ailp->ail_target_prev &&
 637                    list_empty(&ailp->ail_buf_list)) {
 638                        spin_unlock(&ailp->ail_lock);
 639                        freezable_schedule();
 640                        tout = 0;
 641                        continue;
 642                }
 643                spin_unlock(&ailp->ail_lock);
 644
 645                if (tout)
 646                        freezable_schedule_timeout(msecs_to_jiffies(tout));
 647
 648                __set_current_state(TASK_RUNNING);
 649
 650                try_to_freeze();
 651
 652                tout = xfsaild_push(ailp);
 653        }
 654
 655        memalloc_noreclaim_restore(noreclaim_flag);
 656        return 0;
 657}
 658
 659/*
 660 * This routine is called to move the tail of the AIL forward.  It does this by
 661 * trying to flush items in the AIL whose lsns are below the given
 662 * threshold_lsn.
 663 *
 664 * The push is run asynchronously in a workqueue, which means the caller needs
 665 * to handle waiting on the async flush for space to become available.
 666 * We don't want to interrupt any push that is in progress, hence we only queue
 667 * work if we set the pushing bit appropriately.
 668 *
 669 * We do this unlocked - we only need to know whether there is anything in the
 670 * AIL at the time we are called. We don't need to access the contents of
 671 * any of the objects, so the lock is not needed.
 672 */
 673void
 674xfs_ail_push(
 675        struct xfs_ail          *ailp,
 676        xfs_lsn_t               threshold_lsn)
 677{
 678        struct xfs_log_item     *lip;
 679
 680        lip = xfs_ail_min(ailp);
 681        if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
 682            XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
 683                return;
 684
 685        /*
 686         * Ensure that the new target is noticed in push code before it clears
 687         * the XFS_AIL_PUSHING_BIT.
 688         */
 689        smp_wmb();
 690        xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
 691        smp_wmb();
 692
 693        wake_up_process(ailp->ail_task);
 694}
 695
 696/*
 697 * Push out all items in the AIL immediately
 698 */
 699void
 700xfs_ail_push_all(
 701        struct xfs_ail  *ailp)
 702{
 703        xfs_lsn_t       threshold_lsn = xfs_ail_max_lsn(ailp);
 704
 705        if (threshold_lsn)
 706                xfs_ail_push(ailp, threshold_lsn);
 707}
 708
 709/*
 710 * Push out all items in the AIL immediately and wait until the AIL is empty.
 711 */
 712void
 713xfs_ail_push_all_sync(
 714        struct xfs_ail  *ailp)
 715{
 716        struct xfs_log_item     *lip;
 717        DEFINE_WAIT(wait);
 718
 719        spin_lock(&ailp->ail_lock);
 720        while ((lip = xfs_ail_max(ailp)) != NULL) {
 721                prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
 722                ailp->ail_target = lip->li_lsn;
 723                wake_up_process(ailp->ail_task);
 724                spin_unlock(&ailp->ail_lock);
 725                schedule();
 726                spin_lock(&ailp->ail_lock);
 727        }
 728        spin_unlock(&ailp->ail_lock);
 729
 730        finish_wait(&ailp->ail_empty, &wait);
 731}
 732
 733void
 734xfs_ail_update_finish(
 735        struct xfs_ail          *ailp,
 736        xfs_lsn_t               old_lsn) __releases(ailp->ail_lock)
 737{
 738        struct xfs_mount        *mp = ailp->ail_mount;
 739
 740        /* if the tail lsn hasn't changed, don't do updates or wakeups. */
 741        if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
 742                spin_unlock(&ailp->ail_lock);
 743                return;
 744        }
 745
 746        if (!XFS_FORCED_SHUTDOWN(mp))
 747                xlog_assign_tail_lsn_locked(mp);
 748
 749        if (list_empty(&ailp->ail_head))
 750                wake_up_all(&ailp->ail_empty);
 751        spin_unlock(&ailp->ail_lock);
 752        xfs_log_space_wake(mp);
 753}
 754
 755/*
 756 * xfs_trans_ail_update - bulk AIL insertion operation.
 757 *
 758 * @xfs_trans_ail_update takes an array of log items that all need to be
 759 * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
 760 * be added.  Otherwise, it will be repositioned  by removing it and re-adding
 761 * it to the AIL. If we move the first item in the AIL, update the log tail to
 762 * match the new minimum LSN in the AIL.
 763 *
 764 * This function takes the AIL lock once to execute the update operations on
 765 * all the items in the array, and as such should not be called with the AIL
 766 * lock held. As a result, once we have the AIL lock, we need to check each log
 767 * item LSN to confirm it needs to be moved forward in the AIL.
 768 *
 769 * To optimise the insert operation, we delete all the items from the AIL in
 770 * the first pass, moving them into a temporary list, then splice the temporary
 771 * list into the correct position in the AIL. This avoids needing to do an
 772 * insert operation on every item.
 773 *
 774 * This function must be called with the AIL lock held.  The lock is dropped
 775 * before returning.
 776 */
 777void
 778xfs_trans_ail_update_bulk(
 779        struct xfs_ail          *ailp,
 780        struct xfs_ail_cursor   *cur,
 781        struct xfs_log_item     **log_items,
 782        int                     nr_items,
 783        xfs_lsn_t               lsn) __releases(ailp->ail_lock)
 784{
 785        struct xfs_log_item     *mlip;
 786        xfs_lsn_t               tail_lsn = 0;
 787        int                     i;
 788        LIST_HEAD(tmp);
 789
 790        ASSERT(nr_items > 0);           /* Not required, but true. */
 791        mlip = xfs_ail_min(ailp);
 792
 793        for (i = 0; i < nr_items; i++) {
 794                struct xfs_log_item *lip = log_items[i];
 795                if (test_and_set_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 796                        /* check if we really need to move the item */
 797                        if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
 798                                continue;
 799
 800                        trace_xfs_ail_move(lip, lip->li_lsn, lsn);
 801                        if (mlip == lip && !tail_lsn)
 802                                tail_lsn = lip->li_lsn;
 803
 804                        xfs_ail_delete(ailp, lip);
 805                } else {
 806                        trace_xfs_ail_insert(lip, 0, lsn);
 807                }
 808                lip->li_lsn = lsn;
 809                list_add(&lip->li_ail, &tmp);
 810        }
 811
 812        if (!list_empty(&tmp))
 813                xfs_ail_splice(ailp, cur, &tmp, lsn);
 814
 815        xfs_ail_update_finish(ailp, tail_lsn);
 816}
 817
 818/* Insert a log item into the AIL. */
 819void
 820xfs_trans_ail_insert(
 821        struct xfs_ail          *ailp,
 822        struct xfs_log_item     *lip,
 823        xfs_lsn_t               lsn)
 824{
 825        spin_lock(&ailp->ail_lock);
 826        xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 827}
 828
 829/*
 830 * Delete one log item from the AIL.
 831 *
 832 * If this item was at the tail of the AIL, return the LSN of the log item so
 833 * that we can use it to check if the LSN of the tail of the log has moved
 834 * when finishing up the AIL delete process in xfs_ail_update_finish().
 835 */
 836xfs_lsn_t
 837xfs_ail_delete_one(
 838        struct xfs_ail          *ailp,
 839        struct xfs_log_item     *lip)
 840{
 841        struct xfs_log_item     *mlip = xfs_ail_min(ailp);
 842        xfs_lsn_t               lsn = lip->li_lsn;
 843
 844        trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
 845        xfs_ail_delete(ailp, lip);
 846        clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
 847        lip->li_lsn = 0;
 848
 849        if (mlip == lip)
 850                return lsn;
 851        return 0;
 852}
 853
 854void
 855xfs_trans_ail_delete(
 856        struct xfs_log_item     *lip,
 857        int                     shutdown_type)
 858{
 859        struct xfs_ail          *ailp = lip->li_ailp;
 860        struct xfs_mount        *mp = ailp->ail_mount;
 861        xfs_lsn_t               tail_lsn;
 862
 863        spin_lock(&ailp->ail_lock);
 864        if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 865                spin_unlock(&ailp->ail_lock);
 866                if (shutdown_type && !XFS_FORCED_SHUTDOWN(mp)) {
 867                        xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
 868        "%s: attempting to delete a log item that is not in the AIL",
 869                                        __func__);
 870                        xfs_force_shutdown(mp, shutdown_type);
 871                }
 872                return;
 873        }
 874
 875        /* xfs_ail_update_finish() drops the AIL lock */
 876        xfs_clear_li_failed(lip);
 877        tail_lsn = xfs_ail_delete_one(ailp, lip);
 878        xfs_ail_update_finish(ailp, tail_lsn);
 879}
 880
 881int
 882xfs_trans_ail_init(
 883        xfs_mount_t     *mp)
 884{
 885        struct xfs_ail  *ailp;
 886
 887        ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
 888        if (!ailp)
 889                return -ENOMEM;
 890
 891        ailp->ail_mount = mp;
 892        INIT_LIST_HEAD(&ailp->ail_head);
 893        INIT_LIST_HEAD(&ailp->ail_cursors);
 894        spin_lock_init(&ailp->ail_lock);
 895        INIT_LIST_HEAD(&ailp->ail_buf_list);
 896        init_waitqueue_head(&ailp->ail_empty);
 897
 898        ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
 899                        ailp->ail_mount->m_super->s_id);
 900        if (IS_ERR(ailp->ail_task))
 901                goto out_free_ailp;
 902
 903        mp->m_ail = ailp;
 904        return 0;
 905
 906out_free_ailp:
 907        kmem_free(ailp);
 908        return -ENOMEM;
 909}
 910
 911void
 912xfs_trans_ail_destroy(
 913        xfs_mount_t     *mp)
 914{
 915        struct xfs_ail  *ailp = mp->m_ail;
 916
 917        kthread_stop(ailp->ail_task);
 918        kmem_free(ailp);
 919}
 920