linux/fs/jbd2/checkpoint.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * linux/fs/jbd2/checkpoint.c
   4 *
   5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
   6 *
   7 * Copyright 1999 Red Hat Software --- All Rights Reserved
   8 *
   9 * Checkpoint routines for the generic filesystem journaling code.
  10 * Part of the ext2fs journaling system.
  11 *
  12 * Checkpointing is the process of ensuring that a section of the log is
  13 * committed fully to disk, so that that portion of the log can be
  14 * reused.
  15 */
  16
  17#include <linux/time.h>
  18#include <linux/fs.h>
  19#include <linux/jbd2.h>
  20#include <linux/errno.h>
  21#include <linux/slab.h>
  22#include <linux/blkdev.h>
  23#include <trace/events/jbd2.h>
  24
  25/*
  26 * Unlink a buffer from a transaction checkpoint list.
  27 *
  28 * Called with j_list_lock held.
  29 */
  30static inline void __buffer_unlink_first(struct journal_head *jh)
  31{
  32        transaction_t *transaction = jh->b_cp_transaction;
  33
  34        jh->b_cpnext->b_cpprev = jh->b_cpprev;
  35        jh->b_cpprev->b_cpnext = jh->b_cpnext;
  36        if (transaction->t_checkpoint_list == jh) {
  37                transaction->t_checkpoint_list = jh->b_cpnext;
  38                if (transaction->t_checkpoint_list == jh)
  39                        transaction->t_checkpoint_list = NULL;
  40        }
  41}
  42
  43/*
  44 * Unlink a buffer from a transaction checkpoint(io) list.
  45 *
  46 * Called with j_list_lock held.
  47 */
  48static inline void __buffer_unlink(struct journal_head *jh)
  49{
  50        transaction_t *transaction = jh->b_cp_transaction;
  51
  52        __buffer_unlink_first(jh);
  53        if (transaction->t_checkpoint_io_list == jh) {
  54                transaction->t_checkpoint_io_list = jh->b_cpnext;
  55                if (transaction->t_checkpoint_io_list == jh)
  56                        transaction->t_checkpoint_io_list = NULL;
  57        }
  58}
  59
  60/*
  61 * Move a buffer from the checkpoint list to the checkpoint io list
  62 *
  63 * Called with j_list_lock held
  64 */
  65static inline void __buffer_relink_io(struct journal_head *jh)
  66{
  67        transaction_t *transaction = jh->b_cp_transaction;
  68
  69        __buffer_unlink_first(jh);
  70
  71        if (!transaction->t_checkpoint_io_list) {
  72                jh->b_cpnext = jh->b_cpprev = jh;
  73        } else {
  74                jh->b_cpnext = transaction->t_checkpoint_io_list;
  75                jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
  76                jh->b_cpprev->b_cpnext = jh;
  77                jh->b_cpnext->b_cpprev = jh;
  78        }
  79        transaction->t_checkpoint_io_list = jh;
  80}
  81
  82/*
  83 * Check a checkpoint buffer could be release or not.
  84 *
  85 * Requires j_list_lock
  86 */
  87static inline bool __cp_buffer_busy(struct journal_head *jh)
  88{
  89        struct buffer_head *bh = jh2bh(jh);
  90
  91        return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
  92}
  93
  94/*
  95 * __jbd2_log_wait_for_space: wait until there is space in the journal.
  96 *
  97 * Called under j-state_lock *only*.  It will be unlocked if we have to wait
  98 * for a checkpoint to free up some space in the log.
  99 */
 100void __jbd2_log_wait_for_space(journal_t *journal)
 101__acquires(&journal->j_state_lock)
 102__releases(&journal->j_state_lock)
 103{
 104        int nblocks, space_left;
 105        /* assert_spin_locked(&journal->j_state_lock); */
 106
 107        nblocks = journal->j_max_transaction_buffers;
 108        while (jbd2_log_space_left(journal) < nblocks) {
 109                write_unlock(&journal->j_state_lock);
 110                mutex_lock_io(&journal->j_checkpoint_mutex);
 111
 112                /*
 113                 * Test again, another process may have checkpointed while we
 114                 * were waiting for the checkpoint lock. If there are no
 115                 * transactions ready to be checkpointed, try to recover
 116                 * journal space by calling cleanup_journal_tail(), and if
 117                 * that doesn't work, by waiting for the currently committing
 118                 * transaction to complete.  If there is absolutely no way
 119                 * to make progress, this is either a BUG or corrupted
 120                 * filesystem, so abort the journal and leave a stack
 121                 * trace for forensic evidence.
 122                 */
 123                write_lock(&journal->j_state_lock);
 124                if (journal->j_flags & JBD2_ABORT) {
 125                        mutex_unlock(&journal->j_checkpoint_mutex);
 126                        return;
 127                }
 128                spin_lock(&journal->j_list_lock);
 129                space_left = jbd2_log_space_left(journal);
 130                if (space_left < nblocks) {
 131                        int chkpt = journal->j_checkpoint_transactions != NULL;
 132                        tid_t tid = 0;
 133
 134                        if (journal->j_committing_transaction)
 135                                tid = journal->j_committing_transaction->t_tid;
 136                        spin_unlock(&journal->j_list_lock);
 137                        write_unlock(&journal->j_state_lock);
 138                        if (chkpt) {
 139                                jbd2_log_do_checkpoint(journal);
 140                        } else if (jbd2_cleanup_journal_tail(journal) == 0) {
 141                                /* We were able to recover space; yay! */
 142                                ;
 143                        } else if (tid) {
 144                                /*
 145                                 * jbd2_journal_commit_transaction() may want
 146                                 * to take the checkpoint_mutex if JBD2_FLUSHED
 147                                 * is set.  So we need to temporarily drop it.
 148                                 */
 149                                mutex_unlock(&journal->j_checkpoint_mutex);
 150                                jbd2_log_wait_commit(journal, tid);
 151                                write_lock(&journal->j_state_lock);
 152                                continue;
 153                        } else {
 154                                printk(KERN_ERR "%s: needed %d blocks and "
 155                                       "only had %d space available\n",
 156                                       __func__, nblocks, space_left);
 157                                printk(KERN_ERR "%s: no way to get more "
 158                                       "journal space in %s\n", __func__,
 159                                       journal->j_devname);
 160                                WARN_ON(1);
 161                                jbd2_journal_abort(journal, -EIO);
 162                        }
 163                        write_lock(&journal->j_state_lock);
 164                } else {
 165                        spin_unlock(&journal->j_list_lock);
 166                }
 167                mutex_unlock(&journal->j_checkpoint_mutex);
 168        }
 169}
 170
 171static void
 172__flush_batch(journal_t *journal, int *batch_count)
 173{
 174        int i;
 175        struct blk_plug plug;
 176
 177        blk_start_plug(&plug);
 178        for (i = 0; i < *batch_count; i++)
 179                write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
 180        blk_finish_plug(&plug);
 181
 182        for (i = 0; i < *batch_count; i++) {
 183                struct buffer_head *bh = journal->j_chkpt_bhs[i];
 184                BUFFER_TRACE(bh, "brelse");
 185                __brelse(bh);
 186        }
 187        *batch_count = 0;
 188}
 189
 190/*
 191 * Perform an actual checkpoint. We take the first transaction on the
 192 * list of transactions to be checkpointed and send all its buffers
 193 * to disk. We submit larger chunks of data at once.
 194 *
 195 * The journal should be locked before calling this function.
 196 * Called with j_checkpoint_mutex held.
 197 */
 198int jbd2_log_do_checkpoint(journal_t *journal)
 199{
 200        struct journal_head     *jh;
 201        struct buffer_head      *bh;
 202        transaction_t           *transaction;
 203        tid_t                   this_tid;
 204        int                     result, batch_count = 0;
 205
 206        jbd_debug(1, "Start checkpoint\n");
 207
 208        /*
 209         * First thing: if there are any transactions in the log which
 210         * don't need checkpointing, just eliminate them from the
 211         * journal straight away.
 212         */
 213        result = jbd2_cleanup_journal_tail(journal);
 214        trace_jbd2_checkpoint(journal, result);
 215        jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 216        if (result <= 0)
 217                return result;
 218
 219        /*
 220         * OK, we need to start writing disk blocks.  Take one transaction
 221         * and write it.
 222         */
 223        spin_lock(&journal->j_list_lock);
 224        if (!journal->j_checkpoint_transactions)
 225                goto out;
 226        transaction = journal->j_checkpoint_transactions;
 227        if (transaction->t_chp_stats.cs_chp_time == 0)
 228                transaction->t_chp_stats.cs_chp_time = jiffies;
 229        this_tid = transaction->t_tid;
 230restart:
 231        /*
 232         * If someone cleaned up this transaction while we slept, we're
 233         * done (maybe it's a new transaction, but it fell at the same
 234         * address).
 235         */
 236        if (journal->j_checkpoint_transactions != transaction ||
 237            transaction->t_tid != this_tid)
 238                goto out;
 239
 240        /* checkpoint all of the transaction's buffers */
 241        while (transaction->t_checkpoint_list) {
 242                jh = transaction->t_checkpoint_list;
 243                bh = jh2bh(jh);
 244
 245                if (buffer_locked(bh)) {
 246                        get_bh(bh);
 247                        spin_unlock(&journal->j_list_lock);
 248                        wait_on_buffer(bh);
 249                        /* the journal_head may have gone by now */
 250                        BUFFER_TRACE(bh, "brelse");
 251                        __brelse(bh);
 252                        goto retry;
 253                }
 254                if (jh->b_transaction != NULL) {
 255                        transaction_t *t = jh->b_transaction;
 256                        tid_t tid = t->t_tid;
 257
 258                        transaction->t_chp_stats.cs_forced_to_close++;
 259                        spin_unlock(&journal->j_list_lock);
 260                        if (unlikely(journal->j_flags & JBD2_UNMOUNT))
 261                                /*
 262                                 * The journal thread is dead; so
 263                                 * starting and waiting for a commit
 264                                 * to finish will cause us to wait for
 265                                 * a _very_ long time.
 266                                 */
 267                                printk(KERN_ERR
 268                "JBD2: %s: Waiting for Godot: block %llu\n",
 269                journal->j_devname, (unsigned long long) bh->b_blocknr);
 270
 271                        if (batch_count)
 272                                __flush_batch(journal, &batch_count);
 273                        jbd2_log_start_commit(journal, tid);
 274                        /*
 275                         * jbd2_journal_commit_transaction() may want
 276                         * to take the checkpoint_mutex if JBD2_FLUSHED
 277                         * is set, jbd2_update_log_tail() called by
 278                         * jbd2_journal_commit_transaction() may also take
 279                         * checkpoint_mutex.  So we need to temporarily
 280                         * drop it.
 281                         */
 282                        mutex_unlock(&journal->j_checkpoint_mutex);
 283                        jbd2_log_wait_commit(journal, tid);
 284                        mutex_lock_io(&journal->j_checkpoint_mutex);
 285                        spin_lock(&journal->j_list_lock);
 286                        goto restart;
 287                }
 288                if (!buffer_dirty(bh)) {
 289                        BUFFER_TRACE(bh, "remove from checkpoint");
 290                        if (__jbd2_journal_remove_checkpoint(jh))
 291                                /* The transaction was released; we're done */
 292                                goto out;
 293                        continue;
 294                }
 295                /*
 296                 * Important: we are about to write the buffer, and
 297                 * possibly block, while still holding the journal
 298                 * lock.  We cannot afford to let the transaction
 299                 * logic start messing around with this buffer before
 300                 * we write it to disk, as that would break
 301                 * recoverability.
 302                 */
 303                BUFFER_TRACE(bh, "queue");
 304                get_bh(bh);
 305                J_ASSERT_BH(bh, !buffer_jwrite(bh));
 306                journal->j_chkpt_bhs[batch_count++] = bh;
 307                __buffer_relink_io(jh);
 308                transaction->t_chp_stats.cs_written++;
 309                if ((batch_count == JBD2_NR_BATCH) ||
 310                    need_resched() ||
 311                    spin_needbreak(&journal->j_list_lock))
 312                        goto unlock_and_flush;
 313        }
 314
 315        if (batch_count) {
 316                unlock_and_flush:
 317                        spin_unlock(&journal->j_list_lock);
 318                retry:
 319                        if (batch_count)
 320                                __flush_batch(journal, &batch_count);
 321                        spin_lock(&journal->j_list_lock);
 322                        goto restart;
 323        }
 324
 325        /*
 326         * Now we issued all of the transaction's buffers, let's deal
 327         * with the buffers that are out for I/O.
 328         */
 329restart2:
 330        /* Did somebody clean up the transaction in the meanwhile? */
 331        if (journal->j_checkpoint_transactions != transaction ||
 332            transaction->t_tid != this_tid)
 333                goto out;
 334
 335        while (transaction->t_checkpoint_io_list) {
 336                jh = transaction->t_checkpoint_io_list;
 337                bh = jh2bh(jh);
 338                if (buffer_locked(bh)) {
 339                        get_bh(bh);
 340                        spin_unlock(&journal->j_list_lock);
 341                        wait_on_buffer(bh);
 342                        /* the journal_head may have gone by now */
 343                        BUFFER_TRACE(bh, "brelse");
 344                        __brelse(bh);
 345                        spin_lock(&journal->j_list_lock);
 346                        goto restart2;
 347                }
 348
 349                /*
 350                 * Now in whatever state the buffer currently is, we
 351                 * know that it has been written out and so we can
 352                 * drop it from the list
 353                 */
 354                if (__jbd2_journal_remove_checkpoint(jh))
 355                        break;
 356        }
 357out:
 358        spin_unlock(&journal->j_list_lock);
 359        result = jbd2_cleanup_journal_tail(journal);
 360
 361        return (result < 0) ? result : 0;
 362}
 363
 364/*
 365 * Check the list of checkpoint transactions for the journal to see if
 366 * we have already got rid of any since the last update of the log tail
 367 * in the journal superblock.  If so, we can instantly roll the
 368 * superblock forward to remove those transactions from the log.
 369 *
 370 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
 371 *
 372 * Called with the journal lock held.
 373 *
 374 * This is the only part of the journaling code which really needs to be
 375 * aware of transaction aborts.  Checkpointing involves writing to the
 376 * main filesystem area rather than to the journal, so it can proceed
 377 * even in abort state, but we must not update the super block if
 378 * checkpointing may have failed.  Otherwise, we would lose some metadata
 379 * buffers which should be written-back to the filesystem.
 380 */
 381
 382int jbd2_cleanup_journal_tail(journal_t *journal)
 383{
 384        tid_t           first_tid;
 385        unsigned long   blocknr;
 386
 387        if (is_journal_aborted(journal))
 388                return -EIO;
 389
 390        if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
 391                return 1;
 392        J_ASSERT(blocknr != 0);
 393
 394        /*
 395         * We need to make sure that any blocks that were recently written out
 396         * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
 397         * we drop the transactions from the journal. It's unlikely this will
 398         * be necessary, especially with an appropriately sized journal, but we
 399         * need this to guarantee correctness.  Fortunately
 400         * jbd2_cleanup_journal_tail() doesn't get called all that often.
 401         */
 402        if (journal->j_flags & JBD2_BARRIER)
 403                blkdev_issue_flush(journal->j_fs_dev);
 404
 405        return __jbd2_update_log_tail(journal, first_tid, blocknr);
 406}
 407
 408
 409/* Checkpoint list management */
 410
 411/*
 412 * journal_clean_one_cp_list
 413 *
 414 * Find all the written-back checkpoint buffers in the given list and
 415 * release them. If 'destroy' is set, clean all buffers unconditionally.
 416 *
 417 * Called with j_list_lock held.
 418 * Returns 1 if we freed the transaction, 0 otherwise.
 419 */
 420static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 421{
 422        struct journal_head *last_jh;
 423        struct journal_head *next_jh = jh;
 424
 425        if (!jh)
 426                return 0;
 427
 428        last_jh = jh->b_cpprev;
 429        do {
 430                jh = next_jh;
 431                next_jh = jh->b_cpnext;
 432
 433                if (!destroy && __cp_buffer_busy(jh))
 434                        return 0;
 435
 436                if (__jbd2_journal_remove_checkpoint(jh))
 437                        return 1;
 438                /*
 439                 * This function only frees up some memory
 440                 * if possible so we dont have an obligation
 441                 * to finish processing. Bail out if preemption
 442                 * requested:
 443                 */
 444                if (need_resched())
 445                        return 0;
 446        } while (jh != last_jh);
 447
 448        return 0;
 449}
 450
 451/*
 452 * journal_shrink_one_cp_list
 453 *
 454 * Find 'nr_to_scan' written-back checkpoint buffers in the given list
 455 * and try to release them. If the whole transaction is released, set
 456 * the 'released' parameter. Return the number of released checkpointed
 457 * buffers.
 458 *
 459 * Called with j_list_lock held.
 460 */
 461static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
 462                                                unsigned long *nr_to_scan,
 463                                                bool *released)
 464{
 465        struct journal_head *last_jh;
 466        struct journal_head *next_jh = jh;
 467        unsigned long nr_freed = 0;
 468        int ret;
 469
 470        if (!jh || *nr_to_scan == 0)
 471                return 0;
 472
 473        last_jh = jh->b_cpprev;
 474        do {
 475                jh = next_jh;
 476                next_jh = jh->b_cpnext;
 477
 478                (*nr_to_scan)--;
 479                if (__cp_buffer_busy(jh))
 480                        continue;
 481
 482                nr_freed++;
 483                ret = __jbd2_journal_remove_checkpoint(jh);
 484                if (ret) {
 485                        *released = true;
 486                        break;
 487                }
 488
 489                if (need_resched())
 490                        break;
 491        } while (jh != last_jh && *nr_to_scan);
 492
 493        return nr_freed;
 494}
 495
 496/*
 497 * jbd2_journal_shrink_checkpoint_list
 498 *
 499 * Find 'nr_to_scan' written-back checkpoint buffers in the journal
 500 * and try to release them. Return the number of released checkpointed
 501 * buffers.
 502 *
 503 * Called with j_list_lock held.
 504 */
 505unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
 506                                                  unsigned long *nr_to_scan)
 507{
 508        transaction_t *transaction, *last_transaction, *next_transaction;
 509        bool released;
 510        tid_t first_tid = 0, last_tid = 0, next_tid = 0;
 511        tid_t tid = 0;
 512        unsigned long nr_freed = 0;
 513        unsigned long nr_scanned = *nr_to_scan;
 514
 515again:
 516        spin_lock(&journal->j_list_lock);
 517        if (!journal->j_checkpoint_transactions) {
 518                spin_unlock(&journal->j_list_lock);
 519                goto out;
 520        }
 521
 522        /*
 523         * Get next shrink transaction, resume previous scan or start
 524         * over again. If some others do checkpoint and drop transaction
 525         * from the checkpoint list, we ignore saved j_shrink_transaction
 526         * and start over unconditionally.
 527         */
 528        if (journal->j_shrink_transaction)
 529                transaction = journal->j_shrink_transaction;
 530        else
 531                transaction = journal->j_checkpoint_transactions;
 532
 533        if (!first_tid)
 534                first_tid = transaction->t_tid;
 535        last_transaction = journal->j_checkpoint_transactions->t_cpprev;
 536        next_transaction = transaction;
 537        last_tid = last_transaction->t_tid;
 538        do {
 539                transaction = next_transaction;
 540                next_transaction = transaction->t_cpnext;
 541                tid = transaction->t_tid;
 542                released = false;
 543
 544                nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
 545                                                       nr_to_scan, &released);
 546                if (*nr_to_scan == 0)
 547                        break;
 548                if (need_resched() || spin_needbreak(&journal->j_list_lock))
 549                        break;
 550                if (released)
 551                        continue;
 552
 553                nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
 554                                                       nr_to_scan, &released);
 555                if (*nr_to_scan == 0)
 556                        break;
 557                if (need_resched() || spin_needbreak(&journal->j_list_lock))
 558                        break;
 559        } while (transaction != last_transaction);
 560
 561        if (transaction != last_transaction) {
 562                journal->j_shrink_transaction = next_transaction;
 563                next_tid = next_transaction->t_tid;
 564        } else {
 565                journal->j_shrink_transaction = NULL;
 566                next_tid = 0;
 567        }
 568
 569        spin_unlock(&journal->j_list_lock);
 570        cond_resched();
 571
 572        if (*nr_to_scan && next_tid)
 573                goto again;
 574out:
 575        nr_scanned -= *nr_to_scan;
 576        trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
 577                                          nr_freed, nr_scanned, next_tid);
 578
 579        return nr_freed;
 580}
 581
 582/*
 583 * journal_clean_checkpoint_list
 584 *
 585 * Find all the written-back checkpoint buffers in the journal and release them.
 586 * If 'destroy' is set, release all buffers unconditionally.
 587 *
 588 * Called with j_list_lock held.
 589 */
 590void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 591{
 592        transaction_t *transaction, *last_transaction, *next_transaction;
 593        int ret;
 594
 595        transaction = journal->j_checkpoint_transactions;
 596        if (!transaction)
 597                return;
 598
 599        last_transaction = transaction->t_cpprev;
 600        next_transaction = transaction;
 601        do {
 602                transaction = next_transaction;
 603                next_transaction = transaction->t_cpnext;
 604                ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
 605                                                destroy);
 606                /*
 607                 * This function only frees up some memory if possible so we
 608                 * dont have an obligation to finish processing. Bail out if
 609                 * preemption requested:
 610                 */
 611                if (need_resched())
 612                        return;
 613                if (ret)
 614                        continue;
 615                /*
 616                 * It is essential that we are as careful as in the case of
 617                 * t_checkpoint_list with removing the buffer from the list as
 618                 * we can possibly see not yet submitted buffers on io_list
 619                 */
 620                ret = journal_clean_one_cp_list(transaction->
 621                                t_checkpoint_io_list, destroy);
 622                if (need_resched())
 623                        return;
 624                /*
 625                 * Stop scanning if we couldn't free the transaction. This
 626                 * avoids pointless scanning of transactions which still
 627                 * weren't checkpointed.
 628                 */
 629                if (!ret)
 630                        return;
 631        } while (transaction != last_transaction);
 632}
 633
 634/*
 635 * Remove buffers from all checkpoint lists as journal is aborted and we just
 636 * need to free memory
 637 */
 638void jbd2_journal_destroy_checkpoint(journal_t *journal)
 639{
 640        /*
 641         * We loop because __jbd2_journal_clean_checkpoint_list() may abort
 642         * early due to a need of rescheduling.
 643         */
 644        while (1) {
 645                spin_lock(&journal->j_list_lock);
 646                if (!journal->j_checkpoint_transactions) {
 647                        spin_unlock(&journal->j_list_lock);
 648                        break;
 649                }
 650                __jbd2_journal_clean_checkpoint_list(journal, true);
 651                spin_unlock(&journal->j_list_lock);
 652                cond_resched();
 653        }
 654}
 655
 656/*
 657 * journal_remove_checkpoint: called after a buffer has been committed
 658 * to disk (either by being write-back flushed to disk, or being
 659 * committed to the log).
 660 *
 661 * We cannot safely clean a transaction out of the log until all of the
 662 * buffer updates committed in that transaction have safely been stored
 663 * elsewhere on disk.  To achieve this, all of the buffers in a
 664 * transaction need to be maintained on the transaction's checkpoint
 665 * lists until they have been rewritten, at which point this function is
 666 * called to remove the buffer from the existing transaction's
 667 * checkpoint lists.
 668 *
 669 * The function returns 1 if it frees the transaction, 0 otherwise.
 670 * The function can free jh and bh.
 671 *
 672 * This function is called with j_list_lock held.
 673 */
 674int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 675{
 676        struct transaction_chp_stats_s *stats;
 677        transaction_t *transaction;
 678        journal_t *journal;
 679        struct buffer_head *bh = jh2bh(jh);
 680
 681        JBUFFER_TRACE(jh, "entry");
 682
 683        transaction = jh->b_cp_transaction;
 684        if (!transaction) {
 685                JBUFFER_TRACE(jh, "not on transaction");
 686                return 0;
 687        }
 688        journal = transaction->t_journal;
 689
 690        JBUFFER_TRACE(jh, "removing from transaction");
 691
 692        /*
 693         * If we have failed to write the buffer out to disk, the filesystem
 694         * may become inconsistent. We cannot abort the journal here since
 695         * we hold j_list_lock and we have to be careful about races with
 696         * jbd2_journal_destroy(). So mark the writeback IO error in the
 697         * journal here and we abort the journal later from a better context.
 698         */
 699        if (buffer_write_io_error(bh))
 700                set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
 701
 702        __buffer_unlink(jh);
 703        jh->b_cp_transaction = NULL;
 704        percpu_counter_dec(&journal->j_checkpoint_jh_count);
 705        jbd2_journal_put_journal_head(jh);
 706
 707        /* Is this transaction empty? */
 708        if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
 709                return 0;
 710
 711        /*
 712         * There is one special case to worry about: if we have just pulled the
 713         * buffer off a running or committing transaction's checkpoing list,
 714         * then even if the checkpoint list is empty, the transaction obviously
 715         * cannot be dropped!
 716         *
 717         * The locking here around t_state is a bit sleazy.
 718         * See the comment at the end of jbd2_journal_commit_transaction().
 719         */
 720        if (transaction->t_state != T_FINISHED)
 721                return 0;
 722
 723        /*
 724         * OK, that was the last buffer for the transaction, we can now
 725         * safely remove this transaction from the log.
 726         */
 727        stats = &transaction->t_chp_stats;
 728        if (stats->cs_chp_time)
 729                stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
 730                                                    jiffies);
 731        trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
 732                                    transaction->t_tid, stats);
 733
 734        __jbd2_journal_drop_transaction(journal, transaction);
 735        jbd2_journal_free_transaction(transaction);
 736        return 1;
 737}
 738
 739/*
 740 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
 741 * list so that we know when it is safe to clean the transaction out of
 742 * the log.
 743 *
 744 * Called with the journal locked.
 745 * Called with j_list_lock held.
 746 */
 747void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 748                               transaction_t *transaction)
 749{
 750        JBUFFER_TRACE(jh, "entry");
 751        J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 752        J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 753
 754        /* Get reference for checkpointing transaction */
 755        jbd2_journal_grab_journal_head(jh2bh(jh));
 756        jh->b_cp_transaction = transaction;
 757
 758        if (!transaction->t_checkpoint_list) {
 759                jh->b_cpnext = jh->b_cpprev = jh;
 760        } else {
 761                jh->b_cpnext = transaction->t_checkpoint_list;
 762                jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
 763                jh->b_cpprev->b_cpnext = jh;
 764                jh->b_cpnext->b_cpprev = jh;
 765        }
 766        transaction->t_checkpoint_list = jh;
 767        percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count);
 768}
 769
 770/*
 771 * We've finished with this transaction structure: adios...
 772 *
 773 * The transaction must have no links except for the checkpoint by this
 774 * point.
 775 *
 776 * Called with the journal locked.
 777 * Called with j_list_lock held.
 778 */
 779
 780void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 781{
 782        assert_spin_locked(&journal->j_list_lock);
 783
 784        journal->j_shrink_transaction = NULL;
 785        if (transaction->t_cpnext) {
 786                transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
 787                transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
 788                if (journal->j_checkpoint_transactions == transaction)
 789                        journal->j_checkpoint_transactions =
 790                                transaction->t_cpnext;
 791                if (journal->j_checkpoint_transactions == transaction)
 792                        journal->j_checkpoint_transactions = NULL;
 793        }
 794
 795        J_ASSERT(transaction->t_state == T_FINISHED);
 796        J_ASSERT(transaction->t_buffers == NULL);
 797        J_ASSERT(transaction->t_forget == NULL);
 798        J_ASSERT(transaction->t_shadow_list == NULL);
 799        J_ASSERT(transaction->t_checkpoint_list == NULL);
 800        J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 801        J_ASSERT(atomic_read(&transaction->t_updates) == 0);
 802        J_ASSERT(journal->j_committing_transaction != transaction);
 803        J_ASSERT(journal->j_running_transaction != transaction);
 804
 805        trace_jbd2_drop_transaction(journal, transaction);
 806
 807        jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
 808}
 809