linux/fs/jbd2/checkpoint.c
<<
>>
Prefs
   1/*
   2 * linux/fs/jbd2/checkpoint.c
   3 *
   4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
   5 *
   6 * Copyright 1999 Red Hat Software --- All Rights Reserved
   7 *
   8 * This file is part of the Linux kernel and is made available under
   9 * the terms of the GNU General Public License, version 2, or at your
  10 * option, any later version, incorporated herein by reference.
  11 *
  12 * Checkpoint routines for the generic filesystem journaling code.
  13 * Part of the ext2fs journaling system.
  14 *
  15 * Checkpointing is the process of ensuring that a section of the log is
  16 * committed fully to disk, so that that portion of the log can be
  17 * reused.
  18 */
  19
  20#include <linux/time.h>
  21#include <linux/fs.h>
  22#include <linux/jbd2.h>
  23#include <linux/errno.h>
  24#include <linux/slab.h>
  25#include <linux/blkdev.h>
  26#include <trace/events/jbd2.h>
  27
  28/*
  29 * Unlink a buffer from a transaction checkpoint list.
  30 *
  31 * Called with j_list_lock held.
  32 */
  33static inline void __buffer_unlink_first(struct journal_head *jh)
  34{
  35        transaction_t *transaction = jh->b_cp_transaction;
  36
  37        jh->b_cpnext->b_cpprev = jh->b_cpprev;
  38        jh->b_cpprev->b_cpnext = jh->b_cpnext;
  39        if (transaction->t_checkpoint_list == jh) {
  40                transaction->t_checkpoint_list = jh->b_cpnext;
  41                if (transaction->t_checkpoint_list == jh)
  42                        transaction->t_checkpoint_list = NULL;
  43        }
  44}
  45
  46/*
  47 * Unlink a buffer from a transaction checkpoint(io) list.
  48 *
  49 * Called with j_list_lock held.
  50 */
  51static inline void __buffer_unlink(struct journal_head *jh)
  52{
  53        transaction_t *transaction = jh->b_cp_transaction;
  54
  55        __buffer_unlink_first(jh);
  56        if (transaction->t_checkpoint_io_list == jh) {
  57                transaction->t_checkpoint_io_list = jh->b_cpnext;
  58                if (transaction->t_checkpoint_io_list == jh)
  59                        transaction->t_checkpoint_io_list = NULL;
  60        }
  61}
  62
  63/*
  64 * Move a buffer from the checkpoint list to the checkpoint io list
  65 *
  66 * Called with j_list_lock held
  67 */
  68static inline void __buffer_relink_io(struct journal_head *jh)
  69{
  70        transaction_t *transaction = jh->b_cp_transaction;
  71
  72        __buffer_unlink_first(jh);
  73
  74        if (!transaction->t_checkpoint_io_list) {
  75                jh->b_cpnext = jh->b_cpprev = jh;
  76        } else {
  77                jh->b_cpnext = transaction->t_checkpoint_io_list;
  78                jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
  79                jh->b_cpprev->b_cpnext = jh;
  80                jh->b_cpnext->b_cpprev = jh;
  81        }
  82        transaction->t_checkpoint_io_list = jh;
  83}
  84
  85/*
  86 * Try to release a checkpointed buffer from its transaction.
  87 * Returns 1 if we released it and 2 if we also released the
  88 * whole transaction.
  89 *
  90 * Requires j_list_lock
  91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  92 */
  93static int __try_to_free_cp_buf(struct journal_head *jh)
  94{
  95        int ret = 0;
  96        struct buffer_head *bh = jh2bh(jh);
  97
  98        if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
  99            !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
 100                JBUFFER_TRACE(jh, "remove from checkpoint list");
 101                ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 102                jbd_unlock_bh_state(bh);
 103                jbd2_journal_remove_journal_head(bh);
 104                BUFFER_TRACE(bh, "release");
 105                __brelse(bh);
 106        } else {
 107                jbd_unlock_bh_state(bh);
 108        }
 109        return ret;
 110}
 111
 112/*
 113 * __jbd2_log_wait_for_space: wait until there is space in the journal.
 114 *
 115 * Called under j-state_lock *only*.  It will be unlocked if we have to wait
 116 * for a checkpoint to free up some space in the log.
 117 */
 118void __jbd2_log_wait_for_space(journal_t *journal)
 119{
 120        int nblocks, space_left;
 121        /* assert_spin_locked(&journal->j_state_lock); */
 122
 123        nblocks = jbd_space_needed(journal);
 124        while (__jbd2_log_space_left(journal) < nblocks) {
 125                if (journal->j_flags & JBD2_ABORT)
 126                        return;
 127                write_unlock(&journal->j_state_lock);
 128                mutex_lock(&journal->j_checkpoint_mutex);
 129
 130                /*
 131                 * Test again, another process may have checkpointed while we
 132                 * were waiting for the checkpoint lock. If there are no
 133                 * transactions ready to be checkpointed, try to recover
 134                 * journal space by calling cleanup_journal_tail(), and if
 135                 * that doesn't work, by waiting for the currently committing
 136                 * transaction to complete.  If there is absolutely no way
 137                 * to make progress, this is either a BUG or corrupted
 138                 * filesystem, so abort the journal and leave a stack
 139                 * trace for forensic evidence.
 140                 */
 141                write_lock(&journal->j_state_lock);
 142                spin_lock(&journal->j_list_lock);
 143                nblocks = jbd_space_needed(journal);
 144                space_left = __jbd2_log_space_left(journal);
 145                if (space_left < nblocks) {
 146                        int chkpt = journal->j_checkpoint_transactions != NULL;
 147                        tid_t tid = 0;
 148
 149                        if (journal->j_committing_transaction)
 150                                tid = journal->j_committing_transaction->t_tid;
 151                        spin_unlock(&journal->j_list_lock);
 152                        write_unlock(&journal->j_state_lock);
 153                        if (chkpt) {
 154                                jbd2_log_do_checkpoint(journal);
 155                        } else if (jbd2_cleanup_journal_tail(journal) == 0) {
 156                                /* We were able to recover space; yay! */
 157                                ;
 158                        } else if (tid) {
 159                                jbd2_log_wait_commit(journal, tid);
 160                        } else {
 161                                printk(KERN_ERR "%s: needed %d blocks and "
 162                                       "only had %d space available\n",
 163                                       __func__, nblocks, space_left);
 164                                printk(KERN_ERR "%s: no way to get more "
 165                                       "journal space in %s\n", __func__,
 166                                       journal->j_devname);
 167                                WARN_ON(1);
 168                                jbd2_journal_abort(journal, 0);
 169                        }
 170                        write_lock(&journal->j_state_lock);
 171                } else {
 172                        spin_unlock(&journal->j_list_lock);
 173                }
 174                mutex_unlock(&journal->j_checkpoint_mutex);
 175        }
 176}
 177
 178/*
 179 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
 180 * The caller must restart a list walk.  Wait for someone else to run
 181 * jbd_unlock_bh_state().
 182 */
 183static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 184        __releases(journal->j_list_lock)
 185{
 186        get_bh(bh);
 187        spin_unlock(&journal->j_list_lock);
 188        jbd_lock_bh_state(bh);
 189        jbd_unlock_bh_state(bh);
 190        put_bh(bh);
 191}
 192
 193/*
 194 * Clean up transaction's list of buffers submitted for io.
 195 * We wait for any pending IO to complete and remove any clean
 196 * buffers. Note that we take the buffers in the opposite ordering
 197 * from the one in which they were submitted for IO.
 198 *
 199 * Return 0 on success, and return <0 if some buffers have failed
 200 * to be written out.
 201 *
 202 * Called with j_list_lock held.
 203 */
 204static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
 205{
 206        struct journal_head *jh;
 207        struct buffer_head *bh;
 208        tid_t this_tid;
 209        int released = 0;
 210        int ret = 0;
 211
 212        this_tid = transaction->t_tid;
 213restart:
 214        /* Did somebody clean up the transaction in the meanwhile? */
 215        if (journal->j_checkpoint_transactions != transaction ||
 216                        transaction->t_tid != this_tid)
 217                return ret;
 218        while (!released && transaction->t_checkpoint_io_list) {
 219                jh = transaction->t_checkpoint_io_list;
 220                bh = jh2bh(jh);
 221                if (!jbd_trylock_bh_state(bh)) {
 222                        jbd_sync_bh(journal, bh);
 223                        spin_lock(&journal->j_list_lock);
 224                        goto restart;
 225                }
 226                if (buffer_locked(bh)) {
 227                        atomic_inc(&bh->b_count);
 228                        spin_unlock(&journal->j_list_lock);
 229                        jbd_unlock_bh_state(bh);
 230                        wait_on_buffer(bh);
 231                        /* the journal_head may have gone by now */
 232                        BUFFER_TRACE(bh, "brelse");
 233                        __brelse(bh);
 234                        spin_lock(&journal->j_list_lock);
 235                        goto restart;
 236                }
 237                if (unlikely(buffer_write_io_error(bh)))
 238                        ret = -EIO;
 239
 240                /*
 241                 * Now in whatever state the buffer currently is, we know that
 242                 * it has been written out and so we can drop it from the list
 243                 */
 244                released = __jbd2_journal_remove_checkpoint(jh);
 245                jbd_unlock_bh_state(bh);
 246                jbd2_journal_remove_journal_head(bh);
 247                __brelse(bh);
 248        }
 249
 250        return ret;
 251}
 252
 253static void
 254__flush_batch(journal_t *journal, int *batch_count)
 255{
 256        int i;
 257
 258        for (i = 0; i < *batch_count; i++)
 259                write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
 260
 261        for (i = 0; i < *batch_count; i++) {
 262                struct buffer_head *bh = journal->j_chkpt_bhs[i];
 263                clear_buffer_jwrite(bh);
 264                BUFFER_TRACE(bh, "brelse");
 265                __brelse(bh);
 266        }
 267        *batch_count = 0;
 268}
 269
 270/*
 271 * Try to flush one buffer from the checkpoint list to disk.
 272 *
 273 * Return 1 if something happened which requires us to abort the current
 274 * scan of the checkpoint list.  Return <0 if the buffer has failed to
 275 * be written out.
 276 *
 277 * Called with j_list_lock held and drops it if 1 is returned
 278 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
 279 */
 280static int __process_buffer(journal_t *journal, struct journal_head *jh,
 281                            int *batch_count, transaction_t *transaction)
 282{
 283        struct buffer_head *bh = jh2bh(jh);
 284        int ret = 0;
 285
 286        if (buffer_locked(bh)) {
 287                atomic_inc(&bh->b_count);
 288                spin_unlock(&journal->j_list_lock);
 289                jbd_unlock_bh_state(bh);
 290                wait_on_buffer(bh);
 291                /* the journal_head may have gone by now */
 292                BUFFER_TRACE(bh, "brelse");
 293                __brelse(bh);
 294                ret = 1;
 295        } else if (jh->b_transaction != NULL) {
 296                transaction_t *t = jh->b_transaction;
 297                tid_t tid = t->t_tid;
 298
 299                transaction->t_chp_stats.cs_forced_to_close++;
 300                spin_unlock(&journal->j_list_lock);
 301                jbd_unlock_bh_state(bh);
 302                if (unlikely(journal->j_flags & JBD2_UNMOUNT))
 303                        /*
 304                         * The journal thread is dead; so starting and
 305                         * waiting for a commit to finish will cause
 306                         * us to wait for a _very_ long time.
 307                         */
 308                        printk(KERN_ERR "JBD2: %s: "
 309                               "Waiting for Godot: block %llu\n",
 310                               journal->j_devname,
 311                               (unsigned long long) bh->b_blocknr);
 312                jbd2_log_start_commit(journal, tid);
 313                jbd2_log_wait_commit(journal, tid);
 314                ret = 1;
 315        } else if (!buffer_dirty(bh)) {
 316                ret = 1;
 317                if (unlikely(buffer_write_io_error(bh)))
 318                        ret = -EIO;
 319                J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 320                BUFFER_TRACE(bh, "remove from checkpoint");
 321                __jbd2_journal_remove_checkpoint(jh);
 322                spin_unlock(&journal->j_list_lock);
 323                jbd_unlock_bh_state(bh);
 324                jbd2_journal_remove_journal_head(bh);
 325                __brelse(bh);
 326        } else {
 327                /*
 328                 * Important: we are about to write the buffer, and
 329                 * possibly block, while still holding the journal lock.
 330                 * We cannot afford to let the transaction logic start
 331                 * messing around with this buffer before we write it to
 332                 * disk, as that would break recoverability.
 333                 */
 334                BUFFER_TRACE(bh, "queue");
 335                get_bh(bh);
 336                J_ASSERT_BH(bh, !buffer_jwrite(bh));
 337                set_buffer_jwrite(bh);
 338                journal->j_chkpt_bhs[*batch_count] = bh;
 339                __buffer_relink_io(jh);
 340                jbd_unlock_bh_state(bh);
 341                transaction->t_chp_stats.cs_written++;
 342                (*batch_count)++;
 343                if (*batch_count == JBD2_NR_BATCH) {
 344                        spin_unlock(&journal->j_list_lock);
 345                        __flush_batch(journal, batch_count);
 346                        ret = 1;
 347                }
 348        }
 349        return ret;
 350}
 351
 352/*
 353 * Perform an actual checkpoint. We take the first transaction on the
 354 * list of transactions to be checkpointed and send all its buffers
 355 * to disk. We submit larger chunks of data at once.
 356 *
 357 * The journal should be locked before calling this function.
 358 * Called with j_checkpoint_mutex held.
 359 */
 360int jbd2_log_do_checkpoint(journal_t *journal)
 361{
 362        transaction_t *transaction;
 363        tid_t this_tid;
 364        int result;
 365
 366        jbd_debug(1, "Start checkpoint\n");
 367
 368        /*
 369         * First thing: if there are any transactions in the log which
 370         * don't need checkpointing, just eliminate them from the
 371         * journal straight away.
 372         */
 373        result = jbd2_cleanup_journal_tail(journal);
 374        trace_jbd2_checkpoint(journal, result);
 375        jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 376        if (result <= 0)
 377                return result;
 378
 379        /*
 380         * OK, we need to start writing disk blocks.  Take one transaction
 381         * and write it.
 382         */
 383        result = 0;
 384        spin_lock(&journal->j_list_lock);
 385        if (!journal->j_checkpoint_transactions)
 386                goto out;
 387        transaction = journal->j_checkpoint_transactions;
 388        if (transaction->t_chp_stats.cs_chp_time == 0)
 389                transaction->t_chp_stats.cs_chp_time = jiffies;
 390        this_tid = transaction->t_tid;
 391restart:
 392        /*
 393         * If someone cleaned up this transaction while we slept, we're
 394         * done (maybe it's a new transaction, but it fell at the same
 395         * address).
 396         */
 397        if (journal->j_checkpoint_transactions == transaction &&
 398                        transaction->t_tid == this_tid) {
 399                int batch_count = 0;
 400                struct journal_head *jh;
 401                int retry = 0, err;
 402
 403                while (!retry && transaction->t_checkpoint_list) {
 404                        struct buffer_head *bh;
 405
 406                        jh = transaction->t_checkpoint_list;
 407                        bh = jh2bh(jh);
 408                        if (!jbd_trylock_bh_state(bh)) {
 409                                jbd_sync_bh(journal, bh);
 410                                retry = 1;
 411                                break;
 412                        }
 413                        retry = __process_buffer(journal, jh, &batch_count,
 414                                                 transaction);
 415                        if (retry < 0 && !result)
 416                                result = retry;
 417                        if (!retry && (need_resched() ||
 418                                spin_needbreak(&journal->j_list_lock))) {
 419                                spin_unlock(&journal->j_list_lock);
 420                                retry = 1;
 421                                break;
 422                        }
 423                }
 424
 425                if (batch_count) {
 426                        if (!retry) {
 427                                spin_unlock(&journal->j_list_lock);
 428                                retry = 1;
 429                        }
 430                        __flush_batch(journal, &batch_count);
 431                }
 432
 433                if (retry) {
 434                        spin_lock(&journal->j_list_lock);
 435                        goto restart;
 436                }
 437                /*
 438                 * Now we have cleaned up the first transaction's checkpoint
 439                 * list. Let's clean up the second one
 440                 */
 441                err = __wait_cp_io(journal, transaction);
 442                if (!result)
 443                        result = err;
 444        }
 445out:
 446        spin_unlock(&journal->j_list_lock);
 447        if (result < 0)
 448                jbd2_journal_abort(journal, result);
 449        else
 450                result = jbd2_cleanup_journal_tail(journal);
 451
 452        return (result < 0) ? result : 0;
 453}
 454
 455/*
 456 * Check the list of checkpoint transactions for the journal to see if
 457 * we have already got rid of any since the last update of the log tail
 458 * in the journal superblock.  If so, we can instantly roll the
 459 * superblock forward to remove those transactions from the log.
 460 *
 461 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
 462 *
 463 * Called with the journal lock held.
 464 *
 465 * This is the only part of the journaling code which really needs to be
 466 * aware of transaction aborts.  Checkpointing involves writing to the
 467 * main filesystem area rather than to the journal, so it can proceed
 468 * even in abort state, but we must not update the super block if
 469 * checkpointing may have failed.  Otherwise, we would lose some metadata
 470 * buffers which should be written-back to the filesystem.
 471 */
 472
 473int jbd2_cleanup_journal_tail(journal_t *journal)
 474{
 475        transaction_t * transaction;
 476        tid_t           first_tid;
 477        unsigned long   blocknr, freed;
 478
 479        if (is_journal_aborted(journal))
 480                return 1;
 481
 482        /* OK, work out the oldest transaction remaining in the log, and
 483         * the log block it starts at.
 484         *
 485         * If the log is now empty, we need to work out which is the
 486         * next transaction ID we will write, and where it will
 487         * start. */
 488
 489        write_lock(&journal->j_state_lock);
 490        spin_lock(&journal->j_list_lock);
 491        transaction = journal->j_checkpoint_transactions;
 492        if (transaction) {
 493                first_tid = transaction->t_tid;
 494                blocknr = transaction->t_log_start;
 495        } else if ((transaction = journal->j_committing_transaction) != NULL) {
 496                first_tid = transaction->t_tid;
 497                blocknr = transaction->t_log_start;
 498        } else if ((transaction = journal->j_running_transaction) != NULL) {
 499                first_tid = transaction->t_tid;
 500                blocknr = journal->j_head;
 501        } else {
 502                first_tid = journal->j_transaction_sequence;
 503                blocknr = journal->j_head;
 504        }
 505        spin_unlock(&journal->j_list_lock);
 506        J_ASSERT(blocknr != 0);
 507
 508        /* If the oldest pinned transaction is at the tail of the log
 509           already then there's not much we can do right now. */
 510        if (journal->j_tail_sequence == first_tid) {
 511                write_unlock(&journal->j_state_lock);
 512                return 1;
 513        }
 514
 515        /* OK, update the superblock to recover the freed space.
 516         * Physical blocks come first: have we wrapped beyond the end of
 517         * the log?  */
 518        freed = blocknr - journal->j_tail;
 519        if (blocknr < journal->j_tail)
 520                freed = freed + journal->j_last - journal->j_first;
 521
 522        trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
 523        jbd_debug(1,
 524                  "Cleaning journal tail from %d to %d (offset %lu), "
 525                  "freeing %lu\n",
 526                  journal->j_tail_sequence, first_tid, blocknr, freed);
 527
 528        journal->j_free += freed;
 529        journal->j_tail_sequence = first_tid;
 530        journal->j_tail = blocknr;
 531        write_unlock(&journal->j_state_lock);
 532
 533        /*
 534         * If there is an external journal, we need to make sure that
 535         * any data blocks that were recently written out --- perhaps
 536         * by jbd2_log_do_checkpoint() --- are flushed out before we
 537         * drop the transactions from the external journal.  It's
 538         * unlikely this will be necessary, especially with a
 539         * appropriately sized journal, but we need this to guarantee
 540         * correctness.  Fortunately jbd2_cleanup_journal_tail()
 541         * doesn't get called all that often.
 542         */
 543        if ((journal->j_fs_dev != journal->j_dev) &&
 544            (journal->j_flags & JBD2_BARRIER))
 545                blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
 546        if (!(journal->j_flags & JBD2_ABORT))
 547                jbd2_journal_update_superblock(journal, 1);
 548        return 0;
 549}
 550
 551
 552/* Checkpoint list management */
 553
 554/*
 555 * journal_clean_one_cp_list
 556 *
 557 * Find all the written-back checkpoint buffers in the given list and release them.
 558 *
 559 * Called with the journal locked.
 560 * Called with j_list_lock held.
 561 * Returns number of bufers reaped (for debug)
 562 */
 563
 564static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 565{
 566        struct journal_head *last_jh;
 567        struct journal_head *next_jh = jh;
 568        int ret, freed = 0;
 569
 570        *released = 0;
 571        if (!jh)
 572                return 0;
 573
 574        last_jh = jh->b_cpprev;
 575        do {
 576                jh = next_jh;
 577                next_jh = jh->b_cpnext;
 578                /* Use trylock because of the ranking */
 579                if (jbd_trylock_bh_state(jh2bh(jh))) {
 580                        ret = __try_to_free_cp_buf(jh);
 581                        if (ret) {
 582                                freed++;
 583                                if (ret == 2) {
 584                                        *released = 1;
 585                                        return freed;
 586                                }
 587                        }
 588                }
 589                /*
 590                 * This function only frees up some memory
 591                 * if possible so we dont have an obligation
 592                 * to finish processing. Bail out if preemption
 593                 * requested:
 594                 */
 595                if (need_resched())
 596                        return freed;
 597        } while (jh != last_jh);
 598
 599        return freed;
 600}
 601
 602/*
 603 * journal_clean_checkpoint_list
 604 *
 605 * Find all the written-back checkpoint buffers in the journal and release them.
 606 *
 607 * Called with the journal locked.
 608 * Called with j_list_lock held.
 609 * Returns number of buffers reaped (for debug)
 610 */
 611
 612int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 613{
 614        transaction_t *transaction, *last_transaction, *next_transaction;
 615        int ret = 0;
 616        int released;
 617
 618        transaction = journal->j_checkpoint_transactions;
 619        if (!transaction)
 620                goto out;
 621
 622        last_transaction = transaction->t_cpprev;
 623        next_transaction = transaction;
 624        do {
 625                transaction = next_transaction;
 626                next_transaction = transaction->t_cpnext;
 627                ret += journal_clean_one_cp_list(transaction->
 628                                t_checkpoint_list, &released);
 629                /*
 630                 * This function only frees up some memory if possible so we
 631                 * dont have an obligation to finish processing. Bail out if
 632                 * preemption requested:
 633                 */
 634                if (need_resched())
 635                        goto out;
 636                if (released)
 637                        continue;
 638                /*
 639                 * It is essential that we are as careful as in the case of
 640                 * t_checkpoint_list with removing the buffer from the list as
 641                 * we can possibly see not yet submitted buffers on io_list
 642                 */
 643                ret += journal_clean_one_cp_list(transaction->
 644                                t_checkpoint_io_list, &released);
 645                if (need_resched())
 646                        goto out;
 647        } while (transaction != last_transaction);
 648out:
 649        return ret;
 650}
 651
 652/*
 653 * journal_remove_checkpoint: called after a buffer has been committed
 654 * to disk (either by being write-back flushed to disk, or being
 655 * committed to the log).
 656 *
 657 * We cannot safely clean a transaction out of the log until all of the
 658 * buffer updates committed in that transaction have safely been stored
 659 * elsewhere on disk.  To achieve this, all of the buffers in a
 660 * transaction need to be maintained on the transaction's checkpoint
 661 * lists until they have been rewritten, at which point this function is
 662 * called to remove the buffer from the existing transaction's
 663 * checkpoint lists.
 664 *
 665 * The function returns 1 if it frees the transaction, 0 otherwise.
 666 *
 667 * This function is called with the journal locked.
 668 * This function is called with j_list_lock held.
 669 * This function is called with jbd_lock_bh_state(jh2bh(jh))
 670 */
 671
 672int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 673{
 674        struct transaction_chp_stats_s *stats;
 675        transaction_t *transaction;
 676        journal_t *journal;
 677        int ret = 0;
 678
 679        JBUFFER_TRACE(jh, "entry");
 680
 681        if ((transaction = jh->b_cp_transaction) == NULL) {
 682                JBUFFER_TRACE(jh, "not on transaction");
 683                goto out;
 684        }
 685        journal = transaction->t_journal;
 686
 687        __buffer_unlink(jh);
 688        jh->b_cp_transaction = NULL;
 689
 690        if (transaction->t_checkpoint_list != NULL ||
 691            transaction->t_checkpoint_io_list != NULL)
 692                goto out;
 693        JBUFFER_TRACE(jh, "transaction has no more buffers");
 694
 695        /*
 696         * There is one special case to worry about: if we have just pulled the
 697         * buffer off a running or committing transaction's checkpoing list,
 698         * then even if the checkpoint list is empty, the transaction obviously
 699         * cannot be dropped!
 700         *
 701         * The locking here around t_state is a bit sleazy.
 702         * See the comment at the end of jbd2_journal_commit_transaction().
 703         */
 704        if (transaction->t_state != T_FINISHED) {
 705                JBUFFER_TRACE(jh, "belongs to running/committing transaction");
 706                goto out;
 707        }
 708
 709        /* OK, that was the last buffer for the transaction: we can now
 710           safely remove this transaction from the log */
 711        stats = &transaction->t_chp_stats;
 712        if (stats->cs_chp_time)
 713                stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
 714                                                    jiffies);
 715        trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
 716                                    transaction->t_tid, stats);
 717
 718        __jbd2_journal_drop_transaction(journal, transaction);
 719        kfree(transaction);
 720
 721        /* Just in case anybody was waiting for more transactions to be
 722           checkpointed... */
 723        wake_up(&journal->j_wait_logspace);
 724        ret = 1;
 725out:
 726        JBUFFER_TRACE(jh, "exit");
 727        return ret;
 728}
 729
 730/*
 731 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
 732 * list so that we know when it is safe to clean the transaction out of
 733 * the log.
 734 *
 735 * Called with the journal locked.
 736 * Called with j_list_lock held.
 737 */
 738void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 739                               transaction_t *transaction)
 740{
 741        JBUFFER_TRACE(jh, "entry");
 742        J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 743        J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 744
 745        jh->b_cp_transaction = transaction;
 746
 747        if (!transaction->t_checkpoint_list) {
 748                jh->b_cpnext = jh->b_cpprev = jh;
 749        } else {
 750                jh->b_cpnext = transaction->t_checkpoint_list;
 751                jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
 752                jh->b_cpprev->b_cpnext = jh;
 753                jh->b_cpnext->b_cpprev = jh;
 754        }
 755        transaction->t_checkpoint_list = jh;
 756}
 757
 758/*
 759 * We've finished with this transaction structure: adios...
 760 *
 761 * The transaction must have no links except for the checkpoint by this
 762 * point.
 763 *
 764 * Called with the journal locked.
 765 * Called with j_list_lock held.
 766 */
 767
 768void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 769{
 770        assert_spin_locked(&journal->j_list_lock);
 771        if (transaction->t_cpnext) {
 772                transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
 773                transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
 774                if (journal->j_checkpoint_transactions == transaction)
 775                        journal->j_checkpoint_transactions =
 776                                transaction->t_cpnext;
 777                if (journal->j_checkpoint_transactions == transaction)
 778                        journal->j_checkpoint_transactions = NULL;
 779        }
 780
 781        J_ASSERT(transaction->t_state == T_FINISHED);
 782        J_ASSERT(transaction->t_buffers == NULL);
 783        J_ASSERT(transaction->t_forget == NULL);
 784        J_ASSERT(transaction->t_iobuf_list == NULL);
 785        J_ASSERT(transaction->t_shadow_list == NULL);
 786        J_ASSERT(transaction->t_log_list == NULL);
 787        J_ASSERT(transaction->t_checkpoint_list == NULL);
 788        J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 789        J_ASSERT(atomic_read(&transaction->t_updates) == 0);
 790        J_ASSERT(journal->j_committing_transaction != transaction);
 791        J_ASSERT(journal->j_running_transaction != transaction);
 792
 793        jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
 794}
 795