linux/fs/jfs/jfs_logmgr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6
   7/*
   8 *      jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *      log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *      group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *      TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *      serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *      TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *      alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>          /* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *      log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)      mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)           mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)         mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *      log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *      log sync serialization (per log)
  96 */
  97#define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
  98#define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
  99/*
 100#define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
 101#define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
 102*/
 103
 104
 105/*
 106 *      log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)      \
 117do {                                            \
 118        if (cond)                               \
 119                break;                          \
 120        __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define LCACHE_WAKEUP(event)    wake_up(event)
 124
 125
 126/*
 127 *      lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define lbmREAD         0x0001
 131#define lbmWRITE        0x0002  /* enqueue at tail of write queue;
 132                                 * init pageout if at head of queue;
 133                                 */
 134#define lbmRELEASE      0x0004  /* remove from write queue
 135                                 * at completion of pageout;
 136                                 * do not free/recycle it yet:
 137                                 * caller will free it;
 138                                 */
 139#define lbmSYNC         0x0008  /* do not return to freelist
 140                                 * when removed from write queue;
 141                                 */
 142#define lbmFREE         0x0010  /* return to freelist
 143                                 * at completion of pageout;
 144                                 * the buffer may be recycled;
 145                                 */
 146#define lbmDONE         0x0020
 147#define lbmERROR        0x0040
 148#define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
 149                                 * of log page
 150                                 */
 151#define lbmDIRECT       0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164                         struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168                           int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *      statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193        uint commit;            /* # of commit */
 194        uint pagedone;          /* # of page written */
 195        uint submitted;         /* # of pages submitted */
 196        uint full_page;         /* # of full pages submitted */
 197        uint partial_page;      /* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202                                 int (*writer)(struct address_space *))
 203{
 204        struct jfs_sb_info *sbi;
 205
 206        list_for_each_entry(sbi, &log->sb_list, log_list) {
 207                writer(sbi->ipbmap->i_mapping);
 208                writer(sbi->ipimap->i_mapping);
 209                writer(sbi->direct_inode->i_mapping);
 210        }
 211}
 212
 213/*
 214 * NAME:        lmLog()
 215 *
 216 * FUNCTION:    write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:      lsn - offset to the next log record to write (end-of-log);
 221 *              -1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226          struct tlock * tlck)
 227{
 228        int lsn;
 229        int diffp, difft;
 230        struct metapage *mp = NULL;
 231        unsigned long flags;
 232
 233        jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234                 log, tblk, lrd, tlck);
 235
 236        LOG_LOCK(log);
 237
 238        /* log by (out-of-transaction) JFS ? */
 239        if (tblk == NULL)
 240                goto writeRecord;
 241
 242        /* log from page ? */
 243        if (tlck == NULL ||
 244            tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245                goto writeRecord;
 246
 247        /*
 248         *      initialize/update page/transaction recovery lsn
 249         */
 250        lsn = log->lsn;
 251
 252        LOGSYNC_LOCK(log, flags);
 253
 254        /*
 255         * initialize page lsn if first log write of the page
 256         */
 257        if (mp->lsn == 0) {
 258                mp->log = log;
 259                mp->lsn = lsn;
 260                log->count++;
 261
 262                /* insert page at tail of logsynclist */
 263                list_add_tail(&mp->synclist, &log->synclist);
 264        }
 265
 266        /*
 267         *      initialize/update lsn of tblock of the page
 268         *
 269         * transaction inherits oldest lsn of pages associated
 270         * with allocation/deallocation of resources (their
 271         * log records are used to reconstruct allocation map
 272         * at recovery time: inode for inode allocation map,
 273         * B+-tree index of extent descriptors for block
 274         * allocation map);
 275         * allocation map pages inherit transaction lsn at
 276         * commit time to allow forwarding log syncpt past log
 277         * records associated with allocation/deallocation of
 278         * resources only after persistent map of these map pages
 279         * have been updated and propagated to home.
 280         */
 281        /*
 282         * initialize transaction lsn:
 283         */
 284        if (tblk->lsn == 0) {
 285                /* inherit lsn of its first page logged */
 286                tblk->lsn = mp->lsn;
 287                log->count++;
 288
 289                /* insert tblock after the page on logsynclist */
 290                list_add(&tblk->synclist, &mp->synclist);
 291        }
 292        /*
 293         * update transaction lsn:
 294         */
 295        else {
 296                /* inherit oldest/smallest lsn of page */
 297                logdiff(diffp, mp->lsn, log);
 298                logdiff(difft, tblk->lsn, log);
 299                if (diffp < difft) {
 300                        /* update tblock lsn with page lsn */
 301                        tblk->lsn = mp->lsn;
 302
 303                        /* move tblock after page on logsynclist */
 304                        list_move(&tblk->synclist, &mp->synclist);
 305                }
 306        }
 307
 308        LOGSYNC_UNLOCK(log, flags);
 309
 310        /*
 311         *      write the log record
 312         */
 313      writeRecord:
 314        lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316        /*
 317         * forward log syncpt if log reached next syncpt trigger
 318         */
 319        logdiff(diffp, lsn, log);
 320        if (diffp >= log->nextsync)
 321                lsn = lmLogSync(log, 0);
 322
 323        /* update end-of-log lsn */
 324        log->lsn = lsn;
 325
 326        LOG_UNLOCK(log);
 327
 328        /* return end-of-log address */
 329        return lsn;
 330}
 331
 332/*
 333 * NAME:        lmWriteRecord()
 334 *
 335 * FUNCTION:    move the log record to current log page
 336 *
 337 * PARAMETER:   cd      - commit descriptor
 338 *
 339 * RETURN:      end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345              struct tlock * tlck)
 346{
 347        int lsn = 0;            /* end-of-log address */
 348        struct lbuf *bp;        /* dst log page buffer */
 349        struct logpage *lp;     /* dst log page */
 350        caddr_t dst;            /* destination address in log page */
 351        int dstoffset;          /* end-of-log offset in log page */
 352        int freespace;          /* free space in log page */
 353        caddr_t p;              /* src meta-data page */
 354        caddr_t src;
 355        int srclen;
 356        int nbytes;             /* number of bytes to move */
 357        int i;
 358        int len;
 359        struct linelock *linelock;
 360        struct lv *lv;
 361        struct lvd *lvd;
 362        int l2linesize;
 363
 364        len = 0;
 365
 366        /* retrieve destination log page to write */
 367        bp = (struct lbuf *) log->bp;
 368        lp = (struct logpage *) bp->l_ldata;
 369        dstoffset = log->eor;
 370
 371        /* any log data to write ? */
 372        if (tlck == NULL)
 373                goto moveLrd;
 374
 375        /*
 376         *      move log record data
 377         */
 378        /* retrieve source meta-data page to log */
 379        if (tlck->flag & tlckPAGELOCK) {
 380                p = (caddr_t) (tlck->mp->data);
 381                linelock = (struct linelock *) & tlck->lock;
 382        }
 383        /* retrieve source in-memory inode to log */
 384        else if (tlck->flag & tlckINODELOCK) {
 385                if (tlck->type & tlckDTREE)
 386                        p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387                else
 388                        p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389                linelock = (struct linelock *) & tlck->lock;
 390        }
 391#ifdef  _JFS_WIP
 392        else if (tlck->flag & tlckINLINELOCK) {
 393
 394                inlinelock = (struct inlinelock *) & tlck;
 395                p = (caddr_t) & inlinelock->pxd;
 396                linelock = (struct linelock *) & tlck;
 397        }
 398#endif                          /* _JFS_WIP */
 399        else {
 400                jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 401                return 0;       /* Probably should trap */
 402        }
 403        l2linesize = linelock->l2linesize;
 404
 405      moveData:
 406        ASSERT(linelock->index <= linelock->maxcnt);
 407
 408        lv = linelock->lv;
 409        for (i = 0; i < linelock->index; i++, lv++) {
 410                if (lv->length == 0)
 411                        continue;
 412
 413                /* is page full ? */
 414                if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 415                        /* page become full: move on to next page */
 416                        lmNextPage(log);
 417
 418                        bp = log->bp;
 419                        lp = (struct logpage *) bp->l_ldata;
 420                        dstoffset = LOGPHDRSIZE;
 421                }
 422
 423                /*
 424                 * move log vector data
 425                 */
 426                src = (u8 *) p + (lv->offset << l2linesize);
 427                srclen = lv->length << l2linesize;
 428                len += srclen;
 429                while (srclen > 0) {
 430                        freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 431                        nbytes = min(freespace, srclen);
 432                        dst = (caddr_t) lp + dstoffset;
 433                        memcpy(dst, src, nbytes);
 434                        dstoffset += nbytes;
 435
 436                        /* is page not full ? */
 437                        if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 438                                break;
 439
 440                        /* page become full: move on to next page */
 441                        lmNextPage(log);
 442
 443                        bp = (struct lbuf *) log->bp;
 444                        lp = (struct logpage *) bp->l_ldata;
 445                        dstoffset = LOGPHDRSIZE;
 446
 447                        srclen -= nbytes;
 448                        src += nbytes;
 449                }
 450
 451                /*
 452                 * move log vector descriptor
 453                 */
 454                len += 4;
 455                lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 456                lvd->offset = cpu_to_le16(lv->offset);
 457                lvd->length = cpu_to_le16(lv->length);
 458                dstoffset += 4;
 459                jfs_info("lmWriteRecord: lv offset:%d length:%d",
 460                         lv->offset, lv->length);
 461        }
 462
 463        if ((i = linelock->next)) {
 464                linelock = (struct linelock *) lid_to_tlock(i);
 465                goto moveData;
 466        }
 467
 468        /*
 469         *      move log record descriptor
 470         */
 471      moveLrd:
 472        lrd->length = cpu_to_le16(len);
 473
 474        src = (caddr_t) lrd;
 475        srclen = LOGRDSIZE;
 476
 477        while (srclen > 0) {
 478                freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 479                nbytes = min(freespace, srclen);
 480                dst = (caddr_t) lp + dstoffset;
 481                memcpy(dst, src, nbytes);
 482
 483                dstoffset += nbytes;
 484                srclen -= nbytes;
 485
 486                /* are there more to move than freespace of page ? */
 487                if (srclen)
 488                        goto pageFull;
 489
 490                /*
 491                 * end of log record descriptor
 492                 */
 493
 494                /* update last log record eor */
 495                log->eor = dstoffset;
 496                bp->l_eor = dstoffset;
 497                lsn = (log->page << L2LOGPSIZE) + dstoffset;
 498
 499                if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 500                        tblk->clsn = lsn;
 501                        jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 502                                 bp->l_eor);
 503
 504                        INCREMENT(lmStat.commit);       /* # of commit */
 505
 506                        /*
 507                         * enqueue tblock for group commit:
 508                         *
 509                         * enqueue tblock of non-trivial/synchronous COMMIT
 510                         * at tail of group commit queue
 511                         * (trivial/asynchronous COMMITs are ignored by
 512                         * group commit.)
 513                         */
 514                        LOGGC_LOCK(log);
 515
 516                        /* init tblock gc state */
 517                        tblk->flag = tblkGC_QUEUE;
 518                        tblk->bp = log->bp;
 519                        tblk->pn = log->page;
 520                        tblk->eor = log->eor;
 521
 522                        /* enqueue transaction to commit queue */
 523                        list_add_tail(&tblk->cqueue, &log->cqueue);
 524
 525                        LOGGC_UNLOCK(log);
 526                }
 527
 528                jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 529                        le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 530
 531                /* page not full ? */
 532                if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 533                        return lsn;
 534
 535              pageFull:
 536                /* page become full: move on to next page */
 537                lmNextPage(log);
 538
 539                bp = (struct lbuf *) log->bp;
 540                lp = (struct logpage *) bp->l_ldata;
 541                dstoffset = LOGPHDRSIZE;
 542                src += nbytes;
 543        }
 544
 545        return lsn;
 546}
 547
 548
 549/*
 550 * NAME:        lmNextPage()
 551 *
 552 * FUNCTION:    write current page and allocate next page.
 553 *
 554 * PARAMETER:   log
 555 *
 556 * RETURN:      0
 557 *
 558 * serialization: LOG_LOCK() held on entry/exit
 559 */
 560static int lmNextPage(struct jfs_log * log)
 561{
 562        struct logpage *lp;
 563        int lspn;               /* log sequence page number */
 564        int pn;                 /* current page number */
 565        struct lbuf *bp;
 566        struct lbuf *nextbp;
 567        struct tblock *tblk;
 568
 569        /* get current log page number and log sequence page number */
 570        pn = log->page;
 571        bp = log->bp;
 572        lp = (struct logpage *) bp->l_ldata;
 573        lspn = le32_to_cpu(lp->h.page);
 574
 575        LOGGC_LOCK(log);
 576
 577        /*
 578         *      write or queue the full page at the tail of write queue
 579         */
 580        /* get the tail tblk on commit queue */
 581        if (list_empty(&log->cqueue))
 582                tblk = NULL;
 583        else
 584                tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 585
 586        /* every tblk who has COMMIT record on the current page,
 587         * and has not been committed, must be on commit queue
 588         * since tblk is queued at commit queueu at the time
 589         * of writing its COMMIT record on the page before
 590         * page becomes full (even though the tblk thread
 591         * who wrote COMMIT record may have been suspended
 592         * currently);
 593         */
 594
 595        /* is page bound with outstanding tail tblk ? */
 596        if (tblk && tblk->pn == pn) {
 597                /* mark tblk for end-of-page */
 598                tblk->flag |= tblkGC_EOP;
 599
 600                if (log->cflag & logGC_PAGEOUT) {
 601                        /* if page is not already on write queue,
 602                         * just enqueue (no lbmWRITE to prevent redrive)
 603                         * buffer to wqueue to ensure correct serial order
 604                         * of the pages since log pages will be added
 605                         * continuously
 606                         */
 607                        if (bp->l_wqnext == NULL)
 608                                lbmWrite(log, bp, 0, 0);
 609                } else {
 610                        /*
 611                         * No current GC leader, initiate group commit
 612                         */
 613                        log->cflag |= logGC_PAGEOUT;
 614                        lmGCwrite(log, 0);
 615                }
 616        }
 617        /* page is not bound with outstanding tblk:
 618         * init write or mark it to be redriven (lbmWRITE)
 619         */
 620        else {
 621                /* finalize the page */
 622                bp->l_ceor = bp->l_eor;
 623                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 624                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 625        }
 626        LOGGC_UNLOCK(log);
 627
 628        /*
 629         *      allocate/initialize next page
 630         */
 631        /* if log wraps, the first data page of log is 2
 632         * (0 never used, 1 is superblock).
 633         */
 634        log->page = (pn == log->size - 1) ? 2 : pn + 1;
 635        log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
 636
 637        /* allocate/initialize next log page buffer */
 638        nextbp = lbmAllocate(log, log->page);
 639        nextbp->l_eor = log->eor;
 640        log->bp = nextbp;
 641
 642        /* initialize next log page */
 643        lp = (struct logpage *) nextbp->l_ldata;
 644        lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 645        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 646
 647        return 0;
 648}
 649
 650
 651/*
 652 * NAME:        lmGroupCommit()
 653 *
 654 * FUNCTION:    group commit
 655 *      initiate pageout of the pages with COMMIT in the order of
 656 *      page number - redrive pageout of the page at the head of
 657 *      pageout queue until full page has been written.
 658 *
 659 * RETURN:
 660 *
 661 * NOTE:
 662 *      LOGGC_LOCK serializes log group commit queue, and
 663 *      transaction blocks on the commit queue.
 664 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 665 */
 666int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 667{
 668        int rc = 0;
 669
 670        LOGGC_LOCK(log);
 671
 672        /* group committed already ? */
 673        if (tblk->flag & tblkGC_COMMITTED) {
 674                if (tblk->flag & tblkGC_ERROR)
 675                        rc = -EIO;
 676
 677                LOGGC_UNLOCK(log);
 678                return rc;
 679        }
 680        jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 681
 682        if (tblk->xflag & COMMIT_LAZY)
 683                tblk->flag |= tblkGC_LAZY;
 684
 685        if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 686            (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 687             || jfs_tlocks_low)) {
 688                /*
 689                 * No pageout in progress
 690                 *
 691                 * start group commit as its group leader.
 692                 */
 693                log->cflag |= logGC_PAGEOUT;
 694
 695                lmGCwrite(log, 0);
 696        }
 697
 698        if (tblk->xflag & COMMIT_LAZY) {
 699                /*
 700                 * Lazy transactions can leave now
 701                 */
 702                LOGGC_UNLOCK(log);
 703                return 0;
 704        }
 705
 706        /* lmGCwrite gives up LOGGC_LOCK, check again */
 707
 708        if (tblk->flag & tblkGC_COMMITTED) {
 709                if (tblk->flag & tblkGC_ERROR)
 710                        rc = -EIO;
 711
 712                LOGGC_UNLOCK(log);
 713                return rc;
 714        }
 715
 716        /* upcount transaction waiting for completion
 717         */
 718        log->gcrtc++;
 719        tblk->flag |= tblkGC_READY;
 720
 721        __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 722                     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 723
 724        /* removed from commit queue */
 725        if (tblk->flag & tblkGC_ERROR)
 726                rc = -EIO;
 727
 728        LOGGC_UNLOCK(log);
 729        return rc;
 730}
 731
 732/*
 733 * NAME:        lmGCwrite()
 734 *
 735 * FUNCTION:    group commit write
 736 *      initiate write of log page, building a group of all transactions
 737 *      with commit records on that page.
 738 *
 739 * RETURN:      None
 740 *
 741 * NOTE:
 742 *      LOGGC_LOCK must be held by caller.
 743 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 744 */
 745static void lmGCwrite(struct jfs_log * log, int cant_write)
 746{
 747        struct lbuf *bp;
 748        struct logpage *lp;
 749        int gcpn;               /* group commit page number */
 750        struct tblock *tblk;
 751        struct tblock *xtblk = NULL;
 752
 753        /*
 754         * build the commit group of a log page
 755         *
 756         * scan commit queue and make a commit group of all
 757         * transactions with COMMIT records on the same log page.
 758         */
 759        /* get the head tblk on the commit queue */
 760        gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 761
 762        list_for_each_entry(tblk, &log->cqueue, cqueue) {
 763                if (tblk->pn != gcpn)
 764                        break;
 765
 766                xtblk = tblk;
 767
 768                /* state transition: (QUEUE, READY) -> COMMIT */
 769                tblk->flag |= tblkGC_COMMIT;
 770        }
 771        tblk = xtblk;           /* last tblk of the page */
 772
 773        /*
 774         * pageout to commit transactions on the log page.
 775         */
 776        bp = (struct lbuf *) tblk->bp;
 777        lp = (struct logpage *) bp->l_ldata;
 778        /* is page already full ? */
 779        if (tblk->flag & tblkGC_EOP) {
 780                /* mark page to free at end of group commit of the page */
 781                tblk->flag &= ~tblkGC_EOP;
 782                tblk->flag |= tblkGC_FREE;
 783                bp->l_ceor = bp->l_eor;
 784                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 786                         cant_write);
 787                INCREMENT(lmStat.full_page);
 788        }
 789        /* page is not yet full */
 790        else {
 791                bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
 792                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 793                lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 794                INCREMENT(lmStat.partial_page);
 795        }
 796}
 797
 798/*
 799 * NAME:        lmPostGC()
 800 *
 801 * FUNCTION:    group commit post-processing
 802 *      Processes transactions after their commit records have been written
 803 *      to disk, redriving log I/O if necessary.
 804 *
 805 * RETURN:      None
 806 *
 807 * NOTE:
 808 *      This routine is called a interrupt time by lbmIODone
 809 */
 810static void lmPostGC(struct lbuf * bp)
 811{
 812        unsigned long flags;
 813        struct jfs_log *log = bp->l_log;
 814        struct logpage *lp;
 815        struct tblock *tblk, *temp;
 816
 817        //LOGGC_LOCK(log);
 818        spin_lock_irqsave(&log->gclock, flags);
 819        /*
 820         * current pageout of group commit completed.
 821         *
 822         * remove/wakeup transactions from commit queue who were
 823         * group committed with the current log page
 824         */
 825        list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 826                if (!(tblk->flag & tblkGC_COMMIT))
 827                        break;
 828                /* if transaction was marked GC_COMMIT then
 829                 * it has been shipped in the current pageout
 830                 * and made it to disk - it is committed.
 831                 */
 832
 833                if (bp->l_flag & lbmERROR)
 834                        tblk->flag |= tblkGC_ERROR;
 835
 836                /* remove it from the commit queue */
 837                list_del(&tblk->cqueue);
 838                tblk->flag &= ~tblkGC_QUEUE;
 839
 840                if (tblk == log->flush_tblk) {
 841                        /* we can stop flushing the log now */
 842                        clear_bit(log_FLUSH, &log->flag);
 843                        log->flush_tblk = NULL;
 844                }
 845
 846                jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 847                         tblk->flag);
 848
 849                if (!(tblk->xflag & COMMIT_FORCE))
 850                        /*
 851                         * Hand tblk over to lazy commit thread
 852                         */
 853                        txLazyUnlock(tblk);
 854                else {
 855                        /* state transition: COMMIT -> COMMITTED */
 856                        tblk->flag |= tblkGC_COMMITTED;
 857
 858                        if (tblk->flag & tblkGC_READY)
 859                                log->gcrtc--;
 860
 861                        LOGGC_WAKEUP(tblk);
 862                }
 863
 864                /* was page full before pageout ?
 865                 * (and this is the last tblk bound with the page)
 866                 */
 867                if (tblk->flag & tblkGC_FREE)
 868                        lbmFree(bp);
 869                /* did page become full after pageout ?
 870                 * (and this is the last tblk bound with the page)
 871                 */
 872                else if (tblk->flag & tblkGC_EOP) {
 873                        /* finalize the page */
 874                        lp = (struct logpage *) bp->l_ldata;
 875                        bp->l_ceor = bp->l_eor;
 876                        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 877                        jfs_info("lmPostGC: calling lbmWrite");
 878                        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 879                                 1);
 880                }
 881
 882        }
 883
 884        /* are there any transactions who have entered lnGroupCommit()
 885         * (whose COMMITs are after that of the last log page written.
 886         * They are waiting for new group commit (above at (SLEEP 1))
 887         * or lazy transactions are on a full (queued) log page,
 888         * select the latest ready transaction as new group leader and
 889         * wake her up to lead her group.
 890         */
 891        if ((!list_empty(&log->cqueue)) &&
 892            ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 893             test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 894                /*
 895                 * Call lmGCwrite with new group leader
 896                 */
 897                lmGCwrite(log, 1);
 898
 899        /* no transaction are ready yet (transactions are only just
 900         * queued (GC_QUEUE) and not entered for group commit yet).
 901         * the first transaction entering group commit
 902         * will elect herself as new group leader.
 903         */
 904        else
 905                log->cflag &= ~logGC_PAGEOUT;
 906
 907        //LOGGC_UNLOCK(log);
 908        spin_unlock_irqrestore(&log->gclock, flags);
 909        return;
 910}
 911
 912/*
 913 * NAME:        lmLogSync()
 914 *
 915 * FUNCTION:    write log SYNCPT record for specified log
 916 *      if new sync address is available
 917 *      (normally the case if sync() is executed by back-ground
 918 *      process).
 919 *      calculate new value of i_nextsync which determines when
 920 *      this code is called again.
 921 *
 922 * PARAMETERS:  log     - log structure
 923 *              hard_sync - 1 to force all metadata to be written
 924 *
 925 * RETURN:      0
 926 *
 927 * serialization: LOG_LOCK() held on entry/exit
 928 */
 929static int lmLogSync(struct jfs_log * log, int hard_sync)
 930{
 931        int logsize;
 932        int written;            /* written since last syncpt */
 933        int free;               /* free space left available */
 934        int delta;              /* additional delta to write normally */
 935        int more;               /* additional write granted */
 936        struct lrd lrd;
 937        int lsn;
 938        struct logsyncblk *lp;
 939        unsigned long flags;
 940
 941        /* push dirty metapages out to disk */
 942        if (hard_sync)
 943                write_special_inodes(log, filemap_fdatawrite);
 944        else
 945                write_special_inodes(log, filemap_flush);
 946
 947        /*
 948         *      forward syncpt
 949         */
 950        /* if last sync is same as last syncpt,
 951         * invoke sync point forward processing to update sync.
 952         */
 953
 954        if (log->sync == log->syncpt) {
 955                LOGSYNC_LOCK(log, flags);
 956                if (list_empty(&log->synclist))
 957                        log->sync = log->lsn;
 958                else {
 959                        lp = list_entry(log->synclist.next,
 960                                        struct logsyncblk, synclist);
 961                        log->sync = lp->lsn;
 962                }
 963                LOGSYNC_UNLOCK(log, flags);
 964
 965        }
 966
 967        /* if sync is different from last syncpt,
 968         * write a SYNCPT record with syncpt = sync.
 969         * reset syncpt = sync
 970         */
 971        if (log->sync != log->syncpt) {
 972                lrd.logtid = 0;
 973                lrd.backchain = 0;
 974                lrd.type = cpu_to_le16(LOG_SYNCPT);
 975                lrd.length = 0;
 976                lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 977                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 978
 979                log->syncpt = log->sync;
 980        } else
 981                lsn = log->lsn;
 982
 983        /*
 984         *      setup next syncpt trigger (SWAG)
 985         */
 986        logsize = log->logsize;
 987
 988        logdiff(written, lsn, log);
 989        free = logsize - written;
 990        delta = LOGSYNC_DELTA(logsize);
 991        more = min(free / 2, delta);
 992        if (more < 2 * LOGPSIZE) {
 993                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 994                /*
 995                 *      log wrapping
 996                 *
 997                 * option 1 - panic ? No.!
 998                 * option 2 - shutdown file systems
 999                 *            associated with log ?
1000                 * option 3 - extend log ?
1001                 * option 4 - second chance
1002                 *
1003                 * mark log wrapped, and continue.
1004                 * when all active transactions are completed,
1005                 * mark log valid for recovery.
1006                 * if crashed during invalid state, log state
1007                 * implies invalid log, forcing fsck().
1008                 */
1009                /* mark log state log wrap in log superblock */
1010                /* log->state = LOGWRAP; */
1011
1012                /* reset sync point computation */
1013                log->syncpt = log->sync = lsn;
1014                log->nextsync = delta;
1015        } else
1016                /* next syncpt trigger = written + more */
1017                log->nextsync = written + more;
1018
1019        /* if number of bytes written from last sync point is more
1020         * than 1/4 of the log size, stop new transactions from
1021         * starting until all current transactions are completed
1022         * by setting syncbarrier flag.
1023         */
1024        if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025            (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026                set_bit(log_SYNCBARRIER, &log->flag);
1027                jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028                         log->syncpt);
1029                /*
1030                 * We may have to initiate group commit
1031                 */
1032                jfs_flush_journal(log, 0);
1033        }
1034
1035        return lsn;
1036}
1037
1038/*
1039 * NAME:        jfs_syncpt
1040 *
1041 * FUNCTION:    write log SYNCPT record for specified log
1042 *
1043 * PARAMETERS:  log       - log structure
1044 *              hard_sync - set to 1 to force metadata to be written
1045 */
1046void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047{       LOG_LOCK(log);
1048        if (!test_bit(log_QUIESCE, &log->flag))
1049                lmLogSync(log, hard_sync);
1050        LOG_UNLOCK(log);
1051}
1052
1053/*
1054 * NAME:        lmLogOpen()
1055 *
1056 * FUNCTION:    open the log on first open;
1057 *      insert filesystem in the active list of the log.
1058 *
1059 * PARAMETER:   ipmnt   - file system mount inode
1060 *              iplog   - log inode (out)
1061 *
1062 * RETURN:
1063 *
1064 * serialization:
1065 */
1066int lmLogOpen(struct super_block *sb)
1067{
1068        int rc;
1069        struct block_device *bdev;
1070        struct jfs_log *log;
1071        struct jfs_sb_info *sbi = JFS_SBI(sb);
1072
1073        if (sbi->flag & JFS_NOINTEGRITY)
1074                return open_dummy_log(sb);
1075
1076        if (sbi->mntflag & JFS_INLINELOG)
1077                return open_inline_log(sb);
1078
1079        mutex_lock(&jfs_log_mutex);
1080        list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081                if (log->bdev->bd_dev == sbi->logdev) {
1082                        if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083                                jfs_warn("wrong uuid on JFS journal");
1084                                mutex_unlock(&jfs_log_mutex);
1085                                return -EINVAL;
1086                        }
1087                        /*
1088                         * add file system to log active file system list
1089                         */
1090                        if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091                                mutex_unlock(&jfs_log_mutex);
1092                                return rc;
1093                        }
1094                        goto journal_found;
1095                }
1096        }
1097
1098        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099                mutex_unlock(&jfs_log_mutex);
1100                return -ENOMEM;
1101        }
1102        INIT_LIST_HEAD(&log->sb_list);
1103        init_waitqueue_head(&log->syncwait);
1104
1105        /*
1106         *      external log as separate logical volume
1107         *
1108         * file systems to log may have n-to-1 relationship;
1109         */
1110
1111        bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112                                 log);
1113        if (IS_ERR(bdev)) {
1114                rc = PTR_ERR(bdev);
1115                goto free;
1116        }
1117
1118        log->bdev = bdev;
1119        uuid_copy(&log->uuid, &sbi->loguuid);
1120
1121        /*
1122         * initialize log:
1123         */
1124        if ((rc = lmLogInit(log)))
1125                goto close;
1126
1127        list_add(&log->journal_list, &jfs_external_logs);
1128
1129        /*
1130         * add file system to log active file system list
1131         */
1132        if ((rc = lmLogFileSystem(log, sbi, 1)))
1133                goto shutdown;
1134
1135journal_found:
1136        LOG_LOCK(log);
1137        list_add(&sbi->log_list, &log->sb_list);
1138        sbi->log = log;
1139        LOG_UNLOCK(log);
1140
1141        mutex_unlock(&jfs_log_mutex);
1142        return 0;
1143
1144        /*
1145         *      unwind on error
1146         */
1147      shutdown:         /* unwind lbmLogInit() */
1148        list_del(&log->journal_list);
1149        lbmLogShutdown(log);
1150
1151      close:            /* close external log device */
1152        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153
1154      free:             /* free log descriptor */
1155        mutex_unlock(&jfs_log_mutex);
1156        kfree(log);
1157
1158        jfs_warn("lmLogOpen: exit(%d)", rc);
1159        return rc;
1160}
1161
1162static int open_inline_log(struct super_block *sb)
1163{
1164        struct jfs_log *log;
1165        int rc;
1166
1167        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168                return -ENOMEM;
1169        INIT_LIST_HEAD(&log->sb_list);
1170        init_waitqueue_head(&log->syncwait);
1171
1172        set_bit(log_INLINELOG, &log->flag);
1173        log->bdev = sb->s_bdev;
1174        log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175        log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176            (L2LOGPSIZE - sb->s_blocksize_bits);
1177        log->l2bsize = sb->s_blocksize_bits;
1178        ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179
1180        /*
1181         * initialize log.
1182         */
1183        if ((rc = lmLogInit(log))) {
1184                kfree(log);
1185                jfs_warn("lmLogOpen: exit(%d)", rc);
1186                return rc;
1187        }
1188
1189        list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190        JFS_SBI(sb)->log = log;
1191
1192        return rc;
1193}
1194
1195static int open_dummy_log(struct super_block *sb)
1196{
1197        int rc;
1198
1199        mutex_lock(&jfs_log_mutex);
1200        if (!dummy_log) {
1201                dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202                if (!dummy_log) {
1203                        mutex_unlock(&jfs_log_mutex);
1204                        return -ENOMEM;
1205                }
1206                INIT_LIST_HEAD(&dummy_log->sb_list);
1207                init_waitqueue_head(&dummy_log->syncwait);
1208                dummy_log->no_integrity = 1;
1209                /* Make up some stuff */
1210                dummy_log->base = 0;
1211                dummy_log->size = 1024;
1212                rc = lmLogInit(dummy_log);
1213                if (rc) {
1214                        kfree(dummy_log);
1215                        dummy_log = NULL;
1216                        mutex_unlock(&jfs_log_mutex);
1217                        return rc;
1218                }
1219        }
1220
1221        LOG_LOCK(dummy_log);
1222        list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223        JFS_SBI(sb)->log = dummy_log;
1224        LOG_UNLOCK(dummy_log);
1225        mutex_unlock(&jfs_log_mutex);
1226
1227        return 0;
1228}
1229
1230/*
1231 * NAME:        lmLogInit()
1232 *
1233 * FUNCTION:    log initialization at first log open.
1234 *
1235 *      logredo() (or logformat()) should have been run previously.
1236 *      initialize the log from log superblock.
1237 *      set the log state in the superblock to LOGMOUNT and
1238 *      write SYNCPT log record.
1239 *
1240 * PARAMETER:   log     - log structure
1241 *
1242 * RETURN:      0       - if ok
1243 *              -EINVAL - bad log magic number or superblock dirty
1244 *              error returned from logwait()
1245 *
1246 * serialization: single first open thread
1247 */
1248int lmLogInit(struct jfs_log * log)
1249{
1250        int rc = 0;
1251        struct lrd lrd;
1252        struct logsuper *logsuper;
1253        struct lbuf *bpsuper;
1254        struct lbuf *bp;
1255        struct logpage *lp;
1256        int lsn = 0;
1257
1258        jfs_info("lmLogInit: log:0x%p", log);
1259
1260        /* initialize the group commit serialization lock */
1261        LOGGC_LOCK_INIT(log);
1262
1263        /* allocate/initialize the log write serialization lock */
1264        LOG_LOCK_INIT(log);
1265
1266        LOGSYNC_LOCK_INIT(log);
1267
1268        INIT_LIST_HEAD(&log->synclist);
1269
1270        INIT_LIST_HEAD(&log->cqueue);
1271        log->flush_tblk = NULL;
1272
1273        log->count = 0;
1274
1275        /*
1276         * initialize log i/o
1277         */
1278        if ((rc = lbmLogInit(log)))
1279                return rc;
1280
1281        if (!test_bit(log_INLINELOG, &log->flag))
1282                log->l2bsize = L2LOGPSIZE;
1283
1284        /* check for disabled journaling to disk */
1285        if (log->no_integrity) {
1286                /*
1287                 * Journal pages will still be filled.  When the time comes
1288                 * to actually do the I/O, the write is not done, and the
1289                 * endio routine is called directly.
1290                 */
1291                bp = lbmAllocate(log , 0);
1292                log->bp = bp;
1293                bp->l_pn = bp->l_eor = 0;
1294        } else {
1295                /*
1296                 * validate log superblock
1297                 */
1298                if ((rc = lbmRead(log, 1, &bpsuper)))
1299                        goto errout10;
1300
1301                logsuper = (struct logsuper *) bpsuper->l_ldata;
1302
1303                if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304                        jfs_warn("*** Log Format Error ! ***");
1305                        rc = -EINVAL;
1306                        goto errout20;
1307                }
1308
1309                /* logredo() should have been run successfully. */
1310                if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311                        jfs_warn("*** Log Is Dirty ! ***");
1312                        rc = -EINVAL;
1313                        goto errout20;
1314                }
1315
1316                /* initialize log from log superblock */
1317                if (test_bit(log_INLINELOG,&log->flag)) {
1318                        if (log->size != le32_to_cpu(logsuper->size)) {
1319                                rc = -EINVAL;
1320                                goto errout20;
1321                        }
1322                        jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323                                 log, (unsigned long long)log->base, log->size);
1324                } else {
1325                        if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326                                jfs_warn("wrong uuid on JFS log device");
1327                                rc = -EINVAL;
1328                                goto errout20;
1329                        }
1330                        log->size = le32_to_cpu(logsuper->size);
1331                        log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1332                        jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1333                                 log, (unsigned long long)log->base, log->size);
1334                }
1335
1336                log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1337                log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1338
1339                /*
1340                 * initialize for log append write mode
1341                 */
1342                /* establish current/end-of-log page/buffer */
1343                if ((rc = lbmRead(log, log->page, &bp)))
1344                        goto errout20;
1345
1346                lp = (struct logpage *) bp->l_ldata;
1347
1348                jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1349                         le32_to_cpu(logsuper->end), log->page, log->eor,
1350                         le16_to_cpu(lp->h.eor));
1351
1352                log->bp = bp;
1353                bp->l_pn = log->page;
1354                bp->l_eor = log->eor;
1355
1356                /* if current page is full, move on to next page */
1357                if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1358                        lmNextPage(log);
1359
1360                /*
1361                 * initialize log syncpoint
1362                 */
1363                /*
1364                 * write the first SYNCPT record with syncpoint = 0
1365                 * (i.e., log redo up to HERE !);
1366                 * remove current page from lbm write queue at end of pageout
1367                 * (to write log superblock update), but do not release to
1368                 * freelist;
1369                 */
1370                lrd.logtid = 0;
1371                lrd.backchain = 0;
1372                lrd.type = cpu_to_le16(LOG_SYNCPT);
1373                lrd.length = 0;
1374                lrd.log.syncpt.sync = 0;
1375                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1376                bp = log->bp;
1377                bp->l_ceor = bp->l_eor;
1378                lp = (struct logpage *) bp->l_ldata;
1379                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1380                lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1381                if ((rc = lbmIOWait(bp, 0)))
1382                        goto errout30;
1383
1384                /*
1385                 * update/write superblock
1386                 */
1387                logsuper->state = cpu_to_le32(LOGMOUNT);
1388                log->serial = le32_to_cpu(logsuper->serial) + 1;
1389                logsuper->serial = cpu_to_le32(log->serial);
1390                lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1391                if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1392                        goto errout30;
1393        }
1394
1395        /* initialize logsync parameters */
1396        log->logsize = (log->size - 2) << L2LOGPSIZE;
1397        log->lsn = lsn;
1398        log->syncpt = lsn;
1399        log->sync = log->syncpt;
1400        log->nextsync = LOGSYNC_DELTA(log->logsize);
1401
1402        jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1403                 log->lsn, log->syncpt, log->sync);
1404
1405        /*
1406         * initialize for lazy/group commit
1407         */
1408        log->clsn = lsn;
1409
1410        return 0;
1411
1412        /*
1413         *      unwind on error
1414         */
1415      errout30:         /* release log page */
1416        log->wqueue = NULL;
1417        bp->l_wqnext = NULL;
1418        lbmFree(bp);
1419
1420      errout20:         /* release log superblock */
1421        lbmFree(bpsuper);
1422
1423      errout10:         /* unwind lbmLogInit() */
1424        lbmLogShutdown(log);
1425
1426        jfs_warn("lmLogInit: exit(%d)", rc);
1427        return rc;
1428}
1429
1430
1431/*
1432 * NAME:        lmLogClose()
1433 *
1434 * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1435 *              and close it on last close.
1436 *
1437 * PARAMETER:   sb      - superblock
1438 *
1439 * RETURN:      errors from subroutines
1440 *
1441 * serialization:
1442 */
1443int lmLogClose(struct super_block *sb)
1444{
1445        struct jfs_sb_info *sbi = JFS_SBI(sb);
1446        struct jfs_log *log = sbi->log;
1447        struct block_device *bdev;
1448        int rc = 0;
1449
1450        jfs_info("lmLogClose: log:0x%p", log);
1451
1452        mutex_lock(&jfs_log_mutex);
1453        LOG_LOCK(log);
1454        list_del(&sbi->log_list);
1455        LOG_UNLOCK(log);
1456        sbi->log = NULL;
1457
1458        /*
1459         * We need to make sure all of the "written" metapages
1460         * actually make it to disk
1461         */
1462        sync_blockdev(sb->s_bdev);
1463
1464        if (test_bit(log_INLINELOG, &log->flag)) {
1465                /*
1466                 *      in-line log in host file system
1467                 */
1468                rc = lmLogShutdown(log);
1469                kfree(log);
1470                goto out;
1471        }
1472
1473        if (!log->no_integrity)
1474                lmLogFileSystem(log, sbi, 0);
1475
1476        if (!list_empty(&log->sb_list))
1477                goto out;
1478
1479        /*
1480         * TODO: ensure that the dummy_log is in a state to allow
1481         * lbmLogShutdown to deallocate all the buffers and call
1482         * kfree against dummy_log.  For now, leave dummy_log & its
1483         * buffers in memory, and resuse if another no-integrity mount
1484         * is requested.
1485         */
1486        if (log->no_integrity)
1487                goto out;
1488
1489        /*
1490         *      external log as separate logical volume
1491         */
1492        list_del(&log->journal_list);
1493        bdev = log->bdev;
1494        rc = lmLogShutdown(log);
1495
1496        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1497
1498        kfree(log);
1499
1500      out:
1501        mutex_unlock(&jfs_log_mutex);
1502        jfs_info("lmLogClose: exit(%d)", rc);
1503        return rc;
1504}
1505
1506
1507/*
1508 * NAME:        jfs_flush_journal()
1509 *
1510 * FUNCTION:    initiate write of any outstanding transactions to the journal
1511 *              and optionally wait until they are all written to disk
1512 *
1513 *              wait == 0  flush until latest txn is committed, don't wait
1514 *              wait == 1  flush until latest txn is committed, wait
1515 *              wait > 1   flush until all txn's are complete, wait
1516 */
1517void jfs_flush_journal(struct jfs_log *log, int wait)
1518{
1519        int i;
1520        struct tblock *target = NULL;
1521
1522        /* jfs_write_inode may call us during read-only mount */
1523        if (!log)
1524                return;
1525
1526        jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1527
1528        LOGGC_LOCK(log);
1529
1530        if (!list_empty(&log->cqueue)) {
1531                /*
1532                 * This ensures that we will keep writing to the journal as long
1533                 * as there are unwritten commit records
1534                 */
1535                target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1536
1537                if (test_bit(log_FLUSH, &log->flag)) {
1538                        /*
1539                         * We're already flushing.
1540                         * if flush_tblk is NULL, we are flushing everything,
1541                         * so leave it that way.  Otherwise, update it to the
1542                         * latest transaction
1543                         */
1544                        if (log->flush_tblk)
1545                                log->flush_tblk = target;
1546                } else {
1547                        /* Only flush until latest transaction is committed */
1548                        log->flush_tblk = target;
1549                        set_bit(log_FLUSH, &log->flag);
1550
1551                        /*
1552                         * Initiate I/O on outstanding transactions
1553                         */
1554                        if (!(log->cflag & logGC_PAGEOUT)) {
1555                                log->cflag |= logGC_PAGEOUT;
1556                                lmGCwrite(log, 0);
1557                        }
1558                }
1559        }
1560        if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1561                /* Flush until all activity complete */
1562                set_bit(log_FLUSH, &log->flag);
1563                log->flush_tblk = NULL;
1564        }
1565
1566        if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1567                DECLARE_WAITQUEUE(__wait, current);
1568
1569                add_wait_queue(&target->gcwait, &__wait);
1570                set_current_state(TASK_UNINTERRUPTIBLE);
1571                LOGGC_UNLOCK(log);
1572                schedule();
1573                LOGGC_LOCK(log);
1574                remove_wait_queue(&target->gcwait, &__wait);
1575        }
1576        LOGGC_UNLOCK(log);
1577
1578        if (wait < 2)
1579                return;
1580
1581        write_special_inodes(log, filemap_fdatawrite);
1582
1583        /*
1584         * If there was recent activity, we may need to wait
1585         * for the lazycommit thread to catch up
1586         */
1587        if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1588                for (i = 0; i < 200; i++) {     /* Too much? */
1589                        msleep(250);
1590                        write_special_inodes(log, filemap_fdatawrite);
1591                        if (list_empty(&log->cqueue) &&
1592                            list_empty(&log->synclist))
1593                                break;
1594                }
1595        }
1596        assert(list_empty(&log->cqueue));
1597
1598#ifdef CONFIG_JFS_DEBUG
1599        if (!list_empty(&log->synclist)) {
1600                struct logsyncblk *lp;
1601
1602                printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1603                list_for_each_entry(lp, &log->synclist, synclist) {
1604                        if (lp->xflag & COMMIT_PAGE) {
1605                                struct metapage *mp = (struct metapage *)lp;
1606                                print_hex_dump(KERN_ERR, "metapage: ",
1607                                               DUMP_PREFIX_ADDRESS, 16, 4,
1608                                               mp, sizeof(struct metapage), 0);
1609                                print_hex_dump(KERN_ERR, "page: ",
1610                                               DUMP_PREFIX_ADDRESS, 16,
1611                                               sizeof(long), mp->page,
1612                                               sizeof(struct page), 0);
1613                        } else
1614                                print_hex_dump(KERN_ERR, "tblock:",
1615                                               DUMP_PREFIX_ADDRESS, 16, 4,
1616                                               lp, sizeof(struct tblock), 0);
1617                }
1618        }
1619#else
1620        WARN_ON(!list_empty(&log->synclist));
1621#endif
1622        clear_bit(log_FLUSH, &log->flag);
1623}
1624
1625/*
1626 * NAME:        lmLogShutdown()
1627 *
1628 * FUNCTION:    log shutdown at last LogClose().
1629 *
1630 *              write log syncpt record.
1631 *              update super block to set redone flag to 0.
1632 *
1633 * PARAMETER:   log     - log inode
1634 *
1635 * RETURN:      0       - success
1636 *
1637 * serialization: single last close thread
1638 */
1639int lmLogShutdown(struct jfs_log * log)
1640{
1641        int rc;
1642        struct lrd lrd;
1643        int lsn;
1644        struct logsuper *logsuper;
1645        struct lbuf *bpsuper;
1646        struct lbuf *bp;
1647        struct logpage *lp;
1648
1649        jfs_info("lmLogShutdown: log:0x%p", log);
1650
1651        jfs_flush_journal(log, 2);
1652
1653        /*
1654         * write the last SYNCPT record with syncpoint = 0
1655         * (i.e., log redo up to HERE !)
1656         */
1657        lrd.logtid = 0;
1658        lrd.backchain = 0;
1659        lrd.type = cpu_to_le16(LOG_SYNCPT);
1660        lrd.length = 0;
1661        lrd.log.syncpt.sync = 0;
1662
1663        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1664        bp = log->bp;
1665        lp = (struct logpage *) bp->l_ldata;
1666        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1667        lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1668        lbmIOWait(log->bp, lbmFREE);
1669        log->bp = NULL;
1670
1671        /*
1672         * synchronous update log superblock
1673         * mark log state as shutdown cleanly
1674         * (i.e., Log does not need to be replayed).
1675         */
1676        if ((rc = lbmRead(log, 1, &bpsuper)))
1677                goto out;
1678
1679        logsuper = (struct logsuper *) bpsuper->l_ldata;
1680        logsuper->state = cpu_to_le32(LOGREDONE);
1681        logsuper->end = cpu_to_le32(lsn);
1682        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1683        rc = lbmIOWait(bpsuper, lbmFREE);
1684
1685        jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1686                 lsn, log->page, log->eor);
1687
1688      out:
1689        /*
1690         * shutdown per log i/o
1691         */
1692        lbmLogShutdown(log);
1693
1694        if (rc) {
1695                jfs_warn("lmLogShutdown: exit(%d)", rc);
1696        }
1697        return rc;
1698}
1699
1700
1701/*
1702 * NAME:        lmLogFileSystem()
1703 *
1704 * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1705 *      file system into/from log active file system list.
1706 *
1707 * PARAMETE:    log     - pointer to logs inode.
1708 *              fsdev   - kdev_t of filesystem.
1709 *              serial  - pointer to returned log serial number
1710 *              activate - insert/remove device from active list.
1711 *
1712 * RETURN:      0       - success
1713 *              errors returned by vms_iowait().
1714 */
1715static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1716                           int activate)
1717{
1718        int rc = 0;
1719        int i;
1720        struct logsuper *logsuper;
1721        struct lbuf *bpsuper;
1722        uuid_t *uuid = &sbi->uuid;
1723
1724        /*
1725         * insert/remove file system device to log active file system list.
1726         */
1727        if ((rc = lbmRead(log, 1, &bpsuper)))
1728                return rc;
1729
1730        logsuper = (struct logsuper *) bpsuper->l_ldata;
1731        if (activate) {
1732                for (i = 0; i < MAX_ACTIVE; i++)
1733                        if (uuid_is_null(&logsuper->active[i].uuid)) {
1734                                uuid_copy(&logsuper->active[i].uuid, uuid);
1735                                sbi->aggregate = i;
1736                                break;
1737                        }
1738                if (i == MAX_ACTIVE) {
1739                        jfs_warn("Too many file systems sharing journal!");
1740                        lbmFree(bpsuper);
1741                        return -EMFILE; /* Is there a better rc? */
1742                }
1743        } else {
1744                for (i = 0; i < MAX_ACTIVE; i++)
1745                        if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1746                                uuid_copy(&logsuper->active[i].uuid,
1747                                          &uuid_null);
1748                                break;
1749                        }
1750                if (i == MAX_ACTIVE) {
1751                        jfs_warn("Somebody stomped on the journal!");
1752                        lbmFree(bpsuper);
1753                        return -EIO;
1754                }
1755
1756        }
1757
1758        /*
1759         * synchronous write log superblock:
1760         *
1761         * write sidestream bypassing write queue:
1762         * at file system mount, log super block is updated for
1763         * activation of the file system before any log record
1764         * (MOUNT record) of the file system, and at file system
1765         * unmount, all meta data for the file system has been
1766         * flushed before log super block is updated for deactivation
1767         * of the file system.
1768         */
1769        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1770        rc = lbmIOWait(bpsuper, lbmFREE);
1771
1772        return rc;
1773}
1774
1775/*
1776 *              log buffer manager (lbm)
1777 *              ------------------------
1778 *
1779 * special purpose buffer manager supporting log i/o requirements.
1780 *
1781 * per log write queue:
1782 * log pageout occurs in serial order by fifo write queue and
1783 * restricting to a single i/o in pregress at any one time.
1784 * a circular singly-linked list
1785 * (log->wrqueue points to the tail, and buffers are linked via
1786 * bp->wrqueue field), and
1787 * maintains log page in pageout ot waiting for pageout in serial pageout.
1788 */
1789
1790/*
1791 *      lbmLogInit()
1792 *
1793 * initialize per log I/O setup at lmLogInit()
1794 */
1795static int lbmLogInit(struct jfs_log * log)
1796{                               /* log inode */
1797        int i;
1798        struct lbuf *lbuf;
1799
1800        jfs_info("lbmLogInit: log:0x%p", log);
1801
1802        /* initialize current buffer cursor */
1803        log->bp = NULL;
1804
1805        /* initialize log device write queue */
1806        log->wqueue = NULL;
1807
1808        /*
1809         * Each log has its own buffer pages allocated to it.  These are
1810         * not managed by the page cache.  This ensures that a transaction
1811         * writing to the log does not block trying to allocate a page from
1812         * the page cache (for the log).  This would be bad, since page
1813         * allocation waits on the kswapd thread that may be committing inodes
1814         * which would cause log activity.  Was that clear?  I'm trying to
1815         * avoid deadlock here.
1816         */
1817        init_waitqueue_head(&log->free_wait);
1818
1819        log->lbuf_free = NULL;
1820
1821        for (i = 0; i < LOGPAGES;) {
1822                char *buffer;
1823                uint offset;
1824                struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1825
1826                if (!page)
1827                        goto error;
1828                buffer = page_address(page);
1829                for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1830                        lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1831                        if (lbuf == NULL) {
1832                                if (offset == 0)
1833                                        __free_page(page);
1834                                goto error;
1835                        }
1836                        if (offset) /* we already have one reference */
1837                                get_page(page);
1838                        lbuf->l_offset = offset;
1839                        lbuf->l_ldata = buffer + offset;
1840                        lbuf->l_page = page;
1841                        lbuf->l_log = log;
1842                        init_waitqueue_head(&lbuf->l_ioevent);
1843
1844                        lbuf->l_freelist = log->lbuf_free;
1845                        log->lbuf_free = lbuf;
1846                        i++;
1847                }
1848        }
1849
1850        return (0);
1851
1852      error:
1853        lbmLogShutdown(log);
1854        return -ENOMEM;
1855}
1856
1857
1858/*
1859 *      lbmLogShutdown()
1860 *
1861 * finalize per log I/O setup at lmLogShutdown()
1862 */
1863static void lbmLogShutdown(struct jfs_log * log)
1864{
1865        struct lbuf *lbuf;
1866
1867        jfs_info("lbmLogShutdown: log:0x%p", log);
1868
1869        lbuf = log->lbuf_free;
1870        while (lbuf) {
1871                struct lbuf *next = lbuf->l_freelist;
1872                __free_page(lbuf->l_page);
1873                kfree(lbuf);
1874                lbuf = next;
1875        }
1876}
1877
1878
1879/*
1880 *      lbmAllocate()
1881 *
1882 * allocate an empty log buffer
1883 */
1884static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1885{
1886        struct lbuf *bp;
1887        unsigned long flags;
1888
1889        /*
1890         * recycle from log buffer freelist if any
1891         */
1892        LCACHE_LOCK(flags);
1893        LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1894        log->lbuf_free = bp->l_freelist;
1895        LCACHE_UNLOCK(flags);
1896
1897        bp->l_flag = 0;
1898
1899        bp->l_wqnext = NULL;
1900        bp->l_freelist = NULL;
1901
1902        bp->l_pn = pn;
1903        bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1904        bp->l_ceor = 0;
1905
1906        return bp;
1907}
1908
1909
1910/*
1911 *      lbmFree()
1912 *
1913 * release a log buffer to freelist
1914 */
1915static void lbmFree(struct lbuf * bp)
1916{
1917        unsigned long flags;
1918
1919        LCACHE_LOCK(flags);
1920
1921        lbmfree(bp);
1922
1923        LCACHE_UNLOCK(flags);
1924}
1925
1926static void lbmfree(struct lbuf * bp)
1927{
1928        struct jfs_log *log = bp->l_log;
1929
1930        assert(bp->l_wqnext == NULL);
1931
1932        /*
1933         * return the buffer to head of freelist
1934         */
1935        bp->l_freelist = log->lbuf_free;
1936        log->lbuf_free = bp;
1937
1938        wake_up(&log->free_wait);
1939        return;
1940}
1941
1942
1943/*
1944 * NAME:        lbmRedrive
1945 *
1946 * FUNCTION:    add a log buffer to the log redrive list
1947 *
1948 * PARAMETER:
1949 *      bp      - log buffer
1950 *
1951 * NOTES:
1952 *      Takes log_redrive_lock.
1953 */
1954static inline void lbmRedrive(struct lbuf *bp)
1955{
1956        unsigned long flags;
1957
1958        spin_lock_irqsave(&log_redrive_lock, flags);
1959        bp->l_redrive_next = log_redrive_list;
1960        log_redrive_list = bp;
1961        spin_unlock_irqrestore(&log_redrive_lock, flags);
1962
1963        wake_up_process(jfsIOthread);
1964}
1965
1966
1967/*
1968 *      lbmRead()
1969 */
1970static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1971{
1972        struct bio *bio;
1973        struct lbuf *bp;
1974
1975        /*
1976         * allocate a log buffer
1977         */
1978        *bpp = bp = lbmAllocate(log, pn);
1979        jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1980
1981        bp->l_flag |= lbmREAD;
1982
1983        bio = bio_alloc(GFP_NOFS, 1);
1984
1985        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1986        bio_set_dev(bio, log->bdev);
1987
1988        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1989        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1990
1991        bio->bi_end_io = lbmIODone;
1992        bio->bi_private = bp;
1993        bio->bi_opf = REQ_OP_READ;
1994        /*check if journaling to disk has been disabled*/
1995        if (log->no_integrity) {
1996                bio->bi_iter.bi_size = 0;
1997                lbmIODone(bio);
1998        } else {
1999                submit_bio(bio);
2000        }
2001
2002        wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2003
2004        return 0;
2005}
2006
2007
2008/*
2009 *      lbmWrite()
2010 *
2011 * buffer at head of pageout queue stays after completion of
2012 * partial-page pageout and redriven by explicit initiation of
2013 * pageout by caller until full-page pageout is completed and
2014 * released.
2015 *
2016 * device driver i/o done redrives pageout of new buffer at
2017 * head of pageout queue when current buffer at head of pageout
2018 * queue is released at the completion of its full-page pageout.
2019 *
2020 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2021 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2022 */
2023static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2024                     int cant_block)
2025{
2026        struct lbuf *tail;
2027        unsigned long flags;
2028
2029        jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2030
2031        /* map the logical block address to physical block address */
2032        bp->l_blkno =
2033            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2034
2035        LCACHE_LOCK(flags);             /* disable+lock */
2036
2037        /*
2038         * initialize buffer for device driver
2039         */
2040        bp->l_flag = flag;
2041
2042        /*
2043         *      insert bp at tail of write queue associated with log
2044         *
2045         * (request is either for bp already/currently at head of queue
2046         * or new bp to be inserted at tail)
2047         */
2048        tail = log->wqueue;
2049
2050        /* is buffer not already on write queue ? */
2051        if (bp->l_wqnext == NULL) {
2052                /* insert at tail of wqueue */
2053                if (tail == NULL) {
2054                        log->wqueue = bp;
2055                        bp->l_wqnext = bp;
2056                } else {
2057                        log->wqueue = bp;
2058                        bp->l_wqnext = tail->l_wqnext;
2059                        tail->l_wqnext = bp;
2060                }
2061
2062                tail = bp;
2063        }
2064
2065        /* is buffer at head of wqueue and for write ? */
2066        if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2067                LCACHE_UNLOCK(flags);   /* unlock+enable */
2068                return;
2069        }
2070
2071        LCACHE_UNLOCK(flags);   /* unlock+enable */
2072
2073        if (cant_block)
2074                lbmRedrive(bp);
2075        else if (flag & lbmSYNC)
2076                lbmStartIO(bp);
2077        else {
2078                LOGGC_UNLOCK(log);
2079                lbmStartIO(bp);
2080                LOGGC_LOCK(log);
2081        }
2082}
2083
2084
2085/*
2086 *      lbmDirectWrite()
2087 *
2088 * initiate pageout bypassing write queue for sidestream
2089 * (e.g., log superblock) write;
2090 */
2091static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2092{
2093        jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2094                 bp, flag, bp->l_pn);
2095
2096        /*
2097         * initialize buffer for device driver
2098         */
2099        bp->l_flag = flag | lbmDIRECT;
2100
2101        /* map the logical block address to physical block address */
2102        bp->l_blkno =
2103            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2104
2105        /*
2106         *      initiate pageout of the page
2107         */
2108        lbmStartIO(bp);
2109}
2110
2111
2112/*
2113 * NAME:        lbmStartIO()
2114 *
2115 * FUNCTION:    Interface to DD strategy routine
2116 *
2117 * RETURN:      none
2118 *
2119 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2120 */
2121static void lbmStartIO(struct lbuf * bp)
2122{
2123        struct bio *bio;
2124        struct jfs_log *log = bp->l_log;
2125
2126        jfs_info("lbmStartIO");
2127
2128        bio = bio_alloc(GFP_NOFS, 1);
2129        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2130        bio_set_dev(bio, log->bdev);
2131
2132        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2133        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2134
2135        bio->bi_end_io = lbmIODone;
2136        bio->bi_private = bp;
2137        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2138
2139        /* check if journaling to disk has been disabled */
2140        if (log->no_integrity) {
2141                bio->bi_iter.bi_size = 0;
2142                lbmIODone(bio);
2143        } else {
2144                submit_bio(bio);
2145                INCREMENT(lmStat.submitted);
2146        }
2147}
2148
2149
2150/*
2151 *      lbmIOWait()
2152 */
2153static int lbmIOWait(struct lbuf * bp, int flag)
2154{
2155        unsigned long flags;
2156        int rc = 0;
2157
2158        jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2159
2160        LCACHE_LOCK(flags);             /* disable+lock */
2161
2162        LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2163
2164        rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2165
2166        if (flag & lbmFREE)
2167                lbmfree(bp);
2168
2169        LCACHE_UNLOCK(flags);   /* unlock+enable */
2170
2171        jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2172        return rc;
2173}
2174
2175/*
2176 *      lbmIODone()
2177 *
2178 * executed at INTIODONE level
2179 */
2180static void lbmIODone(struct bio *bio)
2181{
2182        struct lbuf *bp = bio->bi_private;
2183        struct lbuf *nextbp, *tail;
2184        struct jfs_log *log;
2185        unsigned long flags;
2186
2187        /*
2188         * get back jfs buffer bound to the i/o buffer
2189         */
2190        jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2191
2192        LCACHE_LOCK(flags);             /* disable+lock */
2193
2194        bp->l_flag |= lbmDONE;
2195
2196        if (bio->bi_status) {
2197                bp->l_flag |= lbmERROR;
2198
2199                jfs_err("lbmIODone: I/O error in JFS log");
2200        }
2201
2202        bio_put(bio);
2203
2204        /*
2205         *      pagein completion
2206         */
2207        if (bp->l_flag & lbmREAD) {
2208                bp->l_flag &= ~lbmREAD;
2209
2210                LCACHE_UNLOCK(flags);   /* unlock+enable */
2211
2212                /* wakeup I/O initiator */
2213                LCACHE_WAKEUP(&bp->l_ioevent);
2214
2215                return;
2216        }
2217
2218        /*
2219         *      pageout completion
2220         *
2221         * the bp at the head of write queue has completed pageout.
2222         *
2223         * if single-commit/full-page pageout, remove the current buffer
2224         * from head of pageout queue, and redrive pageout with
2225         * the new buffer at head of pageout queue;
2226         * otherwise, the partial-page pageout buffer stays at
2227         * the head of pageout queue to be redriven for pageout
2228         * by lmGroupCommit() until full-page pageout is completed.
2229         */
2230        bp->l_flag &= ~lbmWRITE;
2231        INCREMENT(lmStat.pagedone);
2232
2233        /* update committed lsn */
2234        log = bp->l_log;
2235        log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2236
2237        if (bp->l_flag & lbmDIRECT) {
2238                LCACHE_WAKEUP(&bp->l_ioevent);
2239                LCACHE_UNLOCK(flags);
2240                return;
2241        }
2242
2243        tail = log->wqueue;
2244
2245        /* single element queue */
2246        if (bp == tail) {
2247                /* remove head buffer of full-page pageout
2248                 * from log device write queue
2249                 */
2250                if (bp->l_flag & lbmRELEASE) {
2251                        log->wqueue = NULL;
2252                        bp->l_wqnext = NULL;
2253                }
2254        }
2255        /* multi element queue */
2256        else {
2257                /* remove head buffer of full-page pageout
2258                 * from log device write queue
2259                 */
2260                if (bp->l_flag & lbmRELEASE) {
2261                        nextbp = tail->l_wqnext = bp->l_wqnext;
2262                        bp->l_wqnext = NULL;
2263
2264                        /*
2265                         * redrive pageout of next page at head of write queue:
2266                         * redrive next page without any bound tblk
2267                         * (i.e., page w/o any COMMIT records), or
2268                         * first page of new group commit which has been
2269                         * queued after current page (subsequent pageout
2270                         * is performed synchronously, except page without
2271                         * any COMMITs) by lmGroupCommit() as indicated
2272                         * by lbmWRITE flag;
2273                         */
2274                        if (nextbp->l_flag & lbmWRITE) {
2275                                /*
2276                                 * We can't do the I/O at interrupt time.
2277                                 * The jfsIO thread can do it
2278                                 */
2279                                lbmRedrive(nextbp);
2280                        }
2281                }
2282        }
2283
2284        /*
2285         *      synchronous pageout:
2286         *
2287         * buffer has not necessarily been removed from write queue
2288         * (e.g., synchronous write of partial-page with COMMIT):
2289         * leave buffer for i/o initiator to dispose
2290         */
2291        if (bp->l_flag & lbmSYNC) {
2292                LCACHE_UNLOCK(flags);   /* unlock+enable */
2293
2294                /* wakeup I/O initiator */
2295                LCACHE_WAKEUP(&bp->l_ioevent);
2296        }
2297
2298        /*
2299         *      Group Commit pageout:
2300         */
2301        else if (bp->l_flag & lbmGC) {
2302                LCACHE_UNLOCK(flags);
2303                lmPostGC(bp);
2304        }
2305
2306        /*
2307         *      asynchronous pageout:
2308         *
2309         * buffer must have been removed from write queue:
2310         * insert buffer at head of freelist where it can be recycled
2311         */
2312        else {
2313                assert(bp->l_flag & lbmRELEASE);
2314                assert(bp->l_flag & lbmFREE);
2315                lbmfree(bp);
2316
2317                LCACHE_UNLOCK(flags);   /* unlock+enable */
2318        }
2319}
2320
2321int jfsIOWait(void *arg)
2322{
2323        struct lbuf *bp;
2324
2325        do {
2326                spin_lock_irq(&log_redrive_lock);
2327                while ((bp = log_redrive_list)) {
2328                        log_redrive_list = bp->l_redrive_next;
2329                        bp->l_redrive_next = NULL;
2330                        spin_unlock_irq(&log_redrive_lock);
2331                        lbmStartIO(bp);
2332                        spin_lock_irq(&log_redrive_lock);
2333                }
2334
2335                if (freezing(current)) {
2336                        spin_unlock_irq(&log_redrive_lock);
2337                        try_to_freeze();
2338                } else {
2339                        set_current_state(TASK_INTERRUPTIBLE);
2340                        spin_unlock_irq(&log_redrive_lock);
2341                        schedule();
2342                }
2343        } while (!kthread_should_stop());
2344
2345        jfs_info("jfsIOWait being killed!");
2346        return 0;
2347}
2348
2349/*
2350 * NAME:        lmLogFormat()/jfs_logform()
2351 *
2352 * FUNCTION:    format file system log
2353 *
2354 * PARAMETERS:
2355 *      log     - volume log
2356 *      logAddress - start address of log space in FS block
2357 *      logSize - length of log space in FS block;
2358 *
2359 * RETURN:      0       - success
2360 *              -EIO    - i/o error
2361 *
2362 * XXX: We're synchronously writing one page at a time.  This needs to
2363 *      be improved by writing multiple pages at once.
2364 */
2365int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2366{
2367        int rc = -EIO;
2368        struct jfs_sb_info *sbi;
2369        struct logsuper *logsuper;
2370        struct logpage *lp;
2371        int lspn;               /* log sequence page number */
2372        struct lrd *lrd_ptr;
2373        int npages = 0;
2374        struct lbuf *bp;
2375
2376        jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2377                 (long long)logAddress, logSize);
2378
2379        sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2380
2381        /* allocate a log buffer */
2382        bp = lbmAllocate(log, 1);
2383
2384        npages = logSize >> sbi->l2nbperpage;
2385
2386        /*
2387         *      log space:
2388         *
2389         * page 0 - reserved;
2390         * page 1 - log superblock;
2391         * page 2 - log data page: A SYNC log record is written
2392         *          into this page at logform time;
2393         * pages 3-N - log data page: set to empty log data pages;
2394         */
2395        /*
2396         *      init log superblock: log page 1
2397         */
2398        logsuper = (struct logsuper *) bp->l_ldata;
2399
2400        logsuper->magic = cpu_to_le32(LOGMAGIC);
2401        logsuper->version = cpu_to_le32(LOGVERSION);
2402        logsuper->state = cpu_to_le32(LOGREDONE);
2403        logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2404        logsuper->size = cpu_to_le32(npages);
2405        logsuper->bsize = cpu_to_le32(sbi->bsize);
2406        logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2407        logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2408
2409        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2410        bp->l_blkno = logAddress + sbi->nbperpage;
2411        lbmStartIO(bp);
2412        if ((rc = lbmIOWait(bp, 0)))
2413                goto exit;
2414
2415        /*
2416         *      init pages 2 to npages-1 as log data pages:
2417         *
2418         * log page sequence number (lpsn) initialization:
2419         *
2420         * pn:   0     1     2     3                 n-1
2421         *       +-----+-----+=====+=====+===.....===+=====+
2422         * lspn:             N-1   0     1           N-2
2423         *                   <--- N page circular file ---->
2424         *
2425         * the N (= npages-2) data pages of the log is maintained as
2426         * a circular file for the log records;
2427         * lpsn grows by 1 monotonically as each log page is written
2428         * to the circular file of the log;
2429         * and setLogpage() will not reset the page number even if
2430         * the eor is equal to LOGPHDRSIZE. In order for binary search
2431         * still work in find log end process, we have to simulate the
2432         * log wrap situation at the log format time.
2433         * The 1st log page written will have the highest lpsn. Then
2434         * the succeeding log pages will have ascending order of
2435         * the lspn starting from 0, ... (N-2)
2436         */
2437        lp = (struct logpage *) bp->l_ldata;
2438        /*
2439         * initialize 1st log page to be written: lpsn = N - 1,
2440         * write a SYNCPT log record is written to this page
2441         */
2442        lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2443        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2444
2445        lrd_ptr = (struct lrd *) &lp->data;
2446        lrd_ptr->logtid = 0;
2447        lrd_ptr->backchain = 0;
2448        lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2449        lrd_ptr->length = 0;
2450        lrd_ptr->log.syncpt.sync = 0;
2451
2452        bp->l_blkno += sbi->nbperpage;
2453        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2454        lbmStartIO(bp);
2455        if ((rc = lbmIOWait(bp, 0)))
2456                goto exit;
2457
2458        /*
2459         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2460         */
2461        for (lspn = 0; lspn < npages - 3; lspn++) {
2462                lp->h.page = lp->t.page = cpu_to_le32(lspn);
2463                lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2464
2465                bp->l_blkno += sbi->nbperpage;
2466                bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2467                lbmStartIO(bp);
2468                if ((rc = lbmIOWait(bp, 0)))
2469                        goto exit;
2470        }
2471
2472        rc = 0;
2473exit:
2474        /*
2475         *      finalize log
2476         */
2477        /* release the buffer */
2478        lbmFree(bp);
2479
2480        return rc;
2481}
2482
2483#ifdef CONFIG_JFS_STATISTICS
2484int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2485{
2486        seq_printf(m,
2487                       "JFS Logmgr stats\n"
2488                       "================\n"
2489                       "commits = %d\n"
2490                       "writes submitted = %d\n"
2491                       "writes completed = %d\n"
2492                       "full pages submitted = %d\n"
2493                       "partial pages submitted = %d\n",
2494                       lmStat.commit,
2495                       lmStat.submitted,
2496                       lmStat.pagedone,
2497                       lmStat.full_page,
2498                       lmStat.partial_page);
2499        return 0;
2500}
2501#endif /* CONFIG_JFS_STATISTICS */
2502