linux/fs/jfs/jfs_logmgr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6
   7/*
   8 *      jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *      log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *      group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *      TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *      serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *      TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *      alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>          /* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *      log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)      mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)           mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)         mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *      log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *      log sync serialization (per log)
  96 */
  97#define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
  98#define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
  99/*
 100#define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
 101#define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
 102*/
 103
 104
 105/*
 106 *      log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)      \
 117do {                                            \
 118        if (cond)                               \
 119                break;                          \
 120        __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define LCACHE_WAKEUP(event)    wake_up(event)
 124
 125
 126/*
 127 *      lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define lbmREAD         0x0001
 131#define lbmWRITE        0x0002  /* enqueue at tail of write queue;
 132                                 * init pageout if at head of queue;
 133                                 */
 134#define lbmRELEASE      0x0004  /* remove from write queue
 135                                 * at completion of pageout;
 136                                 * do not free/recycle it yet:
 137                                 * caller will free it;
 138                                 */
 139#define lbmSYNC         0x0008  /* do not return to freelist
 140                                 * when removed from write queue;
 141                                 */
 142#define lbmFREE         0x0010  /* return to freelist
 143                                 * at completion of pageout;
 144                                 * the buffer may be recycled;
 145                                 */
 146#define lbmDONE         0x0020
 147#define lbmERROR        0x0040
 148#define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
 149                                 * of log page
 150                                 */
 151#define lbmDIRECT       0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164                         struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168                           int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *      statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193        uint commit;            /* # of commit */
 194        uint pagedone;          /* # of page written */
 195        uint submitted;         /* # of pages submitted */
 196        uint full_page;         /* # of full pages submitted */
 197        uint partial_page;      /* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202                                 int (*writer)(struct address_space *))
 203{
 204        struct jfs_sb_info *sbi;
 205
 206        list_for_each_entry(sbi, &log->sb_list, log_list) {
 207                writer(sbi->ipbmap->i_mapping);
 208                writer(sbi->ipimap->i_mapping);
 209                writer(sbi->direct_inode->i_mapping);
 210        }
 211}
 212
 213/*
 214 * NAME:        lmLog()
 215 *
 216 * FUNCTION:    write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:      lsn - offset to the next log record to write (end-of-log);
 221 *              -1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226          struct tlock * tlck)
 227{
 228        int lsn;
 229        int diffp, difft;
 230        struct metapage *mp = NULL;
 231        unsigned long flags;
 232
 233        jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234                 log, tblk, lrd, tlck);
 235
 236        LOG_LOCK(log);
 237
 238        /* log by (out-of-transaction) JFS ? */
 239        if (tblk == NULL)
 240                goto writeRecord;
 241
 242        /* log from page ? */
 243        if (tlck == NULL ||
 244            tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245                goto writeRecord;
 246
 247        /*
 248         *      initialize/update page/transaction recovery lsn
 249         */
 250        lsn = log->lsn;
 251
 252        LOGSYNC_LOCK(log, flags);
 253
 254        /*
 255         * initialize page lsn if first log write of the page
 256         */
 257        if (mp->lsn == 0) {
 258                mp->log = log;
 259                mp->lsn = lsn;
 260                log->count++;
 261
 262                /* insert page at tail of logsynclist */
 263                list_add_tail(&mp->synclist, &log->synclist);
 264        }
 265
 266        /*
 267         *      initialize/update lsn of tblock of the page
 268         *
 269         * transaction inherits oldest lsn of pages associated
 270         * with allocation/deallocation of resources (their
 271         * log records are used to reconstruct allocation map
 272         * at recovery time: inode for inode allocation map,
 273         * B+-tree index of extent descriptors for block
 274         * allocation map);
 275         * allocation map pages inherit transaction lsn at
 276         * commit time to allow forwarding log syncpt past log
 277         * records associated with allocation/deallocation of
 278         * resources only after persistent map of these map pages
 279         * have been updated and propagated to home.
 280         */
 281        /*
 282         * initialize transaction lsn:
 283         */
 284        if (tblk->lsn == 0) {
 285                /* inherit lsn of its first page logged */
 286                tblk->lsn = mp->lsn;
 287                log->count++;
 288
 289                /* insert tblock after the page on logsynclist */
 290                list_add(&tblk->synclist, &mp->synclist);
 291        }
 292        /*
 293         * update transaction lsn:
 294         */
 295        else {
 296                /* inherit oldest/smallest lsn of page */
 297                logdiff(diffp, mp->lsn, log);
 298                logdiff(difft, tblk->lsn, log);
 299                if (diffp < difft) {
 300                        /* update tblock lsn with page lsn */
 301                        tblk->lsn = mp->lsn;
 302
 303                        /* move tblock after page on logsynclist */
 304                        list_move(&tblk->synclist, &mp->synclist);
 305                }
 306        }
 307
 308        LOGSYNC_UNLOCK(log, flags);
 309
 310        /*
 311         *      write the log record
 312         */
 313      writeRecord:
 314        lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316        /*
 317         * forward log syncpt if log reached next syncpt trigger
 318         */
 319        logdiff(diffp, lsn, log);
 320        if (diffp >= log->nextsync)
 321                lsn = lmLogSync(log, 0);
 322
 323        /* update end-of-log lsn */
 324        log->lsn = lsn;
 325
 326        LOG_UNLOCK(log);
 327
 328        /* return end-of-log address */
 329        return lsn;
 330}
 331
 332/*
 333 * NAME:        lmWriteRecord()
 334 *
 335 * FUNCTION:    move the log record to current log page
 336 *
 337 * PARAMETER:   cd      - commit descriptor
 338 *
 339 * RETURN:      end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345              struct tlock * tlck)
 346{
 347        int lsn = 0;            /* end-of-log address */
 348        struct lbuf *bp;        /* dst log page buffer */
 349        struct logpage *lp;     /* dst log page */
 350        caddr_t dst;            /* destination address in log page */
 351        int dstoffset;          /* end-of-log offset in log page */
 352        int freespace;          /* free space in log page */
 353        caddr_t p;              /* src meta-data page */
 354        caddr_t src;
 355        int srclen;
 356        int nbytes;             /* number of bytes to move */
 357        int i;
 358        int len;
 359        struct linelock *linelock;
 360        struct lv *lv;
 361        struct lvd *lvd;
 362        int l2linesize;
 363
 364        len = 0;
 365
 366        /* retrieve destination log page to write */
 367        bp = (struct lbuf *) log->bp;
 368        lp = (struct logpage *) bp->l_ldata;
 369        dstoffset = log->eor;
 370
 371        /* any log data to write ? */
 372        if (tlck == NULL)
 373                goto moveLrd;
 374
 375        /*
 376         *      move log record data
 377         */
 378        /* retrieve source meta-data page to log */
 379        if (tlck->flag & tlckPAGELOCK) {
 380                p = (caddr_t) (tlck->mp->data);
 381                linelock = (struct linelock *) & tlck->lock;
 382        }
 383        /* retrieve source in-memory inode to log */
 384        else if (tlck->flag & tlckINODELOCK) {
 385                if (tlck->type & tlckDTREE)
 386                        p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387                else
 388                        p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389                linelock = (struct linelock *) & tlck->lock;
 390        }
 391#ifdef  _JFS_WIP
 392        else if (tlck->flag & tlckINLINELOCK) {
 393
 394                inlinelock = (struct inlinelock *) & tlck;
 395                p = (caddr_t) & inlinelock->pxd;
 396                linelock = (struct linelock *) & tlck;
 397        }
 398#endif                          /* _JFS_WIP */
 399        else {
 400                jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 401                return 0;       /* Probably should trap */
 402        }
 403        l2linesize = linelock->l2linesize;
 404
 405      moveData:
 406        ASSERT(linelock->index <= linelock->maxcnt);
 407
 408        lv = linelock->lv;
 409        for (i = 0; i < linelock->index; i++, lv++) {
 410                if (lv->length == 0)
 411                        continue;
 412
 413                /* is page full ? */
 414                if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 415                        /* page become full: move on to next page */
 416                        lmNextPage(log);
 417
 418                        bp = log->bp;
 419                        lp = (struct logpage *) bp->l_ldata;
 420                        dstoffset = LOGPHDRSIZE;
 421                }
 422
 423                /*
 424                 * move log vector data
 425                 */
 426                src = (u8 *) p + (lv->offset << l2linesize);
 427                srclen = lv->length << l2linesize;
 428                len += srclen;
 429                while (srclen > 0) {
 430                        freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 431                        nbytes = min(freespace, srclen);
 432                        dst = (caddr_t) lp + dstoffset;
 433                        memcpy(dst, src, nbytes);
 434                        dstoffset += nbytes;
 435
 436                        /* is page not full ? */
 437                        if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 438                                break;
 439
 440                        /* page become full: move on to next page */
 441                        lmNextPage(log);
 442
 443                        bp = (struct lbuf *) log->bp;
 444                        lp = (struct logpage *) bp->l_ldata;
 445                        dstoffset = LOGPHDRSIZE;
 446
 447                        srclen -= nbytes;
 448                        src += nbytes;
 449                }
 450
 451                /*
 452                 * move log vector descriptor
 453                 */
 454                len += 4;
 455                lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 456                lvd->offset = cpu_to_le16(lv->offset);
 457                lvd->length = cpu_to_le16(lv->length);
 458                dstoffset += 4;
 459                jfs_info("lmWriteRecord: lv offset:%d length:%d",
 460                         lv->offset, lv->length);
 461        }
 462
 463        if ((i = linelock->next)) {
 464                linelock = (struct linelock *) lid_to_tlock(i);
 465                goto moveData;
 466        }
 467
 468        /*
 469         *      move log record descriptor
 470         */
 471      moveLrd:
 472        lrd->length = cpu_to_le16(len);
 473
 474        src = (caddr_t) lrd;
 475        srclen = LOGRDSIZE;
 476
 477        while (srclen > 0) {
 478                freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 479                nbytes = min(freespace, srclen);
 480                dst = (caddr_t) lp + dstoffset;
 481                memcpy(dst, src, nbytes);
 482
 483                dstoffset += nbytes;
 484                srclen -= nbytes;
 485
 486                /* are there more to move than freespace of page ? */
 487                if (srclen)
 488                        goto pageFull;
 489
 490                /*
 491                 * end of log record descriptor
 492                 */
 493
 494                /* update last log record eor */
 495                log->eor = dstoffset;
 496                bp->l_eor = dstoffset;
 497                lsn = (log->page << L2LOGPSIZE) + dstoffset;
 498
 499                if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 500                        tblk->clsn = lsn;
 501                        jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 502                                 bp->l_eor);
 503
 504                        INCREMENT(lmStat.commit);       /* # of commit */
 505
 506                        /*
 507                         * enqueue tblock for group commit:
 508                         *
 509                         * enqueue tblock of non-trivial/synchronous COMMIT
 510                         * at tail of group commit queue
 511                         * (trivial/asynchronous COMMITs are ignored by
 512                         * group commit.)
 513                         */
 514                        LOGGC_LOCK(log);
 515
 516                        /* init tblock gc state */
 517                        tblk->flag = tblkGC_QUEUE;
 518                        tblk->bp = log->bp;
 519                        tblk->pn = log->page;
 520                        tblk->eor = log->eor;
 521
 522                        /* enqueue transaction to commit queue */
 523                        list_add_tail(&tblk->cqueue, &log->cqueue);
 524
 525                        LOGGC_UNLOCK(log);
 526                }
 527
 528                jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 529                        le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 530
 531                /* page not full ? */
 532                if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 533                        return lsn;
 534
 535              pageFull:
 536                /* page become full: move on to next page */
 537                lmNextPage(log);
 538
 539                bp = (struct lbuf *) log->bp;
 540                lp = (struct logpage *) bp->l_ldata;
 541                dstoffset = LOGPHDRSIZE;
 542                src += nbytes;
 543        }
 544
 545        return lsn;
 546}
 547
 548
 549/*
 550 * NAME:        lmNextPage()
 551 *
 552 * FUNCTION:    write current page and allocate next page.
 553 *
 554 * PARAMETER:   log
 555 *
 556 * RETURN:      0
 557 *
 558 * serialization: LOG_LOCK() held on entry/exit
 559 */
 560static int lmNextPage(struct jfs_log * log)
 561{
 562        struct logpage *lp;
 563        int lspn;               /* log sequence page number */
 564        int pn;                 /* current page number */
 565        struct lbuf *bp;
 566        struct lbuf *nextbp;
 567        struct tblock *tblk;
 568
 569        /* get current log page number and log sequence page number */
 570        pn = log->page;
 571        bp = log->bp;
 572        lp = (struct logpage *) bp->l_ldata;
 573        lspn = le32_to_cpu(lp->h.page);
 574
 575        LOGGC_LOCK(log);
 576
 577        /*
 578         *      write or queue the full page at the tail of write queue
 579         */
 580        /* get the tail tblk on commit queue */
 581        if (list_empty(&log->cqueue))
 582                tblk = NULL;
 583        else
 584                tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 585
 586        /* every tblk who has COMMIT record on the current page,
 587         * and has not been committed, must be on commit queue
 588         * since tblk is queued at commit queueu at the time
 589         * of writing its COMMIT record on the page before
 590         * page becomes full (even though the tblk thread
 591         * who wrote COMMIT record may have been suspended
 592         * currently);
 593         */
 594
 595        /* is page bound with outstanding tail tblk ? */
 596        if (tblk && tblk->pn == pn) {
 597                /* mark tblk for end-of-page */
 598                tblk->flag |= tblkGC_EOP;
 599
 600                if (log->cflag & logGC_PAGEOUT) {
 601                        /* if page is not already on write queue,
 602                         * just enqueue (no lbmWRITE to prevent redrive)
 603                         * buffer to wqueue to ensure correct serial order
 604                         * of the pages since log pages will be added
 605                         * continuously
 606                         */
 607                        if (bp->l_wqnext == NULL)
 608                                lbmWrite(log, bp, 0, 0);
 609                } else {
 610                        /*
 611                         * No current GC leader, initiate group commit
 612                         */
 613                        log->cflag |= logGC_PAGEOUT;
 614                        lmGCwrite(log, 0);
 615                }
 616        }
 617        /* page is not bound with outstanding tblk:
 618         * init write or mark it to be redriven (lbmWRITE)
 619         */
 620        else {
 621                /* finalize the page */
 622                bp->l_ceor = bp->l_eor;
 623                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 624                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 625        }
 626        LOGGC_UNLOCK(log);
 627
 628        /*
 629         *      allocate/initialize next page
 630         */
 631        /* if log wraps, the first data page of log is 2
 632         * (0 never used, 1 is superblock).
 633         */
 634        log->page = (pn == log->size - 1) ? 2 : pn + 1;
 635        log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
 636
 637        /* allocate/initialize next log page buffer */
 638        nextbp = lbmAllocate(log, log->page);
 639        nextbp->l_eor = log->eor;
 640        log->bp = nextbp;
 641
 642        /* initialize next log page */
 643        lp = (struct logpage *) nextbp->l_ldata;
 644        lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 645        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 646
 647        return 0;
 648}
 649
 650
 651/*
 652 * NAME:        lmGroupCommit()
 653 *
 654 * FUNCTION:    group commit
 655 *      initiate pageout of the pages with COMMIT in the order of
 656 *      page number - redrive pageout of the page at the head of
 657 *      pageout queue until full page has been written.
 658 *
 659 * RETURN:
 660 *
 661 * NOTE:
 662 *      LOGGC_LOCK serializes log group commit queue, and
 663 *      transaction blocks on the commit queue.
 664 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 665 */
 666int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 667{
 668        int rc = 0;
 669
 670        LOGGC_LOCK(log);
 671
 672        /* group committed already ? */
 673        if (tblk->flag & tblkGC_COMMITTED) {
 674                if (tblk->flag & tblkGC_ERROR)
 675                        rc = -EIO;
 676
 677                LOGGC_UNLOCK(log);
 678                return rc;
 679        }
 680        jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 681
 682        if (tblk->xflag & COMMIT_LAZY)
 683                tblk->flag |= tblkGC_LAZY;
 684
 685        if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 686            (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 687             || jfs_tlocks_low)) {
 688                /*
 689                 * No pageout in progress
 690                 *
 691                 * start group commit as its group leader.
 692                 */
 693                log->cflag |= logGC_PAGEOUT;
 694
 695                lmGCwrite(log, 0);
 696        }
 697
 698        if (tblk->xflag & COMMIT_LAZY) {
 699                /*
 700                 * Lazy transactions can leave now
 701                 */
 702                LOGGC_UNLOCK(log);
 703                return 0;
 704        }
 705
 706        /* lmGCwrite gives up LOGGC_LOCK, check again */
 707
 708        if (tblk->flag & tblkGC_COMMITTED) {
 709                if (tblk->flag & tblkGC_ERROR)
 710                        rc = -EIO;
 711
 712                LOGGC_UNLOCK(log);
 713                return rc;
 714        }
 715
 716        /* upcount transaction waiting for completion
 717         */
 718        log->gcrtc++;
 719        tblk->flag |= tblkGC_READY;
 720
 721        __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 722                     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 723
 724        /* removed from commit queue */
 725        if (tblk->flag & tblkGC_ERROR)
 726                rc = -EIO;
 727
 728        LOGGC_UNLOCK(log);
 729        return rc;
 730}
 731
 732/*
 733 * NAME:        lmGCwrite()
 734 *
 735 * FUNCTION:    group commit write
 736 *      initiate write of log page, building a group of all transactions
 737 *      with commit records on that page.
 738 *
 739 * RETURN:      None
 740 *
 741 * NOTE:
 742 *      LOGGC_LOCK must be held by caller.
 743 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 744 */
 745static void lmGCwrite(struct jfs_log * log, int cant_write)
 746{
 747        struct lbuf *bp;
 748        struct logpage *lp;
 749        int gcpn;               /* group commit page number */
 750        struct tblock *tblk;
 751        struct tblock *xtblk = NULL;
 752
 753        /*
 754         * build the commit group of a log page
 755         *
 756         * scan commit queue and make a commit group of all
 757         * transactions with COMMIT records on the same log page.
 758         */
 759        /* get the head tblk on the commit queue */
 760        gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 761
 762        list_for_each_entry(tblk, &log->cqueue, cqueue) {
 763                if (tblk->pn != gcpn)
 764                        break;
 765
 766                xtblk = tblk;
 767
 768                /* state transition: (QUEUE, READY) -> COMMIT */
 769                tblk->flag |= tblkGC_COMMIT;
 770        }
 771        tblk = xtblk;           /* last tblk of the page */
 772
 773        /*
 774         * pageout to commit transactions on the log page.
 775         */
 776        bp = (struct lbuf *) tblk->bp;
 777        lp = (struct logpage *) bp->l_ldata;
 778        /* is page already full ? */
 779        if (tblk->flag & tblkGC_EOP) {
 780                /* mark page to free at end of group commit of the page */
 781                tblk->flag &= ~tblkGC_EOP;
 782                tblk->flag |= tblkGC_FREE;
 783                bp->l_ceor = bp->l_eor;
 784                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 786                         cant_write);
 787                INCREMENT(lmStat.full_page);
 788        }
 789        /* page is not yet full */
 790        else {
 791                bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
 792                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 793                lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 794                INCREMENT(lmStat.partial_page);
 795        }
 796}
 797
 798/*
 799 * NAME:        lmPostGC()
 800 *
 801 * FUNCTION:    group commit post-processing
 802 *      Processes transactions after their commit records have been written
 803 *      to disk, redriving log I/O if necessary.
 804 *
 805 * RETURN:      None
 806 *
 807 * NOTE:
 808 *      This routine is called a interrupt time by lbmIODone
 809 */
 810static void lmPostGC(struct lbuf * bp)
 811{
 812        unsigned long flags;
 813        struct jfs_log *log = bp->l_log;
 814        struct logpage *lp;
 815        struct tblock *tblk, *temp;
 816
 817        //LOGGC_LOCK(log);
 818        spin_lock_irqsave(&log->gclock, flags);
 819        /*
 820         * current pageout of group commit completed.
 821         *
 822         * remove/wakeup transactions from commit queue who were
 823         * group committed with the current log page
 824         */
 825        list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 826                if (!(tblk->flag & tblkGC_COMMIT))
 827                        break;
 828                /* if transaction was marked GC_COMMIT then
 829                 * it has been shipped in the current pageout
 830                 * and made it to disk - it is committed.
 831                 */
 832
 833                if (bp->l_flag & lbmERROR)
 834                        tblk->flag |= tblkGC_ERROR;
 835
 836                /* remove it from the commit queue */
 837                list_del(&tblk->cqueue);
 838                tblk->flag &= ~tblkGC_QUEUE;
 839
 840                if (tblk == log->flush_tblk) {
 841                        /* we can stop flushing the log now */
 842                        clear_bit(log_FLUSH, &log->flag);
 843                        log->flush_tblk = NULL;
 844                }
 845
 846                jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 847                         tblk->flag);
 848
 849                if (!(tblk->xflag & COMMIT_FORCE))
 850                        /*
 851                         * Hand tblk over to lazy commit thread
 852                         */
 853                        txLazyUnlock(tblk);
 854                else {
 855                        /* state transition: COMMIT -> COMMITTED */
 856                        tblk->flag |= tblkGC_COMMITTED;
 857
 858                        if (tblk->flag & tblkGC_READY)
 859                                log->gcrtc--;
 860
 861                        LOGGC_WAKEUP(tblk);
 862                }
 863
 864                /* was page full before pageout ?
 865                 * (and this is the last tblk bound with the page)
 866                 */
 867                if (tblk->flag & tblkGC_FREE)
 868                        lbmFree(bp);
 869                /* did page become full after pageout ?
 870                 * (and this is the last tblk bound with the page)
 871                 */
 872                else if (tblk->flag & tblkGC_EOP) {
 873                        /* finalize the page */
 874                        lp = (struct logpage *) bp->l_ldata;
 875                        bp->l_ceor = bp->l_eor;
 876                        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 877                        jfs_info("lmPostGC: calling lbmWrite");
 878                        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 879                                 1);
 880                }
 881
 882        }
 883
 884        /* are there any transactions who have entered lnGroupCommit()
 885         * (whose COMMITs are after that of the last log page written.
 886         * They are waiting for new group commit (above at (SLEEP 1))
 887         * or lazy transactions are on a full (queued) log page,
 888         * select the latest ready transaction as new group leader and
 889         * wake her up to lead her group.
 890         */
 891        if ((!list_empty(&log->cqueue)) &&
 892            ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 893             test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 894                /*
 895                 * Call lmGCwrite with new group leader
 896                 */
 897                lmGCwrite(log, 1);
 898
 899        /* no transaction are ready yet (transactions are only just
 900         * queued (GC_QUEUE) and not entered for group commit yet).
 901         * the first transaction entering group commit
 902         * will elect herself as new group leader.
 903         */
 904        else
 905                log->cflag &= ~logGC_PAGEOUT;
 906
 907        //LOGGC_UNLOCK(log);
 908        spin_unlock_irqrestore(&log->gclock, flags);
 909        return;
 910}
 911
 912/*
 913 * NAME:        lmLogSync()
 914 *
 915 * FUNCTION:    write log SYNCPT record for specified log
 916 *      if new sync address is available
 917 *      (normally the case if sync() is executed by back-ground
 918 *      process).
 919 *      calculate new value of i_nextsync which determines when
 920 *      this code is called again.
 921 *
 922 * PARAMETERS:  log     - log structure
 923 *              hard_sync - 1 to force all metadata to be written
 924 *
 925 * RETURN:      0
 926 *
 927 * serialization: LOG_LOCK() held on entry/exit
 928 */
 929static int lmLogSync(struct jfs_log * log, int hard_sync)
 930{
 931        int logsize;
 932        int written;            /* written since last syncpt */
 933        int free;               /* free space left available */
 934        int delta;              /* additional delta to write normally */
 935        int more;               /* additional write granted */
 936        struct lrd lrd;
 937        int lsn;
 938        struct logsyncblk *lp;
 939        unsigned long flags;
 940
 941        /* push dirty metapages out to disk */
 942        if (hard_sync)
 943                write_special_inodes(log, filemap_fdatawrite);
 944        else
 945                write_special_inodes(log, filemap_flush);
 946
 947        /*
 948         *      forward syncpt
 949         */
 950        /* if last sync is same as last syncpt,
 951         * invoke sync point forward processing to update sync.
 952         */
 953
 954        if (log->sync == log->syncpt) {
 955                LOGSYNC_LOCK(log, flags);
 956                if (list_empty(&log->synclist))
 957                        log->sync = log->lsn;
 958                else {
 959                        lp = list_entry(log->synclist.next,
 960                                        struct logsyncblk, synclist);
 961                        log->sync = lp->lsn;
 962                }
 963                LOGSYNC_UNLOCK(log, flags);
 964
 965        }
 966
 967        /* if sync is different from last syncpt,
 968         * write a SYNCPT record with syncpt = sync.
 969         * reset syncpt = sync
 970         */
 971        if (log->sync != log->syncpt) {
 972                lrd.logtid = 0;
 973                lrd.backchain = 0;
 974                lrd.type = cpu_to_le16(LOG_SYNCPT);
 975                lrd.length = 0;
 976                lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 977                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 978
 979                log->syncpt = log->sync;
 980        } else
 981                lsn = log->lsn;
 982
 983        /*
 984         *      setup next syncpt trigger (SWAG)
 985         */
 986        logsize = log->logsize;
 987
 988        logdiff(written, lsn, log);
 989        free = logsize - written;
 990        delta = LOGSYNC_DELTA(logsize);
 991        more = min(free / 2, delta);
 992        if (more < 2 * LOGPSIZE) {
 993                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 994                /*
 995                 *      log wrapping
 996                 *
 997                 * option 1 - panic ? No.!
 998                 * option 2 - shutdown file systems
 999                 *            associated with log ?
1000                 * option 3 - extend log ?
1001                 * option 4 - second chance
1002                 *
1003                 * mark log wrapped, and continue.
1004                 * when all active transactions are completed,
1005                 * mark log valid for recovery.
1006                 * if crashed during invalid state, log state
1007                 * implies invalid log, forcing fsck().
1008                 */
1009                /* mark log state log wrap in log superblock */
1010                /* log->state = LOGWRAP; */
1011
1012                /* reset sync point computation */
1013                log->syncpt = log->sync = lsn;
1014                log->nextsync = delta;
1015        } else
1016                /* next syncpt trigger = written + more */
1017                log->nextsync = written + more;
1018
1019        /* if number of bytes written from last sync point is more
1020         * than 1/4 of the log size, stop new transactions from
1021         * starting until all current transactions are completed
1022         * by setting syncbarrier flag.
1023         */
1024        if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025            (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026                set_bit(log_SYNCBARRIER, &log->flag);
1027                jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028                         log->syncpt);
1029                /*
1030                 * We may have to initiate group commit
1031                 */
1032                jfs_flush_journal(log, 0);
1033        }
1034
1035        return lsn;
1036}
1037
1038/*
1039 * NAME:        jfs_syncpt
1040 *
1041 * FUNCTION:    write log SYNCPT record for specified log
1042 *
1043 * PARAMETERS:  log       - log structure
1044 *              hard_sync - set to 1 to force metadata to be written
1045 */
1046void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047{       LOG_LOCK(log);
1048        if (!test_bit(log_QUIESCE, &log->flag))
1049                lmLogSync(log, hard_sync);
1050        LOG_UNLOCK(log);
1051}
1052
1053/*
1054 * NAME:        lmLogOpen()
1055 *
1056 * FUNCTION:    open the log on first open;
1057 *      insert filesystem in the active list of the log.
1058 *
1059 * PARAMETER:   ipmnt   - file system mount inode
1060 *              iplog   - log inode (out)
1061 *
1062 * RETURN:
1063 *
1064 * serialization:
1065 */
1066int lmLogOpen(struct super_block *sb)
1067{
1068        int rc;
1069        struct block_device *bdev;
1070        struct jfs_log *log;
1071        struct jfs_sb_info *sbi = JFS_SBI(sb);
1072
1073        if (sbi->flag & JFS_NOINTEGRITY)
1074                return open_dummy_log(sb);
1075
1076        if (sbi->mntflag & JFS_INLINELOG)
1077                return open_inline_log(sb);
1078
1079        mutex_lock(&jfs_log_mutex);
1080        list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081                if (log->bdev->bd_dev == sbi->logdev) {
1082                        if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083                                jfs_warn("wrong uuid on JFS journal");
1084                                mutex_unlock(&jfs_log_mutex);
1085                                return -EINVAL;
1086                        }
1087                        /*
1088                         * add file system to log active file system list
1089                         */
1090                        if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091                                mutex_unlock(&jfs_log_mutex);
1092                                return rc;
1093                        }
1094                        goto journal_found;
1095                }
1096        }
1097
1098        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099                mutex_unlock(&jfs_log_mutex);
1100                return -ENOMEM;
1101        }
1102        INIT_LIST_HEAD(&log->sb_list);
1103        init_waitqueue_head(&log->syncwait);
1104
1105        /*
1106         *      external log as separate logical volume
1107         *
1108         * file systems to log may have n-to-1 relationship;
1109         */
1110
1111        bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112                                 log);
1113        if (IS_ERR(bdev)) {
1114                rc = PTR_ERR(bdev);
1115                goto free;
1116        }
1117
1118        log->bdev = bdev;
1119        uuid_copy(&log->uuid, &sbi->loguuid);
1120
1121        /*
1122         * initialize log:
1123         */
1124        if ((rc = lmLogInit(log)))
1125                goto close;
1126
1127        list_add(&log->journal_list, &jfs_external_logs);
1128
1129        /*
1130         * add file system to log active file system list
1131         */
1132        if ((rc = lmLogFileSystem(log, sbi, 1)))
1133                goto shutdown;
1134
1135journal_found:
1136        LOG_LOCK(log);
1137        list_add(&sbi->log_list, &log->sb_list);
1138        sbi->log = log;
1139        LOG_UNLOCK(log);
1140
1141        mutex_unlock(&jfs_log_mutex);
1142        return 0;
1143
1144        /*
1145         *      unwind on error
1146         */
1147      shutdown:         /* unwind lbmLogInit() */
1148        list_del(&log->journal_list);
1149        lbmLogShutdown(log);
1150
1151      close:            /* close external log device */
1152        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153
1154      free:             /* free log descriptor */
1155        mutex_unlock(&jfs_log_mutex);
1156        kfree(log);
1157
1158        jfs_warn("lmLogOpen: exit(%d)", rc);
1159        return rc;
1160}
1161
1162static int open_inline_log(struct super_block *sb)
1163{
1164        struct jfs_log *log;
1165        int rc;
1166
1167        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168                return -ENOMEM;
1169        INIT_LIST_HEAD(&log->sb_list);
1170        init_waitqueue_head(&log->syncwait);
1171
1172        set_bit(log_INLINELOG, &log->flag);
1173        log->bdev = sb->s_bdev;
1174        log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175        log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176            (L2LOGPSIZE - sb->s_blocksize_bits);
1177        log->l2bsize = sb->s_blocksize_bits;
1178        ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179
1180        /*
1181         * initialize log.
1182         */
1183        if ((rc = lmLogInit(log))) {
1184                kfree(log);
1185                jfs_warn("lmLogOpen: exit(%d)", rc);
1186                return rc;
1187        }
1188
1189        list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190        JFS_SBI(sb)->log = log;
1191
1192        return rc;
1193}
1194
1195static int open_dummy_log(struct super_block *sb)
1196{
1197        int rc;
1198
1199        mutex_lock(&jfs_log_mutex);
1200        if (!dummy_log) {
1201                dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202                if (!dummy_log) {
1203                        mutex_unlock(&jfs_log_mutex);
1204                        return -ENOMEM;
1205                }
1206                INIT_LIST_HEAD(&dummy_log->sb_list);
1207                init_waitqueue_head(&dummy_log->syncwait);
1208                dummy_log->no_integrity = 1;
1209                /* Make up some stuff */
1210                dummy_log->base = 0;
1211                dummy_log->size = 1024;
1212                rc = lmLogInit(dummy_log);
1213                if (rc) {
1214                        kfree(dummy_log);
1215                        dummy_log = NULL;
1216                        mutex_unlock(&jfs_log_mutex);
1217                        return rc;
1218                }
1219        }
1220
1221        LOG_LOCK(dummy_log);
1222        list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223        JFS_SBI(sb)->log = dummy_log;
1224        LOG_UNLOCK(dummy_log);
1225        mutex_unlock(&jfs_log_mutex);
1226
1227        return 0;
1228}
1229
1230/*
1231 * NAME:        lmLogInit()
1232 *
1233 * FUNCTION:    log initialization at first log open.
1234 *
1235 *      logredo() (or logformat()) should have been run previously.
1236 *      initialize the log from log superblock.
1237 *      set the log state in the superblock to LOGMOUNT and
1238 *      write SYNCPT log record.
1239 *
1240 * PARAMETER:   log     - log structure
1241 *
1242 * RETURN:      0       - if ok
1243 *              -EINVAL - bad log magic number or superblock dirty
1244 *              error returned from logwait()
1245 *
1246 * serialization: single first open thread
1247 */
1248int lmLogInit(struct jfs_log * log)
1249{
1250        int rc = 0;
1251        struct lrd lrd;
1252        struct logsuper *logsuper;
1253        struct lbuf *bpsuper;
1254        struct lbuf *bp;
1255        struct logpage *lp;
1256        int lsn = 0;
1257
1258        jfs_info("lmLogInit: log:0x%p", log);
1259
1260        /* initialize the group commit serialization lock */
1261        LOGGC_LOCK_INIT(log);
1262
1263        /* allocate/initialize the log write serialization lock */
1264        LOG_LOCK_INIT(log);
1265
1266        LOGSYNC_LOCK_INIT(log);
1267
1268        INIT_LIST_HEAD(&log->synclist);
1269
1270        INIT_LIST_HEAD(&log->cqueue);
1271        log->flush_tblk = NULL;
1272
1273        log->count = 0;
1274
1275        /*
1276         * initialize log i/o
1277         */
1278        if ((rc = lbmLogInit(log)))
1279                return rc;
1280
1281        if (!test_bit(log_INLINELOG, &log->flag))
1282                log->l2bsize = L2LOGPSIZE;
1283
1284        /* check for disabled journaling to disk */
1285        if (log->no_integrity) {
1286                /*
1287                 * Journal pages will still be filled.  When the time comes
1288                 * to actually do the I/O, the write is not done, and the
1289                 * endio routine is called directly.
1290                 */
1291                bp = lbmAllocate(log , 0);
1292                log->bp = bp;
1293                bp->l_pn = bp->l_eor = 0;
1294        } else {
1295                /*
1296                 * validate log superblock
1297                 */
1298                if ((rc = lbmRead(log, 1, &bpsuper)))
1299                        goto errout10;
1300
1301                logsuper = (struct logsuper *) bpsuper->l_ldata;
1302
1303                if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304                        jfs_warn("*** Log Format Error ! ***");
1305                        rc = -EINVAL;
1306                        goto errout20;
1307                }
1308
1309                /* logredo() should have been run successfully. */
1310                if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311                        jfs_warn("*** Log Is Dirty ! ***");
1312                        rc = -EINVAL;
1313                        goto errout20;
1314                }
1315
1316                /* initialize log from log superblock */
1317                if (test_bit(log_INLINELOG,&log->flag)) {
1318                        if (log->size != le32_to_cpu(logsuper->size)) {
1319                                rc = -EINVAL;
1320                                goto errout20;
1321                        }
1322                        jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323                                 log, (unsigned long long)log->base, log->size);
1324                } else {
1325                        if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326                                jfs_warn("wrong uuid on JFS log device");
1327                                goto errout20;
1328                        }
1329                        log->size = le32_to_cpu(logsuper->size);
1330                        log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1331                        jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1332                                 log, (unsigned long long)log->base, log->size);
1333                }
1334
1335                log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1336                log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1337
1338                /*
1339                 * initialize for log append write mode
1340                 */
1341                /* establish current/end-of-log page/buffer */
1342                if ((rc = lbmRead(log, log->page, &bp)))
1343                        goto errout20;
1344
1345                lp = (struct logpage *) bp->l_ldata;
1346
1347                jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1348                         le32_to_cpu(logsuper->end), log->page, log->eor,
1349                         le16_to_cpu(lp->h.eor));
1350
1351                log->bp = bp;
1352                bp->l_pn = log->page;
1353                bp->l_eor = log->eor;
1354
1355                /* if current page is full, move on to next page */
1356                if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1357                        lmNextPage(log);
1358
1359                /*
1360                 * initialize log syncpoint
1361                 */
1362                /*
1363                 * write the first SYNCPT record with syncpoint = 0
1364                 * (i.e., log redo up to HERE !);
1365                 * remove current page from lbm write queue at end of pageout
1366                 * (to write log superblock update), but do not release to
1367                 * freelist;
1368                 */
1369                lrd.logtid = 0;
1370                lrd.backchain = 0;
1371                lrd.type = cpu_to_le16(LOG_SYNCPT);
1372                lrd.length = 0;
1373                lrd.log.syncpt.sync = 0;
1374                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1375                bp = log->bp;
1376                bp->l_ceor = bp->l_eor;
1377                lp = (struct logpage *) bp->l_ldata;
1378                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1379                lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1380                if ((rc = lbmIOWait(bp, 0)))
1381                        goto errout30;
1382
1383                /*
1384                 * update/write superblock
1385                 */
1386                logsuper->state = cpu_to_le32(LOGMOUNT);
1387                log->serial = le32_to_cpu(logsuper->serial) + 1;
1388                logsuper->serial = cpu_to_le32(log->serial);
1389                lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1390                if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1391                        goto errout30;
1392        }
1393
1394        /* initialize logsync parameters */
1395        log->logsize = (log->size - 2) << L2LOGPSIZE;
1396        log->lsn = lsn;
1397        log->syncpt = lsn;
1398        log->sync = log->syncpt;
1399        log->nextsync = LOGSYNC_DELTA(log->logsize);
1400
1401        jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1402                 log->lsn, log->syncpt, log->sync);
1403
1404        /*
1405         * initialize for lazy/group commit
1406         */
1407        log->clsn = lsn;
1408
1409        return 0;
1410
1411        /*
1412         *      unwind on error
1413         */
1414      errout30:         /* release log page */
1415        log->wqueue = NULL;
1416        bp->l_wqnext = NULL;
1417        lbmFree(bp);
1418
1419      errout20:         /* release log superblock */
1420        lbmFree(bpsuper);
1421
1422      errout10:         /* unwind lbmLogInit() */
1423        lbmLogShutdown(log);
1424
1425        jfs_warn("lmLogInit: exit(%d)", rc);
1426        return rc;
1427}
1428
1429
1430/*
1431 * NAME:        lmLogClose()
1432 *
1433 * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1434 *              and close it on last close.
1435 *
1436 * PARAMETER:   sb      - superblock
1437 *
1438 * RETURN:      errors from subroutines
1439 *
1440 * serialization:
1441 */
1442int lmLogClose(struct super_block *sb)
1443{
1444        struct jfs_sb_info *sbi = JFS_SBI(sb);
1445        struct jfs_log *log = sbi->log;
1446        struct block_device *bdev;
1447        int rc = 0;
1448
1449        jfs_info("lmLogClose: log:0x%p", log);
1450
1451        mutex_lock(&jfs_log_mutex);
1452        LOG_LOCK(log);
1453        list_del(&sbi->log_list);
1454        LOG_UNLOCK(log);
1455        sbi->log = NULL;
1456
1457        /*
1458         * We need to make sure all of the "written" metapages
1459         * actually make it to disk
1460         */
1461        sync_blockdev(sb->s_bdev);
1462
1463        if (test_bit(log_INLINELOG, &log->flag)) {
1464                /*
1465                 *      in-line log in host file system
1466                 */
1467                rc = lmLogShutdown(log);
1468                kfree(log);
1469                goto out;
1470        }
1471
1472        if (!log->no_integrity)
1473                lmLogFileSystem(log, sbi, 0);
1474
1475        if (!list_empty(&log->sb_list))
1476                goto out;
1477
1478        /*
1479         * TODO: ensure that the dummy_log is in a state to allow
1480         * lbmLogShutdown to deallocate all the buffers and call
1481         * kfree against dummy_log.  For now, leave dummy_log & its
1482         * buffers in memory, and resuse if another no-integrity mount
1483         * is requested.
1484         */
1485        if (log->no_integrity)
1486                goto out;
1487
1488        /*
1489         *      external log as separate logical volume
1490         */
1491        list_del(&log->journal_list);
1492        bdev = log->bdev;
1493        rc = lmLogShutdown(log);
1494
1495        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1496
1497        kfree(log);
1498
1499      out:
1500        mutex_unlock(&jfs_log_mutex);
1501        jfs_info("lmLogClose: exit(%d)", rc);
1502        return rc;
1503}
1504
1505
1506/*
1507 * NAME:        jfs_flush_journal()
1508 *
1509 * FUNCTION:    initiate write of any outstanding transactions to the journal
1510 *              and optionally wait until they are all written to disk
1511 *
1512 *              wait == 0  flush until latest txn is committed, don't wait
1513 *              wait == 1  flush until latest txn is committed, wait
1514 *              wait > 1   flush until all txn's are complete, wait
1515 */
1516void jfs_flush_journal(struct jfs_log *log, int wait)
1517{
1518        int i;
1519        struct tblock *target = NULL;
1520
1521        /* jfs_write_inode may call us during read-only mount */
1522        if (!log)
1523                return;
1524
1525        jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1526
1527        LOGGC_LOCK(log);
1528
1529        if (!list_empty(&log->cqueue)) {
1530                /*
1531                 * This ensures that we will keep writing to the journal as long
1532                 * as there are unwritten commit records
1533                 */
1534                target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1535
1536                if (test_bit(log_FLUSH, &log->flag)) {
1537                        /*
1538                         * We're already flushing.
1539                         * if flush_tblk is NULL, we are flushing everything,
1540                         * so leave it that way.  Otherwise, update it to the
1541                         * latest transaction
1542                         */
1543                        if (log->flush_tblk)
1544                                log->flush_tblk = target;
1545                } else {
1546                        /* Only flush until latest transaction is committed */
1547                        log->flush_tblk = target;
1548                        set_bit(log_FLUSH, &log->flag);
1549
1550                        /*
1551                         * Initiate I/O on outstanding transactions
1552                         */
1553                        if (!(log->cflag & logGC_PAGEOUT)) {
1554                                log->cflag |= logGC_PAGEOUT;
1555                                lmGCwrite(log, 0);
1556                        }
1557                }
1558        }
1559        if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1560                /* Flush until all activity complete */
1561                set_bit(log_FLUSH, &log->flag);
1562                log->flush_tblk = NULL;
1563        }
1564
1565        if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1566                DECLARE_WAITQUEUE(__wait, current);
1567
1568                add_wait_queue(&target->gcwait, &__wait);
1569                set_current_state(TASK_UNINTERRUPTIBLE);
1570                LOGGC_UNLOCK(log);
1571                schedule();
1572                LOGGC_LOCK(log);
1573                remove_wait_queue(&target->gcwait, &__wait);
1574        }
1575        LOGGC_UNLOCK(log);
1576
1577        if (wait < 2)
1578                return;
1579
1580        write_special_inodes(log, filemap_fdatawrite);
1581
1582        /*
1583         * If there was recent activity, we may need to wait
1584         * for the lazycommit thread to catch up
1585         */
1586        if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1587                for (i = 0; i < 200; i++) {     /* Too much? */
1588                        msleep(250);
1589                        write_special_inodes(log, filemap_fdatawrite);
1590                        if (list_empty(&log->cqueue) &&
1591                            list_empty(&log->synclist))
1592                                break;
1593                }
1594        }
1595        assert(list_empty(&log->cqueue));
1596
1597#ifdef CONFIG_JFS_DEBUG
1598        if (!list_empty(&log->synclist)) {
1599                struct logsyncblk *lp;
1600
1601                printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1602                list_for_each_entry(lp, &log->synclist, synclist) {
1603                        if (lp->xflag & COMMIT_PAGE) {
1604                                struct metapage *mp = (struct metapage *)lp;
1605                                print_hex_dump(KERN_ERR, "metapage: ",
1606                                               DUMP_PREFIX_ADDRESS, 16, 4,
1607                                               mp, sizeof(struct metapage), 0);
1608                                print_hex_dump(KERN_ERR, "page: ",
1609                                               DUMP_PREFIX_ADDRESS, 16,
1610                                               sizeof(long), mp->page,
1611                                               sizeof(struct page), 0);
1612                        } else
1613                                print_hex_dump(KERN_ERR, "tblock:",
1614                                               DUMP_PREFIX_ADDRESS, 16, 4,
1615                                               lp, sizeof(struct tblock), 0);
1616                }
1617        }
1618#else
1619        WARN_ON(!list_empty(&log->synclist));
1620#endif
1621        clear_bit(log_FLUSH, &log->flag);
1622}
1623
1624/*
1625 * NAME:        lmLogShutdown()
1626 *
1627 * FUNCTION:    log shutdown at last LogClose().
1628 *
1629 *              write log syncpt record.
1630 *              update super block to set redone flag to 0.
1631 *
1632 * PARAMETER:   log     - log inode
1633 *
1634 * RETURN:      0       - success
1635 *
1636 * serialization: single last close thread
1637 */
1638int lmLogShutdown(struct jfs_log * log)
1639{
1640        int rc;
1641        struct lrd lrd;
1642        int lsn;
1643        struct logsuper *logsuper;
1644        struct lbuf *bpsuper;
1645        struct lbuf *bp;
1646        struct logpage *lp;
1647
1648        jfs_info("lmLogShutdown: log:0x%p", log);
1649
1650        jfs_flush_journal(log, 2);
1651
1652        /*
1653         * write the last SYNCPT record with syncpoint = 0
1654         * (i.e., log redo up to HERE !)
1655         */
1656        lrd.logtid = 0;
1657        lrd.backchain = 0;
1658        lrd.type = cpu_to_le16(LOG_SYNCPT);
1659        lrd.length = 0;
1660        lrd.log.syncpt.sync = 0;
1661
1662        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1663        bp = log->bp;
1664        lp = (struct logpage *) bp->l_ldata;
1665        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1666        lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1667        lbmIOWait(log->bp, lbmFREE);
1668        log->bp = NULL;
1669
1670        /*
1671         * synchronous update log superblock
1672         * mark log state as shutdown cleanly
1673         * (i.e., Log does not need to be replayed).
1674         */
1675        if ((rc = lbmRead(log, 1, &bpsuper)))
1676                goto out;
1677
1678        logsuper = (struct logsuper *) bpsuper->l_ldata;
1679        logsuper->state = cpu_to_le32(LOGREDONE);
1680        logsuper->end = cpu_to_le32(lsn);
1681        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1682        rc = lbmIOWait(bpsuper, lbmFREE);
1683
1684        jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1685                 lsn, log->page, log->eor);
1686
1687      out:
1688        /*
1689         * shutdown per log i/o
1690         */
1691        lbmLogShutdown(log);
1692
1693        if (rc) {
1694                jfs_warn("lmLogShutdown: exit(%d)", rc);
1695        }
1696        return rc;
1697}
1698
1699
1700/*
1701 * NAME:        lmLogFileSystem()
1702 *
1703 * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1704 *      file system into/from log active file system list.
1705 *
1706 * PARAMETE:    log     - pointer to logs inode.
1707 *              fsdev   - kdev_t of filesystem.
1708 *              serial  - pointer to returned log serial number
1709 *              activate - insert/remove device from active list.
1710 *
1711 * RETURN:      0       - success
1712 *              errors returned by vms_iowait().
1713 */
1714static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1715                           int activate)
1716{
1717        int rc = 0;
1718        int i;
1719        struct logsuper *logsuper;
1720        struct lbuf *bpsuper;
1721        uuid_t *uuid = &sbi->uuid;
1722
1723        /*
1724         * insert/remove file system device to log active file system list.
1725         */
1726        if ((rc = lbmRead(log, 1, &bpsuper)))
1727                return rc;
1728
1729        logsuper = (struct logsuper *) bpsuper->l_ldata;
1730        if (activate) {
1731                for (i = 0; i < MAX_ACTIVE; i++)
1732                        if (uuid_is_null(&logsuper->active[i].uuid)) {
1733                                uuid_copy(&logsuper->active[i].uuid, uuid);
1734                                sbi->aggregate = i;
1735                                break;
1736                        }
1737                if (i == MAX_ACTIVE) {
1738                        jfs_warn("Too many file systems sharing journal!");
1739                        lbmFree(bpsuper);
1740                        return -EMFILE; /* Is there a better rc? */
1741                }
1742        } else {
1743                for (i = 0; i < MAX_ACTIVE; i++)
1744                        if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1745                                uuid_copy(&logsuper->active[i].uuid,
1746                                          &uuid_null);
1747                                break;
1748                        }
1749                if (i == MAX_ACTIVE) {
1750                        jfs_warn("Somebody stomped on the journal!");
1751                        lbmFree(bpsuper);
1752                        return -EIO;
1753                }
1754
1755        }
1756
1757        /*
1758         * synchronous write log superblock:
1759         *
1760         * write sidestream bypassing write queue:
1761         * at file system mount, log super block is updated for
1762         * activation of the file system before any log record
1763         * (MOUNT record) of the file system, and at file system
1764         * unmount, all meta data for the file system has been
1765         * flushed before log super block is updated for deactivation
1766         * of the file system.
1767         */
1768        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1769        rc = lbmIOWait(bpsuper, lbmFREE);
1770
1771        return rc;
1772}
1773
1774/*
1775 *              log buffer manager (lbm)
1776 *              ------------------------
1777 *
1778 * special purpose buffer manager supporting log i/o requirements.
1779 *
1780 * per log write queue:
1781 * log pageout occurs in serial order by fifo write queue and
1782 * restricting to a single i/o in pregress at any one time.
1783 * a circular singly-linked list
1784 * (log->wrqueue points to the tail, and buffers are linked via
1785 * bp->wrqueue field), and
1786 * maintains log page in pageout ot waiting for pageout in serial pageout.
1787 */
1788
1789/*
1790 *      lbmLogInit()
1791 *
1792 * initialize per log I/O setup at lmLogInit()
1793 */
1794static int lbmLogInit(struct jfs_log * log)
1795{                               /* log inode */
1796        int i;
1797        struct lbuf *lbuf;
1798
1799        jfs_info("lbmLogInit: log:0x%p", log);
1800
1801        /* initialize current buffer cursor */
1802        log->bp = NULL;
1803
1804        /* initialize log device write queue */
1805        log->wqueue = NULL;
1806
1807        /*
1808         * Each log has its own buffer pages allocated to it.  These are
1809         * not managed by the page cache.  This ensures that a transaction
1810         * writing to the log does not block trying to allocate a page from
1811         * the page cache (for the log).  This would be bad, since page
1812         * allocation waits on the kswapd thread that may be committing inodes
1813         * which would cause log activity.  Was that clear?  I'm trying to
1814         * avoid deadlock here.
1815         */
1816        init_waitqueue_head(&log->free_wait);
1817
1818        log->lbuf_free = NULL;
1819
1820        for (i = 0; i < LOGPAGES;) {
1821                char *buffer;
1822                uint offset;
1823                struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1824
1825                if (!page)
1826                        goto error;
1827                buffer = page_address(page);
1828                for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1829                        lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1830                        if (lbuf == NULL) {
1831                                if (offset == 0)
1832                                        __free_page(page);
1833                                goto error;
1834                        }
1835                        if (offset) /* we already have one reference */
1836                                get_page(page);
1837                        lbuf->l_offset = offset;
1838                        lbuf->l_ldata = buffer + offset;
1839                        lbuf->l_page = page;
1840                        lbuf->l_log = log;
1841                        init_waitqueue_head(&lbuf->l_ioevent);
1842
1843                        lbuf->l_freelist = log->lbuf_free;
1844                        log->lbuf_free = lbuf;
1845                        i++;
1846                }
1847        }
1848
1849        return (0);
1850
1851      error:
1852        lbmLogShutdown(log);
1853        return -ENOMEM;
1854}
1855
1856
1857/*
1858 *      lbmLogShutdown()
1859 *
1860 * finalize per log I/O setup at lmLogShutdown()
1861 */
1862static void lbmLogShutdown(struct jfs_log * log)
1863{
1864        struct lbuf *lbuf;
1865
1866        jfs_info("lbmLogShutdown: log:0x%p", log);
1867
1868        lbuf = log->lbuf_free;
1869        while (lbuf) {
1870                struct lbuf *next = lbuf->l_freelist;
1871                __free_page(lbuf->l_page);
1872                kfree(lbuf);
1873                lbuf = next;
1874        }
1875}
1876
1877
1878/*
1879 *      lbmAllocate()
1880 *
1881 * allocate an empty log buffer
1882 */
1883static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1884{
1885        struct lbuf *bp;
1886        unsigned long flags;
1887
1888        /*
1889         * recycle from log buffer freelist if any
1890         */
1891        LCACHE_LOCK(flags);
1892        LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1893        log->lbuf_free = bp->l_freelist;
1894        LCACHE_UNLOCK(flags);
1895
1896        bp->l_flag = 0;
1897
1898        bp->l_wqnext = NULL;
1899        bp->l_freelist = NULL;
1900
1901        bp->l_pn = pn;
1902        bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1903        bp->l_ceor = 0;
1904
1905        return bp;
1906}
1907
1908
1909/*
1910 *      lbmFree()
1911 *
1912 * release a log buffer to freelist
1913 */
1914static void lbmFree(struct lbuf * bp)
1915{
1916        unsigned long flags;
1917
1918        LCACHE_LOCK(flags);
1919
1920        lbmfree(bp);
1921
1922        LCACHE_UNLOCK(flags);
1923}
1924
1925static void lbmfree(struct lbuf * bp)
1926{
1927        struct jfs_log *log = bp->l_log;
1928
1929        assert(bp->l_wqnext == NULL);
1930
1931        /*
1932         * return the buffer to head of freelist
1933         */
1934        bp->l_freelist = log->lbuf_free;
1935        log->lbuf_free = bp;
1936
1937        wake_up(&log->free_wait);
1938        return;
1939}
1940
1941
1942/*
1943 * NAME:        lbmRedrive
1944 *
1945 * FUNCTION:    add a log buffer to the log redrive list
1946 *
1947 * PARAMETER:
1948 *      bp      - log buffer
1949 *
1950 * NOTES:
1951 *      Takes log_redrive_lock.
1952 */
1953static inline void lbmRedrive(struct lbuf *bp)
1954{
1955        unsigned long flags;
1956
1957        spin_lock_irqsave(&log_redrive_lock, flags);
1958        bp->l_redrive_next = log_redrive_list;
1959        log_redrive_list = bp;
1960        spin_unlock_irqrestore(&log_redrive_lock, flags);
1961
1962        wake_up_process(jfsIOthread);
1963}
1964
1965
1966/*
1967 *      lbmRead()
1968 */
1969static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1970{
1971        struct bio *bio;
1972        struct lbuf *bp;
1973
1974        /*
1975         * allocate a log buffer
1976         */
1977        *bpp = bp = lbmAllocate(log, pn);
1978        jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1979
1980        bp->l_flag |= lbmREAD;
1981
1982        bio = bio_alloc(GFP_NOFS, 1);
1983
1984        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1985        bio_set_dev(bio, log->bdev);
1986
1987        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1988        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1989
1990        bio->bi_end_io = lbmIODone;
1991        bio->bi_private = bp;
1992        bio->bi_opf = REQ_OP_READ;
1993        /*check if journaling to disk has been disabled*/
1994        if (log->no_integrity) {
1995                bio->bi_iter.bi_size = 0;
1996                lbmIODone(bio);
1997        } else {
1998                submit_bio(bio);
1999        }
2000
2001        wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2002
2003        return 0;
2004}
2005
2006
2007/*
2008 *      lbmWrite()
2009 *
2010 * buffer at head of pageout queue stays after completion of
2011 * partial-page pageout and redriven by explicit initiation of
2012 * pageout by caller until full-page pageout is completed and
2013 * released.
2014 *
2015 * device driver i/o done redrives pageout of new buffer at
2016 * head of pageout queue when current buffer at head of pageout
2017 * queue is released at the completion of its full-page pageout.
2018 *
2019 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2020 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2021 */
2022static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2023                     int cant_block)
2024{
2025        struct lbuf *tail;
2026        unsigned long flags;
2027
2028        jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2029
2030        /* map the logical block address to physical block address */
2031        bp->l_blkno =
2032            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2033
2034        LCACHE_LOCK(flags);             /* disable+lock */
2035
2036        /*
2037         * initialize buffer for device driver
2038         */
2039        bp->l_flag = flag;
2040
2041        /*
2042         *      insert bp at tail of write queue associated with log
2043         *
2044         * (request is either for bp already/currently at head of queue
2045         * or new bp to be inserted at tail)
2046         */
2047        tail = log->wqueue;
2048
2049        /* is buffer not already on write queue ? */
2050        if (bp->l_wqnext == NULL) {
2051                /* insert at tail of wqueue */
2052                if (tail == NULL) {
2053                        log->wqueue = bp;
2054                        bp->l_wqnext = bp;
2055                } else {
2056                        log->wqueue = bp;
2057                        bp->l_wqnext = tail->l_wqnext;
2058                        tail->l_wqnext = bp;
2059                }
2060
2061                tail = bp;
2062        }
2063
2064        /* is buffer at head of wqueue and for write ? */
2065        if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2066                LCACHE_UNLOCK(flags);   /* unlock+enable */
2067                return;
2068        }
2069
2070        LCACHE_UNLOCK(flags);   /* unlock+enable */
2071
2072        if (cant_block)
2073                lbmRedrive(bp);
2074        else if (flag & lbmSYNC)
2075                lbmStartIO(bp);
2076        else {
2077                LOGGC_UNLOCK(log);
2078                lbmStartIO(bp);
2079                LOGGC_LOCK(log);
2080        }
2081}
2082
2083
2084/*
2085 *      lbmDirectWrite()
2086 *
2087 * initiate pageout bypassing write queue for sidestream
2088 * (e.g., log superblock) write;
2089 */
2090static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2091{
2092        jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2093                 bp, flag, bp->l_pn);
2094
2095        /*
2096         * initialize buffer for device driver
2097         */
2098        bp->l_flag = flag | lbmDIRECT;
2099
2100        /* map the logical block address to physical block address */
2101        bp->l_blkno =
2102            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2103
2104        /*
2105         *      initiate pageout of the page
2106         */
2107        lbmStartIO(bp);
2108}
2109
2110
2111/*
2112 * NAME:        lbmStartIO()
2113 *
2114 * FUNCTION:    Interface to DD strategy routine
2115 *
2116 * RETURN:      none
2117 *
2118 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2119 */
2120static void lbmStartIO(struct lbuf * bp)
2121{
2122        struct bio *bio;
2123        struct jfs_log *log = bp->l_log;
2124
2125        jfs_info("lbmStartIO");
2126
2127        bio = bio_alloc(GFP_NOFS, 1);
2128        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2129        bio_set_dev(bio, log->bdev);
2130
2131        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2132        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2133
2134        bio->bi_end_io = lbmIODone;
2135        bio->bi_private = bp;
2136        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2137
2138        /* check if journaling to disk has been disabled */
2139        if (log->no_integrity) {
2140                bio->bi_iter.bi_size = 0;
2141                lbmIODone(bio);
2142        } else {
2143                submit_bio(bio);
2144                INCREMENT(lmStat.submitted);
2145        }
2146}
2147
2148
2149/*
2150 *      lbmIOWait()
2151 */
2152static int lbmIOWait(struct lbuf * bp, int flag)
2153{
2154        unsigned long flags;
2155        int rc = 0;
2156
2157        jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2158
2159        LCACHE_LOCK(flags);             /* disable+lock */
2160
2161        LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2162
2163        rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2164
2165        if (flag & lbmFREE)
2166                lbmfree(bp);
2167
2168        LCACHE_UNLOCK(flags);   /* unlock+enable */
2169
2170        jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2171        return rc;
2172}
2173
2174/*
2175 *      lbmIODone()
2176 *
2177 * executed at INTIODONE level
2178 */
2179static void lbmIODone(struct bio *bio)
2180{
2181        struct lbuf *bp = bio->bi_private;
2182        struct lbuf *nextbp, *tail;
2183        struct jfs_log *log;
2184        unsigned long flags;
2185
2186        /*
2187         * get back jfs buffer bound to the i/o buffer
2188         */
2189        jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2190
2191        LCACHE_LOCK(flags);             /* disable+lock */
2192
2193        bp->l_flag |= lbmDONE;
2194
2195        if (bio->bi_status) {
2196                bp->l_flag |= lbmERROR;
2197
2198                jfs_err("lbmIODone: I/O error in JFS log");
2199        }
2200
2201        bio_put(bio);
2202
2203        /*
2204         *      pagein completion
2205         */
2206        if (bp->l_flag & lbmREAD) {
2207                bp->l_flag &= ~lbmREAD;
2208
2209                LCACHE_UNLOCK(flags);   /* unlock+enable */
2210
2211                /* wakeup I/O initiator */
2212                LCACHE_WAKEUP(&bp->l_ioevent);
2213
2214                return;
2215        }
2216
2217        /*
2218         *      pageout completion
2219         *
2220         * the bp at the head of write queue has completed pageout.
2221         *
2222         * if single-commit/full-page pageout, remove the current buffer
2223         * from head of pageout queue, and redrive pageout with
2224         * the new buffer at head of pageout queue;
2225         * otherwise, the partial-page pageout buffer stays at
2226         * the head of pageout queue to be redriven for pageout
2227         * by lmGroupCommit() until full-page pageout is completed.
2228         */
2229        bp->l_flag &= ~lbmWRITE;
2230        INCREMENT(lmStat.pagedone);
2231
2232        /* update committed lsn */
2233        log = bp->l_log;
2234        log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2235
2236        if (bp->l_flag & lbmDIRECT) {
2237                LCACHE_WAKEUP(&bp->l_ioevent);
2238                LCACHE_UNLOCK(flags);
2239                return;
2240        }
2241
2242        tail = log->wqueue;
2243
2244        /* single element queue */
2245        if (bp == tail) {
2246                /* remove head buffer of full-page pageout
2247                 * from log device write queue
2248                 */
2249                if (bp->l_flag & lbmRELEASE) {
2250                        log->wqueue = NULL;
2251                        bp->l_wqnext = NULL;
2252                }
2253        }
2254        /* multi element queue */
2255        else {
2256                /* remove head buffer of full-page pageout
2257                 * from log device write queue
2258                 */
2259                if (bp->l_flag & lbmRELEASE) {
2260                        nextbp = tail->l_wqnext = bp->l_wqnext;
2261                        bp->l_wqnext = NULL;
2262
2263                        /*
2264                         * redrive pageout of next page at head of write queue:
2265                         * redrive next page without any bound tblk
2266                         * (i.e., page w/o any COMMIT records), or
2267                         * first page of new group commit which has been
2268                         * queued after current page (subsequent pageout
2269                         * is performed synchronously, except page without
2270                         * any COMMITs) by lmGroupCommit() as indicated
2271                         * by lbmWRITE flag;
2272                         */
2273                        if (nextbp->l_flag & lbmWRITE) {
2274                                /*
2275                                 * We can't do the I/O at interrupt time.
2276                                 * The jfsIO thread can do it
2277                                 */
2278                                lbmRedrive(nextbp);
2279                        }
2280                }
2281        }
2282
2283        /*
2284         *      synchronous pageout:
2285         *
2286         * buffer has not necessarily been removed from write queue
2287         * (e.g., synchronous write of partial-page with COMMIT):
2288         * leave buffer for i/o initiator to dispose
2289         */
2290        if (bp->l_flag & lbmSYNC) {
2291                LCACHE_UNLOCK(flags);   /* unlock+enable */
2292
2293                /* wakeup I/O initiator */
2294                LCACHE_WAKEUP(&bp->l_ioevent);
2295        }
2296
2297        /*
2298         *      Group Commit pageout:
2299         */
2300        else if (bp->l_flag & lbmGC) {
2301                LCACHE_UNLOCK(flags);
2302                lmPostGC(bp);
2303        }
2304
2305        /*
2306         *      asynchronous pageout:
2307         *
2308         * buffer must have been removed from write queue:
2309         * insert buffer at head of freelist where it can be recycled
2310         */
2311        else {
2312                assert(bp->l_flag & lbmRELEASE);
2313                assert(bp->l_flag & lbmFREE);
2314                lbmfree(bp);
2315
2316                LCACHE_UNLOCK(flags);   /* unlock+enable */
2317        }
2318}
2319
2320int jfsIOWait(void *arg)
2321{
2322        struct lbuf *bp;
2323
2324        do {
2325                spin_lock_irq(&log_redrive_lock);
2326                while ((bp = log_redrive_list)) {
2327                        log_redrive_list = bp->l_redrive_next;
2328                        bp->l_redrive_next = NULL;
2329                        spin_unlock_irq(&log_redrive_lock);
2330                        lbmStartIO(bp);
2331                        spin_lock_irq(&log_redrive_lock);
2332                }
2333
2334                if (freezing(current)) {
2335                        spin_unlock_irq(&log_redrive_lock);
2336                        try_to_freeze();
2337                } else {
2338                        set_current_state(TASK_INTERRUPTIBLE);
2339                        spin_unlock_irq(&log_redrive_lock);
2340                        schedule();
2341                }
2342        } while (!kthread_should_stop());
2343
2344        jfs_info("jfsIOWait being killed!");
2345        return 0;
2346}
2347
2348/*
2349 * NAME:        lmLogFormat()/jfs_logform()
2350 *
2351 * FUNCTION:    format file system log
2352 *
2353 * PARAMETERS:
2354 *      log     - volume log
2355 *      logAddress - start address of log space in FS block
2356 *      logSize - length of log space in FS block;
2357 *
2358 * RETURN:      0       - success
2359 *              -EIO    - i/o error
2360 *
2361 * XXX: We're synchronously writing one page at a time.  This needs to
2362 *      be improved by writing multiple pages at once.
2363 */
2364int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2365{
2366        int rc = -EIO;
2367        struct jfs_sb_info *sbi;
2368        struct logsuper *logsuper;
2369        struct logpage *lp;
2370        int lspn;               /* log sequence page number */
2371        struct lrd *lrd_ptr;
2372        int npages = 0;
2373        struct lbuf *bp;
2374
2375        jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2376                 (long long)logAddress, logSize);
2377
2378        sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2379
2380        /* allocate a log buffer */
2381        bp = lbmAllocate(log, 1);
2382
2383        npages = logSize >> sbi->l2nbperpage;
2384
2385        /*
2386         *      log space:
2387         *
2388         * page 0 - reserved;
2389         * page 1 - log superblock;
2390         * page 2 - log data page: A SYNC log record is written
2391         *          into this page at logform time;
2392         * pages 3-N - log data page: set to empty log data pages;
2393         */
2394        /*
2395         *      init log superblock: log page 1
2396         */
2397        logsuper = (struct logsuper *) bp->l_ldata;
2398
2399        logsuper->magic = cpu_to_le32(LOGMAGIC);
2400        logsuper->version = cpu_to_le32(LOGVERSION);
2401        logsuper->state = cpu_to_le32(LOGREDONE);
2402        logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2403        logsuper->size = cpu_to_le32(npages);
2404        logsuper->bsize = cpu_to_le32(sbi->bsize);
2405        logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2406        logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2407
2408        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2409        bp->l_blkno = logAddress + sbi->nbperpage;
2410        lbmStartIO(bp);
2411        if ((rc = lbmIOWait(bp, 0)))
2412                goto exit;
2413
2414        /*
2415         *      init pages 2 to npages-1 as log data pages:
2416         *
2417         * log page sequence number (lpsn) initialization:
2418         *
2419         * pn:   0     1     2     3                 n-1
2420         *       +-----+-----+=====+=====+===.....===+=====+
2421         * lspn:             N-1   0     1           N-2
2422         *                   <--- N page circular file ---->
2423         *
2424         * the N (= npages-2) data pages of the log is maintained as
2425         * a circular file for the log records;
2426         * lpsn grows by 1 monotonically as each log page is written
2427         * to the circular file of the log;
2428         * and setLogpage() will not reset the page number even if
2429         * the eor is equal to LOGPHDRSIZE. In order for binary search
2430         * still work in find log end process, we have to simulate the
2431         * log wrap situation at the log format time.
2432         * The 1st log page written will have the highest lpsn. Then
2433         * the succeeding log pages will have ascending order of
2434         * the lspn starting from 0, ... (N-2)
2435         */
2436        lp = (struct logpage *) bp->l_ldata;
2437        /*
2438         * initialize 1st log page to be written: lpsn = N - 1,
2439         * write a SYNCPT log record is written to this page
2440         */
2441        lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2442        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2443
2444        lrd_ptr = (struct lrd *) &lp->data;
2445        lrd_ptr->logtid = 0;
2446        lrd_ptr->backchain = 0;
2447        lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2448        lrd_ptr->length = 0;
2449        lrd_ptr->log.syncpt.sync = 0;
2450
2451        bp->l_blkno += sbi->nbperpage;
2452        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2453        lbmStartIO(bp);
2454        if ((rc = lbmIOWait(bp, 0)))
2455                goto exit;
2456
2457        /*
2458         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2459         */
2460        for (lspn = 0; lspn < npages - 3; lspn++) {
2461                lp->h.page = lp->t.page = cpu_to_le32(lspn);
2462                lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2463
2464                bp->l_blkno += sbi->nbperpage;
2465                bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2466                lbmStartIO(bp);
2467                if ((rc = lbmIOWait(bp, 0)))
2468                        goto exit;
2469        }
2470
2471        rc = 0;
2472exit:
2473        /*
2474         *      finalize log
2475         */
2476        /* release the buffer */
2477        lbmFree(bp);
2478
2479        return rc;
2480}
2481
2482#ifdef CONFIG_JFS_STATISTICS
2483int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2484{
2485        seq_printf(m,
2486                       "JFS Logmgr stats\n"
2487                       "================\n"
2488                       "commits = %d\n"
2489                       "writes submitted = %d\n"
2490                       "writes completed = %d\n"
2491                       "full pages submitted = %d\n"
2492                       "partial pages submitted = %d\n",
2493                       lmStat.commit,
2494                       lmStat.submitted,
2495                       lmStat.pagedone,
2496                       lmStat.full_page,
2497                       lmStat.partial_page);
2498        return 0;
2499}
2500#endif /* CONFIG_JFS_STATISTICS */
2501