linux/fs/jfs/jfs_logmgr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6
   7/*
   8 *      jfs_logmgr.c: log manager
   9 *
  10 * for related information, see transaction manager (jfs_txnmgr.c), and
  11 * recovery manager (jfs_logredo.c).
  12 *
  13 * note: for detail, RTFS.
  14 *
  15 *      log buffer manager:
  16 * special purpose buffer manager supporting log i/o requirements.
  17 * per log serial pageout of logpage
  18 * queuing i/o requests and redrive i/o at iodone
  19 * maintain current logpage buffer
  20 * no caching since append only
  21 * appropriate jfs buffer cache buffers as needed
  22 *
  23 *      group commit:
  24 * transactions which wrote COMMIT records in the same in-memory
  25 * log page during the pageout of previous/current log page(s) are
  26 * committed together by the pageout of the page.
  27 *
  28 *      TBD lazy commit:
  29 * transactions are committed asynchronously when the log page
  30 * containing it COMMIT is paged out when it becomes full;
  31 *
  32 *      serialization:
  33 * . a per log lock serialize log write.
  34 * . a per log lock serialize group commit.
  35 * . a per log lock serialize log open/close;
  36 *
  37 *      TBD log integrity:
  38 * careful-write (ping-pong) of last logpage to recover from crash
  39 * in overwrite.
  40 * detection of split (out-of-order) write of physical sectors
  41 * of last logpage via timestamp at end of each sector
  42 * with its mirror data array at trailer).
  43 *
  44 *      alternatives:
  45 * lsn - 64-bit monotonically increasing integer vs
  46 * 32-bit lspn and page eor.
  47 */
  48
  49#include <linux/fs.h>
  50#include <linux/blkdev.h>
  51#include <linux/interrupt.h>
  52#include <linux/completion.h>
  53#include <linux/kthread.h>
  54#include <linux/buffer_head.h>          /* for sync_blockdev() */
  55#include <linux/bio.h>
  56#include <linux/freezer.h>
  57#include <linux/export.h>
  58#include <linux/delay.h>
  59#include <linux/mutex.h>
  60#include <linux/seq_file.h>
  61#include <linux/slab.h>
  62#include "jfs_incore.h"
  63#include "jfs_filsys.h"
  64#include "jfs_metapage.h"
  65#include "jfs_superblock.h"
  66#include "jfs_txnmgr.h"
  67#include "jfs_debug.h"
  68
  69
  70/*
  71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
  72 */
  73static struct lbuf *log_redrive_list;
  74static DEFINE_SPINLOCK(log_redrive_lock);
  75
  76
  77/*
  78 *      log read/write serialization (per log)
  79 */
  80#define LOG_LOCK_INIT(log)      mutex_init(&(log)->loglock)
  81#define LOG_LOCK(log)           mutex_lock(&((log)->loglock))
  82#define LOG_UNLOCK(log)         mutex_unlock(&((log)->loglock))
  83
  84
  85/*
  86 *      log group commit serialization (per log)
  87 */
  88
  89#define LOGGC_LOCK_INIT(log)    spin_lock_init(&(log)->gclock)
  90#define LOGGC_LOCK(log)         spin_lock_irq(&(log)->gclock)
  91#define LOGGC_UNLOCK(log)       spin_unlock_irq(&(log)->gclock)
  92#define LOGGC_WAKEUP(tblk)      wake_up_all(&(tblk)->gcwait)
  93
  94/*
  95 *      log sync serialization (per log)
  96 */
  97#define LOGSYNC_DELTA(logsize)          min((logsize)/8, 128*LOGPSIZE)
  98#define LOGSYNC_BARRIER(logsize)        ((logsize)/4)
  99/*
 100#define LOGSYNC_DELTA(logsize)          min((logsize)/4, 256*LOGPSIZE)
 101#define LOGSYNC_BARRIER(logsize)        ((logsize)/2)
 102*/
 103
 104
 105/*
 106 *      log buffer cache synchronization
 107 */
 108static DEFINE_SPINLOCK(jfsLCacheLock);
 109
 110#define LCACHE_LOCK(flags)      spin_lock_irqsave(&jfsLCacheLock, flags)
 111#define LCACHE_UNLOCK(flags)    spin_unlock_irqrestore(&jfsLCacheLock, flags)
 112
 113/*
 114 * See __SLEEP_COND in jfs_locks.h
 115 */
 116#define LCACHE_SLEEP_COND(wq, cond, flags)      \
 117do {                                            \
 118        if (cond)                               \
 119                break;                          \
 120        __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
 121} while (0)
 122
 123#define LCACHE_WAKEUP(event)    wake_up(event)
 124
 125
 126/*
 127 *      lbuf buffer cache (lCache) control
 128 */
 129/* log buffer manager pageout control (cumulative, inclusive) */
 130#define lbmREAD         0x0001
 131#define lbmWRITE        0x0002  /* enqueue at tail of write queue;
 132                                 * init pageout if at head of queue;
 133                                 */
 134#define lbmRELEASE      0x0004  /* remove from write queue
 135                                 * at completion of pageout;
 136                                 * do not free/recycle it yet:
 137                                 * caller will free it;
 138                                 */
 139#define lbmSYNC         0x0008  /* do not return to freelist
 140                                 * when removed from write queue;
 141                                 */
 142#define lbmFREE         0x0010  /* return to freelist
 143                                 * at completion of pageout;
 144                                 * the buffer may be recycled;
 145                                 */
 146#define lbmDONE         0x0020
 147#define lbmERROR        0x0040
 148#define lbmGC           0x0080  /* lbmIODone to perform post-GC processing
 149                                 * of log page
 150                                 */
 151#define lbmDIRECT       0x0100
 152
 153/*
 154 * Global list of active external journals
 155 */
 156static LIST_HEAD(jfs_external_logs);
 157static struct jfs_log *dummy_log;
 158static DEFINE_MUTEX(jfs_log_mutex);
 159
 160/*
 161 * forward references
 162 */
 163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
 164                         struct lrd * lrd, struct tlock * tlck);
 165
 166static int lmNextPage(struct jfs_log * log);
 167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
 168                           int activate);
 169
 170static int open_inline_log(struct super_block *sb);
 171static int open_dummy_log(struct super_block *sb);
 172static int lbmLogInit(struct jfs_log * log);
 173static void lbmLogShutdown(struct jfs_log * log);
 174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
 175static void lbmFree(struct lbuf * bp);
 176static void lbmfree(struct lbuf * bp);
 177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
 178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
 179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
 180static int lbmIOWait(struct lbuf * bp, int flag);
 181static bio_end_io_t lbmIODone;
 182static void lbmStartIO(struct lbuf * bp);
 183static void lmGCwrite(struct jfs_log * log, int cant_block);
 184static int lmLogSync(struct jfs_log * log, int hard_sync);
 185
 186
 187
 188/*
 189 *      statistics
 190 */
 191#ifdef CONFIG_JFS_STATISTICS
 192static struct lmStat {
 193        uint commit;            /* # of commit */
 194        uint pagedone;          /* # of page written */
 195        uint submitted;         /* # of pages submitted */
 196        uint full_page;         /* # of full pages submitted */
 197        uint partial_page;      /* # of partial pages submitted */
 198} lmStat;
 199#endif
 200
 201static void write_special_inodes(struct jfs_log *log,
 202                                 int (*writer)(struct address_space *))
 203{
 204        struct jfs_sb_info *sbi;
 205
 206        list_for_each_entry(sbi, &log->sb_list, log_list) {
 207                writer(sbi->ipbmap->i_mapping);
 208                writer(sbi->ipimap->i_mapping);
 209                writer(sbi->direct_inode->i_mapping);
 210        }
 211}
 212
 213/*
 214 * NAME:        lmLog()
 215 *
 216 * FUNCTION:    write a log record;
 217 *
 218 * PARAMETER:
 219 *
 220 * RETURN:      lsn - offset to the next log record to write (end-of-log);
 221 *              -1  - error;
 222 *
 223 * note: todo: log error handler
 224 */
 225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 226          struct tlock * tlck)
 227{
 228        int lsn;
 229        int diffp, difft;
 230        struct metapage *mp = NULL;
 231        unsigned long flags;
 232
 233        jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 234                 log, tblk, lrd, tlck);
 235
 236        LOG_LOCK(log);
 237
 238        /* log by (out-of-transaction) JFS ? */
 239        if (tblk == NULL)
 240                goto writeRecord;
 241
 242        /* log from page ? */
 243        if (tlck == NULL ||
 244            tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
 245                goto writeRecord;
 246
 247        /*
 248         *      initialize/update page/transaction recovery lsn
 249         */
 250        lsn = log->lsn;
 251
 252        LOGSYNC_LOCK(log, flags);
 253
 254        /*
 255         * initialize page lsn if first log write of the page
 256         */
 257        if (mp->lsn == 0) {
 258                mp->log = log;
 259                mp->lsn = lsn;
 260                log->count++;
 261
 262                /* insert page at tail of logsynclist */
 263                list_add_tail(&mp->synclist, &log->synclist);
 264        }
 265
 266        /*
 267         *      initialize/update lsn of tblock of the page
 268         *
 269         * transaction inherits oldest lsn of pages associated
 270         * with allocation/deallocation of resources (their
 271         * log records are used to reconstruct allocation map
 272         * at recovery time: inode for inode allocation map,
 273         * B+-tree index of extent descriptors for block
 274         * allocation map);
 275         * allocation map pages inherit transaction lsn at
 276         * commit time to allow forwarding log syncpt past log
 277         * records associated with allocation/deallocation of
 278         * resources only after persistent map of these map pages
 279         * have been updated and propagated to home.
 280         */
 281        /*
 282         * initialize transaction lsn:
 283         */
 284        if (tblk->lsn == 0) {
 285                /* inherit lsn of its first page logged */
 286                tblk->lsn = mp->lsn;
 287                log->count++;
 288
 289                /* insert tblock after the page on logsynclist */
 290                list_add(&tblk->synclist, &mp->synclist);
 291        }
 292        /*
 293         * update transaction lsn:
 294         */
 295        else {
 296                /* inherit oldest/smallest lsn of page */
 297                logdiff(diffp, mp->lsn, log);
 298                logdiff(difft, tblk->lsn, log);
 299                if (diffp < difft) {
 300                        /* update tblock lsn with page lsn */
 301                        tblk->lsn = mp->lsn;
 302
 303                        /* move tblock after page on logsynclist */
 304                        list_move(&tblk->synclist, &mp->synclist);
 305                }
 306        }
 307
 308        LOGSYNC_UNLOCK(log, flags);
 309
 310        /*
 311         *      write the log record
 312         */
 313      writeRecord:
 314        lsn = lmWriteRecord(log, tblk, lrd, tlck);
 315
 316        /*
 317         * forward log syncpt if log reached next syncpt trigger
 318         */
 319        logdiff(diffp, lsn, log);
 320        if (diffp >= log->nextsync)
 321                lsn = lmLogSync(log, 0);
 322
 323        /* update end-of-log lsn */
 324        log->lsn = lsn;
 325
 326        LOG_UNLOCK(log);
 327
 328        /* return end-of-log address */
 329        return lsn;
 330}
 331
 332/*
 333 * NAME:        lmWriteRecord()
 334 *
 335 * FUNCTION:    move the log record to current log page
 336 *
 337 * PARAMETER:   cd      - commit descriptor
 338 *
 339 * RETURN:      end-of-log address
 340 *
 341 * serialization: LOG_LOCK() held on entry/exit
 342 */
 343static int
 344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 345              struct tlock * tlck)
 346{
 347        int lsn = 0;            /* end-of-log address */
 348        struct lbuf *bp;        /* dst log page buffer */
 349        struct logpage *lp;     /* dst log page */
 350        caddr_t dst;            /* destination address in log page */
 351        int dstoffset;          /* end-of-log offset in log page */
 352        int freespace;          /* free space in log page */
 353        caddr_t p;              /* src meta-data page */
 354        caddr_t src;
 355        int srclen;
 356        int nbytes;             /* number of bytes to move */
 357        int i;
 358        int len;
 359        struct linelock *linelock;
 360        struct lv *lv;
 361        struct lvd *lvd;
 362        int l2linesize;
 363
 364        len = 0;
 365
 366        /* retrieve destination log page to write */
 367        bp = (struct lbuf *) log->bp;
 368        lp = (struct logpage *) bp->l_ldata;
 369        dstoffset = log->eor;
 370
 371        /* any log data to write ? */
 372        if (tlck == NULL)
 373                goto moveLrd;
 374
 375        /*
 376         *      move log record data
 377         */
 378        /* retrieve source meta-data page to log */
 379        if (tlck->flag & tlckPAGELOCK) {
 380                p = (caddr_t) (tlck->mp->data);
 381                linelock = (struct linelock *) & tlck->lock;
 382        }
 383        /* retrieve source in-memory inode to log */
 384        else if (tlck->flag & tlckINODELOCK) {
 385                if (tlck->type & tlckDTREE)
 386                        p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
 387                else
 388                        p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
 389                linelock = (struct linelock *) & tlck->lock;
 390        }
 391        else {
 392                jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
 393                return 0;       /* Probably should trap */
 394        }
 395        l2linesize = linelock->l2linesize;
 396
 397      moveData:
 398        ASSERT(linelock->index <= linelock->maxcnt);
 399
 400        lv = linelock->lv;
 401        for (i = 0; i < linelock->index; i++, lv++) {
 402                if (lv->length == 0)
 403                        continue;
 404
 405                /* is page full ? */
 406                if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
 407                        /* page become full: move on to next page */
 408                        lmNextPage(log);
 409
 410                        bp = log->bp;
 411                        lp = (struct logpage *) bp->l_ldata;
 412                        dstoffset = LOGPHDRSIZE;
 413                }
 414
 415                /*
 416                 * move log vector data
 417                 */
 418                src = (u8 *) p + (lv->offset << l2linesize);
 419                srclen = lv->length << l2linesize;
 420                len += srclen;
 421                while (srclen > 0) {
 422                        freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 423                        nbytes = min(freespace, srclen);
 424                        dst = (caddr_t) lp + dstoffset;
 425                        memcpy(dst, src, nbytes);
 426                        dstoffset += nbytes;
 427
 428                        /* is page not full ? */
 429                        if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 430                                break;
 431
 432                        /* page become full: move on to next page */
 433                        lmNextPage(log);
 434
 435                        bp = (struct lbuf *) log->bp;
 436                        lp = (struct logpage *) bp->l_ldata;
 437                        dstoffset = LOGPHDRSIZE;
 438
 439                        srclen -= nbytes;
 440                        src += nbytes;
 441                }
 442
 443                /*
 444                 * move log vector descriptor
 445                 */
 446                len += 4;
 447                lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
 448                lvd->offset = cpu_to_le16(lv->offset);
 449                lvd->length = cpu_to_le16(lv->length);
 450                dstoffset += 4;
 451                jfs_info("lmWriteRecord: lv offset:%d length:%d",
 452                         lv->offset, lv->length);
 453        }
 454
 455        if ((i = linelock->next)) {
 456                linelock = (struct linelock *) lid_to_tlock(i);
 457                goto moveData;
 458        }
 459
 460        /*
 461         *      move log record descriptor
 462         */
 463      moveLrd:
 464        lrd->length = cpu_to_le16(len);
 465
 466        src = (caddr_t) lrd;
 467        srclen = LOGRDSIZE;
 468
 469        while (srclen > 0) {
 470                freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
 471                nbytes = min(freespace, srclen);
 472                dst = (caddr_t) lp + dstoffset;
 473                memcpy(dst, src, nbytes);
 474
 475                dstoffset += nbytes;
 476                srclen -= nbytes;
 477
 478                /* are there more to move than freespace of page ? */
 479                if (srclen)
 480                        goto pageFull;
 481
 482                /*
 483                 * end of log record descriptor
 484                 */
 485
 486                /* update last log record eor */
 487                log->eor = dstoffset;
 488                bp->l_eor = dstoffset;
 489                lsn = (log->page << L2LOGPSIZE) + dstoffset;
 490
 491                if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
 492                        tblk->clsn = lsn;
 493                        jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
 494                                 bp->l_eor);
 495
 496                        INCREMENT(lmStat.commit);       /* # of commit */
 497
 498                        /*
 499                         * enqueue tblock for group commit:
 500                         *
 501                         * enqueue tblock of non-trivial/synchronous COMMIT
 502                         * at tail of group commit queue
 503                         * (trivial/asynchronous COMMITs are ignored by
 504                         * group commit.)
 505                         */
 506                        LOGGC_LOCK(log);
 507
 508                        /* init tblock gc state */
 509                        tblk->flag = tblkGC_QUEUE;
 510                        tblk->bp = log->bp;
 511                        tblk->pn = log->page;
 512                        tblk->eor = log->eor;
 513
 514                        /* enqueue transaction to commit queue */
 515                        list_add_tail(&tblk->cqueue, &log->cqueue);
 516
 517                        LOGGC_UNLOCK(log);
 518                }
 519
 520                jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
 521                        le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
 522
 523                /* page not full ? */
 524                if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
 525                        return lsn;
 526
 527              pageFull:
 528                /* page become full: move on to next page */
 529                lmNextPage(log);
 530
 531                bp = (struct lbuf *) log->bp;
 532                lp = (struct logpage *) bp->l_ldata;
 533                dstoffset = LOGPHDRSIZE;
 534                src += nbytes;
 535        }
 536
 537        return lsn;
 538}
 539
 540
 541/*
 542 * NAME:        lmNextPage()
 543 *
 544 * FUNCTION:    write current page and allocate next page.
 545 *
 546 * PARAMETER:   log
 547 *
 548 * RETURN:      0
 549 *
 550 * serialization: LOG_LOCK() held on entry/exit
 551 */
 552static int lmNextPage(struct jfs_log * log)
 553{
 554        struct logpage *lp;
 555        int lspn;               /* log sequence page number */
 556        int pn;                 /* current page number */
 557        struct lbuf *bp;
 558        struct lbuf *nextbp;
 559        struct tblock *tblk;
 560
 561        /* get current log page number and log sequence page number */
 562        pn = log->page;
 563        bp = log->bp;
 564        lp = (struct logpage *) bp->l_ldata;
 565        lspn = le32_to_cpu(lp->h.page);
 566
 567        LOGGC_LOCK(log);
 568
 569        /*
 570         *      write or queue the full page at the tail of write queue
 571         */
 572        /* get the tail tblk on commit queue */
 573        if (list_empty(&log->cqueue))
 574                tblk = NULL;
 575        else
 576                tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
 577
 578        /* every tblk who has COMMIT record on the current page,
 579         * and has not been committed, must be on commit queue
 580         * since tblk is queued at commit queueu at the time
 581         * of writing its COMMIT record on the page before
 582         * page becomes full (even though the tblk thread
 583         * who wrote COMMIT record may have been suspended
 584         * currently);
 585         */
 586
 587        /* is page bound with outstanding tail tblk ? */
 588        if (tblk && tblk->pn == pn) {
 589                /* mark tblk for end-of-page */
 590                tblk->flag |= tblkGC_EOP;
 591
 592                if (log->cflag & logGC_PAGEOUT) {
 593                        /* if page is not already on write queue,
 594                         * just enqueue (no lbmWRITE to prevent redrive)
 595                         * buffer to wqueue to ensure correct serial order
 596                         * of the pages since log pages will be added
 597                         * continuously
 598                         */
 599                        if (bp->l_wqnext == NULL)
 600                                lbmWrite(log, bp, 0, 0);
 601                } else {
 602                        /*
 603                         * No current GC leader, initiate group commit
 604                         */
 605                        log->cflag |= logGC_PAGEOUT;
 606                        lmGCwrite(log, 0);
 607                }
 608        }
 609        /* page is not bound with outstanding tblk:
 610         * init write or mark it to be redriven (lbmWRITE)
 611         */
 612        else {
 613                /* finalize the page */
 614                bp->l_ceor = bp->l_eor;
 615                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 616                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
 617        }
 618        LOGGC_UNLOCK(log);
 619
 620        /*
 621         *      allocate/initialize next page
 622         */
 623        /* if log wraps, the first data page of log is 2
 624         * (0 never used, 1 is superblock).
 625         */
 626        log->page = (pn == log->size - 1) ? 2 : pn + 1;
 627        log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
 628
 629        /* allocate/initialize next log page buffer */
 630        nextbp = lbmAllocate(log, log->page);
 631        nextbp->l_eor = log->eor;
 632        log->bp = nextbp;
 633
 634        /* initialize next log page */
 635        lp = (struct logpage *) nextbp->l_ldata;
 636        lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
 637        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
 638
 639        return 0;
 640}
 641
 642
 643/*
 644 * NAME:        lmGroupCommit()
 645 *
 646 * FUNCTION:    group commit
 647 *      initiate pageout of the pages with COMMIT in the order of
 648 *      page number - redrive pageout of the page at the head of
 649 *      pageout queue until full page has been written.
 650 *
 651 * RETURN:
 652 *
 653 * NOTE:
 654 *      LOGGC_LOCK serializes log group commit queue, and
 655 *      transaction blocks on the commit queue.
 656 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 657 */
 658int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 659{
 660        int rc = 0;
 661
 662        LOGGC_LOCK(log);
 663
 664        /* group committed already ? */
 665        if (tblk->flag & tblkGC_COMMITTED) {
 666                if (tblk->flag & tblkGC_ERROR)
 667                        rc = -EIO;
 668
 669                LOGGC_UNLOCK(log);
 670                return rc;
 671        }
 672        jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
 673
 674        if (tblk->xflag & COMMIT_LAZY)
 675                tblk->flag |= tblkGC_LAZY;
 676
 677        if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
 678            (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
 679             || jfs_tlocks_low)) {
 680                /*
 681                 * No pageout in progress
 682                 *
 683                 * start group commit as its group leader.
 684                 */
 685                log->cflag |= logGC_PAGEOUT;
 686
 687                lmGCwrite(log, 0);
 688        }
 689
 690        if (tblk->xflag & COMMIT_LAZY) {
 691                /*
 692                 * Lazy transactions can leave now
 693                 */
 694                LOGGC_UNLOCK(log);
 695                return 0;
 696        }
 697
 698        /* lmGCwrite gives up LOGGC_LOCK, check again */
 699
 700        if (tblk->flag & tblkGC_COMMITTED) {
 701                if (tblk->flag & tblkGC_ERROR)
 702                        rc = -EIO;
 703
 704                LOGGC_UNLOCK(log);
 705                return rc;
 706        }
 707
 708        /* upcount transaction waiting for completion
 709         */
 710        log->gcrtc++;
 711        tblk->flag |= tblkGC_READY;
 712
 713        __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
 714                     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
 715
 716        /* removed from commit queue */
 717        if (tblk->flag & tblkGC_ERROR)
 718                rc = -EIO;
 719
 720        LOGGC_UNLOCK(log);
 721        return rc;
 722}
 723
 724/*
 725 * NAME:        lmGCwrite()
 726 *
 727 * FUNCTION:    group commit write
 728 *      initiate write of log page, building a group of all transactions
 729 *      with commit records on that page.
 730 *
 731 * RETURN:      None
 732 *
 733 * NOTE:
 734 *      LOGGC_LOCK must be held by caller.
 735 *      N.B. LOG_LOCK is NOT held during lmGroupCommit().
 736 */
 737static void lmGCwrite(struct jfs_log * log, int cant_write)
 738{
 739        struct lbuf *bp;
 740        struct logpage *lp;
 741        int gcpn;               /* group commit page number */
 742        struct tblock *tblk;
 743        struct tblock *xtblk = NULL;
 744
 745        /*
 746         * build the commit group of a log page
 747         *
 748         * scan commit queue and make a commit group of all
 749         * transactions with COMMIT records on the same log page.
 750         */
 751        /* get the head tblk on the commit queue */
 752        gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
 753
 754        list_for_each_entry(tblk, &log->cqueue, cqueue) {
 755                if (tblk->pn != gcpn)
 756                        break;
 757
 758                xtblk = tblk;
 759
 760                /* state transition: (QUEUE, READY) -> COMMIT */
 761                tblk->flag |= tblkGC_COMMIT;
 762        }
 763        tblk = xtblk;           /* last tblk of the page */
 764
 765        /*
 766         * pageout to commit transactions on the log page.
 767         */
 768        bp = (struct lbuf *) tblk->bp;
 769        lp = (struct logpage *) bp->l_ldata;
 770        /* is page already full ? */
 771        if (tblk->flag & tblkGC_EOP) {
 772                /* mark page to free at end of group commit of the page */
 773                tblk->flag &= ~tblkGC_EOP;
 774                tblk->flag |= tblkGC_FREE;
 775                bp->l_ceor = bp->l_eor;
 776                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 777                lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 778                         cant_write);
 779                INCREMENT(lmStat.full_page);
 780        }
 781        /* page is not yet full */
 782        else {
 783                bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
 784                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
 785                lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
 786                INCREMENT(lmStat.partial_page);
 787        }
 788}
 789
 790/*
 791 * NAME:        lmPostGC()
 792 *
 793 * FUNCTION:    group commit post-processing
 794 *      Processes transactions after their commit records have been written
 795 *      to disk, redriving log I/O if necessary.
 796 *
 797 * RETURN:      None
 798 *
 799 * NOTE:
 800 *      This routine is called a interrupt time by lbmIODone
 801 */
 802static void lmPostGC(struct lbuf * bp)
 803{
 804        unsigned long flags;
 805        struct jfs_log *log = bp->l_log;
 806        struct logpage *lp;
 807        struct tblock *tblk, *temp;
 808
 809        //LOGGC_LOCK(log);
 810        spin_lock_irqsave(&log->gclock, flags);
 811        /*
 812         * current pageout of group commit completed.
 813         *
 814         * remove/wakeup transactions from commit queue who were
 815         * group committed with the current log page
 816         */
 817        list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
 818                if (!(tblk->flag & tblkGC_COMMIT))
 819                        break;
 820                /* if transaction was marked GC_COMMIT then
 821                 * it has been shipped in the current pageout
 822                 * and made it to disk - it is committed.
 823                 */
 824
 825                if (bp->l_flag & lbmERROR)
 826                        tblk->flag |= tblkGC_ERROR;
 827
 828                /* remove it from the commit queue */
 829                list_del(&tblk->cqueue);
 830                tblk->flag &= ~tblkGC_QUEUE;
 831
 832                if (tblk == log->flush_tblk) {
 833                        /* we can stop flushing the log now */
 834                        clear_bit(log_FLUSH, &log->flag);
 835                        log->flush_tblk = NULL;
 836                }
 837
 838                jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
 839                         tblk->flag);
 840
 841                if (!(tblk->xflag & COMMIT_FORCE))
 842                        /*
 843                         * Hand tblk over to lazy commit thread
 844                         */
 845                        txLazyUnlock(tblk);
 846                else {
 847                        /* state transition: COMMIT -> COMMITTED */
 848                        tblk->flag |= tblkGC_COMMITTED;
 849
 850                        if (tblk->flag & tblkGC_READY)
 851                                log->gcrtc--;
 852
 853                        LOGGC_WAKEUP(tblk);
 854                }
 855
 856                /* was page full before pageout ?
 857                 * (and this is the last tblk bound with the page)
 858                 */
 859                if (tblk->flag & tblkGC_FREE)
 860                        lbmFree(bp);
 861                /* did page become full after pageout ?
 862                 * (and this is the last tblk bound with the page)
 863                 */
 864                else if (tblk->flag & tblkGC_EOP) {
 865                        /* finalize the page */
 866                        lp = (struct logpage *) bp->l_ldata;
 867                        bp->l_ceor = bp->l_eor;
 868                        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 869                        jfs_info("lmPostGC: calling lbmWrite");
 870                        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
 871                                 1);
 872                }
 873
 874        }
 875
 876        /* are there any transactions who have entered lnGroupCommit()
 877         * (whose COMMITs are after that of the last log page written.
 878         * They are waiting for new group commit (above at (SLEEP 1))
 879         * or lazy transactions are on a full (queued) log page,
 880         * select the latest ready transaction as new group leader and
 881         * wake her up to lead her group.
 882         */
 883        if ((!list_empty(&log->cqueue)) &&
 884            ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
 885             test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
 886                /*
 887                 * Call lmGCwrite with new group leader
 888                 */
 889                lmGCwrite(log, 1);
 890
 891        /* no transaction are ready yet (transactions are only just
 892         * queued (GC_QUEUE) and not entered for group commit yet).
 893         * the first transaction entering group commit
 894         * will elect herself as new group leader.
 895         */
 896        else
 897                log->cflag &= ~logGC_PAGEOUT;
 898
 899        //LOGGC_UNLOCK(log);
 900        spin_unlock_irqrestore(&log->gclock, flags);
 901        return;
 902}
 903
 904/*
 905 * NAME:        lmLogSync()
 906 *
 907 * FUNCTION:    write log SYNCPT record for specified log
 908 *      if new sync address is available
 909 *      (normally the case if sync() is executed by back-ground
 910 *      process).
 911 *      calculate new value of i_nextsync which determines when
 912 *      this code is called again.
 913 *
 914 * PARAMETERS:  log     - log structure
 915 *              hard_sync - 1 to force all metadata to be written
 916 *
 917 * RETURN:      0
 918 *
 919 * serialization: LOG_LOCK() held on entry/exit
 920 */
 921static int lmLogSync(struct jfs_log * log, int hard_sync)
 922{
 923        int logsize;
 924        int written;            /* written since last syncpt */
 925        int free;               /* free space left available */
 926        int delta;              /* additional delta to write normally */
 927        int more;               /* additional write granted */
 928        struct lrd lrd;
 929        int lsn;
 930        struct logsyncblk *lp;
 931        unsigned long flags;
 932
 933        /* push dirty metapages out to disk */
 934        if (hard_sync)
 935                write_special_inodes(log, filemap_fdatawrite);
 936        else
 937                write_special_inodes(log, filemap_flush);
 938
 939        /*
 940         *      forward syncpt
 941         */
 942        /* if last sync is same as last syncpt,
 943         * invoke sync point forward processing to update sync.
 944         */
 945
 946        if (log->sync == log->syncpt) {
 947                LOGSYNC_LOCK(log, flags);
 948                if (list_empty(&log->synclist))
 949                        log->sync = log->lsn;
 950                else {
 951                        lp = list_entry(log->synclist.next,
 952                                        struct logsyncblk, synclist);
 953                        log->sync = lp->lsn;
 954                }
 955                LOGSYNC_UNLOCK(log, flags);
 956
 957        }
 958
 959        /* if sync is different from last syncpt,
 960         * write a SYNCPT record with syncpt = sync.
 961         * reset syncpt = sync
 962         */
 963        if (log->sync != log->syncpt) {
 964                lrd.logtid = 0;
 965                lrd.backchain = 0;
 966                lrd.type = cpu_to_le16(LOG_SYNCPT);
 967                lrd.length = 0;
 968                lrd.log.syncpt.sync = cpu_to_le32(log->sync);
 969                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
 970
 971                log->syncpt = log->sync;
 972        } else
 973                lsn = log->lsn;
 974
 975        /*
 976         *      setup next syncpt trigger (SWAG)
 977         */
 978        logsize = log->logsize;
 979
 980        logdiff(written, lsn, log);
 981        free = logsize - written;
 982        delta = LOGSYNC_DELTA(logsize);
 983        more = min(free / 2, delta);
 984        if (more < 2 * LOGPSIZE) {
 985                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
 986                /*
 987                 *      log wrapping
 988                 *
 989                 * option 1 - panic ? No.!
 990                 * option 2 - shutdown file systems
 991                 *            associated with log ?
 992                 * option 3 - extend log ?
 993                 * option 4 - second chance
 994                 *
 995                 * mark log wrapped, and continue.
 996                 * when all active transactions are completed,
 997                 * mark log valid for recovery.
 998                 * if crashed during invalid state, log state
 999                 * implies invalid log, forcing fsck().
1000                 */
1001                /* mark log state log wrap in log superblock */
1002                /* log->state = LOGWRAP; */
1003
1004                /* reset sync point computation */
1005                log->syncpt = log->sync = lsn;
1006                log->nextsync = delta;
1007        } else
1008                /* next syncpt trigger = written + more */
1009                log->nextsync = written + more;
1010
1011        /* if number of bytes written from last sync point is more
1012         * than 1/4 of the log size, stop new transactions from
1013         * starting until all current transactions are completed
1014         * by setting syncbarrier flag.
1015         */
1016        if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1017            (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1018                set_bit(log_SYNCBARRIER, &log->flag);
1019                jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1020                         log->syncpt);
1021                /*
1022                 * We may have to initiate group commit
1023                 */
1024                jfs_flush_journal(log, 0);
1025        }
1026
1027        return lsn;
1028}
1029
1030/*
1031 * NAME:        jfs_syncpt
1032 *
1033 * FUNCTION:    write log SYNCPT record for specified log
1034 *
1035 * PARAMETERS:  log       - log structure
1036 *              hard_sync - set to 1 to force metadata to be written
1037 */
1038void jfs_syncpt(struct jfs_log *log, int hard_sync)
1039{       LOG_LOCK(log);
1040        if (!test_bit(log_QUIESCE, &log->flag))
1041                lmLogSync(log, hard_sync);
1042        LOG_UNLOCK(log);
1043}
1044
1045/*
1046 * NAME:        lmLogOpen()
1047 *
1048 * FUNCTION:    open the log on first open;
1049 *      insert filesystem in the active list of the log.
1050 *
1051 * PARAMETER:   ipmnt   - file system mount inode
1052 *              iplog   - log inode (out)
1053 *
1054 * RETURN:
1055 *
1056 * serialization:
1057 */
1058int lmLogOpen(struct super_block *sb)
1059{
1060        int rc;
1061        struct block_device *bdev;
1062        struct jfs_log *log;
1063        struct jfs_sb_info *sbi = JFS_SBI(sb);
1064
1065        if (sbi->flag & JFS_NOINTEGRITY)
1066                return open_dummy_log(sb);
1067
1068        if (sbi->mntflag & JFS_INLINELOG)
1069                return open_inline_log(sb);
1070
1071        mutex_lock(&jfs_log_mutex);
1072        list_for_each_entry(log, &jfs_external_logs, journal_list) {
1073                if (log->bdev->bd_dev == sbi->logdev) {
1074                        if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1075                                jfs_warn("wrong uuid on JFS journal");
1076                                mutex_unlock(&jfs_log_mutex);
1077                                return -EINVAL;
1078                        }
1079                        /*
1080                         * add file system to log active file system list
1081                         */
1082                        if ((rc = lmLogFileSystem(log, sbi, 1))) {
1083                                mutex_unlock(&jfs_log_mutex);
1084                                return rc;
1085                        }
1086                        goto journal_found;
1087                }
1088        }
1089
1090        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1091                mutex_unlock(&jfs_log_mutex);
1092                return -ENOMEM;
1093        }
1094        INIT_LIST_HEAD(&log->sb_list);
1095        init_waitqueue_head(&log->syncwait);
1096
1097        /*
1098         *      external log as separate logical volume
1099         *
1100         * file systems to log may have n-to-1 relationship;
1101         */
1102
1103        bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1104                                 log);
1105        if (IS_ERR(bdev)) {
1106                rc = PTR_ERR(bdev);
1107                goto free;
1108        }
1109
1110        log->bdev = bdev;
1111        uuid_copy(&log->uuid, &sbi->loguuid);
1112
1113        /*
1114         * initialize log:
1115         */
1116        if ((rc = lmLogInit(log)))
1117                goto close;
1118
1119        list_add(&log->journal_list, &jfs_external_logs);
1120
1121        /*
1122         * add file system to log active file system list
1123         */
1124        if ((rc = lmLogFileSystem(log, sbi, 1)))
1125                goto shutdown;
1126
1127journal_found:
1128        LOG_LOCK(log);
1129        list_add(&sbi->log_list, &log->sb_list);
1130        sbi->log = log;
1131        LOG_UNLOCK(log);
1132
1133        mutex_unlock(&jfs_log_mutex);
1134        return 0;
1135
1136        /*
1137         *      unwind on error
1138         */
1139      shutdown:         /* unwind lbmLogInit() */
1140        list_del(&log->journal_list);
1141        lbmLogShutdown(log);
1142
1143      close:            /* close external log device */
1144        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1145
1146      free:             /* free log descriptor */
1147        mutex_unlock(&jfs_log_mutex);
1148        kfree(log);
1149
1150        jfs_warn("lmLogOpen: exit(%d)", rc);
1151        return rc;
1152}
1153
1154static int open_inline_log(struct super_block *sb)
1155{
1156        struct jfs_log *log;
1157        int rc;
1158
1159        if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1160                return -ENOMEM;
1161        INIT_LIST_HEAD(&log->sb_list);
1162        init_waitqueue_head(&log->syncwait);
1163
1164        set_bit(log_INLINELOG, &log->flag);
1165        log->bdev = sb->s_bdev;
1166        log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1167        log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1168            (L2LOGPSIZE - sb->s_blocksize_bits);
1169        log->l2bsize = sb->s_blocksize_bits;
1170        ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1171
1172        /*
1173         * initialize log.
1174         */
1175        if ((rc = lmLogInit(log))) {
1176                kfree(log);
1177                jfs_warn("lmLogOpen: exit(%d)", rc);
1178                return rc;
1179        }
1180
1181        list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1182        JFS_SBI(sb)->log = log;
1183
1184        return rc;
1185}
1186
1187static int open_dummy_log(struct super_block *sb)
1188{
1189        int rc;
1190
1191        mutex_lock(&jfs_log_mutex);
1192        if (!dummy_log) {
1193                dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1194                if (!dummy_log) {
1195                        mutex_unlock(&jfs_log_mutex);
1196                        return -ENOMEM;
1197                }
1198                INIT_LIST_HEAD(&dummy_log->sb_list);
1199                init_waitqueue_head(&dummy_log->syncwait);
1200                dummy_log->no_integrity = 1;
1201                /* Make up some stuff */
1202                dummy_log->base = 0;
1203                dummy_log->size = 1024;
1204                rc = lmLogInit(dummy_log);
1205                if (rc) {
1206                        kfree(dummy_log);
1207                        dummy_log = NULL;
1208                        mutex_unlock(&jfs_log_mutex);
1209                        return rc;
1210                }
1211        }
1212
1213        LOG_LOCK(dummy_log);
1214        list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1215        JFS_SBI(sb)->log = dummy_log;
1216        LOG_UNLOCK(dummy_log);
1217        mutex_unlock(&jfs_log_mutex);
1218
1219        return 0;
1220}
1221
1222/*
1223 * NAME:        lmLogInit()
1224 *
1225 * FUNCTION:    log initialization at first log open.
1226 *
1227 *      logredo() (or logformat()) should have been run previously.
1228 *      initialize the log from log superblock.
1229 *      set the log state in the superblock to LOGMOUNT and
1230 *      write SYNCPT log record.
1231 *
1232 * PARAMETER:   log     - log structure
1233 *
1234 * RETURN:      0       - if ok
1235 *              -EINVAL - bad log magic number or superblock dirty
1236 *              error returned from logwait()
1237 *
1238 * serialization: single first open thread
1239 */
1240int lmLogInit(struct jfs_log * log)
1241{
1242        int rc = 0;
1243        struct lrd lrd;
1244        struct logsuper *logsuper;
1245        struct lbuf *bpsuper;
1246        struct lbuf *bp;
1247        struct logpage *lp;
1248        int lsn = 0;
1249
1250        jfs_info("lmLogInit: log:0x%p", log);
1251
1252        /* initialize the group commit serialization lock */
1253        LOGGC_LOCK_INIT(log);
1254
1255        /* allocate/initialize the log write serialization lock */
1256        LOG_LOCK_INIT(log);
1257
1258        LOGSYNC_LOCK_INIT(log);
1259
1260        INIT_LIST_HEAD(&log->synclist);
1261
1262        INIT_LIST_HEAD(&log->cqueue);
1263        log->flush_tblk = NULL;
1264
1265        log->count = 0;
1266
1267        /*
1268         * initialize log i/o
1269         */
1270        if ((rc = lbmLogInit(log)))
1271                return rc;
1272
1273        if (!test_bit(log_INLINELOG, &log->flag))
1274                log->l2bsize = L2LOGPSIZE;
1275
1276        /* check for disabled journaling to disk */
1277        if (log->no_integrity) {
1278                /*
1279                 * Journal pages will still be filled.  When the time comes
1280                 * to actually do the I/O, the write is not done, and the
1281                 * endio routine is called directly.
1282                 */
1283                bp = lbmAllocate(log , 0);
1284                log->bp = bp;
1285                bp->l_pn = bp->l_eor = 0;
1286        } else {
1287                /*
1288                 * validate log superblock
1289                 */
1290                if ((rc = lbmRead(log, 1, &bpsuper)))
1291                        goto errout10;
1292
1293                logsuper = (struct logsuper *) bpsuper->l_ldata;
1294
1295                if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1296                        jfs_warn("*** Log Format Error ! ***");
1297                        rc = -EINVAL;
1298                        goto errout20;
1299                }
1300
1301                /* logredo() should have been run successfully. */
1302                if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1303                        jfs_warn("*** Log Is Dirty ! ***");
1304                        rc = -EINVAL;
1305                        goto errout20;
1306                }
1307
1308                /* initialize log from log superblock */
1309                if (test_bit(log_INLINELOG,&log->flag)) {
1310                        if (log->size != le32_to_cpu(logsuper->size)) {
1311                                rc = -EINVAL;
1312                                goto errout20;
1313                        }
1314                        jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1315                                 log, (unsigned long long)log->base, log->size);
1316                } else {
1317                        if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1318                                jfs_warn("wrong uuid on JFS log device");
1319                                rc = -EINVAL;
1320                                goto errout20;
1321                        }
1322                        log->size = le32_to_cpu(logsuper->size);
1323                        log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1324                        jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1325                                 log, (unsigned long long)log->base, log->size);
1326                }
1327
1328                log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1329                log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1330
1331                /*
1332                 * initialize for log append write mode
1333                 */
1334                /* establish current/end-of-log page/buffer */
1335                if ((rc = lbmRead(log, log->page, &bp)))
1336                        goto errout20;
1337
1338                lp = (struct logpage *) bp->l_ldata;
1339
1340                jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1341                         le32_to_cpu(logsuper->end), log->page, log->eor,
1342                         le16_to_cpu(lp->h.eor));
1343
1344                log->bp = bp;
1345                bp->l_pn = log->page;
1346                bp->l_eor = log->eor;
1347
1348                /* if current page is full, move on to next page */
1349                if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1350                        lmNextPage(log);
1351
1352                /*
1353                 * initialize log syncpoint
1354                 */
1355                /*
1356                 * write the first SYNCPT record with syncpoint = 0
1357                 * (i.e., log redo up to HERE !);
1358                 * remove current page from lbm write queue at end of pageout
1359                 * (to write log superblock update), but do not release to
1360                 * freelist;
1361                 */
1362                lrd.logtid = 0;
1363                lrd.backchain = 0;
1364                lrd.type = cpu_to_le16(LOG_SYNCPT);
1365                lrd.length = 0;
1366                lrd.log.syncpt.sync = 0;
1367                lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1368                bp = log->bp;
1369                bp->l_ceor = bp->l_eor;
1370                lp = (struct logpage *) bp->l_ldata;
1371                lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1372                lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1373                if ((rc = lbmIOWait(bp, 0)))
1374                        goto errout30;
1375
1376                /*
1377                 * update/write superblock
1378                 */
1379                logsuper->state = cpu_to_le32(LOGMOUNT);
1380                log->serial = le32_to_cpu(logsuper->serial) + 1;
1381                logsuper->serial = cpu_to_le32(log->serial);
1382                lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1383                if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1384                        goto errout30;
1385        }
1386
1387        /* initialize logsync parameters */
1388        log->logsize = (log->size - 2) << L2LOGPSIZE;
1389        log->lsn = lsn;
1390        log->syncpt = lsn;
1391        log->sync = log->syncpt;
1392        log->nextsync = LOGSYNC_DELTA(log->logsize);
1393
1394        jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1395                 log->lsn, log->syncpt, log->sync);
1396
1397        /*
1398         * initialize for lazy/group commit
1399         */
1400        log->clsn = lsn;
1401
1402        return 0;
1403
1404        /*
1405         *      unwind on error
1406         */
1407      errout30:         /* release log page */
1408        log->wqueue = NULL;
1409        bp->l_wqnext = NULL;
1410        lbmFree(bp);
1411
1412      errout20:         /* release log superblock */
1413        lbmFree(bpsuper);
1414
1415      errout10:         /* unwind lbmLogInit() */
1416        lbmLogShutdown(log);
1417
1418        jfs_warn("lmLogInit: exit(%d)", rc);
1419        return rc;
1420}
1421
1422
1423/*
1424 * NAME:        lmLogClose()
1425 *
1426 * FUNCTION:    remove file system <ipmnt> from active list of log <iplog>
1427 *              and close it on last close.
1428 *
1429 * PARAMETER:   sb      - superblock
1430 *
1431 * RETURN:      errors from subroutines
1432 *
1433 * serialization:
1434 */
1435int lmLogClose(struct super_block *sb)
1436{
1437        struct jfs_sb_info *sbi = JFS_SBI(sb);
1438        struct jfs_log *log = sbi->log;
1439        struct block_device *bdev;
1440        int rc = 0;
1441
1442        jfs_info("lmLogClose: log:0x%p", log);
1443
1444        mutex_lock(&jfs_log_mutex);
1445        LOG_LOCK(log);
1446        list_del(&sbi->log_list);
1447        LOG_UNLOCK(log);
1448        sbi->log = NULL;
1449
1450        /*
1451         * We need to make sure all of the "written" metapages
1452         * actually make it to disk
1453         */
1454        sync_blockdev(sb->s_bdev);
1455
1456        if (test_bit(log_INLINELOG, &log->flag)) {
1457                /*
1458                 *      in-line log in host file system
1459                 */
1460                rc = lmLogShutdown(log);
1461                kfree(log);
1462                goto out;
1463        }
1464
1465        if (!log->no_integrity)
1466                lmLogFileSystem(log, sbi, 0);
1467
1468        if (!list_empty(&log->sb_list))
1469                goto out;
1470
1471        /*
1472         * TODO: ensure that the dummy_log is in a state to allow
1473         * lbmLogShutdown to deallocate all the buffers and call
1474         * kfree against dummy_log.  For now, leave dummy_log & its
1475         * buffers in memory, and resuse if another no-integrity mount
1476         * is requested.
1477         */
1478        if (log->no_integrity)
1479                goto out;
1480
1481        /*
1482         *      external log as separate logical volume
1483         */
1484        list_del(&log->journal_list);
1485        bdev = log->bdev;
1486        rc = lmLogShutdown(log);
1487
1488        blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1489
1490        kfree(log);
1491
1492      out:
1493        mutex_unlock(&jfs_log_mutex);
1494        jfs_info("lmLogClose: exit(%d)", rc);
1495        return rc;
1496}
1497
1498
1499/*
1500 * NAME:        jfs_flush_journal()
1501 *
1502 * FUNCTION:    initiate write of any outstanding transactions to the journal
1503 *              and optionally wait until they are all written to disk
1504 *
1505 *              wait == 0  flush until latest txn is committed, don't wait
1506 *              wait == 1  flush until latest txn is committed, wait
1507 *              wait > 1   flush until all txn's are complete, wait
1508 */
1509void jfs_flush_journal(struct jfs_log *log, int wait)
1510{
1511        int i;
1512        struct tblock *target = NULL;
1513
1514        /* jfs_write_inode may call us during read-only mount */
1515        if (!log)
1516                return;
1517
1518        jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1519
1520        LOGGC_LOCK(log);
1521
1522        if (!list_empty(&log->cqueue)) {
1523                /*
1524                 * This ensures that we will keep writing to the journal as long
1525                 * as there are unwritten commit records
1526                 */
1527                target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1528
1529                if (test_bit(log_FLUSH, &log->flag)) {
1530                        /*
1531                         * We're already flushing.
1532                         * if flush_tblk is NULL, we are flushing everything,
1533                         * so leave it that way.  Otherwise, update it to the
1534                         * latest transaction
1535                         */
1536                        if (log->flush_tblk)
1537                                log->flush_tblk = target;
1538                } else {
1539                        /* Only flush until latest transaction is committed */
1540                        log->flush_tblk = target;
1541                        set_bit(log_FLUSH, &log->flag);
1542
1543                        /*
1544                         * Initiate I/O on outstanding transactions
1545                         */
1546                        if (!(log->cflag & logGC_PAGEOUT)) {
1547                                log->cflag |= logGC_PAGEOUT;
1548                                lmGCwrite(log, 0);
1549                        }
1550                }
1551        }
1552        if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1553                /* Flush until all activity complete */
1554                set_bit(log_FLUSH, &log->flag);
1555                log->flush_tblk = NULL;
1556        }
1557
1558        if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1559                DECLARE_WAITQUEUE(__wait, current);
1560
1561                add_wait_queue(&target->gcwait, &__wait);
1562                set_current_state(TASK_UNINTERRUPTIBLE);
1563                LOGGC_UNLOCK(log);
1564                schedule();
1565                LOGGC_LOCK(log);
1566                remove_wait_queue(&target->gcwait, &__wait);
1567        }
1568        LOGGC_UNLOCK(log);
1569
1570        if (wait < 2)
1571                return;
1572
1573        write_special_inodes(log, filemap_fdatawrite);
1574
1575        /*
1576         * If there was recent activity, we may need to wait
1577         * for the lazycommit thread to catch up
1578         */
1579        if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1580                for (i = 0; i < 200; i++) {     /* Too much? */
1581                        msleep(250);
1582                        write_special_inodes(log, filemap_fdatawrite);
1583                        if (list_empty(&log->cqueue) &&
1584                            list_empty(&log->synclist))
1585                                break;
1586                }
1587        }
1588        assert(list_empty(&log->cqueue));
1589
1590#ifdef CONFIG_JFS_DEBUG
1591        if (!list_empty(&log->synclist)) {
1592                struct logsyncblk *lp;
1593
1594                printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1595                list_for_each_entry(lp, &log->synclist, synclist) {
1596                        if (lp->xflag & COMMIT_PAGE) {
1597                                struct metapage *mp = (struct metapage *)lp;
1598                                print_hex_dump(KERN_ERR, "metapage: ",
1599                                               DUMP_PREFIX_ADDRESS, 16, 4,
1600                                               mp, sizeof(struct metapage), 0);
1601                                print_hex_dump(KERN_ERR, "page: ",
1602                                               DUMP_PREFIX_ADDRESS, 16,
1603                                               sizeof(long), mp->page,
1604                                               sizeof(struct page), 0);
1605                        } else
1606                                print_hex_dump(KERN_ERR, "tblock:",
1607                                               DUMP_PREFIX_ADDRESS, 16, 4,
1608                                               lp, sizeof(struct tblock), 0);
1609                }
1610        }
1611#else
1612        WARN_ON(!list_empty(&log->synclist));
1613#endif
1614        clear_bit(log_FLUSH, &log->flag);
1615}
1616
1617/*
1618 * NAME:        lmLogShutdown()
1619 *
1620 * FUNCTION:    log shutdown at last LogClose().
1621 *
1622 *              write log syncpt record.
1623 *              update super block to set redone flag to 0.
1624 *
1625 * PARAMETER:   log     - log inode
1626 *
1627 * RETURN:      0       - success
1628 *
1629 * serialization: single last close thread
1630 */
1631int lmLogShutdown(struct jfs_log * log)
1632{
1633        int rc;
1634        struct lrd lrd;
1635        int lsn;
1636        struct logsuper *logsuper;
1637        struct lbuf *bpsuper;
1638        struct lbuf *bp;
1639        struct logpage *lp;
1640
1641        jfs_info("lmLogShutdown: log:0x%p", log);
1642
1643        jfs_flush_journal(log, 2);
1644
1645        /*
1646         * write the last SYNCPT record with syncpoint = 0
1647         * (i.e., log redo up to HERE !)
1648         */
1649        lrd.logtid = 0;
1650        lrd.backchain = 0;
1651        lrd.type = cpu_to_le16(LOG_SYNCPT);
1652        lrd.length = 0;
1653        lrd.log.syncpt.sync = 0;
1654
1655        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1656        bp = log->bp;
1657        lp = (struct logpage *) bp->l_ldata;
1658        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1659        lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1660        lbmIOWait(log->bp, lbmFREE);
1661        log->bp = NULL;
1662
1663        /*
1664         * synchronous update log superblock
1665         * mark log state as shutdown cleanly
1666         * (i.e., Log does not need to be replayed).
1667         */
1668        if ((rc = lbmRead(log, 1, &bpsuper)))
1669                goto out;
1670
1671        logsuper = (struct logsuper *) bpsuper->l_ldata;
1672        logsuper->state = cpu_to_le32(LOGREDONE);
1673        logsuper->end = cpu_to_le32(lsn);
1674        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1675        rc = lbmIOWait(bpsuper, lbmFREE);
1676
1677        jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1678                 lsn, log->page, log->eor);
1679
1680      out:
1681        /*
1682         * shutdown per log i/o
1683         */
1684        lbmLogShutdown(log);
1685
1686        if (rc) {
1687                jfs_warn("lmLogShutdown: exit(%d)", rc);
1688        }
1689        return rc;
1690}
1691
1692
1693/*
1694 * NAME:        lmLogFileSystem()
1695 *
1696 * FUNCTION:    insert (<activate> = true)/remove (<activate> = false)
1697 *      file system into/from log active file system list.
1698 *
1699 * PARAMETE:    log     - pointer to logs inode.
1700 *              fsdev   - kdev_t of filesystem.
1701 *              serial  - pointer to returned log serial number
1702 *              activate - insert/remove device from active list.
1703 *
1704 * RETURN:      0       - success
1705 *              errors returned by vms_iowait().
1706 */
1707static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1708                           int activate)
1709{
1710        int rc = 0;
1711        int i;
1712        struct logsuper *logsuper;
1713        struct lbuf *bpsuper;
1714        uuid_t *uuid = &sbi->uuid;
1715
1716        /*
1717         * insert/remove file system device to log active file system list.
1718         */
1719        if ((rc = lbmRead(log, 1, &bpsuper)))
1720                return rc;
1721
1722        logsuper = (struct logsuper *) bpsuper->l_ldata;
1723        if (activate) {
1724                for (i = 0; i < MAX_ACTIVE; i++)
1725                        if (uuid_is_null(&logsuper->active[i].uuid)) {
1726                                uuid_copy(&logsuper->active[i].uuid, uuid);
1727                                sbi->aggregate = i;
1728                                break;
1729                        }
1730                if (i == MAX_ACTIVE) {
1731                        jfs_warn("Too many file systems sharing journal!");
1732                        lbmFree(bpsuper);
1733                        return -EMFILE; /* Is there a better rc? */
1734                }
1735        } else {
1736                for (i = 0; i < MAX_ACTIVE; i++)
1737                        if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1738                                uuid_copy(&logsuper->active[i].uuid,
1739                                          &uuid_null);
1740                                break;
1741                        }
1742                if (i == MAX_ACTIVE) {
1743                        jfs_warn("Somebody stomped on the journal!");
1744                        lbmFree(bpsuper);
1745                        return -EIO;
1746                }
1747
1748        }
1749
1750        /*
1751         * synchronous write log superblock:
1752         *
1753         * write sidestream bypassing write queue:
1754         * at file system mount, log super block is updated for
1755         * activation of the file system before any log record
1756         * (MOUNT record) of the file system, and at file system
1757         * unmount, all meta data for the file system has been
1758         * flushed before log super block is updated for deactivation
1759         * of the file system.
1760         */
1761        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1762        rc = lbmIOWait(bpsuper, lbmFREE);
1763
1764        return rc;
1765}
1766
1767/*
1768 *              log buffer manager (lbm)
1769 *              ------------------------
1770 *
1771 * special purpose buffer manager supporting log i/o requirements.
1772 *
1773 * per log write queue:
1774 * log pageout occurs in serial order by fifo write queue and
1775 * restricting to a single i/o in pregress at any one time.
1776 * a circular singly-linked list
1777 * (log->wrqueue points to the tail, and buffers are linked via
1778 * bp->wrqueue field), and
1779 * maintains log page in pageout ot waiting for pageout in serial pageout.
1780 */
1781
1782/*
1783 *      lbmLogInit()
1784 *
1785 * initialize per log I/O setup at lmLogInit()
1786 */
1787static int lbmLogInit(struct jfs_log * log)
1788{                               /* log inode */
1789        int i;
1790        struct lbuf *lbuf;
1791
1792        jfs_info("lbmLogInit: log:0x%p", log);
1793
1794        /* initialize current buffer cursor */
1795        log->bp = NULL;
1796
1797        /* initialize log device write queue */
1798        log->wqueue = NULL;
1799
1800        /*
1801         * Each log has its own buffer pages allocated to it.  These are
1802         * not managed by the page cache.  This ensures that a transaction
1803         * writing to the log does not block trying to allocate a page from
1804         * the page cache (for the log).  This would be bad, since page
1805         * allocation waits on the kswapd thread that may be committing inodes
1806         * which would cause log activity.  Was that clear?  I'm trying to
1807         * avoid deadlock here.
1808         */
1809        init_waitqueue_head(&log->free_wait);
1810
1811        log->lbuf_free = NULL;
1812
1813        for (i = 0; i < LOGPAGES;) {
1814                char *buffer;
1815                uint offset;
1816                struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1817
1818                if (!page)
1819                        goto error;
1820                buffer = page_address(page);
1821                for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1822                        lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1823                        if (lbuf == NULL) {
1824                                if (offset == 0)
1825                                        __free_page(page);
1826                                goto error;
1827                        }
1828                        if (offset) /* we already have one reference */
1829                                get_page(page);
1830                        lbuf->l_offset = offset;
1831                        lbuf->l_ldata = buffer + offset;
1832                        lbuf->l_page = page;
1833                        lbuf->l_log = log;
1834                        init_waitqueue_head(&lbuf->l_ioevent);
1835
1836                        lbuf->l_freelist = log->lbuf_free;
1837                        log->lbuf_free = lbuf;
1838                        i++;
1839                }
1840        }
1841
1842        return (0);
1843
1844      error:
1845        lbmLogShutdown(log);
1846        return -ENOMEM;
1847}
1848
1849
1850/*
1851 *      lbmLogShutdown()
1852 *
1853 * finalize per log I/O setup at lmLogShutdown()
1854 */
1855static void lbmLogShutdown(struct jfs_log * log)
1856{
1857        struct lbuf *lbuf;
1858
1859        jfs_info("lbmLogShutdown: log:0x%p", log);
1860
1861        lbuf = log->lbuf_free;
1862        while (lbuf) {
1863                struct lbuf *next = lbuf->l_freelist;
1864                __free_page(lbuf->l_page);
1865                kfree(lbuf);
1866                lbuf = next;
1867        }
1868}
1869
1870
1871/*
1872 *      lbmAllocate()
1873 *
1874 * allocate an empty log buffer
1875 */
1876static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1877{
1878        struct lbuf *bp;
1879        unsigned long flags;
1880
1881        /*
1882         * recycle from log buffer freelist if any
1883         */
1884        LCACHE_LOCK(flags);
1885        LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1886        log->lbuf_free = bp->l_freelist;
1887        LCACHE_UNLOCK(flags);
1888
1889        bp->l_flag = 0;
1890
1891        bp->l_wqnext = NULL;
1892        bp->l_freelist = NULL;
1893
1894        bp->l_pn = pn;
1895        bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1896        bp->l_ceor = 0;
1897
1898        return bp;
1899}
1900
1901
1902/*
1903 *      lbmFree()
1904 *
1905 * release a log buffer to freelist
1906 */
1907static void lbmFree(struct lbuf * bp)
1908{
1909        unsigned long flags;
1910
1911        LCACHE_LOCK(flags);
1912
1913        lbmfree(bp);
1914
1915        LCACHE_UNLOCK(flags);
1916}
1917
1918static void lbmfree(struct lbuf * bp)
1919{
1920        struct jfs_log *log = bp->l_log;
1921
1922        assert(bp->l_wqnext == NULL);
1923
1924        /*
1925         * return the buffer to head of freelist
1926         */
1927        bp->l_freelist = log->lbuf_free;
1928        log->lbuf_free = bp;
1929
1930        wake_up(&log->free_wait);
1931        return;
1932}
1933
1934
1935/*
1936 * NAME:        lbmRedrive
1937 *
1938 * FUNCTION:    add a log buffer to the log redrive list
1939 *
1940 * PARAMETER:
1941 *      bp      - log buffer
1942 *
1943 * NOTES:
1944 *      Takes log_redrive_lock.
1945 */
1946static inline void lbmRedrive(struct lbuf *bp)
1947{
1948        unsigned long flags;
1949
1950        spin_lock_irqsave(&log_redrive_lock, flags);
1951        bp->l_redrive_next = log_redrive_list;
1952        log_redrive_list = bp;
1953        spin_unlock_irqrestore(&log_redrive_lock, flags);
1954
1955        wake_up_process(jfsIOthread);
1956}
1957
1958
1959/*
1960 *      lbmRead()
1961 */
1962static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1963{
1964        struct bio *bio;
1965        struct lbuf *bp;
1966
1967        /*
1968         * allocate a log buffer
1969         */
1970        *bpp = bp = lbmAllocate(log, pn);
1971        jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1972
1973        bp->l_flag |= lbmREAD;
1974
1975        bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS);
1976        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1977        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1978        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1979
1980        bio->bi_end_io = lbmIODone;
1981        bio->bi_private = bp;
1982        /*check if journaling to disk has been disabled*/
1983        if (log->no_integrity) {
1984                bio->bi_iter.bi_size = 0;
1985                lbmIODone(bio);
1986        } else {
1987                submit_bio(bio);
1988        }
1989
1990        wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
1991
1992        return 0;
1993}
1994
1995
1996/*
1997 *      lbmWrite()
1998 *
1999 * buffer at head of pageout queue stays after completion of
2000 * partial-page pageout and redriven by explicit initiation of
2001 * pageout by caller until full-page pageout is completed and
2002 * released.
2003 *
2004 * device driver i/o done redrives pageout of new buffer at
2005 * head of pageout queue when current buffer at head of pageout
2006 * queue is released at the completion of its full-page pageout.
2007 *
2008 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2009 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2010 */
2011static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2012                     int cant_block)
2013{
2014        struct lbuf *tail;
2015        unsigned long flags;
2016
2017        jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2018
2019        /* map the logical block address to physical block address */
2020        bp->l_blkno =
2021            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2022
2023        LCACHE_LOCK(flags);             /* disable+lock */
2024
2025        /*
2026         * initialize buffer for device driver
2027         */
2028        bp->l_flag = flag;
2029
2030        /*
2031         *      insert bp at tail of write queue associated with log
2032         *
2033         * (request is either for bp already/currently at head of queue
2034         * or new bp to be inserted at tail)
2035         */
2036        tail = log->wqueue;
2037
2038        /* is buffer not already on write queue ? */
2039        if (bp->l_wqnext == NULL) {
2040                /* insert at tail of wqueue */
2041                if (tail == NULL) {
2042                        log->wqueue = bp;
2043                        bp->l_wqnext = bp;
2044                } else {
2045                        log->wqueue = bp;
2046                        bp->l_wqnext = tail->l_wqnext;
2047                        tail->l_wqnext = bp;
2048                }
2049
2050                tail = bp;
2051        }
2052
2053        /* is buffer at head of wqueue and for write ? */
2054        if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2055                LCACHE_UNLOCK(flags);   /* unlock+enable */
2056                return;
2057        }
2058
2059        LCACHE_UNLOCK(flags);   /* unlock+enable */
2060
2061        if (cant_block)
2062                lbmRedrive(bp);
2063        else if (flag & lbmSYNC)
2064                lbmStartIO(bp);
2065        else {
2066                LOGGC_UNLOCK(log);
2067                lbmStartIO(bp);
2068                LOGGC_LOCK(log);
2069        }
2070}
2071
2072
2073/*
2074 *      lbmDirectWrite()
2075 *
2076 * initiate pageout bypassing write queue for sidestream
2077 * (e.g., log superblock) write;
2078 */
2079static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2080{
2081        jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2082                 bp, flag, bp->l_pn);
2083
2084        /*
2085         * initialize buffer for device driver
2086         */
2087        bp->l_flag = flag | lbmDIRECT;
2088
2089        /* map the logical block address to physical block address */
2090        bp->l_blkno =
2091            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2092
2093        /*
2094         *      initiate pageout of the page
2095         */
2096        lbmStartIO(bp);
2097}
2098
2099
2100/*
2101 * NAME:        lbmStartIO()
2102 *
2103 * FUNCTION:    Interface to DD strategy routine
2104 *
2105 * RETURN:      none
2106 *
2107 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2108 */
2109static void lbmStartIO(struct lbuf * bp)
2110{
2111        struct bio *bio;
2112        struct jfs_log *log = bp->l_log;
2113
2114        jfs_info("lbmStartIO");
2115
2116        bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
2117        bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2118        bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2119        BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2120
2121        bio->bi_end_io = lbmIODone;
2122        bio->bi_private = bp;
2123
2124        /* check if journaling to disk has been disabled */
2125        if (log->no_integrity) {
2126                bio->bi_iter.bi_size = 0;
2127                lbmIODone(bio);
2128        } else {
2129                submit_bio(bio);
2130                INCREMENT(lmStat.submitted);
2131        }
2132}
2133
2134
2135/*
2136 *      lbmIOWait()
2137 */
2138static int lbmIOWait(struct lbuf * bp, int flag)
2139{
2140        unsigned long flags;
2141        int rc = 0;
2142
2143        jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2144
2145        LCACHE_LOCK(flags);             /* disable+lock */
2146
2147        LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2148
2149        rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2150
2151        if (flag & lbmFREE)
2152                lbmfree(bp);
2153
2154        LCACHE_UNLOCK(flags);   /* unlock+enable */
2155
2156        jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2157        return rc;
2158}
2159
2160/*
2161 *      lbmIODone()
2162 *
2163 * executed at INTIODONE level
2164 */
2165static void lbmIODone(struct bio *bio)
2166{
2167        struct lbuf *bp = bio->bi_private;
2168        struct lbuf *nextbp, *tail;
2169        struct jfs_log *log;
2170        unsigned long flags;
2171
2172        /*
2173         * get back jfs buffer bound to the i/o buffer
2174         */
2175        jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2176
2177        LCACHE_LOCK(flags);             /* disable+lock */
2178
2179        bp->l_flag |= lbmDONE;
2180
2181        if (bio->bi_status) {
2182                bp->l_flag |= lbmERROR;
2183
2184                jfs_err("lbmIODone: I/O error in JFS log");
2185        }
2186
2187        bio_put(bio);
2188
2189        /*
2190         *      pagein completion
2191         */
2192        if (bp->l_flag & lbmREAD) {
2193                bp->l_flag &= ~lbmREAD;
2194
2195                LCACHE_UNLOCK(flags);   /* unlock+enable */
2196
2197                /* wakeup I/O initiator */
2198                LCACHE_WAKEUP(&bp->l_ioevent);
2199
2200                return;
2201        }
2202
2203        /*
2204         *      pageout completion
2205         *
2206         * the bp at the head of write queue has completed pageout.
2207         *
2208         * if single-commit/full-page pageout, remove the current buffer
2209         * from head of pageout queue, and redrive pageout with
2210         * the new buffer at head of pageout queue;
2211         * otherwise, the partial-page pageout buffer stays at
2212         * the head of pageout queue to be redriven for pageout
2213         * by lmGroupCommit() until full-page pageout is completed.
2214         */
2215        bp->l_flag &= ~lbmWRITE;
2216        INCREMENT(lmStat.pagedone);
2217
2218        /* update committed lsn */
2219        log = bp->l_log;
2220        log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2221
2222        if (bp->l_flag & lbmDIRECT) {
2223                LCACHE_WAKEUP(&bp->l_ioevent);
2224                LCACHE_UNLOCK(flags);
2225                return;
2226        }
2227
2228        tail = log->wqueue;
2229
2230        /* single element queue */
2231        if (bp == tail) {
2232                /* remove head buffer of full-page pageout
2233                 * from log device write queue
2234                 */
2235                if (bp->l_flag & lbmRELEASE) {
2236                        log->wqueue = NULL;
2237                        bp->l_wqnext = NULL;
2238                }
2239        }
2240        /* multi element queue */
2241        else {
2242                /* remove head buffer of full-page pageout
2243                 * from log device write queue
2244                 */
2245                if (bp->l_flag & lbmRELEASE) {
2246                        nextbp = tail->l_wqnext = bp->l_wqnext;
2247                        bp->l_wqnext = NULL;
2248
2249                        /*
2250                         * redrive pageout of next page at head of write queue:
2251                         * redrive next page without any bound tblk
2252                         * (i.e., page w/o any COMMIT records), or
2253                         * first page of new group commit which has been
2254                         * queued after current page (subsequent pageout
2255                         * is performed synchronously, except page without
2256                         * any COMMITs) by lmGroupCommit() as indicated
2257                         * by lbmWRITE flag;
2258                         */
2259                        if (nextbp->l_flag & lbmWRITE) {
2260                                /*
2261                                 * We can't do the I/O at interrupt time.
2262                                 * The jfsIO thread can do it
2263                                 */
2264                                lbmRedrive(nextbp);
2265                        }
2266                }
2267        }
2268
2269        /*
2270         *      synchronous pageout:
2271         *
2272         * buffer has not necessarily been removed from write queue
2273         * (e.g., synchronous write of partial-page with COMMIT):
2274         * leave buffer for i/o initiator to dispose
2275         */
2276        if (bp->l_flag & lbmSYNC) {
2277                LCACHE_UNLOCK(flags);   /* unlock+enable */
2278
2279                /* wakeup I/O initiator */
2280                LCACHE_WAKEUP(&bp->l_ioevent);
2281        }
2282
2283        /*
2284         *      Group Commit pageout:
2285         */
2286        else if (bp->l_flag & lbmGC) {
2287                LCACHE_UNLOCK(flags);
2288                lmPostGC(bp);
2289        }
2290
2291        /*
2292         *      asynchronous pageout:
2293         *
2294         * buffer must have been removed from write queue:
2295         * insert buffer at head of freelist where it can be recycled
2296         */
2297        else {
2298                assert(bp->l_flag & lbmRELEASE);
2299                assert(bp->l_flag & lbmFREE);
2300                lbmfree(bp);
2301
2302                LCACHE_UNLOCK(flags);   /* unlock+enable */
2303        }
2304}
2305
2306int jfsIOWait(void *arg)
2307{
2308        struct lbuf *bp;
2309
2310        do {
2311                spin_lock_irq(&log_redrive_lock);
2312                while ((bp = log_redrive_list)) {
2313                        log_redrive_list = bp->l_redrive_next;
2314                        bp->l_redrive_next = NULL;
2315                        spin_unlock_irq(&log_redrive_lock);
2316                        lbmStartIO(bp);
2317                        spin_lock_irq(&log_redrive_lock);
2318                }
2319
2320                if (freezing(current)) {
2321                        spin_unlock_irq(&log_redrive_lock);
2322                        try_to_freeze();
2323                } else {
2324                        set_current_state(TASK_INTERRUPTIBLE);
2325                        spin_unlock_irq(&log_redrive_lock);
2326                        schedule();
2327                }
2328        } while (!kthread_should_stop());
2329
2330        jfs_info("jfsIOWait being killed!");
2331        return 0;
2332}
2333
2334/*
2335 * NAME:        lmLogFormat()/jfs_logform()
2336 *
2337 * FUNCTION:    format file system log
2338 *
2339 * PARAMETERS:
2340 *      log     - volume log
2341 *      logAddress - start address of log space in FS block
2342 *      logSize - length of log space in FS block;
2343 *
2344 * RETURN:      0       - success
2345 *              -EIO    - i/o error
2346 *
2347 * XXX: We're synchronously writing one page at a time.  This needs to
2348 *      be improved by writing multiple pages at once.
2349 */
2350int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2351{
2352        int rc = -EIO;
2353        struct jfs_sb_info *sbi;
2354        struct logsuper *logsuper;
2355        struct logpage *lp;
2356        int lspn;               /* log sequence page number */
2357        struct lrd *lrd_ptr;
2358        int npages = 0;
2359        struct lbuf *bp;
2360
2361        jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2362                 (long long)logAddress, logSize);
2363
2364        sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2365
2366        /* allocate a log buffer */
2367        bp = lbmAllocate(log, 1);
2368
2369        npages = logSize >> sbi->l2nbperpage;
2370
2371        /*
2372         *      log space:
2373         *
2374         * page 0 - reserved;
2375         * page 1 - log superblock;
2376         * page 2 - log data page: A SYNC log record is written
2377         *          into this page at logform time;
2378         * pages 3-N - log data page: set to empty log data pages;
2379         */
2380        /*
2381         *      init log superblock: log page 1
2382         */
2383        logsuper = (struct logsuper *) bp->l_ldata;
2384
2385        logsuper->magic = cpu_to_le32(LOGMAGIC);
2386        logsuper->version = cpu_to_le32(LOGVERSION);
2387        logsuper->state = cpu_to_le32(LOGREDONE);
2388        logsuper->flag = cpu_to_le32(sbi->mntflag);     /* ? */
2389        logsuper->size = cpu_to_le32(npages);
2390        logsuper->bsize = cpu_to_le32(sbi->bsize);
2391        logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2392        logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2393
2394        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2395        bp->l_blkno = logAddress + sbi->nbperpage;
2396        lbmStartIO(bp);
2397        if ((rc = lbmIOWait(bp, 0)))
2398                goto exit;
2399
2400        /*
2401         *      init pages 2 to npages-1 as log data pages:
2402         *
2403         * log page sequence number (lpsn) initialization:
2404         *
2405         * pn:   0     1     2     3                 n-1
2406         *       +-----+-----+=====+=====+===.....===+=====+
2407         * lspn:             N-1   0     1           N-2
2408         *                   <--- N page circular file ---->
2409         *
2410         * the N (= npages-2) data pages of the log is maintained as
2411         * a circular file for the log records;
2412         * lpsn grows by 1 monotonically as each log page is written
2413         * to the circular file of the log;
2414         * and setLogpage() will not reset the page number even if
2415         * the eor is equal to LOGPHDRSIZE. In order for binary search
2416         * still work in find log end process, we have to simulate the
2417         * log wrap situation at the log format time.
2418         * The 1st log page written will have the highest lpsn. Then
2419         * the succeeding log pages will have ascending order of
2420         * the lspn starting from 0, ... (N-2)
2421         */
2422        lp = (struct logpage *) bp->l_ldata;
2423        /*
2424         * initialize 1st log page to be written: lpsn = N - 1,
2425         * write a SYNCPT log record is written to this page
2426         */
2427        lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2428        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2429
2430        lrd_ptr = (struct lrd *) &lp->data;
2431        lrd_ptr->logtid = 0;
2432        lrd_ptr->backchain = 0;
2433        lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2434        lrd_ptr->length = 0;
2435        lrd_ptr->log.syncpt.sync = 0;
2436
2437        bp->l_blkno += sbi->nbperpage;
2438        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2439        lbmStartIO(bp);
2440        if ((rc = lbmIOWait(bp, 0)))
2441                goto exit;
2442
2443        /*
2444         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2445         */
2446        for (lspn = 0; lspn < npages - 3; lspn++) {
2447                lp->h.page = lp->t.page = cpu_to_le32(lspn);
2448                lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2449
2450                bp->l_blkno += sbi->nbperpage;
2451                bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2452                lbmStartIO(bp);
2453                if ((rc = lbmIOWait(bp, 0)))
2454                        goto exit;
2455        }
2456
2457        rc = 0;
2458exit:
2459        /*
2460         *      finalize log
2461         */
2462        /* release the buffer */
2463        lbmFree(bp);
2464
2465        return rc;
2466}
2467
2468#ifdef CONFIG_JFS_STATISTICS
2469int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2470{
2471        seq_printf(m,
2472                       "JFS Logmgr stats\n"
2473                       "================\n"
2474                       "commits = %d\n"
2475                       "writes submitted = %d\n"
2476                       "writes completed = %d\n"
2477                       "full pages submitted = %d\n"
2478                       "partial pages submitted = %d\n",
2479                       lmStat.commit,
2480                       lmStat.submitted,
2481                       lmStat.pagedone,
2482                       lmStat.full_page,
2483                       lmStat.partial_page);
2484        return 0;
2485}
2486#endif /* CONFIG_JFS_STATISTICS */
2487