linux/fs/jfs/jfs_logmgr.h
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or
   8 *   (at your option) any later version.
   9 *
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19#ifndef _H_JFS_LOGMGR
  20#define _H_JFS_LOGMGR
  21
  22#include "jfs_filsys.h"
  23#include "jfs_lock.h"
  24
  25/*
  26 *      log manager configuration parameters
  27 */
  28
  29/* log page size */
  30#define LOGPSIZE        4096
  31#define L2LOGPSIZE      12
  32
  33#define LOGPAGES        16      /* Log pages per mounted file system */
  34
  35/*
  36 *      log logical volume
  37 *
  38 * a log is used to make the commit operation on journalled
  39 * files within the same logical volume group atomic.
  40 * a log is implemented with a logical volume.
  41 * there is one log per logical volume group.
  42 *
  43 * block 0 of the log logical volume is not used (ipl etc).
  44 * block 1 contains a log "superblock" and is used by logFormat(),
  45 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
  46 * of the log but is not otherwise used during normal processing.
  47 * blocks 2 - (N-1) are used to contain log records.
  48 *
  49 * when a volume group is varied-on-line, logRedo() must have
  50 * been executed before the file systems (logical volumes) in
  51 * the volume group can be mounted.
  52 */
  53/*
  54 *      log superblock (block 1 of logical volume)
  55 */
  56#define LOGSUPER_B      1
  57#define LOGSTART_B      2
  58
  59#define LOGMAGIC        0x87654321
  60#define LOGVERSION      1
  61
  62#define MAX_ACTIVE      128     /* Max active file systems sharing log */
  63
  64struct logsuper {
  65        __le32 magic;           /* 4: log lv identifier */
  66        __le32 version;         /* 4: version number */
  67        __le32 serial;          /* 4: log open/mount counter */
  68        __le32 size;            /* 4: size in number of LOGPSIZE blocks */
  69        __le32 bsize;           /* 4: logical block size in byte */
  70        __le32 l2bsize;         /* 4: log2 of bsize */
  71
  72        __le32 flag;            /* 4: option */
  73        __le32 state;           /* 4: state - see below */
  74
  75        __le32 end;             /* 4: addr of last log record set by logredo */
  76        char uuid[16];          /* 16: 128-bit journal uuid */
  77        char label[16];         /* 16: journal label */
  78        struct {
  79                char uuid[16];
  80        } active[MAX_ACTIVE];   /* 2048: active file systems list */
  81};
  82
  83#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
  84
  85/* log flag: commit option (see jfs_filsys.h) */
  86
  87/* log state */
  88#define LOGMOUNT        0       /* log mounted by lmLogInit() */
  89#define LOGREDONE       1       /* log shutdown by lmLogShutdown().
  90                                 * log redo completed by logredo().
  91                                 */
  92#define LOGWRAP         2       /* log wrapped */
  93#define LOGREADERR      3       /* log read error detected in logredo() */
  94
  95
  96/*
  97 *      log logical page
  98 *
  99 * (this comment should be rewritten !)
 100 * the header and trailer structures (h,t) will normally have
 101 * the same page and eor value.
 102 * An exception to this occurs when a complete page write is not
 103 * accomplished on a power failure. Since the hardware may "split write"
 104 * sectors in the page, any out of order sequence may occur during powerfail
 105 * and needs to be recognized during log replay.  The xor value is
 106 * an "exclusive or" of all log words in the page up to eor.  This
 107 * 32 bit eor is stored with the top 16 bits in the header and the
 108 * bottom 16 bits in the trailer.  logredo can easily recognize pages
 109 * that were not completed by reconstructing this eor and checking
 110 * the log page.
 111 *
 112 * Previous versions of the operating system did not allow split
 113 * writes and detected partially written records in logredo by
 114 * ordering the updates to the header, trailer, and the move of data
 115 * into the logdata area.  The order: (1) data is moved (2) header
 116 * is updated (3) trailer is updated.  In logredo, when the header
 117 * differed from the trailer, the header and trailer were reconciled
 118 * as follows: if h.page != t.page they were set to the smaller of
 119 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
 120 * h.eor != t.eor they were set to the smaller of their two values.
 121 */
 122struct logpage {
 123        struct {                /* header */
 124                __le32 page;    /* 4: log sequence page number */
 125                __le16 rsrvd;   /* 2: */
 126                __le16 eor;     /* 2: end-of-log offset of lasrt record write */
 127        } h;
 128
 129        __le32 data[LOGPSIZE / 4 - 4];  /* log record area */
 130
 131        struct {                /* trailer */
 132                __le32 page;    /* 4: normally the same as h.page */
 133                __le16 rsrvd;   /* 2: */
 134                __le16 eor;     /* 2: normally the same as h.eor */
 135        } t;
 136};
 137
 138#define LOGPHDRSIZE     8       /* log page header size */
 139#define LOGPTLRSIZE     8       /* log page trailer size */
 140
 141
 142/*
 143 *      log record
 144 *
 145 * (this comment should be rewritten !)
 146 * jfs uses only "after" log records (only a single writer is allowed
 147 * in a page, pages are written to temporary paging space if
 148 * if they must be written to disk before commit, and i/o is
 149 * scheduled for modified pages to their home location after
 150 * the log records containing the after values and the commit
 151 * record is written to the log on disk, undo discards the copy
 152 * in main-memory.)
 153 *
 154 * a log record consists of a data area of variable length followed by
 155 * a descriptor of fixed size LOGRDSIZE bytes.
 156 * the data area is rounded up to an integral number of 4-bytes and
 157 * must be no longer than LOGPSIZE.
 158 * the descriptor is of size of multiple of 4-bytes and aligned on a
 159 * 4-byte boundary.
 160 * records are packed one after the other in the data area of log pages.
 161 * (sometimes a DUMMY record is inserted so that at least one record ends
 162 * on every page or the longest record is placed on at most two pages).
 163 * the field eor in page header/trailer points to the byte following
 164 * the last record on a page.
 165 */
 166
 167/* log record types */
 168#define LOG_COMMIT              0x8000
 169#define LOG_SYNCPT              0x4000
 170#define LOG_MOUNT               0x2000
 171#define LOG_REDOPAGE            0x0800
 172#define LOG_NOREDOPAGE          0x0080
 173#define LOG_NOREDOINOEXT        0x0040
 174#define LOG_UPDATEMAP           0x0008
 175#define LOG_NOREDOFILE          0x0001
 176
 177/* REDOPAGE/NOREDOPAGE log record data type */
 178#define LOG_INODE               0x0001
 179#define LOG_XTREE               0x0002
 180#define LOG_DTREE               0x0004
 181#define LOG_BTROOT              0x0010
 182#define LOG_EA                  0x0020
 183#define LOG_ACL                 0x0040
 184#define LOG_DATA                0x0080
 185#define LOG_NEW                 0x0100
 186#define LOG_EXTEND              0x0200
 187#define LOG_RELOCATE            0x0400
 188#define LOG_DIR_XTREE           0x0800  /* Xtree is in directory inode */
 189
 190/* UPDATEMAP log record descriptor type */
 191#define LOG_ALLOCXADLIST        0x0080
 192#define LOG_ALLOCPXDLIST        0x0040
 193#define LOG_ALLOCXAD            0x0020
 194#define LOG_ALLOCPXD            0x0010
 195#define LOG_FREEXADLIST         0x0008
 196#define LOG_FREEPXDLIST         0x0004
 197#define LOG_FREEXAD             0x0002
 198#define LOG_FREEPXD             0x0001
 199
 200
 201struct lrd {
 202        /*
 203         * type independent area
 204         */
 205        __le32 logtid;          /* 4: log transaction identifier */
 206        __le32 backchain;       /* 4: ptr to prev record of same transaction */
 207        __le16 type;            /* 2: record type */
 208        __le16 length;          /* 2: length of data in record (in byte) */
 209        __le32 aggregate;       /* 4: file system lv/aggregate */
 210        /* (16) */
 211
 212        /*
 213         * type dependent area (20)
 214         */
 215        union {
 216
 217                /*
 218                 *      COMMIT: commit
 219                 *
 220                 * transaction commit: no type-dependent information;
 221                 */
 222
 223                /*
 224                 *      REDOPAGE: after-image
 225                 *
 226                 * apply after-image;
 227                 *
 228                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 229                 */
 230                struct {
 231                        __le32 fileset; /* 4: fileset number */
 232                        __le32 inode;   /* 4: inode number */
 233                        __le16 type;    /* 2: REDOPAGE record type */
 234                        __le16 l2linesize;      /* 2: log2 of line size */
 235                        pxd_t pxd;      /* 8: on-disk page pxd */
 236                } redopage;     /* (20) */
 237
 238                /*
 239                 *      NOREDOPAGE: the page is freed
 240                 *
 241                 * do not apply after-image records which precede this record
 242                 * in the log with the same page block number to this page.
 243                 *
 244                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 245                 */
 246                struct {
 247                        __le32 fileset; /* 4: fileset number */
 248                        __le32 inode;   /* 4: inode number */
 249                        __le16 type;    /* 2: NOREDOPAGE record type */
 250                        __le16 rsrvd;   /* 2: reserved */
 251                        pxd_t pxd;      /* 8: on-disk page pxd */
 252                } noredopage;   /* (20) */
 253
 254                /*
 255                 *      UPDATEMAP: update block allocation map
 256                 *
 257                 * either in-line PXD,
 258                 * or     out-of-line  XADLIST;
 259                 *
 260                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 261                 */
 262                struct {
 263                        __le32 fileset; /* 4: fileset number */
 264                        __le32 inode;   /* 4: inode number */
 265                        __le16 type;    /* 2: UPDATEMAP record type */
 266                        __le16 nxd;     /* 2: number of extents */
 267                        pxd_t pxd;      /* 8: pxd */
 268                } updatemap;    /* (20) */
 269
 270                /*
 271                 *      NOREDOINOEXT: the inode extent is freed
 272                 *
 273                 * do not apply after-image records which precede this
 274                 * record in the log with the any of the 4 page block
 275                 * numbers in this inode extent.
 276                 *
 277                 * NOTE: The fileset and pxd fields MUST remain in
 278                 *       the same fields in the REDOPAGE record format.
 279                 *
 280                 */
 281                struct {
 282                        __le32 fileset; /* 4: fileset number */
 283                        __le32 iagnum;  /* 4: IAG number     */
 284                        __le32 inoext_idx;      /* 4: inode extent index */
 285                        pxd_t pxd;      /* 8: on-disk page pxd */
 286                } noredoinoext; /* (20) */
 287
 288                /*
 289                 *      SYNCPT: log sync point
 290                 *
 291                 * replay log upto syncpt address specified;
 292                 */
 293                struct {
 294                        __le32 sync;    /* 4: syncpt address (0 = here) */
 295                } syncpt;
 296
 297                /*
 298                 *      MOUNT: file system mount
 299                 *
 300                 * file system mount: no type-dependent information;
 301                 */
 302
 303                /*
 304                 *      ? FREEXTENT: free specified extent(s)
 305                 *
 306                 * free specified extent(s) from block allocation map
 307                 * N.B.: nextents should be length of data/sizeof(xad_t)
 308                 */
 309                struct {
 310                        __le32 type;    /* 4: FREEXTENT record type */
 311                        __le32 nextent; /* 4: number of extents */
 312
 313                        /* data: PXD or XAD list */
 314                } freextent;
 315
 316                /*
 317                 *      ? NOREDOFILE: this file is freed
 318                 *
 319                 * do not apply records which precede this record in the log
 320                 * with the same inode number.
 321                 *
 322                 * NOREDOFILE must be the first to be written at commit
 323                 * (last to be read in logredo()) - it prevents
 324                 * replay of preceding updates of all preceding generations
 325                 * of the inumber esp. the on-disk inode itself.
 326                 */
 327                struct {
 328                        __le32 fileset; /* 4: fileset number */
 329                        __le32 inode;   /* 4: inode number */
 330                } noredofile;
 331
 332                /*
 333                 *      ? NEWPAGE:
 334                 *
 335                 * metadata type dependent
 336                 */
 337                struct {
 338                        __le32 fileset; /* 4: fileset number */
 339                        __le32 inode;   /* 4: inode number */
 340                        __le32 type;    /* 4: NEWPAGE record type */
 341                        pxd_t pxd;      /* 8: on-disk page pxd */
 342                } newpage;
 343
 344                /*
 345                 *      ? DUMMY: filler
 346                 *
 347                 * no type-dependent information
 348                 */
 349        } log;
 350};                                      /* (36) */
 351
 352#define LOGRDSIZE       (sizeof(struct lrd))
 353
 354/*
 355 *      line vector descriptor
 356 */
 357struct lvd {
 358        __le16 offset;
 359        __le16 length;
 360};
 361
 362
 363/*
 364 *      log logical volume
 365 */
 366struct jfs_log {
 367
 368        struct list_head sb_list;/*  This is used to sync metadata
 369                                 *    before writing syncpt.
 370                                 */
 371        struct list_head journal_list; /* Global list */
 372        struct block_device *bdev; /* 4: log lv pointer */
 373        int serial;             /* 4: log mount serial number */
 374
 375        s64 base;               /* @8: log extent address (inline log ) */
 376        int size;               /* 4: log size in log page (in page) */
 377        int l2bsize;            /* 4: log2 of bsize */
 378
 379        unsigned long flag;     /* 4: flag */
 380
 381        struct lbuf *lbuf_free; /* 4: free lbufs */
 382        wait_queue_head_t free_wait;    /* 4: */
 383
 384        /* log write */
 385        int logtid;             /* 4: log tid */
 386        int page;               /* 4: page number of eol page */
 387        int eor;                /* 4: eor of last record in eol page */
 388        struct lbuf *bp;        /* 4: current log page buffer */
 389
 390        struct mutex loglock;   /* 4: log write serialization lock */
 391
 392        /* syncpt */
 393        int nextsync;           /* 4: bytes to write before next syncpt */
 394        int active;             /* 4: */
 395        wait_queue_head_t syncwait;     /* 4: */
 396
 397        /* commit */
 398        uint cflag;             /* 4: */
 399        struct list_head cqueue; /* FIFO commit queue */
 400        struct tblock *flush_tblk; /* tblk we're waiting on for flush */
 401        int gcrtc;              /* 4: GC_READY transaction count */
 402        struct tblock *gclrt;   /* 4: latest GC_READY transaction */
 403        spinlock_t gclock;      /* 4: group commit lock */
 404        int logsize;            /* 4: log data area size in byte */
 405        int lsn;                /* 4: end-of-log */
 406        int clsn;               /* 4: clsn */
 407        int syncpt;             /* 4: addr of last syncpt record */
 408        int sync;               /* 4: addr from last logsync() */
 409        struct list_head synclist;      /* 8: logsynclist anchor */
 410        spinlock_t synclock;    /* 4: synclist lock */
 411        struct lbuf *wqueue;    /* 4: log pageout queue */
 412        int count;              /* 4: count */
 413        char uuid[16];          /* 16: 128-bit uuid of log device */
 414
 415        int no_integrity;       /* 3: flag to disable journaling to disk */
 416};
 417
 418/*
 419 * Log flag
 420 */
 421#define log_INLINELOG   1
 422#define log_SYNCBARRIER 2
 423#define log_QUIESCE     3
 424#define log_FLUSH       4
 425
 426/*
 427 * group commit flag
 428 */
 429/* jfs_log */
 430#define logGC_PAGEOUT   0x00000001
 431
 432/* tblock/lbuf */
 433#define tblkGC_QUEUE            0x0001
 434#define tblkGC_READY            0x0002
 435#define tblkGC_COMMIT           0x0004
 436#define tblkGC_COMMITTED        0x0008
 437#define tblkGC_EOP              0x0010
 438#define tblkGC_FREE             0x0020
 439#define tblkGC_LEADER           0x0040
 440#define tblkGC_ERROR            0x0080
 441#define tblkGC_LAZY             0x0100  // D230860
 442#define tblkGC_UNLOCKED         0x0200  // D230860
 443
 444/*
 445 *              log cache buffer header
 446 */
 447struct lbuf {
 448        struct jfs_log *l_log;  /* 4: log associated with buffer */
 449
 450        /*
 451         * data buffer base area
 452         */
 453        uint l_flag;            /* 4: pageout control flags */
 454
 455        struct lbuf *l_wqnext;  /* 4: write queue link */
 456        struct lbuf *l_freelist;        /* 4: freelistlink */
 457
 458        int l_pn;               /* 4: log page number */
 459        int l_eor;              /* 4: log record eor */
 460        int l_ceor;             /* 4: committed log record eor */
 461
 462        s64 l_blkno;            /* 8: log page block number */
 463        caddr_t l_ldata;        /* 4: data page */
 464        struct page *l_page;    /* The page itself */
 465        uint l_offset;          /* Offset of l_ldata within the page */
 466
 467        wait_queue_head_t l_ioevent;    /* 4: i/o done event */
 468};
 469
 470/* Reuse l_freelist for redrive list */
 471#define l_redrive_next l_freelist
 472
 473/*
 474 *      logsynclist block
 475 *
 476 * common logsyncblk prefix for jbuf_t and tblock
 477 */
 478struct logsyncblk {
 479        u16 xflag;              /* flags */
 480        u16 flag;               /* only meaninful in tblock */
 481        lid_t lid;              /* lock id */
 482        s32 lsn;                /* log sequence number */
 483        struct list_head synclist;      /* log sync list link */
 484};
 485
 486/*
 487 *      logsynclist serialization (per log)
 488 */
 489
 490#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
 491#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
 492#define LOGSYNC_UNLOCK(log, flags) \
 493        spin_unlock_irqrestore(&(log)->synclock, flags)
 494
 495/* compute the difference in bytes of lsn from sync point */
 496#define logdiff(diff, lsn, log)\
 497{\
 498        diff = (lsn) - (log)->syncpt;\
 499        if (diff < 0)\
 500                diff += (log)->logsize;\
 501}
 502
 503extern int lmLogOpen(struct super_block *sb);
 504extern int lmLogClose(struct super_block *sb);
 505extern int lmLogShutdown(struct jfs_log * log);
 506extern int lmLogInit(struct jfs_log * log);
 507extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
 508extern int lmGroupCommit(struct jfs_log *, struct tblock *);
 509extern int jfsIOWait(void *);
 510extern void jfs_flush_journal(struct jfs_log * log, int wait);
 511extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
 512
 513#endif                          /* _H_JFS_LOGMGR */
 514