linux/fs/jfs/jfs_logmgr.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 *   Copyright (C) International Business Machines Corp., 2000-2004
   4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   5 */
   6#ifndef _H_JFS_LOGMGR
   7#define _H_JFS_LOGMGR
   8
   9#include <linux/uuid.h>
  10
  11#include "jfs_filsys.h"
  12#include "jfs_lock.h"
  13
  14/*
  15 *      log manager configuration parameters
  16 */
  17
  18/* log page size */
  19#define LOGPSIZE        4096
  20#define L2LOGPSIZE      12
  21
  22#define LOGPAGES        16      /* Log pages per mounted file system */
  23
  24/*
  25 *      log logical volume
  26 *
  27 * a log is used to make the commit operation on journalled
  28 * files within the same logical volume group atomic.
  29 * a log is implemented with a logical volume.
  30 * there is one log per logical volume group.
  31 *
  32 * block 0 of the log logical volume is not used (ipl etc).
  33 * block 1 contains a log "superblock" and is used by logFormat(),
  34 * lmLogInit(), lmLogShutdown(), and logRedo() to record status
  35 * of the log but is not otherwise used during normal processing.
  36 * blocks 2 - (N-1) are used to contain log records.
  37 *
  38 * when a volume group is varied-on-line, logRedo() must have
  39 * been executed before the file systems (logical volumes) in
  40 * the volume group can be mounted.
  41 */
  42/*
  43 *      log superblock (block 1 of logical volume)
  44 */
  45#define LOGSUPER_B      1
  46#define LOGSTART_B      2
  47
  48#define LOGMAGIC        0x87654321
  49#define LOGVERSION      1
  50
  51#define MAX_ACTIVE      128     /* Max active file systems sharing log */
  52
  53struct logsuper {
  54        __le32 magic;           /* 4: log lv identifier */
  55        __le32 version;         /* 4: version number */
  56        __le32 serial;          /* 4: log open/mount counter */
  57        __le32 size;            /* 4: size in number of LOGPSIZE blocks */
  58        __le32 bsize;           /* 4: logical block size in byte */
  59        __le32 l2bsize;         /* 4: log2 of bsize */
  60
  61        __le32 flag;            /* 4: option */
  62        __le32 state;           /* 4: state - see below */
  63
  64        __le32 end;             /* 4: addr of last log record set by logredo */
  65        uuid_t uuid;            /* 16: 128-bit journal uuid */
  66        char label[16];         /* 16: journal label */
  67        struct {
  68                uuid_t uuid;
  69        } active[MAX_ACTIVE];   /* 2048: active file systems list */
  70};
  71
  72/* log flag: commit option (see jfs_filsys.h) */
  73
  74/* log state */
  75#define LOGMOUNT        0       /* log mounted by lmLogInit() */
  76#define LOGREDONE       1       /* log shutdown by lmLogShutdown().
  77                                 * log redo completed by logredo().
  78                                 */
  79#define LOGWRAP         2       /* log wrapped */
  80#define LOGREADERR      3       /* log read error detected in logredo() */
  81
  82
  83/*
  84 *      log logical page
  85 *
  86 * (this comment should be rewritten !)
  87 * the header and trailer structures (h,t) will normally have
  88 * the same page and eor value.
  89 * An exception to this occurs when a complete page write is not
  90 * accomplished on a power failure. Since the hardware may "split write"
  91 * sectors in the page, any out of order sequence may occur during powerfail
  92 * and needs to be recognized during log replay.  The xor value is
  93 * an "exclusive or" of all log words in the page up to eor.  This
  94 * 32 bit eor is stored with the top 16 bits in the header and the
  95 * bottom 16 bits in the trailer.  logredo can easily recognize pages
  96 * that were not completed by reconstructing this eor and checking
  97 * the log page.
  98 *
  99 * Previous versions of the operating system did not allow split
 100 * writes and detected partially written records in logredo by
 101 * ordering the updates to the header, trailer, and the move of data
 102 * into the logdata area.  The order: (1) data is moved (2) header
 103 * is updated (3) trailer is updated.  In logredo, when the header
 104 * differed from the trailer, the header and trailer were reconciled
 105 * as follows: if h.page != t.page they were set to the smaller of
 106 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
 107 * h.eor != t.eor they were set to the smaller of their two values.
 108 */
 109struct logpage {
 110        struct {                /* header */
 111                __le32 page;    /* 4: log sequence page number */
 112                __le16 rsrvd;   /* 2: */
 113                __le16 eor;     /* 2: end-of-log offset of lasrt record write */
 114        } h;
 115
 116        __le32 data[LOGPSIZE / 4 - 4];  /* log record area */
 117
 118        struct {                /* trailer */
 119                __le32 page;    /* 4: normally the same as h.page */
 120                __le16 rsrvd;   /* 2: */
 121                __le16 eor;     /* 2: normally the same as h.eor */
 122        } t;
 123};
 124
 125#define LOGPHDRSIZE     8       /* log page header size */
 126#define LOGPTLRSIZE     8       /* log page trailer size */
 127
 128
 129/*
 130 *      log record
 131 *
 132 * (this comment should be rewritten !)
 133 * jfs uses only "after" log records (only a single writer is allowed
 134 * in a page, pages are written to temporary paging space if
 135 * they must be written to disk before commit, and i/o is
 136 * scheduled for modified pages to their home location after
 137 * the log records containing the after values and the commit
 138 * record is written to the log on disk, undo discards the copy
 139 * in main-memory.)
 140 *
 141 * a log record consists of a data area of variable length followed by
 142 * a descriptor of fixed size LOGRDSIZE bytes.
 143 * the data area is rounded up to an integral number of 4-bytes and
 144 * must be no longer than LOGPSIZE.
 145 * the descriptor is of size of multiple of 4-bytes and aligned on a
 146 * 4-byte boundary.
 147 * records are packed one after the other in the data area of log pages.
 148 * (sometimes a DUMMY record is inserted so that at least one record ends
 149 * on every page or the longest record is placed on at most two pages).
 150 * the field eor in page header/trailer points to the byte following
 151 * the last record on a page.
 152 */
 153
 154/* log record types */
 155#define LOG_COMMIT              0x8000
 156#define LOG_SYNCPT              0x4000
 157#define LOG_MOUNT               0x2000
 158#define LOG_REDOPAGE            0x0800
 159#define LOG_NOREDOPAGE          0x0080
 160#define LOG_NOREDOINOEXT        0x0040
 161#define LOG_UPDATEMAP           0x0008
 162#define LOG_NOREDOFILE          0x0001
 163
 164/* REDOPAGE/NOREDOPAGE log record data type */
 165#define LOG_INODE               0x0001
 166#define LOG_XTREE               0x0002
 167#define LOG_DTREE               0x0004
 168#define LOG_BTROOT              0x0010
 169#define LOG_EA                  0x0020
 170#define LOG_ACL                 0x0040
 171#define LOG_DATA                0x0080
 172#define LOG_NEW                 0x0100
 173#define LOG_EXTEND              0x0200
 174#define LOG_RELOCATE            0x0400
 175#define LOG_DIR_XTREE           0x0800  /* Xtree is in directory inode */
 176
 177/* UPDATEMAP log record descriptor type */
 178#define LOG_ALLOCXADLIST        0x0080
 179#define LOG_ALLOCPXDLIST        0x0040
 180#define LOG_ALLOCXAD            0x0020
 181#define LOG_ALLOCPXD            0x0010
 182#define LOG_FREEXADLIST         0x0008
 183#define LOG_FREEPXDLIST         0x0004
 184#define LOG_FREEXAD             0x0002
 185#define LOG_FREEPXD             0x0001
 186
 187
 188struct lrd {
 189        /*
 190         * type independent area
 191         */
 192        __le32 logtid;          /* 4: log transaction identifier */
 193        __le32 backchain;       /* 4: ptr to prev record of same transaction */
 194        __le16 type;            /* 2: record type */
 195        __le16 length;          /* 2: length of data in record (in byte) */
 196        __le32 aggregate;       /* 4: file system lv/aggregate */
 197        /* (16) */
 198
 199        /*
 200         * type dependent area (20)
 201         */
 202        union {
 203
 204                /*
 205                 *      COMMIT: commit
 206                 *
 207                 * transaction commit: no type-dependent information;
 208                 */
 209
 210                /*
 211                 *      REDOPAGE: after-image
 212                 *
 213                 * apply after-image;
 214                 *
 215                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 216                 */
 217                struct {
 218                        __le32 fileset; /* 4: fileset number */
 219                        __le32 inode;   /* 4: inode number */
 220                        __le16 type;    /* 2: REDOPAGE record type */
 221                        __le16 l2linesize;      /* 2: log2 of line size */
 222                        pxd_t pxd;      /* 8: on-disk page pxd */
 223                } redopage;     /* (20) */
 224
 225                /*
 226                 *      NOREDOPAGE: the page is freed
 227                 *
 228                 * do not apply after-image records which precede this record
 229                 * in the log with the same page block number to this page.
 230                 *
 231                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 232                 */
 233                struct {
 234                        __le32 fileset; /* 4: fileset number */
 235                        __le32 inode;   /* 4: inode number */
 236                        __le16 type;    /* 2: NOREDOPAGE record type */
 237                        __le16 rsrvd;   /* 2: reserved */
 238                        pxd_t pxd;      /* 8: on-disk page pxd */
 239                } noredopage;   /* (20) */
 240
 241                /*
 242                 *      UPDATEMAP: update block allocation map
 243                 *
 244                 * either in-line PXD,
 245                 * or     out-of-line  XADLIST;
 246                 *
 247                 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
 248                 */
 249                struct {
 250                        __le32 fileset; /* 4: fileset number */
 251                        __le32 inode;   /* 4: inode number */
 252                        __le16 type;    /* 2: UPDATEMAP record type */
 253                        __le16 nxd;     /* 2: number of extents */
 254                        pxd_t pxd;      /* 8: pxd */
 255                } updatemap;    /* (20) */
 256
 257                /*
 258                 *      NOREDOINOEXT: the inode extent is freed
 259                 *
 260                 * do not apply after-image records which precede this
 261                 * record in the log with the any of the 4 page block
 262                 * numbers in this inode extent.
 263                 *
 264                 * NOTE: The fileset and pxd fields MUST remain in
 265                 *       the same fields in the REDOPAGE record format.
 266                 *
 267                 */
 268                struct {
 269                        __le32 fileset; /* 4: fileset number */
 270                        __le32 iagnum;  /* 4: IAG number     */
 271                        __le32 inoext_idx;      /* 4: inode extent index */
 272                        pxd_t pxd;      /* 8: on-disk page pxd */
 273                } noredoinoext; /* (20) */
 274
 275                /*
 276                 *      SYNCPT: log sync point
 277                 *
 278                 * replay log up to syncpt address specified;
 279                 */
 280                struct {
 281                        __le32 sync;    /* 4: syncpt address (0 = here) */
 282                } syncpt;
 283
 284                /*
 285                 *      MOUNT: file system mount
 286                 *
 287                 * file system mount: no type-dependent information;
 288                 */
 289
 290                /*
 291                 *      ? FREEXTENT: free specified extent(s)
 292                 *
 293                 * free specified extent(s) from block allocation map
 294                 * N.B.: nextents should be length of data/sizeof(xad_t)
 295                 */
 296                struct {
 297                        __le32 type;    /* 4: FREEXTENT record type */
 298                        __le32 nextent; /* 4: number of extents */
 299
 300                        /* data: PXD or XAD list */
 301                } freextent;
 302
 303                /*
 304                 *      ? NOREDOFILE: this file is freed
 305                 *
 306                 * do not apply records which precede this record in the log
 307                 * with the same inode number.
 308                 *
 309                 * NOREDOFILE must be the first to be written at commit
 310                 * (last to be read in logredo()) - it prevents
 311                 * replay of preceding updates of all preceding generations
 312                 * of the inumber esp. the on-disk inode itself.
 313                 */
 314                struct {
 315                        __le32 fileset; /* 4: fileset number */
 316                        __le32 inode;   /* 4: inode number */
 317                } noredofile;
 318
 319                /*
 320                 *      ? NEWPAGE:
 321                 *
 322                 * metadata type dependent
 323                 */
 324                struct {
 325                        __le32 fileset; /* 4: fileset number */
 326                        __le32 inode;   /* 4: inode number */
 327                        __le32 type;    /* 4: NEWPAGE record type */
 328                        pxd_t pxd;      /* 8: on-disk page pxd */
 329                } newpage;
 330
 331                /*
 332                 *      ? DUMMY: filler
 333                 *
 334                 * no type-dependent information
 335                 */
 336        } log;
 337};                                      /* (36) */
 338
 339#define LOGRDSIZE       (sizeof(struct lrd))
 340
 341/*
 342 *      line vector descriptor
 343 */
 344struct lvd {
 345        __le16 offset;
 346        __le16 length;
 347};
 348
 349
 350/*
 351 *      log logical volume
 352 */
 353struct jfs_log {
 354
 355        struct list_head sb_list;/*  This is used to sync metadata
 356                                 *    before writing syncpt.
 357                                 */
 358        struct list_head journal_list; /* Global list */
 359        struct block_device *bdev; /* 4: log lv pointer */
 360        int serial;             /* 4: log mount serial number */
 361
 362        s64 base;               /* @8: log extent address (inline log ) */
 363        int size;               /* 4: log size in log page (in page) */
 364        int l2bsize;            /* 4: log2 of bsize */
 365
 366        unsigned long flag;     /* 4: flag */
 367
 368        struct lbuf *lbuf_free; /* 4: free lbufs */
 369        wait_queue_head_t free_wait;    /* 4: */
 370
 371        /* log write */
 372        int logtid;             /* 4: log tid */
 373        int page;               /* 4: page number of eol page */
 374        int eor;                /* 4: eor of last record in eol page */
 375        struct lbuf *bp;        /* 4: current log page buffer */
 376
 377        struct mutex loglock;   /* 4: log write serialization lock */
 378
 379        /* syncpt */
 380        int nextsync;           /* 4: bytes to write before next syncpt */
 381        int active;             /* 4: */
 382        wait_queue_head_t syncwait;     /* 4: */
 383
 384        /* commit */
 385        uint cflag;             /* 4: */
 386        struct list_head cqueue; /* FIFO commit queue */
 387        struct tblock *flush_tblk; /* tblk we're waiting on for flush */
 388        int gcrtc;              /* 4: GC_READY transaction count */
 389        struct tblock *gclrt;   /* 4: latest GC_READY transaction */
 390        spinlock_t gclock;      /* 4: group commit lock */
 391        int logsize;            /* 4: log data area size in byte */
 392        int lsn;                /* 4: end-of-log */
 393        int clsn;               /* 4: clsn */
 394        int syncpt;             /* 4: addr of last syncpt record */
 395        int sync;               /* 4: addr from last logsync() */
 396        struct list_head synclist;      /* 8: logsynclist anchor */
 397        spinlock_t synclock;    /* 4: synclist lock */
 398        struct lbuf *wqueue;    /* 4: log pageout queue */
 399        int count;              /* 4: count */
 400        uuid_t uuid;            /* 16: 128-bit uuid of log device */
 401
 402        int no_integrity;       /* 3: flag to disable journaling to disk */
 403};
 404
 405/*
 406 * Log flag
 407 */
 408#define log_INLINELOG   1
 409#define log_SYNCBARRIER 2
 410#define log_QUIESCE     3
 411#define log_FLUSH       4
 412
 413/*
 414 * group commit flag
 415 */
 416/* jfs_log */
 417#define logGC_PAGEOUT   0x00000001
 418
 419/* tblock/lbuf */
 420#define tblkGC_QUEUE            0x0001
 421#define tblkGC_READY            0x0002
 422#define tblkGC_COMMIT           0x0004
 423#define tblkGC_COMMITTED        0x0008
 424#define tblkGC_EOP              0x0010
 425#define tblkGC_FREE             0x0020
 426#define tblkGC_LEADER           0x0040
 427#define tblkGC_ERROR            0x0080
 428#define tblkGC_LAZY             0x0100  // D230860
 429#define tblkGC_UNLOCKED         0x0200  // D230860
 430
 431/*
 432 *              log cache buffer header
 433 */
 434struct lbuf {
 435        struct jfs_log *l_log;  /* 4: log associated with buffer */
 436
 437        /*
 438         * data buffer base area
 439         */
 440        uint l_flag;            /* 4: pageout control flags */
 441
 442        struct lbuf *l_wqnext;  /* 4: write queue link */
 443        struct lbuf *l_freelist;        /* 4: freelistlink */
 444
 445        int l_pn;               /* 4: log page number */
 446        int l_eor;              /* 4: log record eor */
 447        int l_ceor;             /* 4: committed log record eor */
 448
 449        s64 l_blkno;            /* 8: log page block number */
 450        caddr_t l_ldata;        /* 4: data page */
 451        struct page *l_page;    /* The page itself */
 452        uint l_offset;          /* Offset of l_ldata within the page */
 453
 454        wait_queue_head_t l_ioevent;    /* 4: i/o done event */
 455};
 456
 457/* Reuse l_freelist for redrive list */
 458#define l_redrive_next l_freelist
 459
 460/*
 461 *      logsynclist block
 462 *
 463 * common logsyncblk prefix for jbuf_t and tblock
 464 */
 465struct logsyncblk {
 466        u16 xflag;              /* flags */
 467        u16 flag;               /* only meaninful in tblock */
 468        lid_t lid;              /* lock id */
 469        s32 lsn;                /* log sequence number */
 470        struct list_head synclist;      /* log sync list link */
 471};
 472
 473/*
 474 *      logsynclist serialization (per log)
 475 */
 476
 477#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
 478#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
 479#define LOGSYNC_UNLOCK(log, flags) \
 480        spin_unlock_irqrestore(&(log)->synclock, flags)
 481
 482/* compute the difference in bytes of lsn from sync point */
 483#define logdiff(diff, lsn, log)\
 484{\
 485        diff = (lsn) - (log)->syncpt;\
 486        if (diff < 0)\
 487                diff += (log)->logsize;\
 488}
 489
 490extern int lmLogOpen(struct super_block *sb);
 491extern int lmLogClose(struct super_block *sb);
 492extern int lmLogShutdown(struct jfs_log * log);
 493extern int lmLogInit(struct jfs_log * log);
 494extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
 495extern int lmGroupCommit(struct jfs_log *, struct tblock *);
 496extern int jfsIOWait(void *);
 497extern void jfs_flush_journal(struct jfs_log * log, int wait);
 498extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
 499
 500#endif                          /* _H_JFS_LOGMGR */
 501