1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Copyright (C) International Business Machines Corp., 2000-2004 4 * Portions Copyright (C) Christoph Hellwig, 2001-2002 5 */ 6#ifndef _H_JFS_LOGMGR 7#define _H_JFS_LOGMGR 8 9#include <linux/uuid.h> 10 11#include "jfs_filsys.h" 12#include "jfs_lock.h" 13 14/* 15 * log manager configuration parameters 16 */ 17 18/* log page size */ 19#define LOGPSIZE 4096 20#define L2LOGPSIZE 12 21 22#define LOGPAGES 16 /* Log pages per mounted file system */ 23 24/* 25 * log logical volume 26 * 27 * a log is used to make the commit operation on journalled 28 * files within the same logical volume group atomic. 29 * a log is implemented with a logical volume. 30 * there is one log per logical volume group. 31 * 32 * block 0 of the log logical volume is not used (ipl etc). 33 * block 1 contains a log "superblock" and is used by logFormat(), 34 * lmLogInit(), lmLogShutdown(), and logRedo() to record status 35 * of the log but is not otherwise used during normal processing. 36 * blocks 2 - (N-1) are used to contain log records. 37 * 38 * when a volume group is varied-on-line, logRedo() must have 39 * been executed before the file systems (logical volumes) in 40 * the volume group can be mounted. 41 */ 42/* 43 * log superblock (block 1 of logical volume) 44 */ 45#define LOGSUPER_B 1 46#define LOGSTART_B 2 47 48#define LOGMAGIC 0x87654321 49#define LOGVERSION 1 50 51#define MAX_ACTIVE 128 /* Max active file systems sharing log */ 52 53struct logsuper { 54 __le32 magic; /* 4: log lv identifier */ 55 __le32 version; /* 4: version number */ 56 __le32 serial; /* 4: log open/mount counter */ 57 __le32 size; /* 4: size in number of LOGPSIZE blocks */ 58 __le32 bsize; /* 4: logical block size in byte */ 59 __le32 l2bsize; /* 4: log2 of bsize */ 60 61 __le32 flag; /* 4: option */ 62 __le32 state; /* 4: state - see below */ 63 64 __le32 end; /* 4: addr of last log record set by logredo */ 65 uuid_t uuid; /* 16: 128-bit journal uuid */ 66 char label[16]; /* 16: journal label */ 67 struct { 68 uuid_t uuid; 69 } active[MAX_ACTIVE]; /* 2048: active file systems list */ 70}; 71 72/* log flag: commit option (see jfs_filsys.h) */ 73 74/* log state */ 75#define LOGMOUNT 0 /* log mounted by lmLogInit() */ 76#define LOGREDONE 1 /* log shutdown by lmLogShutdown(). 77 * log redo completed by logredo(). 78 */ 79#define LOGWRAP 2 /* log wrapped */ 80#define LOGREADERR 3 /* log read error detected in logredo() */ 81 82 83/* 84 * log logical page 85 * 86 * (this comment should be rewritten !) 87 * the header and trailer structures (h,t) will normally have 88 * the same page and eor value. 89 * An exception to this occurs when a complete page write is not 90 * accomplished on a power failure. Since the hardware may "split write" 91 * sectors in the page, any out of order sequence may occur during powerfail 92 * and needs to be recognized during log replay. The xor value is 93 * an "exclusive or" of all log words in the page up to eor. This 94 * 32 bit eor is stored with the top 16 bits in the header and the 95 * bottom 16 bits in the trailer. logredo can easily recognize pages 96 * that were not completed by reconstructing this eor and checking 97 * the log page. 98 * 99 * Previous versions of the operating system did not allow split 100 * writes and detected partially written records in logredo by 101 * ordering the updates to the header, trailer, and the move of data 102 * into the logdata area. The order: (1) data is moved (2) header 103 * is updated (3) trailer is updated. In logredo, when the header 104 * differed from the trailer, the header and trailer were reconciled 105 * as follows: if h.page != t.page they were set to the smaller of 106 * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) 107 * h.eor != t.eor they were set to the smaller of their two values. 108 */ 109struct logpage { 110 struct { /* header */ 111 __le32 page; /* 4: log sequence page number */ 112 __le16 rsrvd; /* 2: */ 113 __le16 eor; /* 2: end-of-log offset of lasrt record write */ 114 } h; 115 116 __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ 117 118 struct { /* trailer */ 119 __le32 page; /* 4: normally the same as h.page */ 120 __le16 rsrvd; /* 2: */ 121 __le16 eor; /* 2: normally the same as h.eor */ 122 } t; 123}; 124 125#define LOGPHDRSIZE 8 /* log page header size */ 126#define LOGPTLRSIZE 8 /* log page trailer size */ 127 128 129/* 130 * log record 131 * 132 * (this comment should be rewritten !) 133 * jfs uses only "after" log records (only a single writer is allowed 134 * in a page, pages are written to temporary paging space if 135 * if they must be written to disk before commit, and i/o is 136 * scheduled for modified pages to their home location after 137 * the log records containing the after values and the commit 138 * record is written to the log on disk, undo discards the copy 139 * in main-memory.) 140 * 141 * a log record consists of a data area of variable length followed by 142 * a descriptor of fixed size LOGRDSIZE bytes. 143 * the data area is rounded up to an integral number of 4-bytes and 144 * must be no longer than LOGPSIZE. 145 * the descriptor is of size of multiple of 4-bytes and aligned on a 146 * 4-byte boundary. 147 * records are packed one after the other in the data area of log pages. 148 * (sometimes a DUMMY record is inserted so that at least one record ends 149 * on every page or the longest record is placed on at most two pages). 150 * the field eor in page header/trailer points to the byte following 151 * the last record on a page. 152 */ 153 154/* log record types */ 155#define LOG_COMMIT 0x8000 156#define LOG_SYNCPT 0x4000 157#define LOG_MOUNT 0x2000 158#define LOG_REDOPAGE 0x0800 159#define LOG_NOREDOPAGE 0x0080 160#define LOG_NOREDOINOEXT 0x0040 161#define LOG_UPDATEMAP 0x0008 162#define LOG_NOREDOFILE 0x0001 163 164/* REDOPAGE/NOREDOPAGE log record data type */ 165#define LOG_INODE 0x0001 166#define LOG_XTREE 0x0002 167#define LOG_DTREE 0x0004 168#define LOG_BTROOT 0x0010 169#define LOG_EA 0x0020 170#define LOG_ACL 0x0040 171#define LOG_DATA 0x0080 172#define LOG_NEW 0x0100 173#define LOG_EXTEND 0x0200 174#define LOG_RELOCATE 0x0400 175#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ 176 177/* UPDATEMAP log record descriptor type */ 178#define LOG_ALLOCXADLIST 0x0080 179#define LOG_ALLOCPXDLIST 0x0040 180#define LOG_ALLOCXAD 0x0020 181#define LOG_ALLOCPXD 0x0010 182#define LOG_FREEXADLIST 0x0008 183#define LOG_FREEPXDLIST 0x0004 184#define LOG_FREEXAD 0x0002 185#define LOG_FREEPXD 0x0001 186 187 188struct lrd { 189 /* 190 * type independent area 191 */ 192 __le32 logtid; /* 4: log transaction identifier */ 193 __le32 backchain; /* 4: ptr to prev record of same transaction */ 194 __le16 type; /* 2: record type */ 195 __le16 length; /* 2: length of data in record (in byte) */ 196 __le32 aggregate; /* 4: file system lv/aggregate */ 197 /* (16) */ 198 199 /* 200 * type dependent area (20) 201 */ 202 union { 203 204 /* 205 * COMMIT: commit 206 * 207 * transaction commit: no type-dependent information; 208 */ 209 210 /* 211 * REDOPAGE: after-image 212 * 213 * apply after-image; 214 * 215 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 216 */ 217 struct { 218 __le32 fileset; /* 4: fileset number */ 219 __le32 inode; /* 4: inode number */ 220 __le16 type; /* 2: REDOPAGE record type */ 221 __le16 l2linesize; /* 2: log2 of line size */ 222 pxd_t pxd; /* 8: on-disk page pxd */ 223 } redopage; /* (20) */ 224 225 /* 226 * NOREDOPAGE: the page is freed 227 * 228 * do not apply after-image records which precede this record 229 * in the log with the same page block number to this page. 230 * 231 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 232 */ 233 struct { 234 __le32 fileset; /* 4: fileset number */ 235 __le32 inode; /* 4: inode number */ 236 __le16 type; /* 2: NOREDOPAGE record type */ 237 __le16 rsrvd; /* 2: reserved */ 238 pxd_t pxd; /* 8: on-disk page pxd */ 239 } noredopage; /* (20) */ 240 241 /* 242 * UPDATEMAP: update block allocation map 243 * 244 * either in-line PXD, 245 * or out-of-line XADLIST; 246 * 247 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; 248 */ 249 struct { 250 __le32 fileset; /* 4: fileset number */ 251 __le32 inode; /* 4: inode number */ 252 __le16 type; /* 2: UPDATEMAP record type */ 253 __le16 nxd; /* 2: number of extents */ 254 pxd_t pxd; /* 8: pxd */ 255 } updatemap; /* (20) */ 256 257 /* 258 * NOREDOINOEXT: the inode extent is freed 259 * 260 * do not apply after-image records which precede this 261 * record in the log with the any of the 4 page block 262 * numbers in this inode extent. 263 * 264 * NOTE: The fileset and pxd fields MUST remain in 265 * the same fields in the REDOPAGE record format. 266 * 267 */ 268 struct { 269 __le32 fileset; /* 4: fileset number */ 270 __le32 iagnum; /* 4: IAG number */ 271 __le32 inoext_idx; /* 4: inode extent index */ 272 pxd_t pxd; /* 8: on-disk page pxd */ 273 } noredoinoext; /* (20) */ 274 275 /* 276 * SYNCPT: log sync point 277 * 278 * replay log up to syncpt address specified; 279 */ 280 struct { 281 __le32 sync; /* 4: syncpt address (0 = here) */ 282 } syncpt; 283 284 /* 285 * MOUNT: file system mount 286 * 287 * file system mount: no type-dependent information; 288 */ 289 290 /* 291 * ? FREEXTENT: free specified extent(s) 292 * 293 * free specified extent(s) from block allocation map 294 * N.B.: nextents should be length of data/sizeof(xad_t) 295 */ 296 struct { 297 __le32 type; /* 4: FREEXTENT record type */ 298 __le32 nextent; /* 4: number of extents */ 299 300 /* data: PXD or XAD list */ 301 } freextent; 302 303 /* 304 * ? NOREDOFILE: this file is freed 305 * 306 * do not apply records which precede this record in the log 307 * with the same inode number. 308 * 309 * NOREDOFILE must be the first to be written at commit 310 * (last to be read in logredo()) - it prevents 311 * replay of preceding updates of all preceding generations 312 * of the inumber esp. the on-disk inode itself. 313 */ 314 struct { 315 __le32 fileset; /* 4: fileset number */ 316 __le32 inode; /* 4: inode number */ 317 } noredofile; 318 319 /* 320 * ? NEWPAGE: 321 * 322 * metadata type dependent 323 */ 324 struct { 325 __le32 fileset; /* 4: fileset number */ 326 __le32 inode; /* 4: inode number */ 327 __le32 type; /* 4: NEWPAGE record type */ 328 pxd_t pxd; /* 8: on-disk page pxd */ 329 } newpage; 330 331 /* 332 * ? DUMMY: filler 333 * 334 * no type-dependent information 335 */ 336 } log; 337}; /* (36) */ 338 339#define LOGRDSIZE (sizeof(struct lrd)) 340 341/* 342 * line vector descriptor 343 */ 344struct lvd { 345 __le16 offset; 346 __le16 length; 347}; 348 349 350/* 351 * log logical volume 352 */ 353struct jfs_log { 354 355 struct list_head sb_list;/* This is used to sync metadata 356 * before writing syncpt. 357 */ 358 struct list_head journal_list; /* Global list */ 359 struct block_device *bdev; /* 4: log lv pointer */ 360 int serial; /* 4: log mount serial number */ 361 362 s64 base; /* @8: log extent address (inline log ) */ 363 int size; /* 4: log size in log page (in page) */ 364 int l2bsize; /* 4: log2 of bsize */ 365 366 unsigned long flag; /* 4: flag */ 367 368 struct lbuf *lbuf_free; /* 4: free lbufs */ 369 wait_queue_head_t free_wait; /* 4: */ 370 371 /* log write */ 372 int logtid; /* 4: log tid */ 373 int page; /* 4: page number of eol page */ 374 int eor; /* 4: eor of last record in eol page */ 375 struct lbuf *bp; /* 4: current log page buffer */ 376 377 struct mutex loglock; /* 4: log write serialization lock */ 378 379 /* syncpt */ 380 int nextsync; /* 4: bytes to write before next syncpt */ 381 int active; /* 4: */ 382 wait_queue_head_t syncwait; /* 4: */ 383 384 /* commit */ 385 uint cflag; /* 4: */ 386 struct list_head cqueue; /* FIFO commit queue */ 387 struct tblock *flush_tblk; /* tblk we're waiting on for flush */ 388 int gcrtc; /* 4: GC_READY transaction count */ 389 struct tblock *gclrt; /* 4: latest GC_READY transaction */ 390 spinlock_t gclock; /* 4: group commit lock */ 391 int logsize; /* 4: log data area size in byte */ 392 int lsn; /* 4: end-of-log */ 393 int clsn; /* 4: clsn */ 394 int syncpt; /* 4: addr of last syncpt record */ 395 int sync; /* 4: addr from last logsync() */ 396 struct list_head synclist; /* 8: logsynclist anchor */ 397 spinlock_t synclock; /* 4: synclist lock */ 398 struct lbuf *wqueue; /* 4: log pageout queue */ 399 int count; /* 4: count */ 400 uuid_t uuid; /* 16: 128-bit uuid of log device */ 401 402 int no_integrity; /* 3: flag to disable journaling to disk */ 403}; 404 405/* 406 * Log flag 407 */ 408#define log_INLINELOG 1 409#define log_SYNCBARRIER 2 410#define log_QUIESCE 3 411#define log_FLUSH 4 412 413/* 414 * group commit flag 415 */ 416/* jfs_log */ 417#define logGC_PAGEOUT 0x00000001 418 419/* tblock/lbuf */ 420#define tblkGC_QUEUE 0x0001 421#define tblkGC_READY 0x0002 422#define tblkGC_COMMIT 0x0004 423#define tblkGC_COMMITTED 0x0008 424#define tblkGC_EOP 0x0010 425#define tblkGC_FREE 0x0020 426#define tblkGC_LEADER 0x0040 427#define tblkGC_ERROR 0x0080 428#define tblkGC_LAZY 0x0100 // D230860 429#define tblkGC_UNLOCKED 0x0200 // D230860 430 431/* 432 * log cache buffer header 433 */ 434struct lbuf { 435 struct jfs_log *l_log; /* 4: log associated with buffer */ 436 437 /* 438 * data buffer base area 439 */ 440 uint l_flag; /* 4: pageout control flags */ 441 442 struct lbuf *l_wqnext; /* 4: write queue link */ 443 struct lbuf *l_freelist; /* 4: freelistlink */ 444 445 int l_pn; /* 4: log page number */ 446 int l_eor; /* 4: log record eor */ 447 int l_ceor; /* 4: committed log record eor */ 448 449 s64 l_blkno; /* 8: log page block number */ 450 caddr_t l_ldata; /* 4: data page */ 451 struct page *l_page; /* The page itself */ 452 uint l_offset; /* Offset of l_ldata within the page */ 453 454 wait_queue_head_t l_ioevent; /* 4: i/o done event */ 455}; 456 457/* Reuse l_freelist for redrive list */ 458#define l_redrive_next l_freelist 459 460/* 461 * logsynclist block 462 * 463 * common logsyncblk prefix for jbuf_t and tblock 464 */ 465struct logsyncblk { 466 u16 xflag; /* flags */ 467 u16 flag; /* only meaninful in tblock */ 468 lid_t lid; /* lock id */ 469 s32 lsn; /* log sequence number */ 470 struct list_head synclist; /* log sync list link */ 471}; 472 473/* 474 * logsynclist serialization (per log) 475 */ 476 477#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) 478#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) 479#define LOGSYNC_UNLOCK(log, flags) \ 480 spin_unlock_irqrestore(&(log)->synclock, flags) 481 482/* compute the difference in bytes of lsn from sync point */ 483#define logdiff(diff, lsn, log)\ 484{\ 485 diff = (lsn) - (log)->syncpt;\ 486 if (diff < 0)\ 487 diff += (log)->logsize;\ 488} 489 490extern int lmLogOpen(struct super_block *sb); 491extern int lmLogClose(struct super_block *sb); 492extern int lmLogShutdown(struct jfs_log * log); 493extern int lmLogInit(struct jfs_log * log); 494extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); 495extern int lmGroupCommit(struct jfs_log *, struct tblock *); 496extern int jfsIOWait(void *); 497extern void jfs_flush_journal(struct jfs_log * log, int wait); 498extern void jfs_syncpt(struct jfs_log *log, int hard_sync); 499 500#endif /* _H_JFS_LOGMGR */ 501