1#ifndef __LINUX_SEQLOCK_H 2#define __LINUX_SEQLOCK_H 3/* 4 * Reader/writer consistent mechanism without starving writers. This type of 5 * lock for data where the reader wants a consistent set of information 6 * and is willing to retry if the information changes. There are two types 7 * of readers: 8 * 1. Sequence readers which never block a writer but they may have to retry 9 * if a writer is in progress by detecting change in sequence number. 10 * Writers do not wait for a sequence reader. 11 * 2. Locking readers which will wait if a writer or another locking reader 12 * is in progress. A locking reader in progress will also block a writer 13 * from going forward. Unlike the regular rwlock, the read lock here is 14 * exclusive so that only one locking reader can get it. 15 * 16 * This is not as cache friendly as brlock. Also, this may not work well 17 * for data that contains pointers, because any writer could 18 * invalidate a pointer that a reader was following. 19 * 20 * Expected non-blocking reader usage: 21 * do { 22 * seq = read_seqbegin(&foo); 23 * ... 24 * } while (read_seqretry(&foo, seq)); 25 * 26 * 27 * On non-SMP the spin locks disappear but the writer still needs 28 * to increment the sequence variables because an interrupt routine could 29 * change the state of the data. 30 * 31 * Based on x86_64 vsyscall gettimeofday 32 * by Keith Owens and Andrea Arcangeli 33 */ 34 35#include <linux/spinlock.h> 36#include <linux/preempt.h> 37#include <linux/lockdep.h> 38#include <linux/compiler.h> 39#include <asm/processor.h> 40 41/* 42 * Version using sequence counter only. 43 * This can be used when code has its own mutex protecting the 44 * updating starting before the write_seqcountbeqin() and ending 45 * after the write_seqcount_end(). 46 */ 47typedef struct seqcount { 48 unsigned sequence; 49#ifdef CONFIG_DEBUG_LOCK_ALLOC 50 struct lockdep_map dep_map; 51#endif 52} seqcount_t; 53 54static inline void __seqcount_init(seqcount_t *s, const char *name, 55 struct lock_class_key *key) 56{ 57 /* 58 * Make sure we are not reinitializing a held lock: 59 */ 60 lockdep_init_map(&s->dep_map, name, key, 0); 61 s->sequence = 0; 62} 63 64#ifdef CONFIG_DEBUG_LOCK_ALLOC 65# define SEQCOUNT_DEP_MAP_INIT(lockname) \ 66 .dep_map = { .name = #lockname } \ 67 68# define seqcount_init(s) \ 69 do { \ 70 static struct lock_class_key __key; \ 71 __seqcount_init((s), #s, &__key); \ 72 } while (0) 73 74static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 75{ 76 seqcount_t *l = (seqcount_t *)s; 77 unsigned long flags; 78 79 local_irq_save(flags); 80 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 81 seqcount_release(&l->dep_map, 1, _RET_IP_); 82 local_irq_restore(flags); 83} 84 85#else 86# define SEQCOUNT_DEP_MAP_INIT(lockname) 87# define seqcount_init(s) __seqcount_init(s, NULL, NULL) 88# define seqcount_lockdep_reader_access(x) 89#endif 90 91#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 92 93 94/** 95 * __read_seqcount_begin - begin a seq-read critical section (without barrier) 96 * @s: pointer to seqcount_t 97 * Returns: count to be passed to read_seqcount_retry 98 * 99 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 100 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 101 * provided before actually loading any of the variables that are to be 102 * protected in this critical section. 103 * 104 * Use carefully, only in critical code, and comment how the barrier is 105 * provided. 106 */ 107static inline unsigned __read_seqcount_begin(const seqcount_t *s) 108{ 109 unsigned ret; 110 111repeat: 112 ret = READ_ONCE(s->sequence); 113 if (unlikely(ret & 1)) { 114 cpu_relax(); 115 goto repeat; 116 } 117 return ret; 118} 119 120/** 121 * raw_read_seqcount - Read the raw seqcount 122 * @s: pointer to seqcount_t 123 * Returns: count to be passed to read_seqcount_retry 124 * 125 * raw_read_seqcount opens a read critical section of the given 126 * seqcount without any lockdep checking and without checking or 127 * masking the LSB. Calling code is responsible for handling that. 128 */ 129static inline unsigned raw_read_seqcount(const seqcount_t *s) 130{ 131 unsigned ret = READ_ONCE(s->sequence); 132 smp_rmb(); 133 return ret; 134} 135 136/** 137 * raw_read_seqcount_begin - start seq-read critical section w/o lockdep 138 * @s: pointer to seqcount_t 139 * Returns: count to be passed to read_seqcount_retry 140 * 141 * raw_read_seqcount_begin opens a read critical section of the given 142 * seqcount, but without any lockdep checking. Validity of the critical 143 * section is tested by checking read_seqcount_retry function. 144 */ 145static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) 146{ 147 unsigned ret = __read_seqcount_begin(s); 148 smp_rmb(); 149 return ret; 150} 151 152/** 153 * read_seqcount_begin - begin a seq-read critical section 154 * @s: pointer to seqcount_t 155 * Returns: count to be passed to read_seqcount_retry 156 * 157 * read_seqcount_begin opens a read critical section of the given seqcount. 158 * Validity of the critical section is tested by checking read_seqcount_retry 159 * function. 160 */ 161static inline unsigned read_seqcount_begin(const seqcount_t *s) 162{ 163 seqcount_lockdep_reader_access(s); 164 return raw_read_seqcount_begin(s); 165} 166 167/** 168 * raw_seqcount_begin - begin a seq-read critical section 169 * @s: pointer to seqcount_t 170 * Returns: count to be passed to read_seqcount_retry 171 * 172 * raw_seqcount_begin opens a read critical section of the given seqcount. 173 * Validity of the critical section is tested by checking read_seqcount_retry 174 * function. 175 * 176 * Unlike read_seqcount_begin(), this function will not wait for the count 177 * to stabilize. If a writer is active when we begin, we will fail the 178 * read_seqcount_retry() instead of stabilizing at the beginning of the 179 * critical section. 180 */ 181static inline unsigned raw_seqcount_begin(const seqcount_t *s) 182{ 183 unsigned ret = READ_ONCE(s->sequence); 184 smp_rmb(); 185 return ret & ~1; 186} 187 188/** 189 * __read_seqcount_retry - end a seq-read critical section (without barrier) 190 * @s: pointer to seqcount_t 191 * @start: count, from read_seqcount_begin 192 * Returns: 1 if retry is required, else 0 193 * 194 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 195 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 196 * provided before actually loading any of the variables that are to be 197 * protected in this critical section. 198 * 199 * Use carefully, only in critical code, and comment how the barrier is 200 * provided. 201 */ 202static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) 203{ 204 return unlikely(s->sequence != start); 205} 206 207/** 208 * read_seqcount_retry - end a seq-read critical section 209 * @s: pointer to seqcount_t 210 * @start: count, from read_seqcount_begin 211 * Returns: 1 if retry is required, else 0 212 * 213 * read_seqcount_retry closes a read critical section of the given seqcount. 214 * If the critical section was invalid, it must be ignored (and typically 215 * retried). 216 */ 217static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 218{ 219 smp_rmb(); 220 return __read_seqcount_retry(s, start); 221} 222 223 224 225static inline void raw_write_seqcount_begin(seqcount_t *s) 226{ 227 s->sequence++; 228 smp_wmb(); 229} 230 231static inline void raw_write_seqcount_end(seqcount_t *s) 232{ 233 smp_wmb(); 234 s->sequence++; 235} 236 237/** 238 * raw_write_seqcount_barrier - do a seq write barrier 239 * @s: pointer to seqcount_t 240 * 241 * This can be used to provide an ordering guarantee instead of the 242 * usual consistency guarantee. It is one wmb cheaper, because we can 243 * collapse the two back-to-back wmb()s. 244 * 245 * seqcount_t seq; 246 * bool X = true, Y = false; 247 * 248 * void read(void) 249 * { 250 * bool x, y; 251 * 252 * do { 253 * int s = read_seqcount_begin(&seq); 254 * 255 * x = X; y = Y; 256 * 257 * } while (read_seqcount_retry(&seq, s)); 258 * 259 * BUG_ON(!x && !y); 260 * } 261 * 262 * void write(void) 263 * { 264 * Y = true; 265 * 266 * raw_write_seqcount_barrier(seq); 267 * 268 * X = false; 269 * } 270 */ 271static inline void raw_write_seqcount_barrier(seqcount_t *s) 272{ 273 s->sequence++; 274 smp_wmb(); 275 s->sequence++; 276} 277 278static inline int raw_read_seqcount_latch(seqcount_t *s) 279{ 280 return lockless_dereference(s->sequence); 281} 282 283/** 284 * raw_write_seqcount_latch - redirect readers to even/odd copy 285 * @s: pointer to seqcount_t 286 * 287 * The latch technique is a multiversion concurrency control method that allows 288 * queries during non-atomic modifications. If you can guarantee queries never 289 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 290 * -- you most likely do not need this. 291 * 292 * Where the traditional RCU/lockless data structures rely on atomic 293 * modifications to ensure queries observe either the old or the new state the 294 * latch allows the same for non-atomic updates. The trade-off is doubling the 295 * cost of storage; we have to maintain two copies of the entire data 296 * structure. 297 * 298 * Very simply put: we first modify one copy and then the other. This ensures 299 * there is always one copy in a stable state, ready to give us an answer. 300 * 301 * The basic form is a data structure like: 302 * 303 * struct latch_struct { 304 * seqcount_t seq; 305 * struct data_struct data[2]; 306 * }; 307 * 308 * Where a modification, which is assumed to be externally serialized, does the 309 * following: 310 * 311 * void latch_modify(struct latch_struct *latch, ...) 312 * { 313 * smp_wmb(); <- Ensure that the last data[1] update is visible 314 * latch->seq++; 315 * smp_wmb(); <- Ensure that the seqcount update is visible 316 * 317 * modify(latch->data[0], ...); 318 * 319 * smp_wmb(); <- Ensure that the data[0] update is visible 320 * latch->seq++; 321 * smp_wmb(); <- Ensure that the seqcount update is visible 322 * 323 * modify(latch->data[1], ...); 324 * } 325 * 326 * The query will have a form like: 327 * 328 * struct entry *latch_query(struct latch_struct *latch, ...) 329 * { 330 * struct entry *entry; 331 * unsigned seq, idx; 332 * 333 * do { 334 * seq = lockless_dereference(latch->seq); 335 * 336 * idx = seq & 0x01; 337 * entry = data_query(latch->data[idx], ...); 338 * 339 * smp_rmb(); 340 * } while (seq != latch->seq); 341 * 342 * return entry; 343 * } 344 * 345 * So during the modification, queries are first redirected to data[1]. Then we 346 * modify data[0]. When that is complete, we redirect queries back to data[0] 347 * and we can modify data[1]. 348 * 349 * NOTE: The non-requirement for atomic modifications does _NOT_ include 350 * the publishing of new entries in the case where data is a dynamic 351 * data structure. 352 * 353 * An iteration might start in data[0] and get suspended long enough 354 * to miss an entire modification sequence, once it resumes it might 355 * observe the new entry. 356 * 357 * NOTE: When data is a dynamic data structure; one should use regular RCU 358 * patterns to manage the lifetimes of the objects within. 359 */ 360static inline void raw_write_seqcount_latch(seqcount_t *s) 361{ 362 smp_wmb(); /* prior stores before incrementing "sequence" */ 363 s->sequence++; 364 smp_wmb(); /* increment "sequence" before following stores */ 365} 366 367/* 368 * Sequence counter only version assumes that callers are using their 369 * own mutexing. 370 */ 371static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 372{ 373 raw_write_seqcount_begin(s); 374 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 375} 376 377static inline void write_seqcount_begin(seqcount_t *s) 378{ 379 write_seqcount_begin_nested(s, 0); 380} 381 382static inline void write_seqcount_end(seqcount_t *s) 383{ 384 seqcount_release(&s->dep_map, 1, _RET_IP_); 385 raw_write_seqcount_end(s); 386} 387 388/** 389 * write_seqcount_invalidate - invalidate in-progress read-side seq operations 390 * @s: pointer to seqcount_t 391 * 392 * After write_seqcount_invalidate, no read-side seq operations will complete 393 * successfully and see data older than this. 394 */ 395static inline void write_seqcount_invalidate(seqcount_t *s) 396{ 397 smp_wmb(); 398 s->sequence+=2; 399} 400 401typedef struct { 402 struct seqcount seqcount; 403 spinlock_t lock; 404} seqlock_t; 405 406/* 407 * These macros triggered gcc-3.x compile-time problems. We think these are 408 * OK now. Be cautious. 409 */ 410#define __SEQLOCK_UNLOCKED(lockname) \ 411 { \ 412 .seqcount = SEQCNT_ZERO(lockname), \ 413 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 414 } 415 416#define seqlock_init(x) \ 417 do { \ 418 seqcount_init(&(x)->seqcount); \ 419 spin_lock_init(&(x)->lock); \ 420 } while (0) 421 422#define DEFINE_SEQLOCK(x) \ 423 seqlock_t x = __SEQLOCK_UNLOCKED(x) 424 425/* 426 * Read side functions for starting and finalizing a read side section. 427 */ 428static inline unsigned read_seqbegin(const seqlock_t *sl) 429{ 430 return read_seqcount_begin(&sl->seqcount); 431} 432 433static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 434{ 435 return read_seqcount_retry(&sl->seqcount, start); 436} 437 438/* 439 * Lock out other writers and update the count. 440 * Acts like a normal spin_lock/unlock. 441 * Don't need preempt_disable() because that is in the spin_lock already. 442 */ 443static inline void write_seqlock(seqlock_t *sl) 444{ 445 spin_lock(&sl->lock); 446 write_seqcount_begin(&sl->seqcount); 447} 448 449static inline void write_sequnlock(seqlock_t *sl) 450{ 451 write_seqcount_end(&sl->seqcount); 452 spin_unlock(&sl->lock); 453} 454 455static inline void write_seqlock_bh(seqlock_t *sl) 456{ 457 spin_lock_bh(&sl->lock); 458 write_seqcount_begin(&sl->seqcount); 459} 460 461static inline void write_sequnlock_bh(seqlock_t *sl) 462{ 463 write_seqcount_end(&sl->seqcount); 464 spin_unlock_bh(&sl->lock); 465} 466 467static inline void write_seqlock_irq(seqlock_t *sl) 468{ 469 spin_lock_irq(&sl->lock); 470 write_seqcount_begin(&sl->seqcount); 471} 472 473static inline void write_sequnlock_irq(seqlock_t *sl) 474{ 475 write_seqcount_end(&sl->seqcount); 476 spin_unlock_irq(&sl->lock); 477} 478 479static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 480{ 481 unsigned long flags; 482 483 spin_lock_irqsave(&sl->lock, flags); 484 write_seqcount_begin(&sl->seqcount); 485 return flags; 486} 487 488#define write_seqlock_irqsave(lock, flags) \ 489 do { flags = __write_seqlock_irqsave(lock); } while (0) 490 491static inline void 492write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 493{ 494 write_seqcount_end(&sl->seqcount); 495 spin_unlock_irqrestore(&sl->lock, flags); 496} 497 498/* 499 * A locking reader exclusively locks out other writers and locking readers, 500 * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. 501 * Don't need preempt_disable() because that is in the spin_lock already. 502 */ 503static inline void read_seqlock_excl(seqlock_t *sl) 504{ 505 spin_lock(&sl->lock); 506} 507 508static inline void read_sequnlock_excl(seqlock_t *sl) 509{ 510 spin_unlock(&sl->lock); 511} 512 513/** 514 * read_seqbegin_or_lock - begin a sequence number check or locking block 515 * @lock: sequence lock 516 * @seq : sequence number to be checked 517 * 518 * First try it once optimistically without taking the lock. If that fails, 519 * take the lock. The sequence number is also used as a marker for deciding 520 * whether to be a reader (even) or writer (odd). 521 * N.B. seq must be initialized to an even number to begin with. 522 */ 523static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 524{ 525 if (!(*seq & 1)) /* Even */ 526 *seq = read_seqbegin(lock); 527 else /* Odd */ 528 read_seqlock_excl(lock); 529} 530 531static inline int need_seqretry(seqlock_t *lock, int seq) 532{ 533 return !(seq & 1) && read_seqretry(lock, seq); 534} 535 536static inline void done_seqretry(seqlock_t *lock, int seq) 537{ 538 if (seq & 1) 539 read_sequnlock_excl(lock); 540} 541 542static inline void read_seqlock_excl_bh(seqlock_t *sl) 543{ 544 spin_lock_bh(&sl->lock); 545} 546 547static inline void read_sequnlock_excl_bh(seqlock_t *sl) 548{ 549 spin_unlock_bh(&sl->lock); 550} 551 552static inline void read_seqlock_excl_irq(seqlock_t *sl) 553{ 554 spin_lock_irq(&sl->lock); 555} 556 557static inline void read_sequnlock_excl_irq(seqlock_t *sl) 558{ 559 spin_unlock_irq(&sl->lock); 560} 561 562static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 563{ 564 unsigned long flags; 565 566 spin_lock_irqsave(&sl->lock, flags); 567 return flags; 568} 569 570#define read_seqlock_excl_irqsave(lock, flags) \ 571 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 572 573static inline void 574read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 575{ 576 spin_unlock_irqrestore(&sl->lock, flags); 577} 578 579static inline unsigned long 580read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 581{ 582 unsigned long flags = 0; 583 584 if (!(*seq & 1)) /* Even */ 585 *seq = read_seqbegin(lock); 586 else /* Odd */ 587 read_seqlock_excl_irqsave(lock, flags); 588 589 return flags; 590} 591 592static inline void 593done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 594{ 595 if (seq & 1) 596 read_sequnlock_excl_irqrestore(lock, flags); 597} 598#endif /* __LINUX_SEQLOCK_H */ 599