1#ifndef __LINUX_SEQLOCK_H 2#define __LINUX_SEQLOCK_H 3/* 4 * Reader/writer consistent mechanism without starving writers. This type of 5 * lock for data where the reader wants a consistent set of information 6 * and is willing to retry if the information changes. There are two types 7 * of readers: 8 * 1. Sequence readers which never block a writer but they may have to retry 9 * if a writer is in progress by detecting change in sequence number. 10 * Writers do not wait for a sequence reader. 11 * 2. Locking readers which will wait if a writer or another locking reader 12 * is in progress. A locking reader in progress will also block a writer 13 * from going forward. Unlike the regular rwlock, the read lock here is 14 * exclusive so that only one locking reader can get it. 15 * 16 * This is not as cache friendly as brlock. Also, this may not work well 17 * for data that contains pointers, because any writer could 18 * invalidate a pointer that a reader was following. 19 * 20 * Expected non-blocking reader usage: 21 * do { 22 * seq = read_seqbegin(&foo); 23 * ... 24 * } while (read_seqretry(&foo, seq)); 25 * 26 * 27 * On non-SMP the spin locks disappear but the writer still needs 28 * to increment the sequence variables because an interrupt routine could 29 * change the state of the data. 30 * 31 * Based on x86_64 vsyscall gettimeofday 32 * by Keith Owens and Andrea Arcangeli 33 */ 34 35#include <linux/spinlock.h> 36#include <linux/preempt.h> 37#include <linux/lockdep.h> 38#include <linux/compiler.h> 39#include <asm/processor.h> 40 41/* 42 * Version using sequence counter only. 43 * This can be used when code has its own mutex protecting the 44 * updating starting before the write_seqcountbeqin() and ending 45 * after the write_seqcount_end(). 46 */ 47typedef struct seqcount { 48 unsigned sequence; 49#ifdef CONFIG_DEBUG_LOCK_ALLOC 50 struct lockdep_map dep_map; 51#endif 52} seqcount_t; 53 54static inline void __seqcount_init(seqcount_t *s, const char *name, 55 struct lock_class_key *key) 56{ 57 /* 58 * Make sure we are not reinitializing a held lock: 59 */ 60 lockdep_init_map(&s->dep_map, name, key, 0); 61 s->sequence = 0; 62} 63 64#ifdef CONFIG_DEBUG_LOCK_ALLOC 65# define SEQCOUNT_DEP_MAP_INIT(lockname) \ 66 .dep_map = { .name = #lockname } \ 67 68# define seqcount_init(s) \ 69 do { \ 70 static struct lock_class_key __key; \ 71 __seqcount_init((s), #s, &__key); \ 72 } while (0) 73 74static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 75{ 76 seqcount_t *l = (seqcount_t *)s; 77 unsigned long flags; 78 79 local_irq_save(flags); 80 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 81 seqcount_release(&l->dep_map, 1, _RET_IP_); 82 local_irq_restore(flags); 83} 84 85#else 86# define SEQCOUNT_DEP_MAP_INIT(lockname) 87# define seqcount_init(s) __seqcount_init(s, NULL, NULL) 88# define seqcount_lockdep_reader_access(x) 89#endif 90 91#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 92 93 94/** 95 * __read_seqcount_begin - begin a seq-read critical section (without barrier) 96 * @s: pointer to seqcount_t 97 * Returns: count to be passed to read_seqcount_retry 98 * 99 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 100 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 101 * provided before actually loading any of the variables that are to be 102 * protected in this critical section. 103 * 104 * Use carefully, only in critical code, and comment how the barrier is 105 * provided. 106 */ 107static inline unsigned __read_seqcount_begin(const seqcount_t *s) 108{ 109 unsigned ret; 110 111repeat: 112 ret = READ_ONCE(s->sequence); 113 if (unlikely(ret & 1)) { 114 cpu_relax(); 115 goto repeat; 116 } 117 return ret; 118} 119 120/** 121 * raw_read_seqcount - Read the raw seqcount 122 * @s: pointer to seqcount_t 123 * Returns: count to be passed to read_seqcount_retry 124 * 125 * raw_read_seqcount opens a read critical section of the given 126 * seqcount without any lockdep checking and without checking or 127 * masking the LSB. Calling code is responsible for handling that. 128 */ 129static inline unsigned raw_read_seqcount(const seqcount_t *s) 130{ 131 unsigned ret = READ_ONCE(s->sequence); 132 smp_rmb(); 133 return ret; 134} 135 136/** 137 * raw_read_seqcount_begin - start seq-read critical section w/o lockdep 138 * @s: pointer to seqcount_t 139 * Returns: count to be passed to read_seqcount_retry 140 * 141 * raw_read_seqcount_begin opens a read critical section of the given 142 * seqcount, but without any lockdep checking. Validity of the critical 143 * section is tested by checking read_seqcount_retry function. 144 */ 145static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) 146{ 147 unsigned ret = __read_seqcount_begin(s); 148 smp_rmb(); 149 return ret; 150} 151 152/** 153 * read_seqcount_begin - begin a seq-read critical section 154 * @s: pointer to seqcount_t 155 * Returns: count to be passed to read_seqcount_retry 156 * 157 * read_seqcount_begin opens a read critical section of the given seqcount. 158 * Validity of the critical section is tested by checking read_seqcount_retry 159 * function. 160 */ 161static inline unsigned read_seqcount_begin(const seqcount_t *s) 162{ 163 seqcount_lockdep_reader_access(s); 164 return raw_read_seqcount_begin(s); 165} 166 167/** 168 * raw_seqcount_begin - begin a seq-read critical section 169 * @s: pointer to seqcount_t 170 * Returns: count to be passed to read_seqcount_retry 171 * 172 * raw_seqcount_begin opens a read critical section of the given seqcount. 173 * Validity of the critical section is tested by checking read_seqcount_retry 174 * function. 175 * 176 * Unlike read_seqcount_begin(), this function will not wait for the count 177 * to stabilize. If a writer is active when we begin, we will fail the 178 * read_seqcount_retry() instead of stabilizing at the beginning of the 179 * critical section. 180 */ 181static inline unsigned raw_seqcount_begin(const seqcount_t *s) 182{ 183 unsigned ret = READ_ONCE(s->sequence); 184 smp_rmb(); 185 return ret & ~1; 186} 187 188/** 189 * __read_seqcount_retry - end a seq-read critical section (without barrier) 190 * @s: pointer to seqcount_t 191 * @start: count, from read_seqcount_begin 192 * Returns: 1 if retry is required, else 0 193 * 194 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 195 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 196 * provided before actually loading any of the variables that are to be 197 * protected in this critical section. 198 * 199 * Use carefully, only in critical code, and comment how the barrier is 200 * provided. 201 */ 202static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) 203{ 204 return unlikely(s->sequence != start); 205} 206 207/** 208 * read_seqcount_retry - end a seq-read critical section 209 * @s: pointer to seqcount_t 210 * @start: count, from read_seqcount_begin 211 * Returns: 1 if retry is required, else 0 212 * 213 * read_seqcount_retry closes a read critical section of the given seqcount. 214 * If the critical section was invalid, it must be ignored (and typically 215 * retried). 216 */ 217static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 218{ 219 smp_rmb(); 220 return __read_seqcount_retry(s, start); 221} 222 223 224 225static inline void raw_write_seqcount_begin(seqcount_t *s) 226{ 227 s->sequence++; 228 smp_wmb(); 229} 230 231static inline void raw_write_seqcount_end(seqcount_t *s) 232{ 233 smp_wmb(); 234 s->sequence++; 235} 236 237/** 238 * raw_write_seqcount_barrier - do a seq write barrier 239 * @s: pointer to seqcount_t 240 * 241 * This can be used to provide an ordering guarantee instead of the 242 * usual consistency guarantee. It is one wmb cheaper, because we can 243 * collapse the two back-to-back wmb()s. 244 * 245 * seqcount_t seq; 246 * bool X = true, Y = false; 247 * 248 * void read(void) 249 * { 250 * bool x, y; 251 * 252 * do { 253 * int s = read_seqcount_begin(&seq); 254 * 255 * x = X; y = Y; 256 * 257 * } while (read_seqcount_retry(&seq, s)); 258 * 259 * BUG_ON(!x && !y); 260 * } 261 * 262 * void write(void) 263 * { 264 * Y = true; 265 * 266 * raw_write_seqcount_barrier(seq); 267 * 268 * X = false; 269 * } 270 */ 271static inline void raw_write_seqcount_barrier(seqcount_t *s) 272{ 273 s->sequence++; 274 smp_wmb(); 275 s->sequence++; 276} 277 278static inline int raw_read_seqcount_latch(seqcount_t *s) 279{ 280 int seq = READ_ONCE(s->sequence); 281 /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ 282 smp_read_barrier_depends(); 283 return seq; 284} 285 286/** 287 * raw_write_seqcount_latch - redirect readers to even/odd copy 288 * @s: pointer to seqcount_t 289 * 290 * The latch technique is a multiversion concurrency control method that allows 291 * queries during non-atomic modifications. If you can guarantee queries never 292 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 293 * -- you most likely do not need this. 294 * 295 * Where the traditional RCU/lockless data structures rely on atomic 296 * modifications to ensure queries observe either the old or the new state the 297 * latch allows the same for non-atomic updates. The trade-off is doubling the 298 * cost of storage; we have to maintain two copies of the entire data 299 * structure. 300 * 301 * Very simply put: we first modify one copy and then the other. This ensures 302 * there is always one copy in a stable state, ready to give us an answer. 303 * 304 * The basic form is a data structure like: 305 * 306 * struct latch_struct { 307 * seqcount_t seq; 308 * struct data_struct data[2]; 309 * }; 310 * 311 * Where a modification, which is assumed to be externally serialized, does the 312 * following: 313 * 314 * void latch_modify(struct latch_struct *latch, ...) 315 * { 316 * smp_wmb(); <- Ensure that the last data[1] update is visible 317 * latch->seq++; 318 * smp_wmb(); <- Ensure that the seqcount update is visible 319 * 320 * modify(latch->data[0], ...); 321 * 322 * smp_wmb(); <- Ensure that the data[0] update is visible 323 * latch->seq++; 324 * smp_wmb(); <- Ensure that the seqcount update is visible 325 * 326 * modify(latch->data[1], ...); 327 * } 328 * 329 * The query will have a form like: 330 * 331 * struct entry *latch_query(struct latch_struct *latch, ...) 332 * { 333 * struct entry *entry; 334 * unsigned seq, idx; 335 * 336 * do { 337 * seq = raw_read_seqcount_latch(&latch->seq); 338 * 339 * idx = seq & 0x01; 340 * entry = data_query(latch->data[idx], ...); 341 * 342 * smp_rmb(); 343 * } while (seq != latch->seq); 344 * 345 * return entry; 346 * } 347 * 348 * So during the modification, queries are first redirected to data[1]. Then we 349 * modify data[0]. When that is complete, we redirect queries back to data[0] 350 * and we can modify data[1]. 351 * 352 * NOTE: The non-requirement for atomic modifications does _NOT_ include 353 * the publishing of new entries in the case where data is a dynamic 354 * data structure. 355 * 356 * An iteration might start in data[0] and get suspended long enough 357 * to miss an entire modification sequence, once it resumes it might 358 * observe the new entry. 359 * 360 * NOTE: When data is a dynamic data structure; one should use regular RCU 361 * patterns to manage the lifetimes of the objects within. 362 */ 363static inline void raw_write_seqcount_latch(seqcount_t *s) 364{ 365 smp_wmb(); /* prior stores before incrementing "sequence" */ 366 s->sequence++; 367 smp_wmb(); /* increment "sequence" before following stores */ 368} 369 370/* 371 * Sequence counter only version assumes that callers are using their 372 * own mutexing. 373 */ 374static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 375{ 376 raw_write_seqcount_begin(s); 377 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 378} 379 380static inline void write_seqcount_begin(seqcount_t *s) 381{ 382 write_seqcount_begin_nested(s, 0); 383} 384 385static inline void write_seqcount_end(seqcount_t *s) 386{ 387 seqcount_release(&s->dep_map, 1, _RET_IP_); 388 raw_write_seqcount_end(s); 389} 390 391/** 392 * write_seqcount_invalidate - invalidate in-progress read-side seq operations 393 * @s: pointer to seqcount_t 394 * 395 * After write_seqcount_invalidate, no read-side seq operations will complete 396 * successfully and see data older than this. 397 */ 398static inline void write_seqcount_invalidate(seqcount_t *s) 399{ 400 smp_wmb(); 401 s->sequence+=2; 402} 403 404typedef struct { 405 struct seqcount seqcount; 406 spinlock_t lock; 407} seqlock_t; 408 409/* 410 * These macros triggered gcc-3.x compile-time problems. We think these are 411 * OK now. Be cautious. 412 */ 413#define __SEQLOCK_UNLOCKED(lockname) \ 414 { \ 415 .seqcount = SEQCNT_ZERO(lockname), \ 416 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 417 } 418 419#define seqlock_init(x) \ 420 do { \ 421 seqcount_init(&(x)->seqcount); \ 422 spin_lock_init(&(x)->lock); \ 423 } while (0) 424 425#define DEFINE_SEQLOCK(x) \ 426 seqlock_t x = __SEQLOCK_UNLOCKED(x) 427 428/* 429 * Read side functions for starting and finalizing a read side section. 430 */ 431static inline unsigned read_seqbegin(const seqlock_t *sl) 432{ 433 return read_seqcount_begin(&sl->seqcount); 434} 435 436static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 437{ 438 return read_seqcount_retry(&sl->seqcount, start); 439} 440 441/* 442 * Lock out other writers and update the count. 443 * Acts like a normal spin_lock/unlock. 444 * Don't need preempt_disable() because that is in the spin_lock already. 445 */ 446static inline void write_seqlock(seqlock_t *sl) 447{ 448 spin_lock(&sl->lock); 449 write_seqcount_begin(&sl->seqcount); 450} 451 452static inline void write_sequnlock(seqlock_t *sl) 453{ 454 write_seqcount_end(&sl->seqcount); 455 spin_unlock(&sl->lock); 456} 457 458static inline void write_seqlock_bh(seqlock_t *sl) 459{ 460 spin_lock_bh(&sl->lock); 461 write_seqcount_begin(&sl->seqcount); 462} 463 464static inline void write_sequnlock_bh(seqlock_t *sl) 465{ 466 write_seqcount_end(&sl->seqcount); 467 spin_unlock_bh(&sl->lock); 468} 469 470static inline void write_seqlock_irq(seqlock_t *sl) 471{ 472 spin_lock_irq(&sl->lock); 473 write_seqcount_begin(&sl->seqcount); 474} 475 476static inline void write_sequnlock_irq(seqlock_t *sl) 477{ 478 write_seqcount_end(&sl->seqcount); 479 spin_unlock_irq(&sl->lock); 480} 481 482static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 483{ 484 unsigned long flags; 485 486 spin_lock_irqsave(&sl->lock, flags); 487 write_seqcount_begin(&sl->seqcount); 488 return flags; 489} 490 491#define write_seqlock_irqsave(lock, flags) \ 492 do { flags = __write_seqlock_irqsave(lock); } while (0) 493 494static inline void 495write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 496{ 497 write_seqcount_end(&sl->seqcount); 498 spin_unlock_irqrestore(&sl->lock, flags); 499} 500 501/* 502 * A locking reader exclusively locks out other writers and locking readers, 503 * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. 504 * Don't need preempt_disable() because that is in the spin_lock already. 505 */ 506static inline void read_seqlock_excl(seqlock_t *sl) 507{ 508 spin_lock(&sl->lock); 509} 510 511static inline void read_sequnlock_excl(seqlock_t *sl) 512{ 513 spin_unlock(&sl->lock); 514} 515 516/** 517 * read_seqbegin_or_lock - begin a sequence number check or locking block 518 * @lock: sequence lock 519 * @seq : sequence number to be checked 520 * 521 * First try it once optimistically without taking the lock. If that fails, 522 * take the lock. The sequence number is also used as a marker for deciding 523 * whether to be a reader (even) or writer (odd). 524 * N.B. seq must be initialized to an even number to begin with. 525 */ 526static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 527{ 528 if (!(*seq & 1)) /* Even */ 529 *seq = read_seqbegin(lock); 530 else /* Odd */ 531 read_seqlock_excl(lock); 532} 533 534static inline int need_seqretry(seqlock_t *lock, int seq) 535{ 536 return !(seq & 1) && read_seqretry(lock, seq); 537} 538 539static inline void done_seqretry(seqlock_t *lock, int seq) 540{ 541 if (seq & 1) 542 read_sequnlock_excl(lock); 543} 544 545static inline void read_seqlock_excl_bh(seqlock_t *sl) 546{ 547 spin_lock_bh(&sl->lock); 548} 549 550static inline void read_sequnlock_excl_bh(seqlock_t *sl) 551{ 552 spin_unlock_bh(&sl->lock); 553} 554 555static inline void read_seqlock_excl_irq(seqlock_t *sl) 556{ 557 spin_lock_irq(&sl->lock); 558} 559 560static inline void read_sequnlock_excl_irq(seqlock_t *sl) 561{ 562 spin_unlock_irq(&sl->lock); 563} 564 565static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 566{ 567 unsigned long flags; 568 569 spin_lock_irqsave(&sl->lock, flags); 570 return flags; 571} 572 573#define read_seqlock_excl_irqsave(lock, flags) \ 574 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 575 576static inline void 577read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 578{ 579 spin_unlock_irqrestore(&sl->lock, flags); 580} 581 582static inline unsigned long 583read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 584{ 585 unsigned long flags = 0; 586 587 if (!(*seq & 1)) /* Even */ 588 *seq = read_seqbegin(lock); 589 else /* Odd */ 590 read_seqlock_excl_irqsave(lock, flags); 591 592 return flags; 593} 594 595static inline void 596done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 597{ 598 if (seq & 1) 599 read_sequnlock_excl_irqrestore(lock, flags); 600} 601#endif /* __LINUX_SEQLOCK_H */ 602