linux/arch/blackfin/mach-bf561/atomic.S
<<
>>
Prefs
   1/*
   2 * Copyright 2007-2008 Analog Devices Inc.
   3 *              Philippe Gerum <rpm@xenomai.org>
   4 *
   5 * Licensed under the GPL-2 or later.
   6 */
   7
   8#include <linux/linkage.h>
   9#include <asm/blackfin.h>
  10#include <asm/cache.h>
  11#include <asm/asm-offsets.h>
  12#include <asm/rwlock.h>
  13#include <asm/cplb.h>
  14
  15.text
  16
  17.macro coreslot_loadaddr reg:req
  18        \reg\().l = _corelock;
  19        \reg\().h = _corelock;
  20.endm
  21
  22.macro safe_testset addr:req, scratch:req
  23#if ANOMALY_05000477
  24        cli \scratch;
  25        testset (\addr);
  26        sti \scratch;
  27#else
  28        testset (\addr);
  29#endif
  30.endm
  31
  32/*
  33 * r0 = address of atomic data to flush and invalidate (32bit).
  34 *
  35 * Clear interrupts and return the old mask.
  36 * We assume that no atomic data can span cachelines.
  37 *
  38 * Clobbers: r2:0, p0
  39 */
  40ENTRY(_get_core_lock)
  41        r1 = -L1_CACHE_BYTES;
  42        r1 = r0 & r1;
  43        cli r0;
  44        coreslot_loadaddr p0;
  45.Lretry_corelock:
  46        safe_testset p0, r2;
  47        if cc jump .Ldone_corelock;
  48        SSYNC(r2);
  49        jump .Lretry_corelock
  50.Ldone_corelock:
  51        p0 = r1;
  52        CSYNC(r2);
  53        flushinv[p0];
  54        SSYNC(r2);
  55        rts;
  56ENDPROC(_get_core_lock)
  57
  58/*
  59 * r0 = address of atomic data in uncacheable memory region (32bit).
  60 *
  61 * Clear interrupts and return the old mask.
  62 *
  63 * Clobbers: r0, p0
  64 */
  65ENTRY(_get_core_lock_noflush)
  66        cli r0;
  67        coreslot_loadaddr p0;
  68.Lretry_corelock_noflush:
  69        safe_testset p0, r2;
  70        if cc jump .Ldone_corelock_noflush;
  71        SSYNC(r2);
  72        jump .Lretry_corelock_noflush
  73.Ldone_corelock_noflush:
  74        rts;
  75ENDPROC(_get_core_lock_noflush)
  76
  77/*
  78 * r0 = interrupt mask to restore.
  79 * r1 = address of atomic data to flush and invalidate (32bit).
  80 *
  81 * Interrupts are masked on entry (see _get_core_lock).
  82 * Clobbers: r2:0, p0
  83 */
  84ENTRY(_put_core_lock)
  85        /* Write-through cache assumed, so no flush needed here. */
  86        coreslot_loadaddr p0;
  87        r1 = 0;
  88        [p0] = r1;
  89        SSYNC(r2);
  90        sti r0;
  91        rts;
  92ENDPROC(_put_core_lock)
  93
  94#ifdef __ARCH_SYNC_CORE_DCACHE
  95
  96ENTRY(___raw_smp_mark_barrier_asm)
  97        [--sp] = rets;
  98        [--sp] = ( r7:5 );
  99        [--sp] = r0;
 100        [--sp] = p1;
 101        [--sp] = p0;
 102        call _get_core_lock_noflush;
 103
 104        /*
 105         * Calculate current core mask
 106         */
 107        GET_CPUID(p1, r7);
 108        r6 = 1;
 109        r6 <<= r7;
 110
 111        /*
 112         * Set bit of other cores in barrier mask. Don't change current core bit.
 113         */
 114        p1.l = _barrier_mask;
 115        p1.h = _barrier_mask;
 116        r7 = [p1];
 117        r5 = r7 & r6;
 118        r7 = ~r6;
 119        cc = r5 == 0;
 120        if cc jump 1f;
 121        r7 = r7 | r6;
 1221:
 123        [p1] = r7;
 124        SSYNC(r2);
 125
 126        call _put_core_lock;
 127        p0 = [sp++];
 128        p1 = [sp++];
 129        r0 = [sp++];
 130        ( r7:5 ) = [sp++];
 131        rets = [sp++];
 132        rts;
 133ENDPROC(___raw_smp_mark_barrier_asm)
 134
 135ENTRY(___raw_smp_check_barrier_asm)
 136        [--sp] = rets;
 137        [--sp] = ( r7:5 );
 138        [--sp] = r0;
 139        [--sp] = p1;
 140        [--sp] = p0;
 141        call _get_core_lock_noflush;
 142
 143        /*
 144         * Calculate current core mask
 145         */
 146        GET_CPUID(p1, r7);
 147        r6 = 1;
 148        r6 <<= r7;
 149
 150        /*
 151         * Clear current core bit in barrier mask if it is set.
 152         */
 153        p1.l = _barrier_mask;
 154        p1.h = _barrier_mask;
 155        r7 = [p1];
 156        r5 = r7 & r6;
 157        cc = r5 == 0;
 158        if cc jump 1f;
 159        r6 = ~r6;
 160        r7 = r7 & r6;
 161        [p1] = r7;
 162        SSYNC(r2);
 163
 164        call _put_core_lock;
 165
 166        /*
 167         * Invalidate the entire D-cache of current core.
 168         */
 169        sp += -12;
 170        call _resync_core_dcache
 171        sp += 12;
 172        jump 2f;
 1731:
 174        call _put_core_lock;
 1752:
 176        p0 = [sp++];
 177        p1 = [sp++];
 178        r0 = [sp++];
 179        ( r7:5 ) = [sp++];
 180        rets = [sp++];
 181        rts;
 182ENDPROC(___raw_smp_check_barrier_asm)
 183
 184/*
 185 * r0 = irqflags
 186 * r1 = address of atomic data
 187 *
 188 * Clobbers: r2:0, p1:0
 189 */
 190_start_lock_coherent:
 191
 192        [--sp] = rets;
 193        [--sp] = ( r7:6 );
 194        r7 = r0;
 195        p1 = r1;
 196
 197        /*
 198         * Determine whether the atomic data was previously
 199         * owned by another CPU (=r6).
 200         */
 201        GET_CPUID(p0, r2);
 202        r1 = 1;
 203        r1 <<= r2;
 204        r2 = ~r1;
 205
 206        r1 = [p1];
 207        r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
 208        r6 = r1 & r2;
 209        r1 = [p1];
 210        r1 <<= 4;
 211        r1 >>= 4;
 212        [p1] = r1;
 213
 214        /*
 215         * Release the core lock now, but keep IRQs disabled while we are
 216         * performing the remaining housekeeping chores for the current CPU.
 217         */
 218        coreslot_loadaddr p0;
 219        r1 = 0;
 220        [p0] = r1;
 221
 222        /*
 223         * If another CPU has owned the same atomic section before us,
 224         * then our D-cached copy of the shared data protected by the
 225         * current spin/write_lock may be obsolete.
 226         */
 227        cc = r6 == 0;
 228        if cc jump .Lcache_synced
 229
 230        /*
 231         * Invalidate the entire D-cache of the current core.
 232         */
 233        sp += -12;
 234        call _resync_core_dcache
 235        sp += 12;
 236
 237.Lcache_synced:
 238        SSYNC(r2);
 239        sti r7;
 240        ( r7:6 ) = [sp++];
 241        rets = [sp++];
 242        rts
 243
 244/*
 245 * r0 = irqflags
 246 * r1 = address of atomic data
 247 *
 248 * Clobbers: r2:0, p1:0
 249 */
 250_end_lock_coherent:
 251
 252        p1 = r1;
 253        GET_CPUID(p0, r2);
 254        r2 += 28;
 255        r1 = 1;
 256        r1 <<= r2;
 257        r2 = [p1];
 258        r2 = r1 | r2;
 259        [p1] = r2;
 260        r1 = p1;
 261        jump _put_core_lock;
 262
 263#endif /* __ARCH_SYNC_CORE_DCACHE */
 264
 265/*
 266 * r0 = &spinlock->lock
 267 *
 268 * Clobbers: r3:0, p1:0
 269 */
 270ENTRY(___raw_spin_is_locked_asm)
 271        p1 = r0;
 272        [--sp] = rets;
 273        call _get_core_lock;
 274        r3 = [p1];
 275        cc = bittst( r3, 0 );
 276        r3 = cc;
 277        r1 = p1;
 278        call _put_core_lock;
 279        rets = [sp++];
 280        r0 = r3;
 281        rts;
 282ENDPROC(___raw_spin_is_locked_asm)
 283
 284/*
 285 * r0 = &spinlock->lock
 286 *
 287 * Clobbers: r3:0, p1:0
 288 */
 289ENTRY(___raw_spin_lock_asm)
 290        p1 = r0;
 291        [--sp] = rets;
 292.Lretry_spinlock:
 293        call _get_core_lock;
 294        r1 = p1;
 295        r2 = [p1];
 296        cc = bittst( r2, 0 );
 297        if cc jump .Lbusy_spinlock
 298#ifdef __ARCH_SYNC_CORE_DCACHE
 299        r3 = p1;
 300        bitset ( r2, 0 ); /* Raise the lock bit. */
 301        [p1] = r2;
 302        call _start_lock_coherent
 303#else
 304        r2 = 1;
 305        [p1] = r2;
 306        call _put_core_lock;
 307#endif
 308        rets = [sp++];
 309        rts;
 310
 311.Lbusy_spinlock:
 312        /* We don't touch the atomic area if busy, so that flush
 313           will behave like nop in _put_core_lock. */
 314        call _put_core_lock;
 315        SSYNC(r2);
 316        r0 = p1;
 317        jump .Lretry_spinlock
 318ENDPROC(___raw_spin_lock_asm)
 319
 320/*
 321 * r0 = &spinlock->lock
 322 *
 323 * Clobbers: r3:0, p1:0
 324 */
 325ENTRY(___raw_spin_trylock_asm)
 326        p1 = r0;
 327        [--sp] = rets;
 328        call _get_core_lock;
 329        r1 = p1;
 330        r3 = [p1];
 331        cc = bittst( r3, 0 );
 332        if cc jump .Lfailed_trylock
 333#ifdef __ARCH_SYNC_CORE_DCACHE
 334        bitset ( r3, 0 ); /* Raise the lock bit. */
 335        [p1] = r3;
 336        call _start_lock_coherent
 337#else
 338        r2 = 1;
 339        [p1] = r2;
 340        call _put_core_lock;
 341#endif
 342        r0 = 1;
 343        rets = [sp++];
 344        rts;
 345.Lfailed_trylock:
 346        call _put_core_lock;
 347        r0 = 0;
 348        rets = [sp++];
 349        rts;
 350ENDPROC(___raw_spin_trylock_asm)
 351
 352/*
 353 * r0 = &spinlock->lock
 354 *
 355 * Clobbers: r2:0, p1:0
 356 */
 357ENTRY(___raw_spin_unlock_asm)
 358        p1 = r0;
 359        [--sp] = rets;
 360        call _get_core_lock;
 361        r2 = [p1];
 362        bitclr ( r2, 0 );
 363        [p1] = r2;
 364        r1 = p1;
 365#ifdef __ARCH_SYNC_CORE_DCACHE
 366        call _end_lock_coherent
 367#else
 368        call _put_core_lock;
 369#endif
 370        rets = [sp++];
 371        rts;
 372ENDPROC(___raw_spin_unlock_asm)
 373
 374/*
 375 * r0 = &rwlock->lock
 376 *
 377 * Clobbers: r2:0, p1:0
 378 */
 379ENTRY(___raw_read_lock_asm)
 380        p1 = r0;
 381        [--sp] = rets;
 382        call _get_core_lock;
 383.Lrdlock_try:
 384        r1 = [p1];
 385        r1 += -1;
 386        [p1] = r1;
 387        cc = r1 < 0;
 388        if cc jump .Lrdlock_failed
 389        r1 = p1;
 390#ifdef __ARCH_SYNC_CORE_DCACHE
 391        call _start_lock_coherent
 392#else
 393        call _put_core_lock;
 394#endif
 395        rets = [sp++];
 396        rts;
 397
 398.Lrdlock_failed:
 399        r1 += 1;
 400        [p1] = r1;
 401.Lrdlock_wait:
 402        r1 = p1;
 403        call _put_core_lock;
 404        SSYNC(r2);
 405        r0 = p1;
 406        call _get_core_lock;
 407        r1 = [p1];
 408        cc = r1 < 2;
 409        if cc jump .Lrdlock_wait;
 410        jump .Lrdlock_try
 411ENDPROC(___raw_read_lock_asm)
 412
 413/*
 414 * r0 = &rwlock->lock
 415 *
 416 * Clobbers: r3:0, p1:0
 417 */
 418ENTRY(___raw_read_trylock_asm)
 419        p1 = r0;
 420        [--sp] = rets;
 421        call _get_core_lock;
 422        r1 = [p1];
 423        cc = r1 <= 0;
 424        if cc jump .Lfailed_tryrdlock;
 425        r1 += -1;
 426        [p1] = r1;
 427        r1 = p1;
 428#ifdef __ARCH_SYNC_CORE_DCACHE
 429        call _start_lock_coherent
 430#else
 431        call _put_core_lock;
 432#endif
 433        rets = [sp++];
 434        r0 = 1;
 435        rts;
 436.Lfailed_tryrdlock:
 437        r1 = p1;
 438        call _put_core_lock;
 439        rets = [sp++];
 440        r0 = 0;
 441        rts;
 442ENDPROC(___raw_read_trylock_asm)
 443
 444/*
 445 * r0 = &rwlock->lock
 446 *
 447 * Note: Processing controlled by a reader lock should not have
 448 * any side-effect on cache issues with the other core, so we
 449 * just release the core lock and exit (no _end_lock_coherent).
 450 *
 451 * Clobbers: r3:0, p1:0
 452 */
 453ENTRY(___raw_read_unlock_asm)
 454        p1 = r0;
 455        [--sp] = rets;
 456        call _get_core_lock;
 457        r1 = [p1];
 458        r1 += 1;
 459        [p1] = r1;
 460        r1 = p1;
 461        call _put_core_lock;
 462        rets = [sp++];
 463        rts;
 464ENDPROC(___raw_read_unlock_asm)
 465
 466/*
 467 * r0 = &rwlock->lock
 468 *
 469 * Clobbers: r3:0, p1:0
 470 */
 471ENTRY(___raw_write_lock_asm)
 472        p1 = r0;
 473        r3.l = lo(RW_LOCK_BIAS);
 474        r3.h = hi(RW_LOCK_BIAS);
 475        [--sp] = rets;
 476        call _get_core_lock;
 477.Lwrlock_try:
 478        r1 = [p1];
 479        r1 = r1 - r3;
 480#ifdef __ARCH_SYNC_CORE_DCACHE
 481        r2 = r1;
 482        r2 <<= 4;
 483        r2 >>= 4;
 484        cc = r2 == 0;
 485#else
 486        cc = r1 == 0;
 487#endif
 488        if !cc jump .Lwrlock_wait
 489        [p1] = r1;
 490        r1 = p1;
 491#ifdef __ARCH_SYNC_CORE_DCACHE
 492        call _start_lock_coherent
 493#else
 494        call _put_core_lock;
 495#endif
 496        rets = [sp++];
 497        rts;
 498
 499.Lwrlock_wait:
 500        r1 = p1;
 501        call _put_core_lock;
 502        SSYNC(r2);
 503        r0 = p1;
 504        call _get_core_lock;
 505        r1 = [p1];
 506#ifdef __ARCH_SYNC_CORE_DCACHE
 507        r1 <<= 4;
 508        r1 >>= 4;
 509#endif
 510        cc = r1 == r3;
 511        if !cc jump .Lwrlock_wait;
 512        jump .Lwrlock_try
 513ENDPROC(___raw_write_lock_asm)
 514
 515/*
 516 * r0 = &rwlock->lock
 517 *
 518 * Clobbers: r3:0, p1:0
 519 */
 520ENTRY(___raw_write_trylock_asm)
 521        p1 = r0;
 522        [--sp] = rets;
 523        call _get_core_lock;
 524        r1 = [p1];
 525        r2.l = lo(RW_LOCK_BIAS);
 526        r2.h = hi(RW_LOCK_BIAS);
 527        cc = r1 == r2;
 528        if !cc jump .Lfailed_trywrlock;
 529#ifdef __ARCH_SYNC_CORE_DCACHE
 530        r1 >>= 28;
 531        r1 <<= 28;
 532#else
 533        r1 = 0;
 534#endif
 535        [p1] = r1;
 536        r1 = p1;
 537#ifdef __ARCH_SYNC_CORE_DCACHE
 538        call _start_lock_coherent
 539#else
 540        call _put_core_lock;
 541#endif
 542        rets = [sp++];
 543        r0 = 1;
 544        rts;
 545
 546.Lfailed_trywrlock:
 547        r1 = p1;
 548        call _put_core_lock;
 549        rets = [sp++];
 550        r0 = 0;
 551        rts;
 552ENDPROC(___raw_write_trylock_asm)
 553
 554/*
 555 * r0 = &rwlock->lock
 556 *
 557 * Clobbers: r3:0, p1:0
 558 */
 559ENTRY(___raw_write_unlock_asm)
 560        p1 = r0;
 561        r3.l = lo(RW_LOCK_BIAS);
 562        r3.h = hi(RW_LOCK_BIAS);
 563        [--sp] = rets;
 564        call _get_core_lock;
 565        r1 = [p1];
 566        r1 = r1 + r3;
 567        [p1] = r1;
 568        r1 = p1;
 569#ifdef __ARCH_SYNC_CORE_DCACHE
 570        call _end_lock_coherent
 571#else
 572        call _put_core_lock;
 573#endif
 574        rets = [sp++];
 575        rts;
 576ENDPROC(___raw_write_unlock_asm)
 577
 578/*
 579 * r0 = ptr
 580 * r1 = value
 581 *
 582 * Add a signed value to a 32bit word and return the new value atomically.
 583 * Clobbers: r3:0, p1:0
 584 */
 585ENTRY(___raw_atomic_update_asm)
 586        p1 = r0;
 587        r3 = r1;
 588        [--sp] = rets;
 589        call _get_core_lock;
 590        r2 = [p1];
 591        r3 = r3 + r2;
 592        [p1] = r3;
 593        r1 = p1;
 594        call _put_core_lock;
 595        r0 = r3;
 596        rets = [sp++];
 597        rts;
 598ENDPROC(___raw_atomic_update_asm)
 599
 600/*
 601 * r0 = ptr
 602 * r1 = mask
 603 *
 604 * Clear the mask bits from a 32bit word and return the old 32bit value
 605 * atomically.
 606 * Clobbers: r3:0, p1:0
 607 */
 608ENTRY(___raw_atomic_clear_asm)
 609        p1 = r0;
 610        r3 = ~r1;
 611        [--sp] = rets;
 612        call _get_core_lock;
 613        r2 = [p1];
 614        r3 = r2 & r3;
 615        [p1] = r3;
 616        r3 = r2;
 617        r1 = p1;
 618        call _put_core_lock;
 619        r0 = r3;
 620        rets = [sp++];
 621        rts;
 622ENDPROC(___raw_atomic_clear_asm)
 623
 624/*
 625 * r0 = ptr
 626 * r1 = mask
 627 *
 628 * Set the mask bits into a 32bit word and return the old 32bit value
 629 * atomically.
 630 * Clobbers: r3:0, p1:0
 631 */
 632ENTRY(___raw_atomic_set_asm)
 633        p1 = r0;
 634        r3 = r1;
 635        [--sp] = rets;
 636        call _get_core_lock;
 637        r2 = [p1];
 638        r3 = r2 | r3;
 639        [p1] = r3;
 640        r3 = r2;
 641        r1 = p1;
 642        call _put_core_lock;
 643        r0 = r3;
 644        rets = [sp++];
 645        rts;
 646ENDPROC(___raw_atomic_set_asm)
 647
 648/*
 649 * r0 = ptr
 650 * r1 = mask
 651 *
 652 * XOR the mask bits with a 32bit word and return the old 32bit value
 653 * atomically.
 654 * Clobbers: r3:0, p1:0
 655 */
 656ENTRY(___raw_atomic_xor_asm)
 657        p1 = r0;
 658        r3 = r1;
 659        [--sp] = rets;
 660        call _get_core_lock;
 661        r2 = [p1];
 662        r3 = r2 ^ r3;
 663        [p1] = r3;
 664        r3 = r2;
 665        r1 = p1;
 666        call _put_core_lock;
 667        r0 = r3;
 668        rets = [sp++];
 669        rts;
 670ENDPROC(___raw_atomic_xor_asm)
 671
 672/*
 673 * r0 = ptr
 674 * r1 = mask
 675 *
 676 * Perform a logical AND between the mask bits and a 32bit word, and
 677 * return the masked value. We need this on this architecture in
 678 * order to invalidate the local cache before testing.
 679 *
 680 * Clobbers: r3:0, p1:0
 681 */
 682ENTRY(___raw_atomic_test_asm)
 683        p1 = r0;
 684        r3 = r1;
 685        r1 = -L1_CACHE_BYTES;
 686        r1 = r0 & r1;
 687        p0 = r1;
 688        flushinv[p0];
 689        SSYNC(r2);
 690        r0 = [p1];
 691        r0 = r0 & r3;
 692        rts;
 693ENDPROC(___raw_atomic_test_asm)
 694
 695/*
 696 * r0 = ptr
 697 * r1 = value
 698 *
 699 * Swap *ptr with value and return the old 32bit value atomically.
 700 * Clobbers: r3:0, p1:0
 701 */
 702#define __do_xchg(src, dst)             \
 703        p1 = r0;                        \
 704        r3 = r1;                        \
 705        [--sp] = rets;                  \
 706        call _get_core_lock;            \
 707        r2 = src;                       \
 708        dst = r3;                       \
 709        r3 = r2;                        \
 710        r1 = p1;                        \
 711        call _put_core_lock;            \
 712        r0 = r3;                        \
 713        rets = [sp++];                  \
 714        rts;
 715
 716ENTRY(___raw_xchg_1_asm)
 717        __do_xchg(b[p1] (z), b[p1])
 718ENDPROC(___raw_xchg_1_asm)
 719
 720ENTRY(___raw_xchg_2_asm)
 721        __do_xchg(w[p1] (z), w[p1])
 722ENDPROC(___raw_xchg_2_asm)
 723
 724ENTRY(___raw_xchg_4_asm)
 725        __do_xchg([p1], [p1])
 726ENDPROC(___raw_xchg_4_asm)
 727
 728/*
 729 * r0 = ptr
 730 * r1 = new
 731 * r2 = old
 732 *
 733 * Swap *ptr with new if *ptr == old and return the previous *ptr
 734 * value atomically.
 735 *
 736 * Clobbers: r3:0, p1:0
 737 */
 738#define __do_cmpxchg(src, dst)          \
 739        [--sp] = rets;                  \
 740        [--sp] = r4;                    \
 741        p1 = r0;                        \
 742        r3 = r1;                        \
 743        r4 = r2;                        \
 744        call _get_core_lock;            \
 745        r2 = src;                       \
 746        cc = r2 == r4;                  \
 747        if !cc jump 1f;                 \
 748        dst = r3;                       \
 749     1: r3 = r2;                        \
 750        r1 = p1;                        \
 751        call _put_core_lock;            \
 752        r0 = r3;                        \
 753        r4 = [sp++];                    \
 754        rets = [sp++];                  \
 755        rts;
 756
 757ENTRY(___raw_cmpxchg_1_asm)
 758        __do_cmpxchg(b[p1] (z), b[p1])
 759ENDPROC(___raw_cmpxchg_1_asm)
 760
 761ENTRY(___raw_cmpxchg_2_asm)
 762        __do_cmpxchg(w[p1] (z), w[p1])
 763ENDPROC(___raw_cmpxchg_2_asm)
 764
 765ENTRY(___raw_cmpxchg_4_asm)
 766        __do_cmpxchg([p1], [p1])
 767ENDPROC(___raw_cmpxchg_4_asm)
 768
 769/*
 770 * r0 = ptr
 771 * r1 = bitnr
 772 *
 773 * Set a bit in a 32bit word and return the old 32bit value atomically.
 774 * Clobbers: r3:0, p1:0
 775 */
 776ENTRY(___raw_bit_set_asm)
 777        r2 = r1;
 778        r1 = 1;
 779        r1 <<= r2;
 780        jump ___raw_atomic_set_asm
 781ENDPROC(___raw_bit_set_asm)
 782
 783/*
 784 * r0 = ptr
 785 * r1 = bitnr
 786 *
 787 * Clear a bit in a 32bit word and return the old 32bit value atomically.
 788 * Clobbers: r3:0, p1:0
 789 */
 790ENTRY(___raw_bit_clear_asm)
 791        r2 = r1;
 792        r1 = 1;
 793        r1 <<= r2;
 794        jump ___raw_atomic_clear_asm
 795ENDPROC(___raw_bit_clear_asm)
 796
 797/*
 798 * r0 = ptr
 799 * r1 = bitnr
 800 *
 801 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
 802 * Clobbers: r3:0, p1:0
 803 */
 804ENTRY(___raw_bit_toggle_asm)
 805        r2 = r1;
 806        r1 = 1;
 807        r1 <<= r2;
 808        jump ___raw_atomic_xor_asm
 809ENDPROC(___raw_bit_toggle_asm)
 810
 811/*
 812 * r0 = ptr
 813 * r1 = bitnr
 814 *
 815 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
 816 * Clobbers: r3:0, p1:0
 817 */
 818ENTRY(___raw_bit_test_set_asm)
 819        [--sp] = rets;
 820        [--sp] = r1;
 821        call ___raw_bit_set_asm
 822        r1 = [sp++];
 823        r2 = 1;
 824        r2 <<= r1;
 825        r0 = r0 & r2;
 826        cc = r0 == 0;
 827        if cc jump 1f
 828        r0 = 1;
 8291:
 830        rets = [sp++];
 831        rts;
 832ENDPROC(___raw_bit_test_set_asm)
 833
 834/*
 835 * r0 = ptr
 836 * r1 = bitnr
 837 *
 838 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
 839 * Clobbers: r3:0, p1:0
 840 */
 841ENTRY(___raw_bit_test_clear_asm)
 842        [--sp] = rets;
 843        [--sp] = r1;
 844        call ___raw_bit_clear_asm
 845        r1 = [sp++];
 846        r2 = 1;
 847        r2 <<= r1;
 848        r0 = r0 & r2;
 849        cc = r0 == 0;
 850        if cc jump 1f
 851        r0 = 1;
 8521:
 853        rets = [sp++];
 854        rts;
 855ENDPROC(___raw_bit_test_clear_asm)
 856
 857/*
 858 * r0 = ptr
 859 * r1 = bitnr
 860 *
 861 * Test-and-toggle a bit in a 32bit word,
 862 * and return the old bit value atomically.
 863 * Clobbers: r3:0, p1:0
 864 */
 865ENTRY(___raw_bit_test_toggle_asm)
 866        [--sp] = rets;
 867        [--sp] = r1;
 868        call ___raw_bit_toggle_asm
 869        r1 = [sp++];
 870        r2 = 1;
 871        r2 <<= r1;
 872        r0 = r0 & r2;
 873        cc = r0 == 0;
 874        if cc jump 1f
 875        r0 = 1;
 8761:
 877        rets = [sp++];
 878        rts;
 879ENDPROC(___raw_bit_test_toggle_asm)
 880
 881/*
 882 * r0 = ptr
 883 * r1 = bitnr
 884 *
 885 * Test a bit in a 32bit word and return its value.
 886 * We need this on this architecture in order to invalidate
 887 * the local cache before testing.
 888 *
 889 * Clobbers: r3:0, p1:0
 890 */
 891ENTRY(___raw_bit_test_asm)
 892        r2 = r1;
 893        r1 = 1;
 894        r1 <<= r2;
 895        jump ___raw_atomic_test_asm
 896ENDPROC(___raw_bit_test_asm)
 897
 898/*
 899 * r0 = ptr
 900 *
 901 * Fetch and return an uncached 32bit value.
 902 *
 903 * Clobbers: r2:0, p1:0
 904 */
 905ENTRY(___raw_uncached_fetch_asm)
 906        p1 = r0;
 907        r1 = -L1_CACHE_BYTES;
 908        r1 = r0 & r1;
 909        p0 = r1;
 910        flushinv[p0];
 911        SSYNC(r2);
 912        r0 = [p1];
 913        rts;
 914ENDPROC(___raw_uncached_fetch_asm)
 915