linux/arch/blackfin/mach-bf561/atomic.S
<<
>>
Prefs
   1/*
   2 * Copyright 2007-2008 Analog Devices Inc.
   3 *              Philippe Gerum <rpm@xenomai.org>
   4 *
   5 * Licensed under the GPL-2 or later.
   6 */
   7
   8#include <linux/linkage.h>
   9#include <asm/blackfin.h>
  10#include <asm/cache.h>
  11#include <asm/asm-offsets.h>
  12#include <asm/rwlock.h>
  13#include <asm/cplb.h>
  14
  15.text
  16
  17.macro coreslot_loadaddr reg:req
  18        \reg\().l = _corelock;
  19        \reg\().h = _corelock;
  20.endm
  21
  22.macro safe_testset addr:req, scratch:req
  23#if ANOMALY_05000477
  24        cli \scratch;
  25        testset (\addr);
  26        sti \scratch;
  27#else
  28        testset (\addr);
  29#endif
  30.endm
  31
  32/*
  33 * r0 = address of atomic data to flush and invalidate (32bit).
  34 *
  35 * Clear interrupts and return the old mask.
  36 * We assume that no atomic data can span cachelines.
  37 *
  38 * Clobbers: r2:0, p0
  39 */
  40ENTRY(_get_core_lock)
  41        r1 = -L1_CACHE_BYTES;
  42        r1 = r0 & r1;
  43        cli r0;
  44        coreslot_loadaddr p0;
  45.Lretry_corelock:
  46        safe_testset p0, r2;
  47        if cc jump .Ldone_corelock;
  48        SSYNC(r2);
  49        jump .Lretry_corelock
  50.Ldone_corelock:
  51        p0 = r1;
  52        /* flush core internal write buffer before invalidate dcache */
  53        CSYNC(r2);
  54        flushinv[p0];
  55        SSYNC(r2);
  56        rts;
  57ENDPROC(_get_core_lock)
  58
  59/*
  60 * r0 = address of atomic data in uncacheable memory region (32bit).
  61 *
  62 * Clear interrupts and return the old mask.
  63 *
  64 * Clobbers: r0, p0
  65 */
  66ENTRY(_get_core_lock_noflush)
  67        cli r0;
  68        coreslot_loadaddr p0;
  69.Lretry_corelock_noflush:
  70        safe_testset p0, r2;
  71        if cc jump .Ldone_corelock_noflush;
  72        SSYNC(r2);
  73        jump .Lretry_corelock_noflush
  74.Ldone_corelock_noflush:
  75        /*
  76         * SMP kgdb runs into dead loop without NOP here, when one core
  77         * single steps over get_core_lock_noflush and the other executes
  78         * get_core_lock as a slave node.
  79         */
  80        nop;
  81        CSYNC(r2);
  82        rts;
  83ENDPROC(_get_core_lock_noflush)
  84
  85/*
  86 * r0 = interrupt mask to restore.
  87 * r1 = address of atomic data to flush and invalidate (32bit).
  88 *
  89 * Interrupts are masked on entry (see _get_core_lock).
  90 * Clobbers: r2:0, p0
  91 */
  92ENTRY(_put_core_lock)
  93        /* Write-through cache assumed, so no flush needed here. */
  94        coreslot_loadaddr p0;
  95        r1 = 0;
  96        [p0] = r1;
  97        SSYNC(r2);
  98        sti r0;
  99        rts;
 100ENDPROC(_put_core_lock)
 101
 102#ifdef __ARCH_SYNC_CORE_DCACHE
 103
 104ENTRY(___raw_smp_mark_barrier_asm)
 105        [--sp] = rets;
 106        [--sp] = ( r7:5 );
 107        [--sp] = r0;
 108        [--sp] = p1;
 109        [--sp] = p0;
 110        call _get_core_lock_noflush;
 111
 112        /*
 113         * Calculate current core mask
 114         */
 115        GET_CPUID(p1, r7);
 116        r6 = 1;
 117        r6 <<= r7;
 118
 119        /*
 120         * Set bit of other cores in barrier mask. Don't change current core bit.
 121         */
 122        p1.l = _barrier_mask;
 123        p1.h = _barrier_mask;
 124        r7 = [p1];
 125        r5 = r7 & r6;
 126        r7 = ~r6;
 127        cc = r5 == 0;
 128        if cc jump 1f;
 129        r7 = r7 | r6;
 1301:
 131        [p1] = r7;
 132        SSYNC(r2);
 133
 134        call _put_core_lock;
 135        p0 = [sp++];
 136        p1 = [sp++];
 137        r0 = [sp++];
 138        ( r7:5 ) = [sp++];
 139        rets = [sp++];
 140        rts;
 141ENDPROC(___raw_smp_mark_barrier_asm)
 142
 143ENTRY(___raw_smp_check_barrier_asm)
 144        [--sp] = rets;
 145        [--sp] = ( r7:5 );
 146        [--sp] = r0;
 147        [--sp] = p1;
 148        [--sp] = p0;
 149        call _get_core_lock_noflush;
 150
 151        /*
 152         * Calculate current core mask
 153         */
 154        GET_CPUID(p1, r7);
 155        r6 = 1;
 156        r6 <<= r7;
 157
 158        /*
 159         * Clear current core bit in barrier mask if it is set.
 160         */
 161        p1.l = _barrier_mask;
 162        p1.h = _barrier_mask;
 163        r7 = [p1];
 164        r5 = r7 & r6;
 165        cc = r5 == 0;
 166        if cc jump 1f;
 167        r6 = ~r6;
 168        r7 = r7 & r6;
 169        [p1] = r7;
 170        SSYNC(r2);
 171
 172        call _put_core_lock;
 173
 174        /*
 175         * Invalidate the entire D-cache of current core.
 176         */
 177        sp += -12;
 178        call _resync_core_dcache
 179        sp += 12;
 180        jump 2f;
 1811:
 182        call _put_core_lock;
 1832:
 184        p0 = [sp++];
 185        p1 = [sp++];
 186        r0 = [sp++];
 187        ( r7:5 ) = [sp++];
 188        rets = [sp++];
 189        rts;
 190ENDPROC(___raw_smp_check_barrier_asm)
 191
 192/*
 193 * r0 = irqflags
 194 * r1 = address of atomic data
 195 *
 196 * Clobbers: r2:0, p1:0
 197 */
 198_start_lock_coherent:
 199
 200        [--sp] = rets;
 201        [--sp] = ( r7:6 );
 202        r7 = r0;
 203        p1 = r1;
 204
 205        /*
 206         * Determine whether the atomic data was previously
 207         * owned by another CPU (=r6).
 208         */
 209        GET_CPUID(p0, r2);
 210        r1 = 1;
 211        r1 <<= r2;
 212        r2 = ~r1;
 213
 214        r1 = [p1];
 215        r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
 216        r6 = r1 & r2;
 217        r1 = [p1];
 218        r1 <<= 4;
 219        r1 >>= 4;
 220        [p1] = r1;
 221
 222        /*
 223         * Release the core lock now, but keep IRQs disabled while we are
 224         * performing the remaining housekeeping chores for the current CPU.
 225         */
 226        coreslot_loadaddr p0;
 227        r1 = 0;
 228        [p0] = r1;
 229
 230        /*
 231         * If another CPU has owned the same atomic section before us,
 232         * then our D-cached copy of the shared data protected by the
 233         * current spin/write_lock may be obsolete.
 234         */
 235        cc = r6 == 0;
 236        if cc jump .Lcache_synced
 237
 238        /*
 239         * Invalidate the entire D-cache of the current core.
 240         */
 241        sp += -12;
 242        call _resync_core_dcache
 243        sp += 12;
 244
 245.Lcache_synced:
 246        SSYNC(r2);
 247        sti r7;
 248        ( r7:6 ) = [sp++];
 249        rets = [sp++];
 250        rts
 251
 252/*
 253 * r0 = irqflags
 254 * r1 = address of atomic data
 255 *
 256 * Clobbers: r2:0, p1:0
 257 */
 258_end_lock_coherent:
 259
 260        p1 = r1;
 261        GET_CPUID(p0, r2);
 262        r2 += 28;
 263        r1 = 1;
 264        r1 <<= r2;
 265        r2 = [p1];
 266        r2 = r1 | r2;
 267        [p1] = r2;
 268        r1 = p1;
 269        jump _put_core_lock;
 270
 271#endif /* __ARCH_SYNC_CORE_DCACHE */
 272
 273/*
 274 * r0 = &spinlock->lock
 275 *
 276 * Clobbers: r3:0, p1:0
 277 */
 278ENTRY(___raw_spin_is_locked_asm)
 279        p1 = r0;
 280        [--sp] = rets;
 281        call _get_core_lock;
 282        r3 = [p1];
 283        cc = bittst( r3, 0 );
 284        r3 = cc;
 285        r1 = p1;
 286        call _put_core_lock;
 287        rets = [sp++];
 288        r0 = r3;
 289        rts;
 290ENDPROC(___raw_spin_is_locked_asm)
 291
 292/*
 293 * r0 = &spinlock->lock
 294 *
 295 * Clobbers: r3:0, p1:0
 296 */
 297ENTRY(___raw_spin_lock_asm)
 298        p1 = r0;
 299        [--sp] = rets;
 300.Lretry_spinlock:
 301        call _get_core_lock;
 302        r1 = p1;
 303        r2 = [p1];
 304        cc = bittst( r2, 0 );
 305        if cc jump .Lbusy_spinlock
 306#ifdef __ARCH_SYNC_CORE_DCACHE
 307        r3 = p1;
 308        bitset ( r2, 0 ); /* Raise the lock bit. */
 309        [p1] = r2;
 310        call _start_lock_coherent
 311#else
 312        r2 = 1;
 313        [p1] = r2;
 314        call _put_core_lock;
 315#endif
 316        rets = [sp++];
 317        rts;
 318
 319.Lbusy_spinlock:
 320        /* We don't touch the atomic area if busy, so that flush
 321           will behave like nop in _put_core_lock. */
 322        call _put_core_lock;
 323        SSYNC(r2);
 324        r0 = p1;
 325        jump .Lretry_spinlock
 326ENDPROC(___raw_spin_lock_asm)
 327
 328/*
 329 * r0 = &spinlock->lock
 330 *
 331 * Clobbers: r3:0, p1:0
 332 */
 333ENTRY(___raw_spin_trylock_asm)
 334        p1 = r0;
 335        [--sp] = rets;
 336        call _get_core_lock;
 337        r1 = p1;
 338        r3 = [p1];
 339        cc = bittst( r3, 0 );
 340        if cc jump .Lfailed_trylock
 341#ifdef __ARCH_SYNC_CORE_DCACHE
 342        bitset ( r3, 0 ); /* Raise the lock bit. */
 343        [p1] = r3;
 344        call _start_lock_coherent
 345#else
 346        r2 = 1;
 347        [p1] = r2;
 348        call _put_core_lock;
 349#endif
 350        r0 = 1;
 351        rets = [sp++];
 352        rts;
 353.Lfailed_trylock:
 354        call _put_core_lock;
 355        r0 = 0;
 356        rets = [sp++];
 357        rts;
 358ENDPROC(___raw_spin_trylock_asm)
 359
 360/*
 361 * r0 = &spinlock->lock
 362 *
 363 * Clobbers: r2:0, p1:0
 364 */
 365ENTRY(___raw_spin_unlock_asm)
 366        p1 = r0;
 367        [--sp] = rets;
 368        call _get_core_lock;
 369        r2 = [p1];
 370        bitclr ( r2, 0 );
 371        [p1] = r2;
 372        r1 = p1;
 373#ifdef __ARCH_SYNC_CORE_DCACHE
 374        call _end_lock_coherent
 375#else
 376        call _put_core_lock;
 377#endif
 378        rets = [sp++];
 379        rts;
 380ENDPROC(___raw_spin_unlock_asm)
 381
 382/*
 383 * r0 = &rwlock->lock
 384 *
 385 * Clobbers: r2:0, p1:0
 386 */
 387ENTRY(___raw_read_lock_asm)
 388        p1 = r0;
 389        [--sp] = rets;
 390        call _get_core_lock;
 391.Lrdlock_try:
 392        r1 = [p1];
 393        r1 += -1;
 394        [p1] = r1;
 395        cc = r1 < 0;
 396        if cc jump .Lrdlock_failed
 397        r1 = p1;
 398#ifdef __ARCH_SYNC_CORE_DCACHE
 399        call _start_lock_coherent
 400#else
 401        call _put_core_lock;
 402#endif
 403        rets = [sp++];
 404        rts;
 405
 406.Lrdlock_failed:
 407        r1 += 1;
 408        [p1] = r1;
 409.Lrdlock_wait:
 410        r1 = p1;
 411        call _put_core_lock;
 412        SSYNC(r2);
 413        r0 = p1;
 414        call _get_core_lock;
 415        r1 = [p1];
 416        cc = r1 < 2;
 417        if cc jump .Lrdlock_wait;
 418        jump .Lrdlock_try
 419ENDPROC(___raw_read_lock_asm)
 420
 421/*
 422 * r0 = &rwlock->lock
 423 *
 424 * Clobbers: r3:0, p1:0
 425 */
 426ENTRY(___raw_read_trylock_asm)
 427        p1 = r0;
 428        [--sp] = rets;
 429        call _get_core_lock;
 430        r1 = [p1];
 431        cc = r1 <= 0;
 432        if cc jump .Lfailed_tryrdlock;
 433        r1 += -1;
 434        [p1] = r1;
 435        r1 = p1;
 436#ifdef __ARCH_SYNC_CORE_DCACHE
 437        call _start_lock_coherent
 438#else
 439        call _put_core_lock;
 440#endif
 441        rets = [sp++];
 442        r0 = 1;
 443        rts;
 444.Lfailed_tryrdlock:
 445        r1 = p1;
 446        call _put_core_lock;
 447        rets = [sp++];
 448        r0 = 0;
 449        rts;
 450ENDPROC(___raw_read_trylock_asm)
 451
 452/*
 453 * r0 = &rwlock->lock
 454 *
 455 * Note: Processing controlled by a reader lock should not have
 456 * any side-effect on cache issues with the other core, so we
 457 * just release the core lock and exit (no _end_lock_coherent).
 458 *
 459 * Clobbers: r3:0, p1:0
 460 */
 461ENTRY(___raw_read_unlock_asm)
 462        p1 = r0;
 463        [--sp] = rets;
 464        call _get_core_lock;
 465        r1 = [p1];
 466        r1 += 1;
 467        [p1] = r1;
 468        r1 = p1;
 469        call _put_core_lock;
 470        rets = [sp++];
 471        rts;
 472ENDPROC(___raw_read_unlock_asm)
 473
 474/*
 475 * r0 = &rwlock->lock
 476 *
 477 * Clobbers: r3:0, p1:0
 478 */
 479ENTRY(___raw_write_lock_asm)
 480        p1 = r0;
 481        r3.l = lo(RW_LOCK_BIAS);
 482        r3.h = hi(RW_LOCK_BIAS);
 483        [--sp] = rets;
 484        call _get_core_lock;
 485.Lwrlock_try:
 486        r1 = [p1];
 487        r1 = r1 - r3;
 488#ifdef __ARCH_SYNC_CORE_DCACHE
 489        r2 = r1;
 490        r2 <<= 4;
 491        r2 >>= 4;
 492        cc = r2 == 0;
 493#else
 494        cc = r1 == 0;
 495#endif
 496        if !cc jump .Lwrlock_wait
 497        [p1] = r1;
 498        r1 = p1;
 499#ifdef __ARCH_SYNC_CORE_DCACHE
 500        call _start_lock_coherent
 501#else
 502        call _put_core_lock;
 503#endif
 504        rets = [sp++];
 505        rts;
 506
 507.Lwrlock_wait:
 508        r1 = p1;
 509        call _put_core_lock;
 510        SSYNC(r2);
 511        r0 = p1;
 512        call _get_core_lock;
 513        r1 = [p1];
 514#ifdef __ARCH_SYNC_CORE_DCACHE
 515        r1 <<= 4;
 516        r1 >>= 4;
 517#endif
 518        cc = r1 == r3;
 519        if !cc jump .Lwrlock_wait;
 520        jump .Lwrlock_try
 521ENDPROC(___raw_write_lock_asm)
 522
 523/*
 524 * r0 = &rwlock->lock
 525 *
 526 * Clobbers: r3:0, p1:0
 527 */
 528ENTRY(___raw_write_trylock_asm)
 529        p1 = r0;
 530        [--sp] = rets;
 531        call _get_core_lock;
 532        r1 = [p1];
 533        r2.l = lo(RW_LOCK_BIAS);
 534        r2.h = hi(RW_LOCK_BIAS);
 535        cc = r1 == r2;
 536        if !cc jump .Lfailed_trywrlock;
 537#ifdef __ARCH_SYNC_CORE_DCACHE
 538        r1 >>= 28;
 539        r1 <<= 28;
 540#else
 541        r1 = 0;
 542#endif
 543        [p1] = r1;
 544        r1 = p1;
 545#ifdef __ARCH_SYNC_CORE_DCACHE
 546        call _start_lock_coherent
 547#else
 548        call _put_core_lock;
 549#endif
 550        rets = [sp++];
 551        r0 = 1;
 552        rts;
 553
 554.Lfailed_trywrlock:
 555        r1 = p1;
 556        call _put_core_lock;
 557        rets = [sp++];
 558        r0 = 0;
 559        rts;
 560ENDPROC(___raw_write_trylock_asm)
 561
 562/*
 563 * r0 = &rwlock->lock
 564 *
 565 * Clobbers: r3:0, p1:0
 566 */
 567ENTRY(___raw_write_unlock_asm)
 568        p1 = r0;
 569        r3.l = lo(RW_LOCK_BIAS);
 570        r3.h = hi(RW_LOCK_BIAS);
 571        [--sp] = rets;
 572        call _get_core_lock;
 573        r1 = [p1];
 574        r1 = r1 + r3;
 575        [p1] = r1;
 576        r1 = p1;
 577#ifdef __ARCH_SYNC_CORE_DCACHE
 578        call _end_lock_coherent
 579#else
 580        call _put_core_lock;
 581#endif
 582        rets = [sp++];
 583        rts;
 584ENDPROC(___raw_write_unlock_asm)
 585
 586/*
 587 * r0 = ptr
 588 * r1 = value
 589 *
 590 * Add a signed value to a 32bit word and return the new value atomically.
 591 * Clobbers: r3:0, p1:0
 592 */
 593ENTRY(___raw_atomic_update_asm)
 594        p1 = r0;
 595        r3 = r1;
 596        [--sp] = rets;
 597        call _get_core_lock;
 598        r2 = [p1];
 599        r3 = r3 + r2;
 600        [p1] = r3;
 601        r1 = p1;
 602        call _put_core_lock;
 603        r0 = r3;
 604        rets = [sp++];
 605        rts;
 606ENDPROC(___raw_atomic_update_asm)
 607
 608/*
 609 * r0 = ptr
 610 * r1 = mask
 611 *
 612 * Clear the mask bits from a 32bit word and return the old 32bit value
 613 * atomically.
 614 * Clobbers: r3:0, p1:0
 615 */
 616ENTRY(___raw_atomic_clear_asm)
 617        p1 = r0;
 618        r3 = ~r1;
 619        [--sp] = rets;
 620        call _get_core_lock;
 621        r2 = [p1];
 622        r3 = r2 & r3;
 623        [p1] = r3;
 624        r3 = r2;
 625        r1 = p1;
 626        call _put_core_lock;
 627        r0 = r3;
 628        rets = [sp++];
 629        rts;
 630ENDPROC(___raw_atomic_clear_asm)
 631
 632/*
 633 * r0 = ptr
 634 * r1 = mask
 635 *
 636 * Set the mask bits into a 32bit word and return the old 32bit value
 637 * atomically.
 638 * Clobbers: r3:0, p1:0
 639 */
 640ENTRY(___raw_atomic_set_asm)
 641        p1 = r0;
 642        r3 = r1;
 643        [--sp] = rets;
 644        call _get_core_lock;
 645        r2 = [p1];
 646        r3 = r2 | r3;
 647        [p1] = r3;
 648        r3 = r2;
 649        r1 = p1;
 650        call _put_core_lock;
 651        r0 = r3;
 652        rets = [sp++];
 653        rts;
 654ENDPROC(___raw_atomic_set_asm)
 655
 656/*
 657 * r0 = ptr
 658 * r1 = mask
 659 *
 660 * XOR the mask bits with a 32bit word and return the old 32bit value
 661 * atomically.
 662 * Clobbers: r3:0, p1:0
 663 */
 664ENTRY(___raw_atomic_xor_asm)
 665        p1 = r0;
 666        r3 = r1;
 667        [--sp] = rets;
 668        call _get_core_lock;
 669        r2 = [p1];
 670        r3 = r2 ^ r3;
 671        [p1] = r3;
 672        r3 = r2;
 673        r1 = p1;
 674        call _put_core_lock;
 675        r0 = r3;
 676        rets = [sp++];
 677        rts;
 678ENDPROC(___raw_atomic_xor_asm)
 679
 680/*
 681 * r0 = ptr
 682 * r1 = mask
 683 *
 684 * Perform a logical AND between the mask bits and a 32bit word, and
 685 * return the masked value. We need this on this architecture in
 686 * order to invalidate the local cache before testing.
 687 *
 688 * Clobbers: r3:0, p1:0
 689 */
 690ENTRY(___raw_atomic_test_asm)
 691        p1 = r0;
 692        r3 = r1;
 693        r1 = -L1_CACHE_BYTES;
 694        r1 = r0 & r1;
 695        p0 = r1;
 696        /* flush core internal write buffer before invalidate dcache */
 697        CSYNC(r2);
 698        flushinv[p0];
 699        SSYNC(r2);
 700        r0 = [p1];
 701        r0 = r0 & r3;
 702        rts;
 703ENDPROC(___raw_atomic_test_asm)
 704
 705/*
 706 * r0 = ptr
 707 * r1 = value
 708 *
 709 * Swap *ptr with value and return the old 32bit value atomically.
 710 * Clobbers: r3:0, p1:0
 711 */
 712#define __do_xchg(src, dst)             \
 713        p1 = r0;                        \
 714        r3 = r1;                        \
 715        [--sp] = rets;                  \
 716        call _get_core_lock;            \
 717        r2 = src;                       \
 718        dst = r3;                       \
 719        r3 = r2;                        \
 720        r1 = p1;                        \
 721        call _put_core_lock;            \
 722        r0 = r3;                        \
 723        rets = [sp++];                  \
 724        rts;
 725
 726ENTRY(___raw_xchg_1_asm)
 727        __do_xchg(b[p1] (z), b[p1])
 728ENDPROC(___raw_xchg_1_asm)
 729
 730ENTRY(___raw_xchg_2_asm)
 731        __do_xchg(w[p1] (z), w[p1])
 732ENDPROC(___raw_xchg_2_asm)
 733
 734ENTRY(___raw_xchg_4_asm)
 735        __do_xchg([p1], [p1])
 736ENDPROC(___raw_xchg_4_asm)
 737
 738/*
 739 * r0 = ptr
 740 * r1 = new
 741 * r2 = old
 742 *
 743 * Swap *ptr with new if *ptr == old and return the previous *ptr
 744 * value atomically.
 745 *
 746 * Clobbers: r3:0, p1:0
 747 */
 748#define __do_cmpxchg(src, dst)          \
 749        [--sp] = rets;                  \
 750        [--sp] = r4;                    \
 751        p1 = r0;                        \
 752        r3 = r1;                        \
 753        r4 = r2;                        \
 754        call _get_core_lock;            \
 755        r2 = src;                       \
 756        cc = r2 == r4;                  \
 757        if !cc jump 1f;                 \
 758        dst = r3;                       \
 759     1: r3 = r2;                        \
 760        r1 = p1;                        \
 761        call _put_core_lock;            \
 762        r0 = r3;                        \
 763        r4 = [sp++];                    \
 764        rets = [sp++];                  \
 765        rts;
 766
 767ENTRY(___raw_cmpxchg_1_asm)
 768        __do_cmpxchg(b[p1] (z), b[p1])
 769ENDPROC(___raw_cmpxchg_1_asm)
 770
 771ENTRY(___raw_cmpxchg_2_asm)
 772        __do_cmpxchg(w[p1] (z), w[p1])
 773ENDPROC(___raw_cmpxchg_2_asm)
 774
 775ENTRY(___raw_cmpxchg_4_asm)
 776        __do_cmpxchg([p1], [p1])
 777ENDPROC(___raw_cmpxchg_4_asm)
 778
 779/*
 780 * r0 = ptr
 781 * r1 = bitnr
 782 *
 783 * Set a bit in a 32bit word and return the old 32bit value atomically.
 784 * Clobbers: r3:0, p1:0
 785 */
 786ENTRY(___raw_bit_set_asm)
 787        r2 = r1;
 788        r1 = 1;
 789        r1 <<= r2;
 790        jump ___raw_atomic_set_asm
 791ENDPROC(___raw_bit_set_asm)
 792
 793/*
 794 * r0 = ptr
 795 * r1 = bitnr
 796 *
 797 * Clear a bit in a 32bit word and return the old 32bit value atomically.
 798 * Clobbers: r3:0, p1:0
 799 */
 800ENTRY(___raw_bit_clear_asm)
 801        r2 = r1;
 802        r1 = 1;
 803        r1 <<= r2;
 804        jump ___raw_atomic_clear_asm
 805ENDPROC(___raw_bit_clear_asm)
 806
 807/*
 808 * r0 = ptr
 809 * r1 = bitnr
 810 *
 811 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
 812 * Clobbers: r3:0, p1:0
 813 */
 814ENTRY(___raw_bit_toggle_asm)
 815        r2 = r1;
 816        r1 = 1;
 817        r1 <<= r2;
 818        jump ___raw_atomic_xor_asm
 819ENDPROC(___raw_bit_toggle_asm)
 820
 821/*
 822 * r0 = ptr
 823 * r1 = bitnr
 824 *
 825 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
 826 * Clobbers: r3:0, p1:0
 827 */
 828ENTRY(___raw_bit_test_set_asm)
 829        [--sp] = rets;
 830        [--sp] = r1;
 831        call ___raw_bit_set_asm
 832        r1 = [sp++];
 833        r2 = 1;
 834        r2 <<= r1;
 835        r0 = r0 & r2;
 836        cc = r0 == 0;
 837        if cc jump 1f
 838        r0 = 1;
 8391:
 840        rets = [sp++];
 841        rts;
 842ENDPROC(___raw_bit_test_set_asm)
 843
 844/*
 845 * r0 = ptr
 846 * r1 = bitnr
 847 *
 848 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
 849 * Clobbers: r3:0, p1:0
 850 */
 851ENTRY(___raw_bit_test_clear_asm)
 852        [--sp] = rets;
 853        [--sp] = r1;
 854        call ___raw_bit_clear_asm
 855        r1 = [sp++];
 856        r2 = 1;
 857        r2 <<= r1;
 858        r0 = r0 & r2;
 859        cc = r0 == 0;
 860        if cc jump 1f
 861        r0 = 1;
 8621:
 863        rets = [sp++];
 864        rts;
 865ENDPROC(___raw_bit_test_clear_asm)
 866
 867/*
 868 * r0 = ptr
 869 * r1 = bitnr
 870 *
 871 * Test-and-toggle a bit in a 32bit word,
 872 * and return the old bit value atomically.
 873 * Clobbers: r3:0, p1:0
 874 */
 875ENTRY(___raw_bit_test_toggle_asm)
 876        [--sp] = rets;
 877        [--sp] = r1;
 878        call ___raw_bit_toggle_asm
 879        r1 = [sp++];
 880        r2 = 1;
 881        r2 <<= r1;
 882        r0 = r0 & r2;
 883        cc = r0 == 0;
 884        if cc jump 1f
 885        r0 = 1;
 8861:
 887        rets = [sp++];
 888        rts;
 889ENDPROC(___raw_bit_test_toggle_asm)
 890
 891/*
 892 * r0 = ptr
 893 * r1 = bitnr
 894 *
 895 * Test a bit in a 32bit word and return its value.
 896 * We need this on this architecture in order to invalidate
 897 * the local cache before testing.
 898 *
 899 * Clobbers: r3:0, p1:0
 900 */
 901ENTRY(___raw_bit_test_asm)
 902        r2 = r1;
 903        r1 = 1;
 904        r1 <<= r2;
 905        jump ___raw_atomic_test_asm
 906ENDPROC(___raw_bit_test_asm)
 907
 908/*
 909 * r0 = ptr
 910 *
 911 * Fetch and return an uncached 32bit value.
 912 *
 913 * Clobbers: r2:0, p1:0
 914 */
 915ENTRY(___raw_uncached_fetch_asm)
 916        p1 = r0;
 917        r1 = -L1_CACHE_BYTES;
 918        r1 = r0 & r1;
 919        p0 = r1;
 920        /* flush core internal write buffer before invalidate dcache */
 921        CSYNC(r2);
 922        flushinv[p0];
 923        SSYNC(r2);
 924        r0 = [p1];
 925        rts;
 926ENDPROC(___raw_uncached_fetch_asm)
 927