diff options
Diffstat (limited to 'arch/blackfin/mach-bf561/atomic.S')
-rw-r--r-- | arch/blackfin/mach-bf561/atomic.S | 945 |
1 files changed, 0 insertions, 945 deletions
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S deleted file mode 100644 index 1e2989c5d6b2..000000000000 --- a/arch/blackfin/mach-bf561/atomic.S +++ /dev/null @@ -1,945 +0,0 @@ -/* - * Copyright 2007-2008 Analog Devices Inc. - * Philippe Gerum <rpm@xenomai.org> - * - * Licensed under the GPL-2 or later. - */ - -#include <linux/linkage.h> -#include <asm/blackfin.h> -#include <asm/cache.h> -#include <asm/asm-offsets.h> -#include <asm/rwlock.h> -#include <asm/cplb.h> - -.text - -.macro coreslot_loadaddr reg:req - \reg\().l = _corelock; - \reg\().h = _corelock; -.endm - -.macro safe_testset addr:req, scratch:req -#if ANOMALY_05000477 - cli \scratch; - testset (\addr); - sti \scratch; -#else - testset (\addr); -#endif -.endm - -/* - * r0 = address of atomic data to flush and invalidate (32bit). - * - * Clear interrupts and return the old mask. - * We assume that no atomic data can span cachelines. - * - * Clobbers: r2:0, p0 - */ -ENTRY(_get_core_lock) - r1 = -L1_CACHE_BYTES; - r1 = r0 & r1; - cli r0; - coreslot_loadaddr p0; -.Lretry_corelock: - safe_testset p0, r2; - if cc jump .Ldone_corelock; - SSYNC(r2); - jump .Lretry_corelock -.Ldone_corelock: - p0 = r1; - /* flush core internal write buffer before invalidate dcache */ - CSYNC(r2); - flushinv[p0]; - SSYNC(r2); - rts; -ENDPROC(_get_core_lock) - -/* - * r0 = address of atomic data in uncacheable memory region (32bit). - * - * Clear interrupts and return the old mask. - * - * Clobbers: r0, p0 - */ -ENTRY(_get_core_lock_noflush) - cli r0; - coreslot_loadaddr p0; -.Lretry_corelock_noflush: - safe_testset p0, r2; - if cc jump .Ldone_corelock_noflush; - SSYNC(r2); - jump .Lretry_corelock_noflush -.Ldone_corelock_noflush: - /* - * SMP kgdb runs into dead loop without NOP here, when one core - * single steps over get_core_lock_noflush and the other executes - * get_core_lock as a slave node. - */ - nop; - CSYNC(r2); - rts; -ENDPROC(_get_core_lock_noflush) - -/* - * r0 = interrupt mask to restore. - * r1 = address of atomic data to flush and invalidate (32bit). - * - * Interrupts are masked on entry (see _get_core_lock). - * Clobbers: r2:0, p0 - */ -ENTRY(_put_core_lock) - /* Write-through cache assumed, so no flush needed here. */ - coreslot_loadaddr p0; - r1 = 0; - [p0] = r1; - SSYNC(r2); - sti r0; - rts; -ENDPROC(_put_core_lock) - -#ifdef __ARCH_SYNC_CORE_DCACHE - -ENTRY(___raw_smp_mark_barrier_asm) - [--sp] = rets; - [--sp] = ( r7:5 ); - [--sp] = r0; - [--sp] = p1; - [--sp] = p0; - call _get_core_lock_noflush; - - /* - * Calculate current core mask - */ - GET_CPUID(p1, r7); - r6 = 1; - r6 <<= r7; - - /* - * Set bit of other cores in barrier mask. Don't change current core bit. - */ - p1.l = _barrier_mask; - p1.h = _barrier_mask; - r7 = [p1]; - r5 = r7 & r6; - r7 = ~r6; - cc = r5 == 0; - if cc jump 1f; - r7 = r7 | r6; -1: - [p1] = r7; - SSYNC(r2); - - call _put_core_lock; - p0 = [sp++]; - p1 = [sp++]; - r0 = [sp++]; - ( r7:5 ) = [sp++]; - rets = [sp++]; - rts; -ENDPROC(___raw_smp_mark_barrier_asm) - -ENTRY(___raw_smp_check_barrier_asm) - [--sp] = rets; - [--sp] = ( r7:5 ); - [--sp] = r0; - [--sp] = p1; - [--sp] = p0; - call _get_core_lock_noflush; - - /* - * Calculate current core mask - */ - GET_CPUID(p1, r7); - r6 = 1; - r6 <<= r7; - - /* - * Clear current core bit in barrier mask if it is set. - */ - p1.l = _barrier_mask; - p1.h = _barrier_mask; - r7 = [p1]; - r5 = r7 & r6; - cc = r5 == 0; - if cc jump 1f; - r6 = ~r6; - r7 = r7 & r6; - [p1] = r7; - SSYNC(r2); - - call _put_core_lock; - - /* - * Invalidate the entire D-cache of current core. - */ - sp += -12; - call _resync_core_dcache - sp += 12; - jump 2f; -1: - call _put_core_lock; -2: - p0 = [sp++]; - p1 = [sp++]; - r0 = [sp++]; - ( r7:5 ) = [sp++]; - rets = [sp++]; - rts; -ENDPROC(___raw_smp_check_barrier_asm) - -/* - * r0 = irqflags - * r1 = address of atomic data - * - * Clobbers: r2:0, p1:0 - */ -_start_lock_coherent: - - [--sp] = rets; - [--sp] = ( r7:6 ); - r7 = r0; - p1 = r1; - - /* - * Determine whether the atomic data was previously - * owned by another CPU (=r6). - */ - GET_CPUID(p0, r2); - r1 = 1; - r1 <<= r2; - r2 = ~r1; - - r1 = [p1]; - r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */ - r6 = r1 & r2; - r1 = [p1]; - r1 <<= 4; - r1 >>= 4; - [p1] = r1; - - /* - * Release the core lock now, but keep IRQs disabled while we are - * performing the remaining housekeeping chores for the current CPU. - */ - coreslot_loadaddr p0; - r1 = 0; - [p0] = r1; - - /* - * If another CPU has owned the same atomic section before us, - * then our D-cached copy of the shared data protected by the - * current spin/write_lock may be obsolete. - */ - cc = r6 == 0; - if cc jump .Lcache_synced - - /* - * Invalidate the entire D-cache of the current core. - */ - sp += -12; - call _resync_core_dcache - sp += 12; - -.Lcache_synced: - SSYNC(r2); - sti r7; - ( r7:6 ) = [sp++]; - rets = [sp++]; - rts - -/* - * r0 = irqflags - * r1 = address of atomic data - * - * Clobbers: r2:0, p1:0 - */ -_end_lock_coherent: - - p1 = r1; - GET_CPUID(p0, r2); - r2 += 28; - r1 = 1; - r1 <<= r2; - r2 = [p1]; - r2 = r1 | r2; - [p1] = r2; - r1 = p1; - jump _put_core_lock; - -#endif /* __ARCH_SYNC_CORE_DCACHE */ - -/* - * r0 = &spinlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_spin_is_locked_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r3 = [p1]; - cc = bittst( r3, 0 ); - r3 = cc; - r1 = p1; - call _put_core_lock; - rets = [sp++]; - r0 = r3; - rts; -ENDPROC(___raw_spin_is_locked_asm) - -/* - * r0 = &spinlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_spin_lock_asm) - p1 = r0; - [--sp] = rets; -.Lretry_spinlock: - call _get_core_lock; - r1 = p1; - r2 = [p1]; - cc = bittst( r2, 0 ); - if cc jump .Lbusy_spinlock -#ifdef __ARCH_SYNC_CORE_DCACHE - r3 = p1; - bitset ( r2, 0 ); /* Raise the lock bit. */ - [p1] = r2; - call _start_lock_coherent -#else - r2 = 1; - [p1] = r2; - call _put_core_lock; -#endif - rets = [sp++]; - rts; - -.Lbusy_spinlock: - /* We don't touch the atomic area if busy, so that flush - will behave like nop in _put_core_lock. */ - call _put_core_lock; - SSYNC(r2); - r0 = p1; - jump .Lretry_spinlock -ENDPROC(___raw_spin_lock_asm) - -/* - * r0 = &spinlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_spin_trylock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r1 = p1; - r3 = [p1]; - cc = bittst( r3, 0 ); - if cc jump .Lfailed_trylock -#ifdef __ARCH_SYNC_CORE_DCACHE - bitset ( r3, 0 ); /* Raise the lock bit. */ - [p1] = r3; - call _start_lock_coherent -#else - r2 = 1; - [p1] = r2; - call _put_core_lock; -#endif - r0 = 1; - rets = [sp++]; - rts; -.Lfailed_trylock: - call _put_core_lock; - r0 = 0; - rets = [sp++]; - rts; -ENDPROC(___raw_spin_trylock_asm) - -/* - * r0 = &spinlock->lock - * - * Clobbers: r2:0, p1:0 - */ -ENTRY(___raw_spin_unlock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r2 = [p1]; - bitclr ( r2, 0 ); - [p1] = r2; - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _end_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - rts; -ENDPROC(___raw_spin_unlock_asm) - -/* - * r0 = &rwlock->lock - * - * Clobbers: r2:0, p1:0 - */ -ENTRY(___raw_read_lock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; -.Lrdlock_try: - r1 = [p1]; - r1 += -1; - [p1] = r1; - cc = r1 < 0; - if cc jump .Lrdlock_failed - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _start_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - rts; - -.Lrdlock_failed: - r1 += 1; - [p1] = r1; -.Lrdlock_wait: - r1 = p1; - call _put_core_lock; - SSYNC(r2); - r0 = p1; - call _get_core_lock; - r1 = [p1]; - cc = r1 < 2; - if cc jump .Lrdlock_wait; - jump .Lrdlock_try -ENDPROC(___raw_read_lock_asm) - -/* - * r0 = &rwlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_read_trylock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r1 = [p1]; - cc = r1 <= 0; - if cc jump .Lfailed_tryrdlock; - r1 += -1; - [p1] = r1; - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _start_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - r0 = 1; - rts; -.Lfailed_tryrdlock: - r1 = p1; - call _put_core_lock; - rets = [sp++]; - r0 = 0; - rts; -ENDPROC(___raw_read_trylock_asm) - -/* - * r0 = &rwlock->lock - * - * Note: Processing controlled by a reader lock should not have - * any side-effect on cache issues with the other core, so we - * just release the core lock and exit (no _end_lock_coherent). - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_read_unlock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r1 = [p1]; - r1 += 1; - [p1] = r1; - r1 = p1; - call _put_core_lock; - rets = [sp++]; - rts; -ENDPROC(___raw_read_unlock_asm) - -/* - * r0 = &rwlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_write_lock_asm) - p1 = r0; - r3.l = lo(RW_LOCK_BIAS); - r3.h = hi(RW_LOCK_BIAS); - [--sp] = rets; - call _get_core_lock; -.Lwrlock_try: - r1 = [p1]; - r1 = r1 - r3; -#ifdef __ARCH_SYNC_CORE_DCACHE - r2 = r1; - r2 <<= 4; - r2 >>= 4; - cc = r2 == 0; -#else - cc = r1 == 0; -#endif - if !cc jump .Lwrlock_wait - [p1] = r1; - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _start_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - rts; - -.Lwrlock_wait: - r1 = p1; - call _put_core_lock; - SSYNC(r2); - r0 = p1; - call _get_core_lock; - r1 = [p1]; -#ifdef __ARCH_SYNC_CORE_DCACHE - r1 <<= 4; - r1 >>= 4; -#endif - cc = r1 == r3; - if !cc jump .Lwrlock_wait; - jump .Lwrlock_try -ENDPROC(___raw_write_lock_asm) - -/* - * r0 = &rwlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_write_trylock_asm) - p1 = r0; - [--sp] = rets; - call _get_core_lock; - r1 = [p1]; - r2.l = lo(RW_LOCK_BIAS); - r2.h = hi(RW_LOCK_BIAS); - cc = r1 == r2; - if !cc jump .Lfailed_trywrlock; -#ifdef __ARCH_SYNC_CORE_DCACHE - r1 >>= 28; - r1 <<= 28; -#else - r1 = 0; -#endif - [p1] = r1; - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _start_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - r0 = 1; - rts; - -.Lfailed_trywrlock: - r1 = p1; - call _put_core_lock; - rets = [sp++]; - r0 = 0; - rts; -ENDPROC(___raw_write_trylock_asm) - -/* - * r0 = &rwlock->lock - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_write_unlock_asm) - p1 = r0; - r3.l = lo(RW_LOCK_BIAS); - r3.h = hi(RW_LOCK_BIAS); - [--sp] = rets; - call _get_core_lock; - r1 = [p1]; - r1 = r1 + r3; - [p1] = r1; - r1 = p1; -#ifdef __ARCH_SYNC_CORE_DCACHE - call _end_lock_coherent -#else - call _put_core_lock; -#endif - rets = [sp++]; - rts; -ENDPROC(___raw_write_unlock_asm) - -/* - * r0 = ptr - * r1 = value - * - * ADD a signed value to a 32bit word and return the new value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_add_asm) - p1 = r0; - r3 = r1; - [--sp] = rets; - call _get_core_lock; - r2 = [p1]; - r3 = r3 + r2; - [p1] = r3; - r1 = p1; - call _put_core_lock; - r0 = r3; - rets = [sp++]; - rts; -ENDPROC(___raw_atomic_add_asm) - -/* - * r0 = ptr - * r1 = value - * - * ADD a signed value to a 32bit word and return the old value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_xadd_asm) - p1 = r0; - r3 = r1; - [--sp] = rets; - call _get_core_lock; - r3 = [p1]; - r2 = r3 + r2; - [p1] = r2; - r1 = p1; - call _put_core_lock; - r0 = r3; - rets = [sp++]; - rts; -ENDPROC(___raw_atomic_add_asm) - -/* - * r0 = ptr - * r1 = mask - * - * AND the mask bits from a 32bit word and return the old 32bit value - * atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_and_asm) - p1 = r0; - r3 = r1; - [--sp] = rets; - call _get_core_lock; - r3 = [p1]; - r2 = r2 & r3; - [p1] = r2; - r1 = p1; - call _put_core_lock; - r0 = r3; - rets = [sp++]; - rts; -ENDPROC(___raw_atomic_and_asm) - -/* - * r0 = ptr - * r1 = mask - * - * OR the mask bits into a 32bit word and return the old 32bit value - * atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_or_asm) - p1 = r0; - r3 = r1; - [--sp] = rets; - call _get_core_lock; - r3 = [p1]; - r2 = r2 | r3; - [p1] = r2; - r1 = p1; - call _put_core_lock; - r0 = r3; - rets = [sp++]; - rts; -ENDPROC(___raw_atomic_or_asm) - -/* - * r0 = ptr - * r1 = mask - * - * XOR the mask bits with a 32bit word and return the old 32bit value - * atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_xor_asm) - p1 = r0; - r3 = r1; - [--sp] = rets; - call _get_core_lock; - r3 = [p1]; - r2 = r2 ^ r3; - [p1] = r2; - r1 = p1; - call _put_core_lock; - r0 = r3; - rets = [sp++]; - rts; -ENDPROC(___raw_atomic_xor_asm) - -/* - * r0 = ptr - * r1 = mask - * - * Perform a logical AND between the mask bits and a 32bit word, and - * return the masked value. We need this on this architecture in - * order to invalidate the local cache before testing. - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_atomic_test_asm) - p1 = r0; - r3 = r1; - r1 = -L1_CACHE_BYTES; - r1 = r0 & r1; - p0 = r1; - /* flush core internal write buffer before invalidate dcache */ - CSYNC(r2); - flushinv[p0]; - SSYNC(r2); - r0 = [p1]; - r0 = r0 & r3; - rts; -ENDPROC(___raw_atomic_test_asm) - -/* - * r0 = ptr - * r1 = value - * - * Swap *ptr with value and return the old 32bit value atomically. - * Clobbers: r3:0, p1:0 - */ -#define __do_xchg(src, dst) \ - p1 = r0; \ - r3 = r1; \ - [--sp] = rets; \ - call _get_core_lock; \ - r2 = src; \ - dst = r3; \ - r3 = r2; \ - r1 = p1; \ - call _put_core_lock; \ - r0 = r3; \ - rets = [sp++]; \ - rts; - -ENTRY(___raw_xchg_1_asm) - __do_xchg(b[p1] (z), b[p1]) -ENDPROC(___raw_xchg_1_asm) - -ENTRY(___raw_xchg_2_asm) - __do_xchg(w[p1] (z), w[p1]) -ENDPROC(___raw_xchg_2_asm) - -ENTRY(___raw_xchg_4_asm) - __do_xchg([p1], [p1]) -ENDPROC(___raw_xchg_4_asm) - -/* - * r0 = ptr - * r1 = new - * r2 = old - * - * Swap *ptr with new if *ptr == old and return the previous *ptr - * value atomically. - * - * Clobbers: r3:0, p1:0 - */ -#define __do_cmpxchg(src, dst) \ - [--sp] = rets; \ - [--sp] = r4; \ - p1 = r0; \ - r3 = r1; \ - r4 = r2; \ - call _get_core_lock; \ - r2 = src; \ - cc = r2 == r4; \ - if !cc jump 1f; \ - dst = r3; \ - 1: r3 = r2; \ - r1 = p1; \ - call _put_core_lock; \ - r0 = r3; \ - r4 = [sp++]; \ - rets = [sp++]; \ - rts; - -ENTRY(___raw_cmpxchg_1_asm) - __do_cmpxchg(b[p1] (z), b[p1]) -ENDPROC(___raw_cmpxchg_1_asm) - -ENTRY(___raw_cmpxchg_2_asm) - __do_cmpxchg(w[p1] (z), w[p1]) -ENDPROC(___raw_cmpxchg_2_asm) - -ENTRY(___raw_cmpxchg_4_asm) - __do_cmpxchg([p1], [p1]) -ENDPROC(___raw_cmpxchg_4_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Set a bit in a 32bit word and return the old 32bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_set_asm) - r2 = r1; - r1 = 1; - r1 <<= r2; - jump ___raw_atomic_or_asm -ENDPROC(___raw_bit_set_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Clear a bit in a 32bit word and return the old 32bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_clear_asm) - r2 = 1; - r2 <<= r1; - r1 = ~r2; - jump ___raw_atomic_and_asm -ENDPROC(___raw_bit_clear_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Toggle a bit in a 32bit word and return the old 32bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_toggle_asm) - r2 = r1; - r1 = 1; - r1 <<= r2; - jump ___raw_atomic_xor_asm -ENDPROC(___raw_bit_toggle_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Test-and-set a bit in a 32bit word and return the old bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_test_set_asm) - [--sp] = rets; - [--sp] = r1; - call ___raw_bit_set_asm - r1 = [sp++]; - r2 = 1; - r2 <<= r1; - r0 = r0 & r2; - cc = r0 == 0; - if cc jump 1f - r0 = 1; -1: - rets = [sp++]; - rts; -ENDPROC(___raw_bit_test_set_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Test-and-clear a bit in a 32bit word and return the old bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_test_clear_asm) - [--sp] = rets; - [--sp] = r1; - call ___raw_bit_clear_asm - r1 = [sp++]; - r2 = 1; - r2 <<= r1; - r0 = r0 & r2; - cc = r0 == 0; - if cc jump 1f - r0 = 1; -1: - rets = [sp++]; - rts; -ENDPROC(___raw_bit_test_clear_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Test-and-toggle a bit in a 32bit word, - * and return the old bit value atomically. - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_test_toggle_asm) - [--sp] = rets; - [--sp] = r1; - call ___raw_bit_toggle_asm - r1 = [sp++]; - r2 = 1; - r2 <<= r1; - r0 = r0 & r2; - cc = r0 == 0; - if cc jump 1f - r0 = 1; -1: - rets = [sp++]; - rts; -ENDPROC(___raw_bit_test_toggle_asm) - -/* - * r0 = ptr - * r1 = bitnr - * - * Test a bit in a 32bit word and return its value. - * We need this on this architecture in order to invalidate - * the local cache before testing. - * - * Clobbers: r3:0, p1:0 - */ -ENTRY(___raw_bit_test_asm) - r2 = r1; - r1 = 1; - r1 <<= r2; - jump ___raw_atomic_test_asm -ENDPROC(___raw_bit_test_asm) - -/* - * r0 = ptr - * - * Fetch and return an uncached 32bit value. - * - * Clobbers: r2:0, p1:0 - */ -ENTRY(___raw_uncached_fetch_asm) - p1 = r0; - r1 = -L1_CACHE_BYTES; - r1 = r0 & r1; - p0 = r1; - /* flush core internal write buffer before invalidate dcache */ - CSYNC(r2); - flushinv[p0]; - SSYNC(r2); - r0 = [p1]; - rts; -ENDPROC(___raw_uncached_fetch_asm) |