linux/arch/x86/xen/spinlock.c
<<
>>
Prefs
   1/*
   2 * Split spinlock implementation out into its own file, so it can be
   3 * compiled in a FTRACE-compatible way.
   4 */
   5#include <linux/kernel_stat.h>
   6#include <linux/spinlock.h>
   7#include <linux/debugfs.h>
   8#include <linux/log2.h>
   9#include <linux/gfp.h>
  10
  11#include <asm/paravirt.h>
  12
  13#include <xen/interface/xen.h>
  14#include <xen/events.h>
  15
  16#include "xen-ops.h"
  17#include "debugfs.h"
  18
  19#ifdef CONFIG_XEN_DEBUG_FS
  20static struct xen_spinlock_stats
  21{
  22        u64 taken;
  23        u32 taken_slow;
  24        u32 taken_slow_nested;
  25        u32 taken_slow_pickup;
  26        u32 taken_slow_spurious;
  27        u32 taken_slow_irqenable;
  28
  29        u64 released;
  30        u32 released_slow;
  31        u32 released_slow_kicked;
  32
  33#define HISTO_BUCKETS   30
  34        u32 histo_spin_total[HISTO_BUCKETS+1];
  35        u32 histo_spin_spinning[HISTO_BUCKETS+1];
  36        u32 histo_spin_blocked[HISTO_BUCKETS+1];
  37
  38        u64 time_total;
  39        u64 time_spinning;
  40        u64 time_blocked;
  41} spinlock_stats;
  42
  43static u8 zero_stats;
  44
  45static unsigned lock_timeout = 1 << 10;
  46#define TIMEOUT lock_timeout
  47
  48static inline void check_zero(void)
  49{
  50        if (unlikely(zero_stats)) {
  51                memset(&spinlock_stats, 0, sizeof(spinlock_stats));
  52                zero_stats = 0;
  53        }
  54}
  55
  56#define ADD_STATS(elem, val)                    \
  57        do { check_zero(); spinlock_stats.elem += (val); } while(0)
  58
  59static inline u64 spin_time_start(void)
  60{
  61        return xen_clocksource_read();
  62}
  63
  64static void __spin_time_accum(u64 delta, u32 *array)
  65{
  66        unsigned index = ilog2(delta);
  67
  68        check_zero();
  69
  70        if (index < HISTO_BUCKETS)
  71                array[index]++;
  72        else
  73                array[HISTO_BUCKETS]++;
  74}
  75
  76static inline void spin_time_accum_spinning(u64 start)
  77{
  78        u32 delta = xen_clocksource_read() - start;
  79
  80        __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
  81        spinlock_stats.time_spinning += delta;
  82}
  83
  84static inline void spin_time_accum_total(u64 start)
  85{
  86        u32 delta = xen_clocksource_read() - start;
  87
  88        __spin_time_accum(delta, spinlock_stats.histo_spin_total);
  89        spinlock_stats.time_total += delta;
  90}
  91
  92static inline void spin_time_accum_blocked(u64 start)
  93{
  94        u32 delta = xen_clocksource_read() - start;
  95
  96        __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
  97        spinlock_stats.time_blocked += delta;
  98}
  99#else  /* !CONFIG_XEN_DEBUG_FS */
 100#define TIMEOUT                 (1 << 10)
 101#define ADD_STATS(elem, val)    do { (void)(val); } while(0)
 102
 103static inline u64 spin_time_start(void)
 104{
 105        return 0;
 106}
 107
 108static inline void spin_time_accum_total(u64 start)
 109{
 110}
 111static inline void spin_time_accum_spinning(u64 start)
 112{
 113}
 114static inline void spin_time_accum_blocked(u64 start)
 115{
 116}
 117#endif  /* CONFIG_XEN_DEBUG_FS */
 118
 119struct xen_spinlock {
 120        unsigned char lock;             /* 0 -> free; 1 -> locked */
 121        unsigned short spinners;        /* count of waiting cpus */
 122};
 123
 124static int xen_spin_is_locked(struct arch_spinlock *lock)
 125{
 126        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 127
 128        return xl->lock != 0;
 129}
 130
 131static int xen_spin_is_contended(struct arch_spinlock *lock)
 132{
 133        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 134
 135        /* Not strictly true; this is only the count of contended
 136           lock-takers entering the slow path. */
 137        return xl->spinners != 0;
 138}
 139
 140static int xen_spin_trylock(struct arch_spinlock *lock)
 141{
 142        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 143        u8 old = 1;
 144
 145        asm("xchgb %b0,%1"
 146            : "+q" (old), "+m" (xl->lock) : : "memory");
 147
 148        return old == 0;
 149}
 150
 151static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 152static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 153
 154/*
 155 * Mark a cpu as interested in a lock.  Returns the CPU's previous
 156 * lock of interest, in case we got preempted by an interrupt.
 157 */
 158static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
 159{
 160        struct xen_spinlock *prev;
 161
 162        prev = __this_cpu_read(lock_spinners);
 163        __this_cpu_write(lock_spinners, xl);
 164
 165        wmb();                  /* set lock of interest before count */
 166
 167        asm(LOCK_PREFIX " incw %0"
 168            : "+m" (xl->spinners) : : "memory");
 169
 170        return prev;
 171}
 172
 173/*
 174 * Mark a cpu as no longer interested in a lock.  Restores previous
 175 * lock of interest (NULL for none).
 176 */
 177static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
 178{
 179        asm(LOCK_PREFIX " decw %0"
 180            : "+m" (xl->spinners) : : "memory");
 181        wmb();                  /* decrement count before restoring lock */
 182        __this_cpu_write(lock_spinners, prev);
 183}
 184
 185static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
 186{
 187        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 188        struct xen_spinlock *prev;
 189        int irq = __this_cpu_read(lock_kicker_irq);
 190        int ret;
 191        u64 start;
 192
 193        /* If kicker interrupts not initialized yet, just spin */
 194        if (irq == -1)
 195                return 0;
 196
 197        start = spin_time_start();
 198
 199        /* announce we're spinning */
 200        prev = spinning_lock(xl);
 201
 202        ADD_STATS(taken_slow, 1);
 203        ADD_STATS(taken_slow_nested, prev != NULL);
 204
 205        do {
 206                unsigned long flags;
 207
 208                /* clear pending */
 209                xen_clear_irq_pending(irq);
 210
 211                /* check again make sure it didn't become free while
 212                   we weren't looking  */
 213                ret = xen_spin_trylock(lock);
 214                if (ret) {
 215                        ADD_STATS(taken_slow_pickup, 1);
 216
 217                        /*
 218                         * If we interrupted another spinlock while it
 219                         * was blocking, make sure it doesn't block
 220                         * without rechecking the lock.
 221                         */
 222                        if (prev != NULL)
 223                                xen_set_irq_pending(irq);
 224                        goto out;
 225                }
 226
 227                flags = arch_local_save_flags();
 228                if (irq_enable) {
 229                        ADD_STATS(taken_slow_irqenable, 1);
 230                        raw_local_irq_enable();
 231                }
 232
 233                /*
 234                 * Block until irq becomes pending.  If we're
 235                 * interrupted at this point (after the trylock but
 236                 * before entering the block), then the nested lock
 237                 * handler guarantees that the irq will be left
 238                 * pending if there's any chance the lock became free;
 239                 * xen_poll_irq() returns immediately if the irq is
 240                 * pending.
 241                 */
 242                xen_poll_irq(irq);
 243
 244                raw_local_irq_restore(flags);
 245
 246                ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 247        } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
 248
 249        kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 250
 251out:
 252        unspinning_lock(xl, prev);
 253        spin_time_accum_blocked(start);
 254
 255        return ret;
 256}
 257
 258static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
 259{
 260        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 261        unsigned timeout;
 262        u8 oldval;
 263        u64 start_spin;
 264
 265        ADD_STATS(taken, 1);
 266
 267        start_spin = spin_time_start();
 268
 269        do {
 270                u64 start_spin_fast = spin_time_start();
 271
 272                timeout = TIMEOUT;
 273
 274                asm("1: xchgb %1,%0\n"
 275                    "   testb %1,%1\n"
 276                    "   jz 3f\n"
 277                    "2: rep;nop\n"
 278                    "   cmpb $0,%0\n"
 279                    "   je 1b\n"
 280                    "   dec %2\n"
 281                    "   jnz 2b\n"
 282                    "3:\n"
 283                    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
 284                    : "1" (1)
 285                    : "memory");
 286
 287                spin_time_accum_spinning(start_spin_fast);
 288
 289        } while (unlikely(oldval != 0 &&
 290                          (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
 291
 292        spin_time_accum_total(start_spin);
 293}
 294
 295static void xen_spin_lock(struct arch_spinlock *lock)
 296{
 297        __xen_spin_lock(lock, false);
 298}
 299
 300static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
 301{
 302        __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
 303}
 304
 305static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 306{
 307        int cpu;
 308
 309        ADD_STATS(released_slow, 1);
 310
 311        for_each_online_cpu(cpu) {
 312                /* XXX should mix up next cpu selection */
 313                if (per_cpu(lock_spinners, cpu) == xl) {
 314                        ADD_STATS(released_slow_kicked, 1);
 315                        xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 316                        break;
 317                }
 318        }
 319}
 320
 321static void xen_spin_unlock(struct arch_spinlock *lock)
 322{
 323        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 324
 325        ADD_STATS(released, 1);
 326
 327        smp_wmb();              /* make sure no writes get moved after unlock */
 328        xl->lock = 0;           /* release lock */
 329
 330        /*
 331         * Make sure unlock happens before checking for waiting
 332         * spinners.  We need a strong barrier to enforce the
 333         * write-read ordering to different memory locations, as the
 334         * CPU makes no implied guarantees about their ordering.
 335         */
 336        mb();
 337
 338        if (unlikely(xl->spinners))
 339                xen_spin_unlock_slow(xl);
 340}
 341
 342static irqreturn_t dummy_handler(int irq, void *dev_id)
 343{
 344        BUG();
 345        return IRQ_HANDLED;
 346}
 347
 348void __cpuinit xen_init_lock_cpu(int cpu)
 349{
 350        int irq;
 351        const char *name;
 352
 353        name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
 354        irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
 355                                     cpu,
 356                                     dummy_handler,
 357                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 358                                     name,
 359                                     NULL);
 360
 361        if (irq >= 0) {
 362                disable_irq(irq); /* make sure it's never delivered */
 363                per_cpu(lock_kicker_irq, cpu) = irq;
 364        }
 365
 366        printk("cpu %d spinlock event irq %d\n", cpu, irq);
 367}
 368
 369void xen_uninit_lock_cpu(int cpu)
 370{
 371        unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
 372}
 373
 374void __init xen_init_spinlocks(void)
 375{
 376        pv_lock_ops.spin_is_locked = xen_spin_is_locked;
 377        pv_lock_ops.spin_is_contended = xen_spin_is_contended;
 378        pv_lock_ops.spin_lock = xen_spin_lock;
 379        pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
 380        pv_lock_ops.spin_trylock = xen_spin_trylock;
 381        pv_lock_ops.spin_unlock = xen_spin_unlock;
 382}
 383
 384#ifdef CONFIG_XEN_DEBUG_FS
 385
 386static struct dentry *d_spin_debug;
 387
 388static int __init xen_spinlock_debugfs(void)
 389{
 390        struct dentry *d_xen = xen_init_debugfs();
 391
 392        if (d_xen == NULL)
 393                return -ENOMEM;
 394
 395        d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
 396
 397        debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 398
 399        debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
 400
 401        debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
 402        debugfs_create_u32("taken_slow", 0444, d_spin_debug,
 403                           &spinlock_stats.taken_slow);
 404        debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
 405                           &spinlock_stats.taken_slow_nested);
 406        debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
 407                           &spinlock_stats.taken_slow_pickup);
 408        debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
 409                           &spinlock_stats.taken_slow_spurious);
 410        debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
 411                           &spinlock_stats.taken_slow_irqenable);
 412
 413        debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 414        debugfs_create_u32("released_slow", 0444, d_spin_debug,
 415                           &spinlock_stats.released_slow);
 416        debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
 417                           &spinlock_stats.released_slow_kicked);
 418
 419        debugfs_create_u64("time_spinning", 0444, d_spin_debug,
 420                           &spinlock_stats.time_spinning);
 421        debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 422                           &spinlock_stats.time_blocked);
 423        debugfs_create_u64("time_total", 0444, d_spin_debug,
 424                           &spinlock_stats.time_total);
 425
 426        xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
 427                                     spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
 428        xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
 429                                     spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
 430        xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 431                                     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 432
 433        return 0;
 434}
 435fs_initcall(xen_spinlock_debugfs);
 436
 437#endif  /* CONFIG_XEN_DEBUG_FS */
 438