linux/arch/metag/kernel/smp.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
   3 *
   4 *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/atomic.h>
  11#include <linux/completion.h>
  12#include <linux/delay.h>
  13#include <linux/init.h>
  14#include <linux/spinlock.h>
  15#include <linux/sched.h>
  16#include <linux/interrupt.h>
  17#include <linux/cache.h>
  18#include <linux/profile.h>
  19#include <linux/errno.h>
  20#include <linux/mm.h>
  21#include <linux/err.h>
  22#include <linux/cpu.h>
  23#include <linux/smp.h>
  24#include <linux/seq_file.h>
  25#include <linux/irq.h>
  26#include <linux/bootmem.h>
  27
  28#include <asm/cacheflush.h>
  29#include <asm/cachepart.h>
  30#include <asm/core_reg.h>
  31#include <asm/cpu.h>
  32#include <asm/global_lock.h>
  33#include <asm/metag_mem.h>
  34#include <asm/mmu_context.h>
  35#include <asm/pgtable.h>
  36#include <asm/pgalloc.h>
  37#include <asm/processor.h>
  38#include <asm/setup.h>
  39#include <asm/tlbflush.h>
  40#include <asm/hwthread.h>
  41#include <asm/traps.h>
  42
  43#define SYSC_DCPART(n)  (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
  44#define SYSC_ICPART(n)  (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
  45
  46DECLARE_PER_CPU(PTBI, pTBI);
  47
  48void *secondary_data_stack;
  49
  50/*
  51 * structures for inter-processor calls
  52 * - A collection of single bit ipi messages.
  53 */
  54struct ipi_data {
  55        spinlock_t lock;
  56        unsigned long ipi_count;
  57        unsigned long bits;
  58};
  59
  60static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
  61        .lock   = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
  62};
  63
  64static DEFINE_SPINLOCK(boot_lock);
  65
  66static DECLARE_COMPLETION(cpu_running);
  67
  68/*
  69 * "thread" is assumed to be a valid Meta hardware thread ID.
  70 */
  71int boot_secondary(unsigned int thread, struct task_struct *idle)
  72{
  73        u32 val;
  74
  75        /*
  76         * set synchronisation state between this boot processor
  77         * and the secondary one
  78         */
  79        spin_lock(&boot_lock);
  80
  81        core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
  82        core_reg_write(TXUPC_ID, 1, thread, 0);
  83
  84        /*
  85         * Give the thread privilege (PSTAT) and clear potentially problematic
  86         * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
  87         */
  88        core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
  89
  90        /* Clear the minim enable bit. */
  91        val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
  92        core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
  93
  94        /*
  95         * set the ThreadEnable bit (0x1) in the TXENABLE register
  96         * for the specified thread - off it goes!
  97         */
  98        val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
  99        core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
 100
 101        /*
 102         * now the secondary core is starting up let it run its
 103         * calibrations, then wait for it to finish
 104         */
 105        spin_unlock(&boot_lock);
 106
 107        return 0;
 108}
 109
 110/**
 111 * describe_cachepart_change: describe a change to cache partitions.
 112 * @thread:     Hardware thread number.
 113 * @label:      Label of cache type, e.g. "dcache" or "icache".
 114 * @sz:         Total size of the cache.
 115 * @old:        Old cache partition configuration (*CPART* register).
 116 * @new:        New cache partition configuration (*CPART* register).
 117 *
 118 * If the cache partition has changed, prints a message to the log describing
 119 * those changes.
 120 */
 121static void describe_cachepart_change(unsigned int thread, const char *label,
 122                                      unsigned int sz, unsigned int old,
 123                                      unsigned int new)
 124{
 125        unsigned int lor1, land1, gor1, gand1;
 126        unsigned int lor2, land2, gor2, gand2;
 127        unsigned int diff = old ^ new;
 128
 129        if (!diff)
 130                return;
 131
 132        pr_info("Thread %d: %s partition changed:", thread, label);
 133        if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
 134                lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 135                lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 136                land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 137                land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 138                pr_cont(" L:%#x+%#x->%#x+%#x",
 139                        (lor1 * sz) >> 4,
 140                        ((land1 + 1) * sz) >> 4,
 141                        (lor2 * sz) >> 4,
 142                        ((land2 + 1) * sz) >> 4);
 143        }
 144        if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
 145                gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 146                gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 147                gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 148                gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 149                pr_cont(" G:%#x+%#x->%#x+%#x",
 150                        (gor1 * sz) >> 4,
 151                        ((gand1 + 1) * sz) >> 4,
 152                        (gor2 * sz) >> 4,
 153                        ((gand2 + 1) * sz) >> 4);
 154        }
 155        if (diff & SYSC_CWRMODE_BIT)
 156                pr_cont(" %sWR",
 157                        (new & SYSC_CWRMODE_BIT) ? "+" : "-");
 158        if (diff & SYSC_DCPART_GCON_BIT)
 159                pr_cont(" %sGCOn",
 160                        (new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
 161        pr_cont("\n");
 162}
 163
 164/**
 165 * setup_smp_cache: ensure cache coherency for new SMP thread.
 166 * @thread:     New hardware thread number.
 167 *
 168 * Ensures that coherency is enabled and that the threads share the same cache
 169 * partitions.
 170 */
 171static void setup_smp_cache(unsigned int thread)
 172{
 173        unsigned int this_thread, lflags;
 174        unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
 175        unsigned int icsz, icpart_old, icpart_new;
 176
 177        /*
 178         * Copy over the current thread's cache partition configuration to the
 179         * new thread so that they share cache partitions.
 180         */
 181        __global_lock2(lflags);
 182        this_thread = hard_processor_id();
 183        /* Share dcache partition */
 184        dcpart_this = metag_in32(SYSC_DCPART(this_thread));
 185        dcpart_old = metag_in32(SYSC_DCPART(thread));
 186        dcpart_new = dcpart_this;
 187#if PAGE_OFFSET < LINGLOBAL_BASE
 188        /*
 189         * For the local data cache to be coherent the threads must also have
 190         * GCOn enabled.
 191         */
 192        dcpart_new |= SYSC_DCPART_GCON_BIT;
 193        metag_out32(dcpart_new, SYSC_DCPART(this_thread));
 194#endif
 195        metag_out32(dcpart_new, SYSC_DCPART(thread));
 196        /* Share icache partition too */
 197        icpart_new = metag_in32(SYSC_ICPART(this_thread));
 198        icpart_old = metag_in32(SYSC_ICPART(thread));
 199        metag_out32(icpart_new, SYSC_ICPART(thread));
 200        __global_unlock2(lflags);
 201
 202        /*
 203         * Log if the cache partitions were altered so the user is aware of any
 204         * potential unintentional cache wastage.
 205         */
 206        dcsz = get_dcache_size();
 207        icsz = get_dcache_size();
 208        describe_cachepart_change(this_thread, "dcache", dcsz,
 209                                  dcpart_this, dcpart_new);
 210        describe_cachepart_change(thread, "dcache", dcsz,
 211                                  dcpart_old, dcpart_new);
 212        describe_cachepart_change(thread, "icache", icsz,
 213                                  icpart_old, icpart_new);
 214}
 215
 216int __cpu_up(unsigned int cpu, struct task_struct *idle)
 217{
 218        unsigned int thread = cpu_2_hwthread_id[cpu];
 219        int ret;
 220
 221        load_pgd(swapper_pg_dir, thread);
 222
 223        flush_tlb_all();
 224
 225        setup_smp_cache(thread);
 226
 227        /*
 228         * Tell the secondary CPU where to find its idle thread's stack.
 229         */
 230        secondary_data_stack = task_stack_page(idle);
 231
 232        wmb();
 233
 234        /*
 235         * Now bring the CPU into our world.
 236         */
 237        ret = boot_secondary(thread, idle);
 238        if (ret == 0) {
 239                /*
 240                 * CPU was successfully started, wait for it
 241                 * to come online or time out.
 242                 */
 243                wait_for_completion_timeout(&cpu_running,
 244                                            msecs_to_jiffies(1000));
 245
 246                if (!cpu_online(cpu))
 247                        ret = -EIO;
 248        }
 249
 250        secondary_data_stack = NULL;
 251
 252        if (ret) {
 253                pr_crit("CPU%u: processor failed to boot\n", cpu);
 254
 255                /*
 256                 * FIXME: We need to clean up the new idle thread. --rmk
 257                 */
 258        }
 259
 260        return ret;
 261}
 262
 263#ifdef CONFIG_HOTPLUG_CPU
 264static DECLARE_COMPLETION(cpu_killed);
 265
 266/*
 267 * __cpu_disable runs on the processor to be shutdown.
 268 */
 269int __cpu_disable(void)
 270{
 271        unsigned int cpu = smp_processor_id();
 272
 273        /*
 274         * Take this CPU offline.  Once we clear this, we can't return,
 275         * and we must not schedule until we're ready to give up the cpu.
 276         */
 277        set_cpu_online(cpu, false);
 278
 279        /*
 280         * OK - migrate IRQs away from this CPU
 281         */
 282        migrate_irqs();
 283
 284        /*
 285         * Flush user cache and TLB mappings, and then remove this CPU
 286         * from the vm mask set of all processes.
 287         */
 288        flush_cache_all();
 289        local_flush_tlb_all();
 290
 291        clear_tasks_mm_cpumask(cpu);
 292
 293        return 0;
 294}
 295
 296/*
 297 * called on the thread which is asking for a CPU to be shutdown -
 298 * waits until shutdown has completed, or it is timed out.
 299 */
 300void __cpu_die(unsigned int cpu)
 301{
 302        if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
 303                pr_err("CPU%u: unable to kill\n", cpu);
 304}
 305
 306/*
 307 * Called from the idle thread for the CPU which has been shutdown.
 308 *
 309 * Note that we do not return from this function. If this cpu is
 310 * brought online again it will need to run secondary_startup().
 311 */
 312void cpu_die(void)
 313{
 314        local_irq_disable();
 315        idle_task_exit();
 316
 317        complete(&cpu_killed);
 318
 319        asm ("XOR       TXENABLE, D0Re0,D0Re0\n");
 320}
 321#endif /* CONFIG_HOTPLUG_CPU */
 322
 323/*
 324 * Called by both boot and secondaries to move global data into
 325 * per-processor storage.
 326 */
 327void smp_store_cpu_info(unsigned int cpuid)
 328{
 329        struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
 330
 331        cpu_info->loops_per_jiffy = loops_per_jiffy;
 332}
 333
 334/*
 335 * This is the secondary CPU boot entry.  We're using this CPUs
 336 * idle thread stack and the global page tables.
 337 */
 338asmlinkage void secondary_start_kernel(void)
 339{
 340        struct mm_struct *mm = &init_mm;
 341        unsigned int cpu = smp_processor_id();
 342
 343        /*
 344         * All kernel threads share the same mm context; grab a
 345         * reference and switch to it.
 346         */
 347        atomic_inc(&mm->mm_users);
 348        atomic_inc(&mm->mm_count);
 349        current->active_mm = mm;
 350        cpumask_set_cpu(cpu, mm_cpumask(mm));
 351        enter_lazy_tlb(mm, current);
 352        local_flush_tlb_all();
 353
 354        /*
 355         * TODO: Some day it might be useful for each Linux CPU to
 356         * have its own TBI structure. That would allow each Linux CPU
 357         * to run different interrupt handlers for the same IRQ
 358         * number.
 359         *
 360         * For now, simply copying the pointer to the boot CPU's TBI
 361         * structure is sufficient because we always want to run the
 362         * same interrupt handler whatever CPU takes the interrupt.
 363         */
 364        per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 365
 366        if (!per_cpu(pTBI, cpu))
 367                panic("No TBI found!");
 368
 369        per_cpu_trap_init(cpu);
 370
 371        preempt_disable();
 372
 373        setup_priv();
 374
 375        notify_cpu_starting(cpu);
 376
 377        pr_info("CPU%u (thread %u): Booted secondary processor\n",
 378                cpu, cpu_2_hwthread_id[cpu]);
 379
 380        calibrate_delay();
 381        smp_store_cpu_info(cpu);
 382
 383        /*
 384         * OK, now it's safe to let the boot CPU continue
 385         */
 386        set_cpu_online(cpu, true);
 387        complete(&cpu_running);
 388
 389        /*
 390         * Enable local interrupts.
 391         */
 392        tbi_startup_interrupt(TBID_SIGNUM_TRT);
 393        local_irq_enable();
 394
 395        /*
 396         * OK, it's off to the idle thread for us
 397         */
 398        cpu_startup_entry(CPUHP_ONLINE);
 399}
 400
 401void __init smp_cpus_done(unsigned int max_cpus)
 402{
 403        int cpu;
 404        unsigned long bogosum = 0;
 405
 406        for_each_online_cpu(cpu)
 407                bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
 408
 409        pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 410                num_online_cpus(),
 411                bogosum / (500000/HZ),
 412                (bogosum / (5000/HZ)) % 100);
 413}
 414
 415void __init smp_prepare_cpus(unsigned int max_cpus)
 416{
 417        unsigned int cpu = smp_processor_id();
 418
 419        init_new_context(current, &init_mm);
 420        current_thread_info()->cpu = cpu;
 421
 422        smp_store_cpu_info(cpu);
 423        init_cpu_present(cpu_possible_mask);
 424}
 425
 426void __init smp_prepare_boot_cpu(void)
 427{
 428        unsigned int cpu = smp_processor_id();
 429
 430        per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 431
 432        if (!per_cpu(pTBI, cpu))
 433                panic("No TBI found!");
 434}
 435
 436static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
 437
 438static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
 439{
 440        unsigned long flags;
 441        unsigned int cpu;
 442        cpumask_t map;
 443
 444        cpumask_clear(&map);
 445        local_irq_save(flags);
 446
 447        for_each_cpu(cpu, mask) {
 448                struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 449
 450                spin_lock(&ipi->lock);
 451
 452                /*
 453                 * KICK interrupts are queued in hardware so we'll get
 454                 * multiple interrupts if we call smp_cross_call()
 455                 * multiple times for one msg. The problem is that we
 456                 * only have one bit for each message - we can't queue
 457                 * them in software.
 458                 *
 459                 * The first time through ipi_handler() we'll clear
 460                 * the msg bit, having done all the work. But when we
 461                 * return we'll get _another_ interrupt (and another,
 462                 * and another until we've handled all the queued
 463                 * KICKs). Running ipi_handler() when there's no work
 464                 * to do is bad because that's how kick handler
 465                 * chaining detects who the KICK was intended for.
 466                 * See arch/metag/kernel/kick.c for more details.
 467                 *
 468                 * So only add 'cpu' to 'map' if we haven't already
 469                 * queued a KICK interrupt for 'msg'.
 470                 */
 471                if (!(ipi->bits & (1 << msg))) {
 472                        ipi->bits |= 1 << msg;
 473                        cpumask_set_cpu(cpu, &map);
 474                }
 475
 476                spin_unlock(&ipi->lock);
 477        }
 478
 479        /*
 480         * Call the platform specific cross-CPU call function.
 481         */
 482        smp_cross_call(map, msg);
 483
 484        local_irq_restore(flags);
 485}
 486
 487void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 488{
 489        send_ipi_message(mask, IPI_CALL_FUNC);
 490}
 491
 492void arch_send_call_function_single_ipi(int cpu)
 493{
 494        send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 495}
 496
 497void show_ipi_list(struct seq_file *p)
 498{
 499        unsigned int cpu;
 500
 501        seq_puts(p, "IPI:");
 502
 503        for_each_present_cpu(cpu)
 504                seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
 505
 506        seq_putc(p, '\n');
 507}
 508
 509static DEFINE_SPINLOCK(stop_lock);
 510
 511/*
 512 * Main handler for inter-processor interrupts
 513 *
 514 * For Meta, the ipimask now only identifies a single
 515 * category of IPI (Bit 1 IPIs have been replaced by a
 516 * different mechanism):
 517 *
 518 *  Bit 0 - Inter-processor function call
 519 */
 520static int do_IPI(struct pt_regs *regs)
 521{
 522        unsigned int cpu = smp_processor_id();
 523        struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 524        struct pt_regs *old_regs = set_irq_regs(regs);
 525        unsigned long msgs, nextmsg;
 526        int handled = 0;
 527
 528        ipi->ipi_count++;
 529
 530        spin_lock(&ipi->lock);
 531        msgs = ipi->bits;
 532        nextmsg = msgs & -msgs;
 533        ipi->bits &= ~nextmsg;
 534        spin_unlock(&ipi->lock);
 535
 536        if (nextmsg) {
 537                handled = 1;
 538
 539                nextmsg = ffz(~nextmsg);
 540                switch (nextmsg) {
 541                case IPI_RESCHEDULE:
 542                        scheduler_ipi();
 543                        break;
 544
 545                case IPI_CALL_FUNC:
 546                        generic_smp_call_function_interrupt();
 547                        break;
 548
 549                case IPI_CALL_FUNC_SINGLE:
 550                        generic_smp_call_function_single_interrupt();
 551                        break;
 552
 553                default:
 554                        pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
 555                                cpu, nextmsg);
 556                        break;
 557                }
 558        }
 559
 560        set_irq_regs(old_regs);
 561
 562        return handled;
 563}
 564
 565void smp_send_reschedule(int cpu)
 566{
 567        send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 568}
 569
 570static void stop_this_cpu(void *data)
 571{
 572        unsigned int cpu = smp_processor_id();
 573
 574        if (system_state == SYSTEM_BOOTING ||
 575            system_state == SYSTEM_RUNNING) {
 576                spin_lock(&stop_lock);
 577                pr_crit("CPU%u: stopping\n", cpu);
 578                dump_stack();
 579                spin_unlock(&stop_lock);
 580        }
 581
 582        set_cpu_online(cpu, false);
 583
 584        local_irq_disable();
 585
 586        hard_processor_halt(HALT_OK);
 587}
 588
 589void smp_send_stop(void)
 590{
 591        smp_call_function(stop_this_cpu, NULL, 0);
 592}
 593
 594/*
 595 * not supported here
 596 */
 597int setup_profiling_timer(unsigned int multiplier)
 598{
 599        return -EINVAL;
 600}
 601
 602/*
 603 * We use KICKs for inter-processor interrupts.
 604 *
 605 * For every CPU in "callmap" the IPI data must already have been
 606 * stored in that CPU's "ipi_data" member prior to calling this
 607 * function.
 608 */
 609static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
 610{
 611        int cpu;
 612
 613        for_each_cpu(cpu, &callmap) {
 614                unsigned int thread;
 615
 616                thread = cpu_2_hwthread_id[cpu];
 617
 618                BUG_ON(thread == BAD_HWTHREAD_ID);
 619
 620                metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
 621        }
 622}
 623
 624static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
 625                   int Inst, PTBI pTBI, int *handled)
 626{
 627        *handled = do_IPI((struct pt_regs *)State.Sig.pCtx);
 628
 629        return State;
 630}
 631
 632static struct kick_irq_handler ipi_irq = {
 633        .func = ipi_handler,
 634};
 635
 636static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
 637{
 638        kick_raise_softirq(callmap, 1);
 639}
 640
 641static inline unsigned int get_core_count(void)
 642{
 643        int i;
 644        unsigned int ret = 0;
 645
 646        for (i = 0; i < CONFIG_NR_CPUS; i++) {
 647                if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
 648                        ret++;
 649        }
 650
 651        return ret;
 652}
 653
 654/*
 655 * Initialise the CPU possible map early - this describes the CPUs
 656 * which may be present or become present in the system.
 657 */
 658void __init smp_init_cpus(void)
 659{
 660        unsigned int i, ncores = get_core_count();
 661
 662        /* If no hwthread_map early param was set use default mapping */
 663        for (i = 0; i < NR_CPUS; i++)
 664                if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
 665                        cpu_2_hwthread_id[i] = i;
 666                        hwthread_id_2_cpu[i] = i;
 667                }
 668
 669        for (i = 0; i < ncores; i++)
 670                set_cpu_possible(i, true);
 671
 672        kick_register_func(&ipi_irq);
 673}
 674