linux/arch/x86/xen/smp.c
<<
>>
Prefs
   1/*
   2 * Xen SMP support
   3 *
   4 * This file implements the Xen versions of smp_ops.  SMP under Xen is
   5 * very straightforward.  Bringing a CPU up is simply a matter of
   6 * loading its initial context and setting it running.
   7 *
   8 * IPIs are handled through the Xen event mechanism.
   9 *
  10 * Because virtual CPUs can be scheduled onto any real CPU, there's no
  11 * useful topology information for the kernel to make use of.  As a
  12 * result, all CPUs are treated as if they're single-core and
  13 * single-threaded.
  14 *
  15 * This does not handle HOTPLUG_CPU yet.
  16 */
  17#include <linux/sched.h>
  18#include <linux/err.h>
  19#include <linux/smp.h>
  20
  21#include <asm/paravirt.h>
  22#include <asm/desc.h>
  23#include <asm/pgtable.h>
  24#include <asm/cpu.h>
  25
  26#include <xen/interface/xen.h>
  27#include <xen/interface/vcpu.h>
  28
  29#include <asm/xen/interface.h>
  30#include <asm/xen/hypercall.h>
  31
  32#include <xen/page.h>
  33#include <xen/events.h>
  34
  35#include "xen-ops.h"
  36#include "mmu.h"
  37
  38static cpumask_t cpu_initialized_map;
  39static DEFINE_PER_CPU(int, resched_irq);
  40static DEFINE_PER_CPU(int, callfunc_irq);
  41
  42/*
  43 * Structure and data for smp_call_function(). This is designed to minimise
  44 * static memory requirements. It also looks cleaner.
  45 */
  46static DEFINE_SPINLOCK(call_lock);
  47
  48struct call_data_struct {
  49        void (*func) (void *info);
  50        void *info;
  51        atomic_t started;
  52        atomic_t finished;
  53        int wait;
  54};
  55
  56static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
  57
  58static struct call_data_struct *call_data;
  59
  60/*
  61 * Reschedule call back. Nothing to do,
  62 * all the work is done automatically when
  63 * we return from the interrupt.
  64 */
  65static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
  66{
  67        return IRQ_HANDLED;
  68}
  69
  70static __cpuinit void cpu_bringup_and_idle(void)
  71{
  72        int cpu = smp_processor_id();
  73
  74        cpu_init();
  75
  76        preempt_disable();
  77        per_cpu(cpu_state, cpu) = CPU_ONLINE;
  78
  79        xen_setup_cpu_clockevents();
  80
  81        /* We can take interrupts now: we're officially "up". */
  82        local_irq_enable();
  83
  84        wmb();                  /* make sure everything is out */
  85        cpu_idle();
  86}
  87
  88static int xen_smp_intr_init(unsigned int cpu)
  89{
  90        int rc;
  91        const char *resched_name, *callfunc_name;
  92
  93        per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
  94
  95        resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
  96        rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
  97                                    cpu,
  98                                    xen_reschedule_interrupt,
  99                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 100                                    resched_name,
 101                                    NULL);
 102        if (rc < 0)
 103                goto fail;
 104        per_cpu(resched_irq, cpu) = rc;
 105
 106        callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
 107        rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
 108                                    cpu,
 109                                    xen_call_function_interrupt,
 110                                    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
 111                                    callfunc_name,
 112                                    NULL);
 113        if (rc < 0)
 114                goto fail;
 115        per_cpu(callfunc_irq, cpu) = rc;
 116
 117        return 0;
 118
 119 fail:
 120        if (per_cpu(resched_irq, cpu) >= 0)
 121                unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 122        if (per_cpu(callfunc_irq, cpu) >= 0)
 123                unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 124        return rc;
 125}
 126
 127void __init xen_fill_possible_map(void)
 128{
 129        int i, rc;
 130
 131        for (i = 0; i < NR_CPUS; i++) {
 132                rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 133                if (rc >= 0)
 134                        cpu_set(i, cpu_possible_map);
 135        }
 136}
 137
 138void __init xen_smp_prepare_boot_cpu(void)
 139{
 140        int cpu;
 141
 142        BUG_ON(smp_processor_id() != 0);
 143        native_smp_prepare_boot_cpu();
 144
 145        /* We've switched to the "real" per-cpu gdt, so make sure the
 146           old memory can be recycled */
 147        make_lowmem_page_readwrite(&per_cpu__gdt_page);
 148
 149        for (cpu = 0; cpu < NR_CPUS; cpu++) {
 150                cpus_clear(per_cpu(cpu_sibling_map, cpu));
 151                /*
 152                 * cpu_core_map lives in a per cpu area that is cleared
 153                 * when the per cpu array is allocated.
 154                 *
 155                 * cpus_clear(per_cpu(cpu_core_map, cpu));
 156                 */
 157        }
 158
 159        xen_setup_vcpu_info_placement();
 160}
 161
 162void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 163{
 164        unsigned cpu;
 165
 166        for (cpu = 0; cpu < NR_CPUS; cpu++) {
 167                cpus_clear(per_cpu(cpu_sibling_map, cpu));
 168                /*
 169                 * cpu_core_ map will be zeroed when the per
 170                 * cpu area is allocated.
 171                 *
 172                 * cpus_clear(per_cpu(cpu_core_map, cpu));
 173                 */
 174        }
 175
 176        smp_store_cpu_info(0);
 177        set_cpu_sibling_map(0);
 178
 179        if (xen_smp_intr_init(0))
 180                BUG();
 181
 182        cpu_initialized_map = cpumask_of_cpu(0);
 183
 184        /* Restrict the possible_map according to max_cpus. */
 185        while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
 186                for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
 187                        continue;
 188                cpu_clear(cpu, cpu_possible_map);
 189        }
 190
 191        for_each_possible_cpu (cpu) {
 192                struct task_struct *idle;
 193
 194                if (cpu == 0)
 195                        continue;
 196
 197                idle = fork_idle(cpu);
 198                if (IS_ERR(idle))
 199                        panic("failed fork for CPU %d", cpu);
 200
 201                cpu_set(cpu, cpu_present_map);
 202        }
 203
 204        //init_xenbus_allowed_cpumask();
 205}
 206
 207static __cpuinit int
 208cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 209{
 210        struct vcpu_guest_context *ctxt;
 211        struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
 212
 213        if (cpu_test_and_set(cpu, cpu_initialized_map))
 214                return 0;
 215
 216        ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 217        if (ctxt == NULL)
 218                return -ENOMEM;
 219
 220        ctxt->flags = VGCF_IN_KERNEL;
 221        ctxt->user_regs.ds = __USER_DS;
 222        ctxt->user_regs.es = __USER_DS;
 223        ctxt->user_regs.fs = __KERNEL_PERCPU;
 224        ctxt->user_regs.gs = 0;
 225        ctxt->user_regs.ss = __KERNEL_DS;
 226        ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 227        ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 228
 229        memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 230
 231        xen_copy_trap_info(ctxt->trap_ctxt);
 232
 233        ctxt->ldt_ents = 0;
 234
 235        BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
 236        make_lowmem_page_readonly(gdt->gdt);
 237
 238        ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
 239        ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
 240
 241        ctxt->user_regs.cs = __KERNEL_CS;
 242        ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
 243
 244        ctxt->kernel_ss = __KERNEL_DS;
 245        ctxt->kernel_sp = idle->thread.esp0;
 246
 247        ctxt->event_callback_cs     = __KERNEL_CS;
 248        ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
 249        ctxt->failsafe_callback_cs  = __KERNEL_CS;
 250        ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
 251
 252        per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 253        ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 254
 255        if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
 256                BUG();
 257
 258        kfree(ctxt);
 259        return 0;
 260}
 261
 262int __cpuinit xen_cpu_up(unsigned int cpu)
 263{
 264        struct task_struct *idle = idle_task(cpu);
 265        int rc;
 266
 267#if 0
 268        rc = cpu_up_check(cpu);
 269        if (rc)
 270                return rc;
 271#endif
 272
 273        init_gdt(cpu);
 274        per_cpu(current_task, cpu) = idle;
 275        irq_ctx_init(cpu);
 276        xen_setup_timer(cpu);
 277
 278        /* make sure interrupts start blocked */
 279        per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 280
 281        rc = cpu_initialize_context(cpu, idle);
 282        if (rc)
 283                return rc;
 284
 285        if (num_online_cpus() == 1)
 286                alternatives_smp_switch(1);
 287
 288        rc = xen_smp_intr_init(cpu);
 289        if (rc)
 290                return rc;
 291
 292        smp_store_cpu_info(cpu);
 293        set_cpu_sibling_map(cpu);
 294        /* This must be done before setting cpu_online_map */
 295        wmb();
 296
 297        cpu_set(cpu, cpu_online_map);
 298
 299        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
 300        BUG_ON(rc);
 301
 302        return 0;
 303}
 304
 305void xen_smp_cpus_done(unsigned int max_cpus)
 306{
 307}
 308
 309static void stop_self(void *v)
 310{
 311        int cpu = smp_processor_id();
 312
 313        /* make sure we're not pinning something down */
 314        load_cr3(swapper_pg_dir);
 315        /* should set up a minimal gdt */
 316
 317        HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
 318        BUG();
 319}
 320
 321void xen_smp_send_stop(void)
 322{
 323        smp_call_function(stop_self, NULL, 0, 0);
 324}
 325
 326void xen_smp_send_reschedule(int cpu)
 327{
 328        xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 329}
 330
 331
 332static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 333{
 334        unsigned cpu;
 335
 336        cpus_and(mask, mask, cpu_online_map);
 337
 338        for_each_cpu_mask(cpu, mask)
 339                xen_send_IPI_one(cpu, vector);
 340}
 341
 342static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 343{
 344        void (*func) (void *info) = call_data->func;
 345        void *info = call_data->info;
 346        int wait = call_data->wait;
 347
 348        /*
 349         * Notify initiating CPU that I've grabbed the data and am
 350         * about to execute the function
 351         */
 352        mb();
 353        atomic_inc(&call_data->started);
 354        /*
 355         * At this point the info structure may be out of scope unless wait==1
 356         */
 357        irq_enter();
 358        (*func)(info);
 359        __get_cpu_var(irq_stat).irq_call_count++;
 360        irq_exit();
 361
 362        if (wait) {
 363                mb();           /* commit everything before setting finished */
 364                atomic_inc(&call_data->finished);
 365        }
 366
 367        return IRQ_HANDLED;
 368}
 369
 370int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 371                               void *info, int wait)
 372{
 373        struct call_data_struct data;
 374        int cpus, cpu;
 375        bool yield;
 376
 377        /* Holding any lock stops cpus from going down. */
 378        spin_lock(&call_lock);
 379
 380        cpu_clear(smp_processor_id(), mask);
 381
 382        cpus = cpus_weight(mask);
 383        if (!cpus) {
 384                spin_unlock(&call_lock);
 385                return 0;
 386        }
 387
 388        /* Can deadlock when called with interrupts disabled */
 389        WARN_ON(irqs_disabled());
 390
 391        data.func = func;
 392        data.info = info;
 393        atomic_set(&data.started, 0);
 394        data.wait = wait;
 395        if (wait)
 396                atomic_set(&data.finished, 0);
 397
 398        call_data = &data;
 399        mb();                   /* write everything before IPI */
 400
 401        /* Send a message to other CPUs and wait for them to respond */
 402        xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 403
 404        /* Make sure other vcpus get a chance to run if they need to. */
 405        yield = false;
 406        for_each_cpu_mask(cpu, mask)
 407                if (xen_vcpu_stolen(cpu))
 408                        yield = true;
 409
 410        if (yield)
 411                HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 412
 413        /* Wait for response */
 414        while (atomic_read(&data.started) != cpus ||
 415               (wait && atomic_read(&data.finished) != cpus))
 416                cpu_relax();
 417
 418        spin_unlock(&call_lock);
 419
 420        return 0;
 421}
 422