linux/arch/x86/kernel/apic/numaq_32.c
<<
>>
Prefs
   1/*
   2 * Written by: Patricia Gaughen, IBM Corporation
   3 *
   4 * Copyright (C) 2002, IBM Corp.
   5 * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
   6 *
   7 * All rights reserved.
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License as published by
  11 * the Free Software Foundation; either version 2 of the License, or
  12 * (at your option) any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  17 * NON INFRINGEMENT.  See the GNU General Public License for more
  18 * details.
  19 *
  20 * You should have received a copy of the GNU General Public License
  21 * along with this program; if not, write to the Free Software
  22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23 *
  24 * Send feedback to <gone@us.ibm.com>
  25 */
  26#include <linux/nodemask.h>
  27#include <linux/topology.h>
  28#include <linux/bootmem.h>
  29#include <linux/memblock.h>
  30#include <linux/threads.h>
  31#include <linux/cpumask.h>
  32#include <linux/kernel.h>
  33#include <linux/mmzone.h>
  34#include <linux/module.h>
  35#include <linux/string.h>
  36#include <linux/init.h>
  37#include <linux/numa.h>
  38#include <linux/smp.h>
  39#include <linux/io.h>
  40#include <linux/mm.h>
  41
  42#include <asm/processor.h>
  43#include <asm/fixmap.h>
  44#include <asm/mpspec.h>
  45#include <asm/numaq.h>
  46#include <asm/setup.h>
  47#include <asm/apic.h>
  48#include <asm/e820.h>
  49#include <asm/ipi.h>
  50
  51int found_numaq;
  52
  53/*
  54 * Have to match translation table entries to main table entries by counter
  55 * hence the mpc_record variable .... can't see a less disgusting way of
  56 * doing this ....
  57 */
  58struct mpc_trans {
  59        unsigned char                   mpc_type;
  60        unsigned char                   trans_len;
  61        unsigned char                   trans_type;
  62        unsigned char                   trans_quad;
  63        unsigned char                   trans_global;
  64        unsigned char                   trans_local;
  65        unsigned short                  trans_reserved;
  66};
  67
  68static int                              mpc_record;
  69
  70static struct mpc_trans                 *translation_table[MAX_MPC_ENTRY];
  71
  72int                                     mp_bus_id_to_node[MAX_MP_BUSSES];
  73int                                     mp_bus_id_to_local[MAX_MP_BUSSES];
  74int                                     quad_local_to_mp_bus_id[NR_CPUS/4][4];
  75
  76
  77static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
  78{
  79        struct eachquadmem *eq = scd->eq + node;
  80        u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
  81        u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
  82        int ret;
  83
  84        node_set(node, numa_nodes_parsed);
  85        ret = numa_add_memblk(node, start, end);
  86        BUG_ON(ret < 0);
  87}
  88
  89/*
  90 * Function: smp_dump_qct()
  91 *
  92 * Description: gets memory layout from the quad config table.  This
  93 * function also updates numa_nodes_parsed with the nodes (quads) present.
  94 */
  95static void __init smp_dump_qct(void)
  96{
  97        struct sys_cfg_data *scd;
  98        int node;
  99
 100        scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
 101
 102        for_each_node(node) {
 103                if (scd->quads_present31_0 & (1 << node))
 104                        numaq_register_node(node, scd);
 105        }
 106}
 107
 108void __cpuinit numaq_tsc_disable(void)
 109{
 110        if (!found_numaq)
 111                return;
 112
 113        if (num_online_nodes() > 1) {
 114                printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
 115                setup_clear_cpu_cap(X86_FEATURE_TSC);
 116        }
 117}
 118
 119static void __init numaq_tsc_init(void)
 120{
 121        numaq_tsc_disable();
 122}
 123
 124static inline int generate_logical_apicid(int quad, int phys_apicid)
 125{
 126        return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
 127}
 128
 129/* x86_quirks member */
 130static int mpc_apic_id(struct mpc_cpu *m)
 131{
 132        int quad = translation_table[mpc_record]->trans_quad;
 133        int logical_apicid = generate_logical_apicid(quad, m->apicid);
 134
 135        printk(KERN_DEBUG
 136                "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
 137                 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
 138                (m->cpufeature & CPU_MODEL_MASK) >> 4,
 139                 m->apicver, quad, logical_apicid);
 140
 141        return logical_apicid;
 142}
 143
 144/* x86_quirks member */
 145static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
 146{
 147        int quad = translation_table[mpc_record]->trans_quad;
 148        int local = translation_table[mpc_record]->trans_local;
 149
 150        mp_bus_id_to_node[m->busid] = quad;
 151        mp_bus_id_to_local[m->busid] = local;
 152
 153        printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
 154}
 155
 156/* x86_quirks member */
 157static void mpc_oem_pci_bus(struct mpc_bus *m)
 158{
 159        int quad = translation_table[mpc_record]->trans_quad;
 160        int local = translation_table[mpc_record]->trans_local;
 161
 162        quad_local_to_mp_bus_id[quad][local] = m->busid;
 163}
 164
 165/*
 166 * Called from mpparse code.
 167 * mode = 0: prescan
 168 * mode = 1: one mpc entry scanned
 169 */
 170static void numaq_mpc_record(unsigned int mode)
 171{
 172        if (!mode)
 173                mpc_record = 0;
 174        else
 175                mpc_record++;
 176}
 177
 178static void __init MP_translation_info(struct mpc_trans *m)
 179{
 180        printk(KERN_INFO
 181            "Translation: record %d, type %d, quad %d, global %d, local %d\n",
 182               mpc_record, m->trans_type, m->trans_quad, m->trans_global,
 183               m->trans_local);
 184
 185        if (mpc_record >= MAX_MPC_ENTRY)
 186                printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
 187        else
 188                translation_table[mpc_record] = m; /* stash this for later */
 189
 190        if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
 191                node_set_online(m->trans_quad);
 192}
 193
 194static int __init mpf_checksum(unsigned char *mp, int len)
 195{
 196        int sum = 0;
 197
 198        while (len--)
 199                sum += *mp++;
 200
 201        return sum & 0xFF;
 202}
 203
 204/*
 205 * Read/parse the MPC oem tables
 206 */
 207static void __init smp_read_mpc_oem(struct mpc_table *mpc)
 208{
 209        struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
 210        int count = sizeof(*oemtable);  /* the header size */
 211        unsigned char *oemptr = ((unsigned char *)oemtable) + count;
 212
 213        mpc_record = 0;
 214        printk(KERN_INFO
 215                "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
 216
 217        if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
 218                printk(KERN_WARNING
 219                       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
 220                       oemtable->signature[0], oemtable->signature[1],
 221                       oemtable->signature[2], oemtable->signature[3]);
 222                return;
 223        }
 224
 225        if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
 226                printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
 227                return;
 228        }
 229
 230        while (count < oemtable->length) {
 231                switch (*oemptr) {
 232                case MP_TRANSLATION:
 233                        {
 234                                struct mpc_trans *m = (void *)oemptr;
 235
 236                                MP_translation_info(m);
 237                                oemptr += sizeof(*m);
 238                                count += sizeof(*m);
 239                                ++mpc_record;
 240                                break;
 241                        }
 242                default:
 243                        printk(KERN_WARNING
 244                               "Unrecognised OEM table entry type! - %d\n",
 245                               (int)*oemptr);
 246                        return;
 247                }
 248        }
 249}
 250
 251static __init void early_check_numaq(void)
 252{
 253        /*
 254         * get boot-time SMP configuration:
 255         */
 256        if (smp_found_config)
 257                early_get_smp_config();
 258
 259        if (found_numaq) {
 260                x86_init.mpparse.mpc_record = numaq_mpc_record;
 261                x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
 262                x86_init.mpparse.mpc_apic_id = mpc_apic_id;
 263                x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
 264                x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
 265                x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
 266                x86_init.timers.tsc_pre_init = numaq_tsc_init;
 267                x86_init.pci.init = pci_numaq_init;
 268        }
 269}
 270
 271int __init numaq_numa_init(void)
 272{
 273        early_check_numaq();
 274        if (!found_numaq)
 275                return -ENOENT;
 276        smp_dump_qct();
 277
 278        return 0;
 279}
 280
 281#define NUMAQ_APIC_DFR_VALUE    (APIC_DFR_CLUSTER)
 282
 283static inline unsigned int numaq_get_apic_id(unsigned long x)
 284{
 285        return (x >> 24) & 0x0F;
 286}
 287
 288static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
 289{
 290        default_send_IPI_mask_sequence_logical(mask, vector);
 291}
 292
 293static inline void numaq_send_IPI_allbutself(int vector)
 294{
 295        default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
 296}
 297
 298static inline void numaq_send_IPI_all(int vector)
 299{
 300        numaq_send_IPI_mask(cpu_online_mask, vector);
 301}
 302
 303#define NUMAQ_TRAMPOLINE_PHYS_LOW       (0x8)
 304#define NUMAQ_TRAMPOLINE_PHYS_HIGH      (0xa)
 305
 306/*
 307 * Because we use NMIs rather than the INIT-STARTUP sequence to
 308 * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
 309 */
 310static inline void numaq_smp_callin_clear_local_apic(void)
 311{
 312        clear_local_APIC();
 313}
 314
 315static inline const struct cpumask *numaq_target_cpus(void)
 316{
 317        return cpu_all_mask;
 318}
 319
 320static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
 321{
 322        return physid_isset(apicid, *map);
 323}
 324
 325static inline unsigned long numaq_check_apicid_present(int bit)
 326{
 327        return physid_isset(bit, phys_cpu_present_map);
 328}
 329
 330static inline int numaq_apic_id_registered(void)
 331{
 332        return 1;
 333}
 334
 335static inline void numaq_init_apic_ldr(void)
 336{
 337        /* Already done in NUMA-Q firmware */
 338}
 339
 340static inline void numaq_setup_apic_routing(void)
 341{
 342        printk(KERN_INFO
 343                "Enabling APIC mode:  NUMA-Q.  Using %d I/O APICs\n",
 344                nr_ioapics);
 345}
 346
 347/*
 348 * Skip adding the timer int on secondary nodes, which causes
 349 * a small but painful rift in the time-space continuum.
 350 */
 351static inline int numaq_multi_timer_check(int apic, int irq)
 352{
 353        return apic != 0 && irq == 0;
 354}
 355
 356static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 357{
 358        /* We don't have a good way to do this yet - hack */
 359        return physids_promote(0xFUL, retmap);
 360}
 361
 362/*
 363 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
 364 * cpu to APIC ID relation to properly interact with the intelligent
 365 * mode of the cluster controller.
 366 */
 367static inline int numaq_cpu_present_to_apicid(int mps_cpu)
 368{
 369        if (mps_cpu < 60)
 370                return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
 371        else
 372                return BAD_APICID;
 373}
 374
 375static inline int numaq_apicid_to_node(int logical_apicid)
 376{
 377        return logical_apicid >> 4;
 378}
 379
 380static int numaq_numa_cpu_node(int cpu)
 381{
 382        int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 383
 384        if (logical_apicid != BAD_APICID)
 385                return numaq_apicid_to_node(logical_apicid);
 386        return NUMA_NO_NODE;
 387}
 388
 389static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
 390{
 391        int node = numaq_apicid_to_node(logical_apicid);
 392        int cpu = __ffs(logical_apicid & 0xf);
 393
 394        physid_set_mask_of_physid(cpu + 4*node, retmap);
 395}
 396
 397/* Where the IO area was mapped on multiquad, always 0 otherwise */
 398void *xquad_portio;
 399
 400static inline int numaq_check_phys_apicid_present(int phys_apicid)
 401{
 402        return 1;
 403}
 404
 405/*
 406 * We use physical apicids here, not logical, so just return the default
 407 * physical broadcast to stop people from breaking us
 408 */
 409static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
 410{
 411        return 0x0F;
 412}
 413
 414static inline unsigned int
 415numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 416                             const struct cpumask *andmask)
 417{
 418        return 0x0F;
 419}
 420
 421/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
 422static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
 423{
 424        return cpuid_apic >> index_msb;
 425}
 426
 427static int
 428numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 429{
 430        if (strncmp(oem, "IBM NUMA", 8))
 431                printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
 432        else
 433                found_numaq = 1;
 434
 435        return found_numaq;
 436}
 437
 438static int probe_numaq(void)
 439{
 440        /* already know from get_memcfg_numaq() */
 441        return found_numaq;
 442}
 443
 444static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
 445{
 446        /* Careful. Some cpus do not strictly honor the set of cpus
 447         * specified in the interrupt destination when using lowest
 448         * priority interrupt delivery mode.
 449         *
 450         * In particular there was a hyperthreading cpu observed to
 451         * deliver interrupts to the wrong hyperthread when only one
 452         * hyperthread was specified in the interrupt desitination.
 453         */
 454        cpumask_clear(retmask);
 455        cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
 456}
 457
 458static void numaq_setup_portio_remap(void)
 459{
 460        int num_quads = num_online_nodes();
 461
 462        if (num_quads <= 1)
 463                return;
 464
 465        printk(KERN_INFO
 466                "Remapping cross-quad port I/O for %d quads\n", num_quads);
 467
 468        xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
 469
 470        printk(KERN_INFO
 471                "xquad_portio vaddr 0x%08lx, len %08lx\n",
 472                (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
 473}
 474
 475/* Use __refdata to keep false positive warning calm.  */
 476static struct apic __refdata apic_numaq = {
 477
 478        .name                           = "NUMAQ",
 479        .probe                          = probe_numaq,
 480        .acpi_madt_oem_check            = NULL,
 481        .apic_id_valid                  = default_apic_id_valid,
 482        .apic_id_registered             = numaq_apic_id_registered,
 483
 484        .irq_delivery_mode              = dest_LowestPrio,
 485        /* physical delivery on LOCAL quad: */
 486        .irq_dest_mode                  = 0,
 487
 488        .target_cpus                    = numaq_target_cpus,
 489        .disable_esr                    = 1,
 490        .dest_logical                   = APIC_DEST_LOGICAL,
 491        .check_apicid_used              = numaq_check_apicid_used,
 492        .check_apicid_present           = numaq_check_apicid_present,
 493
 494        .vector_allocation_domain       = numaq_vector_allocation_domain,
 495        .init_apic_ldr                  = numaq_init_apic_ldr,
 496
 497        .ioapic_phys_id_map             = numaq_ioapic_phys_id_map,
 498        .setup_apic_routing             = numaq_setup_apic_routing,
 499        .multi_timer_check              = numaq_multi_timer_check,
 500        .cpu_present_to_apicid          = numaq_cpu_present_to_apicid,
 501        .apicid_to_cpu_present          = numaq_apicid_to_cpu_present,
 502        .setup_portio_remap             = numaq_setup_portio_remap,
 503        .check_phys_apicid_present      = numaq_check_phys_apicid_present,
 504        .enable_apic_mode               = NULL,
 505        .phys_pkg_id                    = numaq_phys_pkg_id,
 506        .mps_oem_check                  = numaq_mps_oem_check,
 507
 508        .get_apic_id                    = numaq_get_apic_id,
 509        .set_apic_id                    = NULL,
 510        .apic_id_mask                   = 0x0F << 24,
 511
 512        .cpu_mask_to_apicid             = numaq_cpu_mask_to_apicid,
 513        .cpu_mask_to_apicid_and         = numaq_cpu_mask_to_apicid_and,
 514
 515        .send_IPI_mask                  = numaq_send_IPI_mask,
 516        .send_IPI_mask_allbutself       = NULL,
 517        .send_IPI_allbutself            = numaq_send_IPI_allbutself,
 518        .send_IPI_all                   = numaq_send_IPI_all,
 519        .send_IPI_self                  = default_send_IPI_self,
 520
 521        .wakeup_secondary_cpu           = wakeup_secondary_cpu_via_nmi,
 522        .trampoline_phys_low            = NUMAQ_TRAMPOLINE_PHYS_LOW,
 523        .trampoline_phys_high           = NUMAQ_TRAMPOLINE_PHYS_HIGH,
 524
 525        /* We don't do anything here because we use NMI's to boot instead */
 526        .wait_for_init_deassert         = NULL,
 527
 528        .smp_callin_clear_local_apic    = numaq_smp_callin_clear_local_apic,
 529        .inquire_remote_apic            = NULL,
 530
 531        .read                           = native_apic_mem_read,
 532        .write                          = native_apic_mem_write,
 533        .eoi_write                      = native_apic_mem_write,
 534        .icr_read                       = native_apic_icr_read,
 535        .icr_write                      = native_apic_icr_write,
 536        .wait_icr_idle                  = native_apic_wait_icr_idle,
 537        .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
 538
 539        .x86_32_early_logical_apicid    = noop_x86_32_early_logical_apicid,
 540        .x86_32_numa_cpu_node           = numaq_numa_cpu_node,
 541};
 542
 543apic_driver(apic_numaq);
 544