linux/arch/x86/kernel/apic/numaq_32.c
<<
>>
Prefs
   1/*
   2 * Written by: Patricia Gaughen, IBM Corporation
   3 *
   4 * Copyright (C) 2002, IBM Corp.
   5 * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
   6 *
   7 * All rights reserved.
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License as published by
  11 * the Free Software Foundation; either version 2 of the License, or
  12 * (at your option) any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  17 * NON INFRINGEMENT.  See the GNU General Public License for more
  18 * details.
  19 *
  20 * You should have received a copy of the GNU General Public License
  21 * along with this program; if not, write to the Free Software
  22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23 *
  24 * Send feedback to <gone@us.ibm.com>
  25 */
  26#include <linux/nodemask.h>
  27#include <linux/topology.h>
  28#include <linux/bootmem.h>
  29#include <linux/threads.h>
  30#include <linux/cpumask.h>
  31#include <linux/kernel.h>
  32#include <linux/mmzone.h>
  33#include <linux/module.h>
  34#include <linux/string.h>
  35#include <linux/init.h>
  36#include <linux/numa.h>
  37#include <linux/smp.h>
  38#include <linux/io.h>
  39#include <linux/mm.h>
  40
  41#include <asm/processor.h>
  42#include <asm/fixmap.h>
  43#include <asm/mpspec.h>
  44#include <asm/numaq.h>
  45#include <asm/setup.h>
  46#include <asm/apic.h>
  47#include <asm/e820.h>
  48#include <asm/ipi.h>
  49
  50#define MB_TO_PAGES(addr)               ((addr) << (20 - PAGE_SHIFT))
  51
  52int found_numaq;
  53
  54/*
  55 * Have to match translation table entries to main table entries by counter
  56 * hence the mpc_record variable .... can't see a less disgusting way of
  57 * doing this ....
  58 */
  59struct mpc_trans {
  60        unsigned char                   mpc_type;
  61        unsigned char                   trans_len;
  62        unsigned char                   trans_type;
  63        unsigned char                   trans_quad;
  64        unsigned char                   trans_global;
  65        unsigned char                   trans_local;
  66        unsigned short                  trans_reserved;
  67};
  68
  69static int                              mpc_record;
  70
  71static struct mpc_trans                 *translation_table[MAX_MPC_ENTRY];
  72
  73int                                     mp_bus_id_to_node[MAX_MP_BUSSES];
  74int                                     mp_bus_id_to_local[MAX_MP_BUSSES];
  75int                                     quad_local_to_mp_bus_id[NR_CPUS/4][4];
  76
  77
  78static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
  79{
  80        struct eachquadmem *eq = scd->eq + node;
  81
  82        node_set_online(node);
  83
  84        /* Convert to pages */
  85        node_start_pfn[node] =
  86                 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
  87
  88        node_end_pfn[node] =
  89                 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
  90
  91        e820_register_active_regions(node, node_start_pfn[node],
  92                                                node_end_pfn[node]);
  93
  94        memory_present(node, node_start_pfn[node], node_end_pfn[node]);
  95
  96        node_remap_size[node] = node_memmap_size_bytes(node,
  97                                        node_start_pfn[node],
  98                                        node_end_pfn[node]);
  99}
 100
 101/*
 102 * Function: smp_dump_qct()
 103 *
 104 * Description: gets memory layout from the quad config table.  This
 105 * function also updates node_online_map with the nodes (quads) present.
 106 */
 107static void __init smp_dump_qct(void)
 108{
 109        struct sys_cfg_data *scd;
 110        int node;
 111
 112        scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
 113
 114        nodes_clear(node_online_map);
 115        for_each_node(node) {
 116                if (scd->quads_present31_0 & (1 << node))
 117                        numaq_register_node(node, scd);
 118        }
 119}
 120
 121void __cpuinit numaq_tsc_disable(void)
 122{
 123        if (!found_numaq)
 124                return;
 125
 126        if (num_online_nodes() > 1) {
 127                printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
 128                setup_clear_cpu_cap(X86_FEATURE_TSC);
 129        }
 130}
 131
 132static void __init numaq_tsc_init(void)
 133{
 134        numaq_tsc_disable();
 135}
 136
 137static inline int generate_logical_apicid(int quad, int phys_apicid)
 138{
 139        return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
 140}
 141
 142/* x86_quirks member */
 143static int mpc_apic_id(struct mpc_cpu *m)
 144{
 145        int quad = translation_table[mpc_record]->trans_quad;
 146        int logical_apicid = generate_logical_apicid(quad, m->apicid);
 147
 148        printk(KERN_DEBUG
 149                "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
 150                 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
 151                (m->cpufeature & CPU_MODEL_MASK) >> 4,
 152                 m->apicver, quad, logical_apicid);
 153
 154        return logical_apicid;
 155}
 156
 157/* x86_quirks member */
 158static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
 159{
 160        int quad = translation_table[mpc_record]->trans_quad;
 161        int local = translation_table[mpc_record]->trans_local;
 162
 163        mp_bus_id_to_node[m->busid] = quad;
 164        mp_bus_id_to_local[m->busid] = local;
 165
 166        printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
 167}
 168
 169/* x86_quirks member */
 170static void mpc_oem_pci_bus(struct mpc_bus *m)
 171{
 172        int quad = translation_table[mpc_record]->trans_quad;
 173        int local = translation_table[mpc_record]->trans_local;
 174
 175        quad_local_to_mp_bus_id[quad][local] = m->busid;
 176}
 177
 178/*
 179 * Called from mpparse code.
 180 * mode = 0: prescan
 181 * mode = 1: one mpc entry scanned
 182 */
 183static void numaq_mpc_record(unsigned int mode)
 184{
 185        if (!mode)
 186                mpc_record = 0;
 187        else
 188                mpc_record++;
 189}
 190
 191static void __init MP_translation_info(struct mpc_trans *m)
 192{
 193        printk(KERN_INFO
 194            "Translation: record %d, type %d, quad %d, global %d, local %d\n",
 195               mpc_record, m->trans_type, m->trans_quad, m->trans_global,
 196               m->trans_local);
 197
 198        if (mpc_record >= MAX_MPC_ENTRY)
 199                printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
 200        else
 201                translation_table[mpc_record] = m; /* stash this for later */
 202
 203        if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
 204                node_set_online(m->trans_quad);
 205}
 206
 207static int __init mpf_checksum(unsigned char *mp, int len)
 208{
 209        int sum = 0;
 210
 211        while (len--)
 212                sum += *mp++;
 213
 214        return sum & 0xFF;
 215}
 216
 217/*
 218 * Read/parse the MPC oem tables
 219 */
 220static void __init smp_read_mpc_oem(struct mpc_table *mpc)
 221{
 222        struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
 223        int count = sizeof(*oemtable);  /* the header size */
 224        unsigned char *oemptr = ((unsigned char *)oemtable) + count;
 225
 226        mpc_record = 0;
 227        printk(KERN_INFO
 228                "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
 229
 230        if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
 231                printk(KERN_WARNING
 232                       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
 233                       oemtable->signature[0], oemtable->signature[1],
 234                       oemtable->signature[2], oemtable->signature[3]);
 235                return;
 236        }
 237
 238        if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
 239                printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
 240                return;
 241        }
 242
 243        while (count < oemtable->length) {
 244                switch (*oemptr) {
 245                case MP_TRANSLATION:
 246                        {
 247                                struct mpc_trans *m = (void *)oemptr;
 248
 249                                MP_translation_info(m);
 250                                oemptr += sizeof(*m);
 251                                count += sizeof(*m);
 252                                ++mpc_record;
 253                                break;
 254                        }
 255                default:
 256                        printk(KERN_WARNING
 257                               "Unrecognised OEM table entry type! - %d\n",
 258                               (int)*oemptr);
 259                        return;
 260                }
 261        }
 262}
 263
 264static __init void early_check_numaq(void)
 265{
 266        /*
 267         * Find possible boot-time SMP configuration:
 268         */
 269        early_find_smp_config();
 270
 271        /*
 272         * get boot-time SMP configuration:
 273         */
 274        if (smp_found_config)
 275                early_get_smp_config();
 276
 277        if (found_numaq) {
 278                x86_init.mpparse.mpc_record = numaq_mpc_record;
 279                x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
 280                x86_init.mpparse.mpc_apic_id = mpc_apic_id;
 281                x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
 282                x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
 283                x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
 284                x86_init.timers.tsc_pre_init = numaq_tsc_init;
 285        }
 286}
 287
 288int __init get_memcfg_numaq(void)
 289{
 290        early_check_numaq();
 291        if (!found_numaq)
 292                return 0;
 293        smp_dump_qct();
 294
 295        return 1;
 296}
 297
 298#define NUMAQ_APIC_DFR_VALUE    (APIC_DFR_CLUSTER)
 299
 300static inline unsigned int numaq_get_apic_id(unsigned long x)
 301{
 302        return (x >> 24) & 0x0F;
 303}
 304
 305static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
 306{
 307        default_send_IPI_mask_sequence_logical(mask, vector);
 308}
 309
 310static inline void numaq_send_IPI_allbutself(int vector)
 311{
 312        default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
 313}
 314
 315static inline void numaq_send_IPI_all(int vector)
 316{
 317        numaq_send_IPI_mask(cpu_online_mask, vector);
 318}
 319
 320#define NUMAQ_TRAMPOLINE_PHYS_LOW       (0x8)
 321#define NUMAQ_TRAMPOLINE_PHYS_HIGH      (0xa)
 322
 323/*
 324 * Because we use NMIs rather than the INIT-STARTUP sequence to
 325 * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
 326 */
 327static inline void numaq_smp_callin_clear_local_apic(void)
 328{
 329        clear_local_APIC();
 330}
 331
 332static inline const struct cpumask *numaq_target_cpus(void)
 333{
 334        return cpu_all_mask;
 335}
 336
 337static inline unsigned long
 338numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
 339{
 340        return physid_isset(apicid, bitmap);
 341}
 342
 343static inline unsigned long numaq_check_apicid_present(int bit)
 344{
 345        return physid_isset(bit, phys_cpu_present_map);
 346}
 347
 348static inline int numaq_apic_id_registered(void)
 349{
 350        return 1;
 351}
 352
 353static inline void numaq_init_apic_ldr(void)
 354{
 355        /* Already done in NUMA-Q firmware */
 356}
 357
 358static inline void numaq_setup_apic_routing(void)
 359{
 360        printk(KERN_INFO
 361                "Enabling APIC mode:  NUMA-Q.  Using %d I/O APICs\n",
 362                nr_ioapics);
 363}
 364
 365/*
 366 * Skip adding the timer int on secondary nodes, which causes
 367 * a small but painful rift in the time-space continuum.
 368 */
 369static inline int numaq_multi_timer_check(int apic, int irq)
 370{
 371        return apic != 0 && irq == 0;
 372}
 373
 374static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
 375{
 376        /* We don't have a good way to do this yet - hack */
 377        return physids_promote(0xFUL);
 378}
 379
 380static inline int numaq_cpu_to_logical_apicid(int cpu)
 381{
 382        if (cpu >= nr_cpu_ids)
 383                return BAD_APICID;
 384        return cpu_2_logical_apicid[cpu];
 385}
 386
 387/*
 388 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
 389 * cpu to APIC ID relation to properly interact with the intelligent
 390 * mode of the cluster controller.
 391 */
 392static inline int numaq_cpu_present_to_apicid(int mps_cpu)
 393{
 394        if (mps_cpu < 60)
 395                return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
 396        else
 397                return BAD_APICID;
 398}
 399
 400static inline int numaq_apicid_to_node(int logical_apicid)
 401{
 402        return logical_apicid >> 4;
 403}
 404
 405static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
 406{
 407        int node = numaq_apicid_to_node(logical_apicid);
 408        int cpu = __ffs(logical_apicid & 0xf);
 409
 410        return physid_mask_of_physid(cpu + 4*node);
 411}
 412
 413/* Where the IO area was mapped on multiquad, always 0 otherwise */
 414void *xquad_portio;
 415
 416static inline int numaq_check_phys_apicid_present(int phys_apicid)
 417{
 418        return 1;
 419}
 420
 421/*
 422 * We use physical apicids here, not logical, so just return the default
 423 * physical broadcast to stop people from breaking us
 424 */
 425static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
 426{
 427        return 0x0F;
 428}
 429
 430static inline unsigned int
 431numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 432                             const struct cpumask *andmask)
 433{
 434        return 0x0F;
 435}
 436
 437/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
 438static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
 439{
 440        return cpuid_apic >> index_msb;
 441}
 442
 443static int
 444numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
 445{
 446        if (strncmp(oem, "IBM NUMA", 8))
 447                printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
 448        else
 449                found_numaq = 1;
 450
 451        return found_numaq;
 452}
 453
 454static int probe_numaq(void)
 455{
 456        /* already know from get_memcfg_numaq() */
 457        return found_numaq;
 458}
 459
 460static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
 461{
 462        /* Careful. Some cpus do not strictly honor the set of cpus
 463         * specified in the interrupt destination when using lowest
 464         * priority interrupt delivery mode.
 465         *
 466         * In particular there was a hyperthreading cpu observed to
 467         * deliver interrupts to the wrong hyperthread when only one
 468         * hyperthread was specified in the interrupt desitination.
 469         */
 470        cpumask_clear(retmask);
 471        cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
 472}
 473
 474static void numaq_setup_portio_remap(void)
 475{
 476        int num_quads = num_online_nodes();
 477
 478        if (num_quads <= 1)
 479                return;
 480
 481        printk(KERN_INFO
 482                "Remapping cross-quad port I/O for %d quads\n", num_quads);
 483
 484        xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
 485
 486        printk(KERN_INFO
 487                "xquad_portio vaddr 0x%08lx, len %08lx\n",
 488                (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
 489}
 490
 491/* Use __refdata to keep false positive warning calm.   */
 492struct apic __refdata apic_numaq = {
 493
 494        .name                           = "NUMAQ",
 495        .probe                          = probe_numaq,
 496        .acpi_madt_oem_check            = NULL,
 497        .apic_id_registered             = numaq_apic_id_registered,
 498
 499        .irq_delivery_mode              = dest_LowestPrio,
 500        /* physical delivery on LOCAL quad: */
 501        .irq_dest_mode                  = 0,
 502
 503        .target_cpus                    = numaq_target_cpus,
 504        .disable_esr                    = 1,
 505        .dest_logical                   = APIC_DEST_LOGICAL,
 506        .check_apicid_used              = numaq_check_apicid_used,
 507        .check_apicid_present           = numaq_check_apicid_present,
 508
 509        .vector_allocation_domain       = numaq_vector_allocation_domain,
 510        .init_apic_ldr                  = numaq_init_apic_ldr,
 511
 512        .ioapic_phys_id_map             = numaq_ioapic_phys_id_map,
 513        .setup_apic_routing             = numaq_setup_apic_routing,
 514        .multi_timer_check              = numaq_multi_timer_check,
 515        .apicid_to_node                 = numaq_apicid_to_node,
 516        .cpu_to_logical_apicid          = numaq_cpu_to_logical_apicid,
 517        .cpu_present_to_apicid          = numaq_cpu_present_to_apicid,
 518        .apicid_to_cpu_present          = numaq_apicid_to_cpu_present,
 519        .setup_portio_remap             = numaq_setup_portio_remap,
 520        .check_phys_apicid_present      = numaq_check_phys_apicid_present,
 521        .enable_apic_mode               = NULL,
 522        .phys_pkg_id                    = numaq_phys_pkg_id,
 523        .mps_oem_check                  = numaq_mps_oem_check,
 524
 525        .get_apic_id                    = numaq_get_apic_id,
 526        .set_apic_id                    = NULL,
 527        .apic_id_mask                   = 0x0F << 24,
 528
 529        .cpu_mask_to_apicid             = numaq_cpu_mask_to_apicid,
 530        .cpu_mask_to_apicid_and         = numaq_cpu_mask_to_apicid_and,
 531
 532        .send_IPI_mask                  = numaq_send_IPI_mask,
 533        .send_IPI_mask_allbutself       = NULL,
 534        .send_IPI_allbutself            = numaq_send_IPI_allbutself,
 535        .send_IPI_all                   = numaq_send_IPI_all,
 536        .send_IPI_self                  = default_send_IPI_self,
 537
 538        .wakeup_secondary_cpu           = wakeup_secondary_cpu_via_nmi,
 539        .trampoline_phys_low            = NUMAQ_TRAMPOLINE_PHYS_LOW,
 540        .trampoline_phys_high           = NUMAQ_TRAMPOLINE_PHYS_HIGH,
 541
 542        /* We don't do anything here because we use NMI's to boot instead */
 543        .wait_for_init_deassert         = NULL,
 544
 545        .smp_callin_clear_local_apic    = numaq_smp_callin_clear_local_apic,
 546        .inquire_remote_apic            = NULL,
 547
 548        .read                           = native_apic_mem_read,
 549        .write                          = native_apic_mem_write,
 550        .icr_read                       = native_apic_icr_read,
 551        .icr_write                      = native_apic_icr_write,
 552        .wait_icr_idle                  = native_apic_wait_icr_idle,
 553        .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
 554};
 555