linux/arch/x86/kernel/apic/vector.c
<<
>>
Prefs
   1/*
   2 * Local APIC related interfaces to support IOAPIC, MSI, etc.
   3 *
   4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
   5 *      Moved from arch/x86/kernel/apic/io_apic.c.
   6 * Jiang Liu <jiang.liu@linux.intel.com>
   7 *      Enable support of hierarchical irqdomains
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of the GNU General Public License version 2 as
  11 * published by the Free Software Foundation.
  12 */
  13#include <linux/interrupt.h>
  14#include <linux/irq.h>
  15#include <linux/seq_file.h>
  16#include <linux/init.h>
  17#include <linux/compiler.h>
  18#include <linux/slab.h>
  19#include <asm/irqdomain.h>
  20#include <asm/hw_irq.h>
  21#include <asm/traps.h>
  22#include <asm/apic.h>
  23#include <asm/i8259.h>
  24#include <asm/desc.h>
  25#include <asm/irq_remapping.h>
  26
  27#include <asm/trace/irq_vectors.h>
  28
  29struct apic_chip_data {
  30        struct irq_cfg          hw_irq_cfg;
  31        unsigned int            vector;
  32        unsigned int            prev_vector;
  33        unsigned int            cpu;
  34        unsigned int            prev_cpu;
  35        unsigned int            irq;
  36        struct hlist_node       clist;
  37        unsigned int            move_in_progress        : 1,
  38                                is_managed              : 1,
  39                                can_reserve             : 1,
  40                                has_reserved            : 1;
  41};
  42
  43struct irq_domain *x86_vector_domain;
  44EXPORT_SYMBOL_GPL(x86_vector_domain);
  45static DEFINE_RAW_SPINLOCK(vector_lock);
  46static cpumask_var_t vector_searchmask;
  47static struct irq_chip lapic_controller;
  48static struct irq_matrix *vector_matrix;
  49#ifdef CONFIG_SMP
  50static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
  51#endif
  52
  53void lock_vector_lock(void)
  54{
  55        /* Used to the online set of cpus does not change
  56         * during assign_irq_vector.
  57         */
  58        raw_spin_lock(&vector_lock);
  59}
  60
  61void unlock_vector_lock(void)
  62{
  63        raw_spin_unlock(&vector_lock);
  64}
  65
  66void init_irq_alloc_info(struct irq_alloc_info *info,
  67                         const struct cpumask *mask)
  68{
  69        memset(info, 0, sizeof(*info));
  70        info->mask = mask;
  71}
  72
  73void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
  74{
  75        if (src)
  76                *dst = *src;
  77        else
  78                memset(dst, 0, sizeof(*dst));
  79}
  80
  81static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
  82{
  83        if (!irqd)
  84                return NULL;
  85
  86        while (irqd->parent_data)
  87                irqd = irqd->parent_data;
  88
  89        return irqd->chip_data;
  90}
  91
  92struct irq_cfg *irqd_cfg(struct irq_data *irqd)
  93{
  94        struct apic_chip_data *apicd = apic_chip_data(irqd);
  95
  96        return apicd ? &apicd->hw_irq_cfg : NULL;
  97}
  98EXPORT_SYMBOL_GPL(irqd_cfg);
  99
 100struct irq_cfg *irq_cfg(unsigned int irq)
 101{
 102        return irqd_cfg(irq_get_irq_data(irq));
 103}
 104
 105static struct apic_chip_data *alloc_apic_chip_data(int node)
 106{
 107        struct apic_chip_data *apicd;
 108
 109        apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
 110        if (apicd)
 111                INIT_HLIST_NODE(&apicd->clist);
 112        return apicd;
 113}
 114
 115static void free_apic_chip_data(struct apic_chip_data *apicd)
 116{
 117        kfree(apicd);
 118}
 119
 120static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
 121                                unsigned int cpu)
 122{
 123        struct apic_chip_data *apicd = apic_chip_data(irqd);
 124
 125        lockdep_assert_held(&vector_lock);
 126
 127        apicd->hw_irq_cfg.vector = vector;
 128        apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
 129        irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
 130        trace_vector_config(irqd->irq, vector, cpu,
 131                            apicd->hw_irq_cfg.dest_apicid);
 132}
 133
 134static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
 135                               unsigned int newcpu)
 136{
 137        struct apic_chip_data *apicd = apic_chip_data(irqd);
 138        struct irq_desc *desc = irq_data_to_desc(irqd);
 139        bool managed = irqd_affinity_is_managed(irqd);
 140
 141        lockdep_assert_held(&vector_lock);
 142
 143        trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
 144                            apicd->cpu);
 145
 146        /*
 147         * If there is no vector associated or if the associated vector is
 148         * the shutdown vector, which is associated to make PCI/MSI
 149         * shutdown mode work, then there is nothing to release. Clear out
 150         * prev_vector for this and the offlined target case.
 151         */
 152        apicd->prev_vector = 0;
 153        if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
 154                goto setnew;
 155        /*
 156         * If the target CPU of the previous vector is online, then mark
 157         * the vector as move in progress and store it for cleanup when the
 158         * first interrupt on the new vector arrives. If the target CPU is
 159         * offline then the regular release mechanism via the cleanup
 160         * vector is not possible and the vector can be immediately freed
 161         * in the underlying matrix allocator.
 162         */
 163        if (cpu_online(apicd->cpu)) {
 164                apicd->move_in_progress = true;
 165                apicd->prev_vector = apicd->vector;
 166                apicd->prev_cpu = apicd->cpu;
 167        } else {
 168                irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
 169                                managed);
 170        }
 171
 172setnew:
 173        apicd->vector = newvec;
 174        apicd->cpu = newcpu;
 175        BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
 176        per_cpu(vector_irq, newcpu)[newvec] = desc;
 177}
 178
 179static void vector_assign_managed_shutdown(struct irq_data *irqd)
 180{
 181        unsigned int cpu = cpumask_first(cpu_online_mask);
 182
 183        apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
 184}
 185
 186static int reserve_managed_vector(struct irq_data *irqd)
 187{
 188        const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
 189        struct apic_chip_data *apicd = apic_chip_data(irqd);
 190        unsigned long flags;
 191        int ret;
 192
 193        raw_spin_lock_irqsave(&vector_lock, flags);
 194        apicd->is_managed = true;
 195        ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
 196        raw_spin_unlock_irqrestore(&vector_lock, flags);
 197        trace_vector_reserve_managed(irqd->irq, ret);
 198        return ret;
 199}
 200
 201static void reserve_irq_vector_locked(struct irq_data *irqd)
 202{
 203        struct apic_chip_data *apicd = apic_chip_data(irqd);
 204
 205        irq_matrix_reserve(vector_matrix);
 206        apicd->can_reserve = true;
 207        apicd->has_reserved = true;
 208        irqd_set_can_reserve(irqd);
 209        trace_vector_reserve(irqd->irq, 0);
 210        vector_assign_managed_shutdown(irqd);
 211}
 212
 213static int reserve_irq_vector(struct irq_data *irqd)
 214{
 215        unsigned long flags;
 216
 217        raw_spin_lock_irqsave(&vector_lock, flags);
 218        reserve_irq_vector_locked(irqd);
 219        raw_spin_unlock_irqrestore(&vector_lock, flags);
 220        return 0;
 221}
 222
 223static int
 224assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
 225{
 226        struct apic_chip_data *apicd = apic_chip_data(irqd);
 227        bool resvd = apicd->has_reserved;
 228        unsigned int cpu = apicd->cpu;
 229        int vector = apicd->vector;
 230
 231        lockdep_assert_held(&vector_lock);
 232
 233        /*
 234         * If the current target CPU is online and in the new requested
 235         * affinity mask, there is no point in moving the interrupt from
 236         * one CPU to another.
 237         */
 238        if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
 239                return 0;
 240
 241        /*
 242         * Careful here. @apicd might either have move_in_progress set or
 243         * be enqueued for cleanup. Assigning a new vector would either
 244         * leave a stale vector on some CPU around or in case of a pending
 245         * cleanup corrupt the hlist.
 246         */
 247        if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist))
 248                return -EBUSY;
 249
 250        vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
 251        trace_vector_alloc(irqd->irq, vector, resvd, vector);
 252        if (vector < 0)
 253                return vector;
 254        apic_update_vector(irqd, vector, cpu);
 255        apic_update_irq_cfg(irqd, vector, cpu);
 256
 257        return 0;
 258}
 259
 260static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
 261{
 262        unsigned long flags;
 263        int ret;
 264
 265        raw_spin_lock_irqsave(&vector_lock, flags);
 266        cpumask_and(vector_searchmask, dest, cpu_online_mask);
 267        ret = assign_vector_locked(irqd, vector_searchmask);
 268        raw_spin_unlock_irqrestore(&vector_lock, flags);
 269        return ret;
 270}
 271
 272static int assign_irq_vector_any_locked(struct irq_data *irqd)
 273{
 274        /* Get the affinity mask - either irq_default_affinity or (user) set */
 275        const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
 276        int node = irq_data_get_node(irqd);
 277
 278        if (node == NUMA_NO_NODE)
 279                goto all;
 280        /* Try the intersection of @affmsk and node mask */
 281        cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
 282        if (!assign_vector_locked(irqd, vector_searchmask))
 283                return 0;
 284        /* Try the node mask */
 285        if (!assign_vector_locked(irqd, cpumask_of_node(node)))
 286                return 0;
 287all:
 288        /* Try the full affinity mask */
 289        cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
 290        if (!assign_vector_locked(irqd, vector_searchmask))
 291                return 0;
 292        /* Try the full online mask */
 293        return assign_vector_locked(irqd, cpu_online_mask);
 294}
 295
 296static int
 297assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
 298{
 299        if (irqd_affinity_is_managed(irqd))
 300                return reserve_managed_vector(irqd);
 301        if (info->mask)
 302                return assign_irq_vector(irqd, info->mask);
 303        /*
 304         * Make only a global reservation with no guarantee. A real vector
 305         * is associated at activation time.
 306         */
 307        return reserve_irq_vector(irqd);
 308}
 309
 310static int
 311assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
 312{
 313        const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
 314        struct apic_chip_data *apicd = apic_chip_data(irqd);
 315        int vector, cpu;
 316
 317        cpumask_and(vector_searchmask, dest, affmsk);
 318
 319        /* set_affinity might call here for nothing */
 320        if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
 321                return 0;
 322        vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
 323                                          &cpu);
 324        trace_vector_alloc_managed(irqd->irq, vector, vector);
 325        if (vector < 0)
 326                return vector;
 327        apic_update_vector(irqd, vector, cpu);
 328        apic_update_irq_cfg(irqd, vector, cpu);
 329        return 0;
 330}
 331
 332static void clear_irq_vector(struct irq_data *irqd)
 333{
 334        struct apic_chip_data *apicd = apic_chip_data(irqd);
 335        bool managed = irqd_affinity_is_managed(irqd);
 336        unsigned int vector = apicd->vector;
 337
 338        lockdep_assert_held(&vector_lock);
 339
 340        if (!vector)
 341                return;
 342
 343        trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
 344                           apicd->prev_cpu);
 345
 346        per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
 347        irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
 348        apicd->vector = 0;
 349
 350        /* Clean up move in progress */
 351        vector = apicd->prev_vector;
 352        if (!vector)
 353                return;
 354
 355        per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
 356        irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
 357        apicd->prev_vector = 0;
 358        apicd->move_in_progress = 0;
 359        hlist_del_init(&apicd->clist);
 360}
 361
 362static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
 363{
 364        struct apic_chip_data *apicd = apic_chip_data(irqd);
 365        unsigned long flags;
 366
 367        trace_vector_deactivate(irqd->irq, apicd->is_managed,
 368                                apicd->can_reserve, false);
 369
 370        /* Regular fixed assigned interrupt */
 371        if (!apicd->is_managed && !apicd->can_reserve)
 372                return;
 373        /* If the interrupt has a global reservation, nothing to do */
 374        if (apicd->has_reserved)
 375                return;
 376
 377        raw_spin_lock_irqsave(&vector_lock, flags);
 378        clear_irq_vector(irqd);
 379        if (apicd->can_reserve)
 380                reserve_irq_vector_locked(irqd);
 381        else
 382                vector_assign_managed_shutdown(irqd);
 383        raw_spin_unlock_irqrestore(&vector_lock, flags);
 384}
 385
 386static int activate_reserved(struct irq_data *irqd)
 387{
 388        struct apic_chip_data *apicd = apic_chip_data(irqd);
 389        int ret;
 390
 391        ret = assign_irq_vector_any_locked(irqd);
 392        if (!ret) {
 393                apicd->has_reserved = false;
 394                /*
 395                 * Core might have disabled reservation mode after
 396                 * allocating the irq descriptor. Ideally this should
 397                 * happen before allocation time, but that would require
 398                 * completely convoluted ways of transporting that
 399                 * information.
 400                 */
 401                if (!irqd_can_reserve(irqd))
 402                        apicd->can_reserve = false;
 403        }
 404        return ret;
 405}
 406
 407static int activate_managed(struct irq_data *irqd)
 408{
 409        const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
 410        int ret;
 411
 412        cpumask_and(vector_searchmask, dest, cpu_online_mask);
 413        if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
 414                /* Something in the core code broke! Survive gracefully */
 415                pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
 416                return -EINVAL;
 417        }
 418
 419        ret = assign_managed_vector(irqd, vector_searchmask);
 420        /*
 421         * This should not happen. The vector reservation got buggered.  Handle
 422         * it gracefully.
 423         */
 424        if (WARN_ON_ONCE(ret < 0)) {
 425                pr_err("Managed startup irq %u, no vector available\n",
 426                       irqd->irq);
 427        }
 428        return ret;
 429}
 430
 431static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
 432                               bool reserve)
 433{
 434        struct apic_chip_data *apicd = apic_chip_data(irqd);
 435        unsigned long flags;
 436        int ret = 0;
 437
 438        trace_vector_activate(irqd->irq, apicd->is_managed,
 439                              apicd->can_reserve, reserve);
 440
 441        /* Nothing to do for fixed assigned vectors */
 442        if (!apicd->can_reserve && !apicd->is_managed)
 443                return 0;
 444
 445        raw_spin_lock_irqsave(&vector_lock, flags);
 446        if (reserve || irqd_is_managed_and_shutdown(irqd))
 447                vector_assign_managed_shutdown(irqd);
 448        else if (apicd->is_managed)
 449                ret = activate_managed(irqd);
 450        else if (apicd->has_reserved)
 451                ret = activate_reserved(irqd);
 452        raw_spin_unlock_irqrestore(&vector_lock, flags);
 453        return ret;
 454}
 455
 456static void vector_free_reserved_and_managed(struct irq_data *irqd)
 457{
 458        const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
 459        struct apic_chip_data *apicd = apic_chip_data(irqd);
 460
 461        trace_vector_teardown(irqd->irq, apicd->is_managed,
 462                              apicd->has_reserved);
 463
 464        if (apicd->has_reserved)
 465                irq_matrix_remove_reserved(vector_matrix);
 466        if (apicd->is_managed)
 467                irq_matrix_remove_managed(vector_matrix, dest);
 468}
 469
 470static void x86_vector_free_irqs(struct irq_domain *domain,
 471                                 unsigned int virq, unsigned int nr_irqs)
 472{
 473        struct apic_chip_data *apicd;
 474        struct irq_data *irqd;
 475        unsigned long flags;
 476        int i;
 477
 478        for (i = 0; i < nr_irqs; i++) {
 479                irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 480                if (irqd && irqd->chip_data) {
 481                        raw_spin_lock_irqsave(&vector_lock, flags);
 482                        clear_irq_vector(irqd);
 483                        vector_free_reserved_and_managed(irqd);
 484                        apicd = irqd->chip_data;
 485                        irq_domain_reset_irq_data(irqd);
 486                        raw_spin_unlock_irqrestore(&vector_lock, flags);
 487                        free_apic_chip_data(apicd);
 488                }
 489        }
 490}
 491
 492static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
 493                                    struct apic_chip_data *apicd)
 494{
 495        unsigned long flags;
 496        bool realloc = false;
 497
 498        apicd->vector = ISA_IRQ_VECTOR(virq);
 499        apicd->cpu = 0;
 500
 501        raw_spin_lock_irqsave(&vector_lock, flags);
 502        /*
 503         * If the interrupt is activated, then it must stay at this vector
 504         * position. That's usually the timer interrupt (0).
 505         */
 506        if (irqd_is_activated(irqd)) {
 507                trace_vector_setup(virq, true, 0);
 508                apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
 509        } else {
 510                /* Release the vector */
 511                apicd->can_reserve = true;
 512                irqd_set_can_reserve(irqd);
 513                clear_irq_vector(irqd);
 514                realloc = true;
 515        }
 516        raw_spin_unlock_irqrestore(&vector_lock, flags);
 517        return realloc;
 518}
 519
 520static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 521                                 unsigned int nr_irqs, void *arg)
 522{
 523        struct irq_alloc_info *info = arg;
 524        struct apic_chip_data *apicd;
 525        struct irq_data *irqd;
 526        int i, err, node;
 527
 528        if (disable_apic)
 529                return -ENXIO;
 530
 531        /* Currently vector allocator can't guarantee contiguous allocations */
 532        if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
 533                return -ENOSYS;
 534
 535        for (i = 0; i < nr_irqs; i++) {
 536                irqd = irq_domain_get_irq_data(domain, virq + i);
 537                BUG_ON(!irqd);
 538                node = irq_data_get_node(irqd);
 539                WARN_ON_ONCE(irqd->chip_data);
 540                apicd = alloc_apic_chip_data(node);
 541                if (!apicd) {
 542                        err = -ENOMEM;
 543                        goto error;
 544                }
 545
 546                apicd->irq = virq + i;
 547                irqd->chip = &lapic_controller;
 548                irqd->chip_data = apicd;
 549                irqd->hwirq = virq + i;
 550                irqd_set_single_target(irqd);
 551                /*
 552                 * Legacy vectors are already assigned when the IOAPIC
 553                 * takes them over. They stay on the same vector. This is
 554                 * required for check_timer() to work correctly as it might
 555                 * switch back to legacy mode. Only update the hardware
 556                 * config.
 557                 */
 558                if (info->flags & X86_IRQ_ALLOC_LEGACY) {
 559                        if (!vector_configure_legacy(virq + i, irqd, apicd))
 560                                continue;
 561                }
 562
 563                err = assign_irq_vector_policy(irqd, info);
 564                trace_vector_setup(virq + i, false, err);
 565                if (err) {
 566                        irqd->chip_data = NULL;
 567                        free_apic_chip_data(apicd);
 568                        goto error;
 569                }
 570        }
 571
 572        return 0;
 573
 574error:
 575        x86_vector_free_irqs(domain, virq, i);
 576        return err;
 577}
 578
 579#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
 580static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
 581                                  struct irq_data *irqd, int ind)
 582{
 583        struct apic_chip_data apicd;
 584        unsigned long flags;
 585        int irq;
 586
 587        if (!irqd) {
 588                irq_matrix_debug_show(m, vector_matrix, ind);
 589                return;
 590        }
 591
 592        irq = irqd->irq;
 593        if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
 594                seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
 595                seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
 596                return;
 597        }
 598
 599        if (!irqd->chip_data) {
 600                seq_printf(m, "%*sVector: Not assigned\n", ind, "");
 601                return;
 602        }
 603
 604        raw_spin_lock_irqsave(&vector_lock, flags);
 605        memcpy(&apicd, irqd->chip_data, sizeof(apicd));
 606        raw_spin_unlock_irqrestore(&vector_lock, flags);
 607
 608        seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector);
 609        seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu);
 610        if (apicd.prev_vector) {
 611                seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector);
 612                seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu);
 613        }
 614        seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0);
 615        seq_printf(m, "%*sis_managed:       %u\n", ind, "", apicd.is_managed ? 1 : 0);
 616        seq_printf(m, "%*scan_reserve:      %u\n", ind, "", apicd.can_reserve ? 1 : 0);
 617        seq_printf(m, "%*shas_reserved:     %u\n", ind, "", apicd.has_reserved ? 1 : 0);
 618        seq_printf(m, "%*scleanup_pending:  %u\n", ind, "", !hlist_unhashed(&apicd.clist));
 619}
 620#endif
 621
 622static const struct irq_domain_ops x86_vector_domain_ops = {
 623        .alloc          = x86_vector_alloc_irqs,
 624        .free           = x86_vector_free_irqs,
 625        .activate       = x86_vector_activate,
 626        .deactivate     = x86_vector_deactivate,
 627#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
 628        .debug_show     = x86_vector_debug_show,
 629#endif
 630};
 631
 632int __init arch_probe_nr_irqs(void)
 633{
 634        int nr;
 635
 636        if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
 637                nr_irqs = NR_VECTORS * nr_cpu_ids;
 638
 639        nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
 640#if defined(CONFIG_PCI_MSI)
 641        /*
 642         * for MSI and HT dyn irq
 643         */
 644        if (gsi_top <= NR_IRQS_LEGACY)
 645                nr +=  8 * nr_cpu_ids;
 646        else
 647                nr += gsi_top * 16;
 648#endif
 649        if (nr < nr_irqs)
 650                nr_irqs = nr;
 651
 652        /*
 653         * We don't know if PIC is present at this point so we need to do
 654         * probe() to get the right number of legacy IRQs.
 655         */
 656        return legacy_pic->probe();
 657}
 658
 659void lapic_assign_legacy_vector(unsigned int irq, bool replace)
 660{
 661        /*
 662         * Use assign system here so it wont get accounted as allocated
 663         * and moveable in the cpu hotplug check and it prevents managed
 664         * irq reservation from touching it.
 665         */
 666        irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
 667}
 668
 669void __init lapic_assign_system_vectors(void)
 670{
 671        unsigned int i, vector = 0;
 672
 673        for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
 674                irq_matrix_assign_system(vector_matrix, vector, false);
 675
 676        if (nr_legacy_irqs() > 1)
 677                lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
 678
 679        /* System vectors are reserved, online it */
 680        irq_matrix_online(vector_matrix);
 681
 682        /* Mark the preallocated legacy interrupts */
 683        for (i = 0; i < nr_legacy_irqs(); i++) {
 684                if (i != PIC_CASCADE_IR)
 685                        irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
 686        }
 687}
 688
 689int __init arch_early_irq_init(void)
 690{
 691        struct fwnode_handle *fn;
 692
 693        fn = irq_domain_alloc_named_fwnode("VECTOR");
 694        BUG_ON(!fn);
 695        x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
 696                                                   NULL);
 697        BUG_ON(x86_vector_domain == NULL);
 698        irq_domain_free_fwnode(fn);
 699        irq_set_default_host(x86_vector_domain);
 700
 701        arch_init_msi_domain(x86_vector_domain);
 702
 703        BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 704
 705        /*
 706         * Allocate the vector matrix allocator data structure and limit the
 707         * search area.
 708         */
 709        vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
 710                                         FIRST_SYSTEM_VECTOR);
 711        BUG_ON(!vector_matrix);
 712
 713        return arch_early_ioapic_init();
 714}
 715
 716#ifdef CONFIG_SMP
 717
 718static struct irq_desc *__setup_vector_irq(int vector)
 719{
 720        int isairq = vector - ISA_IRQ_VECTOR(0);
 721
 722        /* Check whether the irq is in the legacy space */
 723        if (isairq < 0 || isairq >= nr_legacy_irqs())
 724                return VECTOR_UNUSED;
 725        /* Check whether the irq is handled by the IOAPIC */
 726        if (test_bit(isairq, &io_apic_irqs))
 727                return VECTOR_UNUSED;
 728        return irq_to_desc(isairq);
 729}
 730
 731/* Online the local APIC infrastructure and initialize the vectors */
 732void lapic_online(void)
 733{
 734        unsigned int vector;
 735
 736        lockdep_assert_held(&vector_lock);
 737
 738        /* Online the vector matrix array for this CPU */
 739        irq_matrix_online(vector_matrix);
 740
 741        /*
 742         * The interrupt affinity logic never targets interrupts to offline
 743         * CPUs. The exception are the legacy PIC interrupts. In general
 744         * they are only targeted to CPU0, but depending on the platform
 745         * they can be distributed to any online CPU in hardware. The
 746         * kernel has no influence on that. So all active legacy vectors
 747         * must be installed on all CPUs. All non legacy interrupts can be
 748         * cleared.
 749         */
 750        for (vector = 0; vector < NR_VECTORS; vector++)
 751                this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
 752}
 753
 754void lapic_offline(void)
 755{
 756        lock_vector_lock();
 757        irq_matrix_offline(vector_matrix);
 758        unlock_vector_lock();
 759}
 760
 761static int apic_set_affinity(struct irq_data *irqd,
 762                             const struct cpumask *dest, bool force)
 763{
 764        struct apic_chip_data *apicd = apic_chip_data(irqd);
 765        int err;
 766
 767        /*
 768         * Core code can call here for inactive interrupts. For inactive
 769         * interrupts which use managed or reservation mode there is no
 770         * point in going through the vector assignment right now as the
 771         * activation will assign a vector which fits the destination
 772         * cpumask. Let the core code store the destination mask and be
 773         * done with it.
 774         */
 775        if (!irqd_is_activated(irqd) &&
 776            (apicd->is_managed || apicd->can_reserve))
 777                return IRQ_SET_MASK_OK;
 778
 779        raw_spin_lock(&vector_lock);
 780        cpumask_and(vector_searchmask, dest, cpu_online_mask);
 781        if (irqd_affinity_is_managed(irqd))
 782                err = assign_managed_vector(irqd, vector_searchmask);
 783        else
 784                err = assign_vector_locked(irqd, vector_searchmask);
 785        raw_spin_unlock(&vector_lock);
 786        return err ? err : IRQ_SET_MASK_OK;
 787}
 788
 789#else
 790# define apic_set_affinity      NULL
 791#endif
 792
 793static int apic_retrigger_irq(struct irq_data *irqd)
 794{
 795        struct apic_chip_data *apicd = apic_chip_data(irqd);
 796        unsigned long flags;
 797
 798        raw_spin_lock_irqsave(&vector_lock, flags);
 799        apic->send_IPI(apicd->cpu, apicd->vector);
 800        raw_spin_unlock_irqrestore(&vector_lock, flags);
 801
 802        return 1;
 803}
 804
 805void apic_ack_irq(struct irq_data *irqd)
 806{
 807        irq_move_irq(irqd);
 808        ack_APIC_irq();
 809}
 810
 811void apic_ack_edge(struct irq_data *irqd)
 812{
 813        irq_complete_move(irqd_cfg(irqd));
 814        apic_ack_irq(irqd);
 815}
 816
 817static struct irq_chip lapic_controller = {
 818        .name                   = "APIC",
 819        .irq_ack                = apic_ack_edge,
 820        .irq_set_affinity       = apic_set_affinity,
 821        .irq_retrigger          = apic_retrigger_irq,
 822};
 823
 824#ifdef CONFIG_SMP
 825
 826static void free_moved_vector(struct apic_chip_data *apicd)
 827{
 828        unsigned int vector = apicd->prev_vector;
 829        unsigned int cpu = apicd->prev_cpu;
 830        bool managed = apicd->is_managed;
 831
 832        /*
 833         * This should never happen. Managed interrupts are not
 834         * migrated except on CPU down, which does not involve the
 835         * cleanup vector. But try to keep the accounting correct
 836         * nevertheless.
 837         */
 838        WARN_ON_ONCE(managed);
 839
 840        trace_vector_free_moved(apicd->irq, cpu, vector, managed);
 841        irq_matrix_free(vector_matrix, cpu, vector, managed);
 842        per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
 843        hlist_del_init(&apicd->clist);
 844        apicd->prev_vector = 0;
 845        apicd->move_in_progress = 0;
 846}
 847
 848asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 849{
 850        struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
 851        struct apic_chip_data *apicd;
 852        struct hlist_node *tmp;
 853
 854        entering_ack_irq();
 855        /* Prevent vectors vanishing under us */
 856        raw_spin_lock(&vector_lock);
 857
 858        hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
 859                unsigned int irr, vector = apicd->prev_vector;
 860
 861                /*
 862                 * Paranoia: Check if the vector that needs to be cleaned
 863                 * up is registered at the APICs IRR. If so, then this is
 864                 * not the best time to clean it up. Clean it up in the
 865                 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
 866                 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
 867                 * priority external vector, so on return from this
 868                 * interrupt the device interrupt will happen first.
 869                 */
 870                irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
 871                if (irr & (1U << (vector % 32))) {
 872                        apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
 873                        continue;
 874                }
 875                free_moved_vector(apicd);
 876        }
 877
 878        raw_spin_unlock(&vector_lock);
 879        exiting_irq();
 880}
 881
 882static void __send_cleanup_vector(struct apic_chip_data *apicd)
 883{
 884        unsigned int cpu;
 885
 886        raw_spin_lock(&vector_lock);
 887        apicd->move_in_progress = 0;
 888        cpu = apicd->prev_cpu;
 889        if (cpu_online(cpu)) {
 890                hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
 891                apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
 892        } else {
 893                apicd->prev_vector = 0;
 894        }
 895        raw_spin_unlock(&vector_lock);
 896}
 897
 898void send_cleanup_vector(struct irq_cfg *cfg)
 899{
 900        struct apic_chip_data *apicd;
 901
 902        apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
 903        if (apicd->move_in_progress)
 904                __send_cleanup_vector(apicd);
 905}
 906
 907static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 908{
 909        struct apic_chip_data *apicd;
 910
 911        apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
 912        if (likely(!apicd->move_in_progress))
 913                return;
 914
 915        if (vector == apicd->vector && apicd->cpu == smp_processor_id())
 916                __send_cleanup_vector(apicd);
 917}
 918
 919void irq_complete_move(struct irq_cfg *cfg)
 920{
 921        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 922}
 923
 924/*
 925 * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
 926 */
 927void irq_force_complete_move(struct irq_desc *desc)
 928{
 929        struct apic_chip_data *apicd;
 930        struct irq_data *irqd;
 931        unsigned int vector;
 932
 933        /*
 934         * The function is called for all descriptors regardless of which
 935         * irqdomain they belong to. For example if an IRQ is provided by
 936         * an irq_chip as part of a GPIO driver, the chip data for that
 937         * descriptor is specific to the irq_chip in question.
 938         *
 939         * Check first that the chip_data is what we expect
 940         * (apic_chip_data) before touching it any further.
 941         */
 942        irqd = irq_domain_get_irq_data(x86_vector_domain,
 943                                       irq_desc_get_irq(desc));
 944        if (!irqd)
 945                return;
 946
 947        raw_spin_lock(&vector_lock);
 948        apicd = apic_chip_data(irqd);
 949        if (!apicd)
 950                goto unlock;
 951
 952        /*
 953         * If prev_vector is empty, no action required.
 954         */
 955        vector = apicd->prev_vector;
 956        if (!vector)
 957                goto unlock;
 958
 959        /*
 960         * This is tricky. If the cleanup of the old vector has not been
 961         * done yet, then the following setaffinity call will fail with
 962         * -EBUSY. This can leave the interrupt in a stale state.
 963         *
 964         * All CPUs are stuck in stop machine with interrupts disabled so
 965         * calling __irq_complete_move() would be completely pointless.
 966         *
 967         * 1) The interrupt is in move_in_progress state. That means that we
 968         *    have not seen an interrupt since the io_apic was reprogrammed to
 969         *    the new vector.
 970         *
 971         * 2) The interrupt has fired on the new vector, but the cleanup IPIs
 972         *    have not been processed yet.
 973         */
 974        if (apicd->move_in_progress) {
 975                /*
 976                 * In theory there is a race:
 977                 *
 978                 * set_ioapic(new_vector) <-- Interrupt is raised before update
 979                 *                            is effective, i.e. it's raised on
 980                 *                            the old vector.
 981                 *
 982                 * So if the target cpu cannot handle that interrupt before
 983                 * the old vector is cleaned up, we get a spurious interrupt
 984                 * and in the worst case the ioapic irq line becomes stale.
 985                 *
 986                 * But in case of cpu hotplug this should be a non issue
 987                 * because if the affinity update happens right before all
 988                 * cpus rendevouz in stop machine, there is no way that the
 989                 * interrupt can be blocked on the target cpu because all cpus
 990                 * loops first with interrupts enabled in stop machine, so the
 991                 * old vector is not yet cleaned up when the interrupt fires.
 992                 *
 993                 * So the only way to run into this issue is if the delivery
 994                 * of the interrupt on the apic/system bus would be delayed
 995                 * beyond the point where the target cpu disables interrupts
 996                 * in stop machine. I doubt that it can happen, but at least
 997                 * there is a theroretical chance. Virtualization might be
 998                 * able to expose this, but AFAICT the IOAPIC emulation is not
 999                 * as stupid as the real hardware.
1000                 *
1001                 * Anyway, there is nothing we can do about that at this point
1002                 * w/o refactoring the whole fixup_irq() business completely.
1003                 * We print at least the irq number and the old vector number,
1004                 * so we have the necessary information when a problem in that
1005                 * area arises.
1006                 */
1007                pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
1008                        irqd->irq, vector);
1009        }
1010        free_moved_vector(apicd);
1011unlock:
1012        raw_spin_unlock(&vector_lock);
1013}
1014
1015#ifdef CONFIG_HOTPLUG_CPU
1016/*
1017 * Note, this is not accurate accounting, but at least good enough to
1018 * prevent that the actual interrupt move will run out of vectors.
1019 */
1020int lapic_can_unplug_cpu(void)
1021{
1022        unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
1023        int ret = 0;
1024
1025        raw_spin_lock(&vector_lock);
1026        tomove = irq_matrix_allocated(vector_matrix);
1027        avl = irq_matrix_available(vector_matrix, true);
1028        if (avl < tomove) {
1029                pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
1030                        cpu, tomove, avl);
1031                ret = -ENOSPC;
1032                goto out;
1033        }
1034        rsvd = irq_matrix_reserved(vector_matrix);
1035        if (avl < rsvd) {
1036                pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
1037                        rsvd, avl);
1038        }
1039out:
1040        raw_spin_unlock(&vector_lock);
1041        return ret;
1042}
1043#endif /* HOTPLUG_CPU */
1044#endif /* SMP */
1045
1046static void __init print_APIC_field(int base)
1047{
1048        int i;
1049
1050        printk(KERN_DEBUG);
1051
1052        for (i = 0; i < 8; i++)
1053                pr_cont("%08x", apic_read(base + i*0x10));
1054
1055        pr_cont("\n");
1056}
1057
1058static void __init print_local_APIC(void *dummy)
1059{
1060        unsigned int i, v, ver, maxlvt;
1061        u64 icr;
1062
1063        pr_debug("printing local APIC contents on CPU#%d/%d:\n",
1064                 smp_processor_id(), hard_smp_processor_id());
1065        v = apic_read(APIC_ID);
1066        pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
1067        v = apic_read(APIC_LVR);
1068        pr_info("... APIC VERSION: %08x\n", v);
1069        ver = GET_APIC_VERSION(v);
1070        maxlvt = lapic_get_maxlvt();
1071
1072        v = apic_read(APIC_TASKPRI);
1073        pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1074
1075        /* !82489DX */
1076        if (APIC_INTEGRATED(ver)) {
1077                if (!APIC_XAPIC(ver)) {
1078                        v = apic_read(APIC_ARBPRI);
1079                        pr_debug("... APIC ARBPRI: %08x (%02x)\n",
1080                                 v, v & APIC_ARBPRI_MASK);
1081                }
1082                v = apic_read(APIC_PROCPRI);
1083                pr_debug("... APIC PROCPRI: %08x\n", v);
1084        }
1085
1086        /*
1087         * Remote read supported only in the 82489DX and local APIC for
1088         * Pentium processors.
1089         */
1090        if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1091                v = apic_read(APIC_RRR);
1092                pr_debug("... APIC RRR: %08x\n", v);
1093        }
1094
1095        v = apic_read(APIC_LDR);
1096        pr_debug("... APIC LDR: %08x\n", v);
1097        if (!x2apic_enabled()) {
1098                v = apic_read(APIC_DFR);
1099                pr_debug("... APIC DFR: %08x\n", v);
1100        }
1101        v = apic_read(APIC_SPIV);
1102        pr_debug("... APIC SPIV: %08x\n", v);
1103
1104        pr_debug("... APIC ISR field:\n");
1105        print_APIC_field(APIC_ISR);
1106        pr_debug("... APIC TMR field:\n");
1107        print_APIC_field(APIC_TMR);
1108        pr_debug("... APIC IRR field:\n");
1109        print_APIC_field(APIC_IRR);
1110
1111        /* !82489DX */
1112        if (APIC_INTEGRATED(ver)) {
1113                /* Due to the Pentium erratum 3AP. */
1114                if (maxlvt > 3)
1115                        apic_write(APIC_ESR, 0);
1116
1117                v = apic_read(APIC_ESR);
1118                pr_debug("... APIC ESR: %08x\n", v);
1119        }
1120
1121        icr = apic_icr_read();
1122        pr_debug("... APIC ICR: %08x\n", (u32)icr);
1123        pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
1124
1125        v = apic_read(APIC_LVTT);
1126        pr_debug("... APIC LVTT: %08x\n", v);
1127
1128        if (maxlvt > 3) {
1129                /* PC is LVT#4. */
1130                v = apic_read(APIC_LVTPC);
1131                pr_debug("... APIC LVTPC: %08x\n", v);
1132        }
1133        v = apic_read(APIC_LVT0);
1134        pr_debug("... APIC LVT0: %08x\n", v);
1135        v = apic_read(APIC_LVT1);
1136        pr_debug("... APIC LVT1: %08x\n", v);
1137
1138        if (maxlvt > 2) {
1139                /* ERR is LVT#3. */
1140                v = apic_read(APIC_LVTERR);
1141                pr_debug("... APIC LVTERR: %08x\n", v);
1142        }
1143
1144        v = apic_read(APIC_TMICT);
1145        pr_debug("... APIC TMICT: %08x\n", v);
1146        v = apic_read(APIC_TMCCT);
1147        pr_debug("... APIC TMCCT: %08x\n", v);
1148        v = apic_read(APIC_TDCR);
1149        pr_debug("... APIC TDCR: %08x\n", v);
1150
1151        if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
1152                v = apic_read(APIC_EFEAT);
1153                maxlvt = (v >> 16) & 0xff;
1154                pr_debug("... APIC EFEAT: %08x\n", v);
1155                v = apic_read(APIC_ECTRL);
1156                pr_debug("... APIC ECTRL: %08x\n", v);
1157                for (i = 0; i < maxlvt; i++) {
1158                        v = apic_read(APIC_EILVTn(i));
1159                        pr_debug("... APIC EILVT%d: %08x\n", i, v);
1160                }
1161        }
1162        pr_cont("\n");
1163}
1164
1165static void __init print_local_APICs(int maxcpu)
1166{
1167        int cpu;
1168
1169        if (!maxcpu)
1170                return;
1171
1172        preempt_disable();
1173        for_each_online_cpu(cpu) {
1174                if (cpu >= maxcpu)
1175                        break;
1176                smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1177        }
1178        preempt_enable();
1179}
1180
1181static void __init print_PIC(void)
1182{
1183        unsigned int v;
1184        unsigned long flags;
1185
1186        if (!nr_legacy_irqs())
1187                return;
1188
1189        pr_debug("\nprinting PIC contents\n");
1190
1191        raw_spin_lock_irqsave(&i8259A_lock, flags);
1192
1193        v = inb(0xa1) << 8 | inb(0x21);
1194        pr_debug("... PIC  IMR: %04x\n", v);
1195
1196        v = inb(0xa0) << 8 | inb(0x20);
1197        pr_debug("... PIC  IRR: %04x\n", v);
1198
1199        outb(0x0b, 0xa0);
1200        outb(0x0b, 0x20);
1201        v = inb(0xa0) << 8 | inb(0x20);
1202        outb(0x0a, 0xa0);
1203        outb(0x0a, 0x20);
1204
1205        raw_spin_unlock_irqrestore(&i8259A_lock, flags);
1206
1207        pr_debug("... PIC  ISR: %04x\n", v);
1208
1209        v = inb(0x4d1) << 8 | inb(0x4d0);
1210        pr_debug("... PIC ELCR: %04x\n", v);
1211}
1212
1213static int show_lapic __initdata = 1;
1214static __init int setup_show_lapic(char *arg)
1215{
1216        int num = -1;
1217
1218        if (strcmp(arg, "all") == 0) {
1219                show_lapic = CONFIG_NR_CPUS;
1220        } else {
1221                get_option(&arg, &num);
1222                if (num >= 0)
1223                        show_lapic = num;
1224        }
1225
1226        return 1;
1227}
1228__setup("show_lapic=", setup_show_lapic);
1229
1230static int __init print_ICs(void)
1231{
1232        if (apic_verbosity == APIC_QUIET)
1233                return 0;
1234
1235        print_PIC();
1236
1237        /* don't print out if apic is not there */
1238        if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1239                return 0;
1240
1241        print_local_APICs(show_lapic);
1242        print_IO_APICs();
1243
1244        return 0;
1245}
1246
1247late_initcall(print_ICs);
1248