LXR linux/arch/x86/kernel/tlb

   1/*
   2 *      SGI UltraViolet TLB flush routines.
   3 *
   4 *      (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
   5 *
   6 *      This code is released under the GNU General Public License version 2 or
   7 *      later.
   8 */
   9#include <linux/seq_file.h>
  10#include <linux/proc_fs.h>
  11#include <linux/kernel.h>
  12
  13#include <asm/mmu_context.h>
  14#include <asm/uv/uv.h>
  15#include <asm/uv/uv_mmrs.h>
  16#include <asm/uv/uv_hub.h>
  17#include <asm/uv/uv_bau.h>
  18#include <asm/apic.h>
  19#include <asm/idle.h>
  20#include <asm/tsc.h>
  21#include <asm/irq_vectors.h>
  22
  23static struct bau_control       **uv_bau_table_bases __read_mostly;
  24static int                      uv_bau_retry_limit __read_mostly;
  25
  26/* base pnode in this partition */
  27static int                      uv_partition_base_pnode __read_mostly;
  28
  29static unsigned long            uv_mmask __read_mostly;
  30
  31static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
  32static DEFINE_PER_CPU(struct bau_control, bau_control);
  33
  34/*
  35 * Determine the first node on a blade.
  36 */
  37static int __init blade_to_first_node(int blade)
  38{
  39        int node, b;
  40
  41        for_each_online_node(node) {
  42                b = uv_node_to_blade_id(node);
  43                if (blade == b)
  44                        return node;
  45        }
  46        return -1; /* shouldn't happen */
  47}
  48
  49/*
  50 * Determine the apicid of the first cpu on a blade.
  51 */
  52static int __init blade_to_first_apicid(int blade)
  53{
  54        int cpu;
  55
  56        for_each_present_cpu(cpu)
  57                if (blade == uv_cpu_to_blade_id(cpu))
  58                        return per_cpu(x86_cpu_to_apicid, cpu);
  59        return -1;
  60}
  61
  62/*
  63 * Free a software acknowledge hardware resource by clearing its Pending
  64 * bit. This will return a reply to the sender.
  65 * If the message has timed out, a reply has already been sent by the
  66 * hardware but the resource has not been released. In that case our
  67 * clear of the Timeout bit (as well) will free the resource. No reply will
  68 * be sent (the hardware will only do one reply per message).
  69 */
  70static void uv_reply_to_message(int resource,
  71                                struct bau_payload_queue_entry *msg,
  72                                struct bau_msg_status *msp)
  73{
  74        unsigned long dw;
  75
  76        dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
  77        msg->replied_to = 1;
  78        msg->sw_ack_vector = 0;
  79        if (msp)
  80                msp->seen_by.bits = 0;
  81        uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
  82}
  83
  84/*
  85 * Do all the things a cpu should do for a TLB shootdown message.
  86 * Other cpu's may come here at the same time for this message.
  87 */
  88static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
  89                                   int msg_slot, int sw_ack_slot)
  90{
  91        unsigned long this_cpu_mask;
  92        struct bau_msg_status *msp;
  93        int cpu;
  94
  95        msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
  96        cpu = uv_blade_processor_id();
  97        msg->number_of_cpus =
  98                uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
  99        this_cpu_mask = 1UL << cpu;
 100        if (msp->seen_by.bits & this_cpu_mask)
 101                return;
 102        atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
 103
 104        if (msg->replied_to == 1)
 105                return;
 106
 107        if (msg->address == TLB_FLUSH_ALL) {
 108                local_flush_tlb();
 109                __get_cpu_var(ptcstats).alltlb++;
 110        } else {
 111                __flush_tlb_one(msg->address);
 112                __get_cpu_var(ptcstats).onetlb++;
 113        }
 114
 115        __get_cpu_var(ptcstats).requestee++;
 116
 117        atomic_inc_short(&msg->acknowledge_count);
 118        if (msg->number_of_cpus == msg->acknowledge_count)
 119                uv_reply_to_message(sw_ack_slot, msg, msp);
 120}
 121
 122/*
 123 * Examine the payload queue on one distribution node to see
 124 * which messages have not been seen, and which cpu(s) have not seen them.
 125 *
 126 * Returns the number of cpu's that have not responded.
 127 */
 128static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
 129{
 130        struct bau_payload_queue_entry *msg;
 131        struct bau_msg_status *msp;
 132        int count = 0;
 133        int i;
 134        int j;
 135
 136        for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
 137             msg++, i++) {
 138                if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
 139                        msp = bau_tablesp->msg_statuses + i;
 140                        printk(KERN_DEBUG
 141                               "blade %d: address:%#lx %d of %d, not cpu(s): ",
 142                               i, msg->address, msg->acknowledge_count,
 143                               msg->number_of_cpus);
 144                        for (j = 0; j < msg->number_of_cpus; j++) {
 145                                if (!((1L << j) & msp->seen_by.bits)) {
 146                                        count++;
 147                                        printk("%d ", j);
 148                                }
 149                        }
 150                        printk("\n");
 151                }
 152        }
 153        return count;
 154}
 155
 156/*
 157 * Examine the payload queue on all the distribution nodes to see
 158 * which messages have not been seen, and which cpu(s) have not seen them.
 159 *
 160 * Returns the number of cpu's that have not responded.
 161 */
 162static int uv_examine_destinations(struct bau_target_nodemask *distribution)
 163{
 164        int sender;
 165        int i;
 166        int count = 0;
 167
 168        sender = smp_processor_id();
 169        for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) {
 170                if (!bau_node_isset(i, distribution))
 171                        continue;
 172                count += uv_examine_destination(uv_bau_table_bases[i], sender);
 173        }
 174        return count;
 175}
 176
 177/*
 178 * wait for completion of a broadcast message
 179 *
 180 * return COMPLETE, RETRY or GIVEUP
 181 */
 182static int uv_wait_completion(struct bau_desc *bau_desc,
 183                              unsigned long mmr_offset, int right_shift)
 184{
 185        int exams = 0;
 186        long destination_timeouts = 0;
 187        long source_timeouts = 0;
 188        unsigned long descriptor_status;
 189
 190        while ((descriptor_status = (((unsigned long)
 191                uv_read_local_mmr(mmr_offset) >>
 192                        right_shift) & UV_ACT_STATUS_MASK)) !=
 193                        DESC_STATUS_IDLE) {
 194                if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
 195                        source_timeouts++;
 196                        if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
 197                                source_timeouts = 0;
 198                        __get_cpu_var(ptcstats).s_retry++;
 199                        return FLUSH_RETRY;
 200                }
 201                /*
 202                 * spin here looking for progress at the destinations
 203                 */
 204                if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
 205                        destination_timeouts++;
 206                        if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
 207                                /*
 208                                 * returns number of cpus not responding
 209                                 */
 210                                if (uv_examine_destinations
 211                                    (&bau_desc->distribution) == 0) {
 212                                        __get_cpu_var(ptcstats).d_retry++;
 213                                        return FLUSH_RETRY;
 214                                }
 215                                exams++;
 216                                if (exams >= uv_bau_retry_limit) {
 217                                        printk(KERN_DEBUG
 218                                               "uv_flush_tlb_others");
 219                                        printk("giving up on cpu %d\n",
 220                                               smp_processor_id());
 221                                        return FLUSH_GIVEUP;
 222                                }
 223                                /*
 224                                 * delays can hang the simulator
 225                                   udelay(1000);
 226                                 */
 227                                destination_timeouts = 0;
 228                        }
 229                }
 230                cpu_relax();
 231        }
 232        return FLUSH_COMPLETE;
 233}
 234
 235/**
 236 * uv_flush_send_and_wait
 237 *
 238 * Send a broadcast and wait for a broadcast message to complete.
 239 *
 240 * The flush_mask contains the cpus the broadcast was sent to.
 241 *
 242 * Returns NULL if all remote flushing was done. The mask is zeroed.
 243 * Returns @flush_mask if some remote flushing remains to be done. The
 244 * mask will have some bits still set.
 245 */
 246const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
 247                                             struct bau_desc *bau_desc,
 248                                             struct cpumask *flush_mask)
 249{
 250        int completion_status = 0;
 251        int right_shift;
 252        int tries = 0;
 253        int pnode;
 254        int bit;
 255        unsigned long mmr_offset;
 256        unsigned long index;
 257        cycles_t time1;
 258        cycles_t time2;
 259
 260        if (cpu < UV_CPUS_PER_ACT_STATUS) {
 261                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
 262                right_shift = cpu * UV_ACT_STATUS_SIZE;
 263        } else {
 264                mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
 265                right_shift =
 266                    ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
 267        }
 268        time1 = get_cycles();
 269        do {
 270                tries++;
 271                index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
 272                        cpu;
 273                uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
 274                completion_status = uv_wait_completion(bau_desc, mmr_offset,
 275                                        right_shift);
 276        } while (completion_status == FLUSH_RETRY);
 277        time2 = get_cycles();
 278        __get_cpu_var(ptcstats).sflush += (time2 - time1);
 279        if (tries > 1)
 280                __get_cpu_var(ptcstats).retriesok++;
 281
 282        if (completion_status == FLUSH_GIVEUP) {
 283                /*
 284                 * Cause the caller to do an IPI-style TLB shootdown on
 285                 * the cpu's, all of which are still in the mask.
 286                 */
 287                __get_cpu_var(ptcstats).ptc_i++;
 288                return flush_mask;
 289        }
 290
 291        /*
 292         * Success, so clear the remote cpu's from the mask so we don't
 293         * use the IPI method of shootdown on them.
 294         */
 295        for_each_cpu(bit, flush_mask) {
 296                pnode = uv_cpu_to_pnode(bit);
 297                if (pnode == this_pnode)
 298                        continue;
 299                cpumask_clear_cpu(bit, flush_mask);
 300        }
 301        if (!cpumask_empty(flush_mask))
 302                return flush_mask;
 303        return NULL;
 304}
 305
 306static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
 307
 308/**
 309 * uv_flush_tlb_others - globally purge translation cache of a virtual
 310 * address or all TLB's
 311 * @cpumask: mask of all cpu's in which the address is to be removed
 312 * @mm: mm_struct containing virtual address range
 313 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
 314 * @cpu: the current cpu
 315 *
 316 * This is the entry point for initiating any UV global TLB shootdown.
 317 *
 318 * Purges the translation caches of all specified processors of the given
 319 * virtual address, or purges all TLB's on specified processors.
 320 *
 321 * The caller has derived the cpumask from the mm_struct.  This function
 322 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
 323 *
 324 * The cpumask is converted into a nodemask of the nodes containing
 325 * the cpus.
 326 *
 327 * Note that this function should be called with preemption disabled.
 328 *
 329 * Returns NULL if all remote flushing was done.
 330 * Returns pointer to cpumask if some remote flushing remains to be
 331 * done.  The returned pointer is valid till preemption is re-enabled.
 332 */
 333const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 334                                          struct mm_struct *mm,
 335                                          unsigned long va, unsigned int cpu)
 336{
 337        struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
 338        int i;
 339        int bit;
 340        int pnode;
 341        int uv_cpu;
 342        int this_pnode;
 343        int locals = 0;
 344        struct bau_desc *bau_desc;
 345
 346        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 347
 348        uv_cpu = uv_blade_processor_id();
 349        this_pnode = uv_hub_info->pnode;
 350        bau_desc = __get_cpu_var(bau_control).descriptor_base;
 351        bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
 352
 353        bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 354
 355        i = 0;
 356        for_each_cpu(bit, flush_mask) {
 357                pnode = uv_cpu_to_pnode(bit);
 358                BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
 359                if (pnode == this_pnode) {
 360                        locals++;
 361                        continue;
 362                }
 363                bau_node_set(pnode - uv_partition_base_pnode,
 364                                &bau_desc->distribution);
 365                i++;
 366        }
 367        if (i == 0) {
 368                /*
 369                 * no off_node flushing; return status for local node
 370                 */
 371                if (locals)
 372                        return flush_mask;
 373                else
 374                        return NULL;
 375        }
 376        __get_cpu_var(ptcstats).requestor++;
 377        __get_cpu_var(ptcstats).ntargeted += i;
 378
 379        bau_desc->payload.address = va;
 380        bau_desc->payload.sending_cpu = cpu;
 381
 382        return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
 383}
 384
 385/*
 386 * The BAU message interrupt comes here. (registered by set_intr_gate)
 387 * See entry_64.S
 388 *
 389 * We received a broadcast assist message.
 390 *
 391 * Interrupts may have been disabled; this interrupt could represent
 392 * the receipt of several messages.
 393 *
 394 * All cores/threads on this node get this interrupt.
 395 * The last one to see it does the s/w ack.
 396 * (the resource will not be freed until noninterruptable cpus see this
 397 *  interrupt; hardware will timeout the s/w ack and reply ERROR)
 398 */
 399void uv_bau_message_interrupt(struct pt_regs *regs)
 400{
 401        struct bau_payload_queue_entry *va_queue_first;
 402        struct bau_payload_queue_entry *va_queue_last;
 403        struct bau_payload_queue_entry *msg;
 404        struct pt_regs *old_regs = set_irq_regs(regs);
 405        cycles_t time1;
 406        cycles_t time2;
 407        int msg_slot;
 408        int sw_ack_slot;
 409        int fw;
 410        int count = 0;
 411        unsigned long local_pnode;
 412
 413        ack_APIC_irq();
 414        exit_idle();
 415        irq_enter();
 416
 417        time1 = get_cycles();
 418
 419        local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
 420
 421        va_queue_first = __get_cpu_var(bau_control).va_queue_first;
 422        va_queue_last = __get_cpu_var(bau_control).va_queue_last;
 423
 424        msg = __get_cpu_var(bau_control).bau_msg_head;
 425        while (msg->sw_ack_vector) {
 426                count++;
 427                fw = msg->sw_ack_vector;
 428                msg_slot = msg - va_queue_first;
 429                sw_ack_slot = ffs(fw) - 1;
 430
 431                uv_bau_process_message(msg, msg_slot, sw_ack_slot);
 432
 433                msg++;
 434                if (msg > va_queue_last)
 435                        msg = va_queue_first;
 436                __get_cpu_var(bau_control).bau_msg_head = msg;
 437        }
 438        if (!count)
 439                __get_cpu_var(ptcstats).nomsg++;
 440        else if (count > 1)
 441                __get_cpu_var(ptcstats).multmsg++;
 442
 443        time2 = get_cycles();
 444        __get_cpu_var(ptcstats).dflush += (time2 - time1);
 445
 446        irq_exit();
 447        set_irq_regs(old_regs);
 448}
 449
 450/*
 451 * uv_enable_timeouts
 452 *
 453 * Each target blade (i.e. blades that have cpu's) needs to have
 454 * shootdown message timeouts enabled.  The timeout does not cause
 455 * an interrupt, but causes an error message to be returned to
 456 * the sender.
 457 */
 458static void uv_enable_timeouts(void)
 459{
 460        int blade;
 461        int nblades;
 462        int pnode;
 463        unsigned long mmr_image;
 464
 465        nblades = uv_num_possible_blades();
 466
 467        for (blade = 0; blade < nblades; blade++) {
 468                if (!uv_blade_nr_possible_cpus(blade))
 469                        continue;
 470
 471                pnode = uv_blade_to_pnode(blade);
 472                mmr_image =
 473                    uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
 474                /*
 475                 * Set the timeout period and then lock it in, in three
 476                 * steps; captures and locks in the period.
 477                 *
 478                 * To program the period, the SOFT_ACK_MODE must be off.
 479                 */
 480                mmr_image &= ~((unsigned long)1 <<
 481                               UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
 482                uv_write_global_mmr64
 483                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 484                /*
 485                 * Set the 4-bit period.
 486                 */
 487                mmr_image &= ~((unsigned long)0xf <<
 488                        UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
 489                mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
 490                             UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
 491                uv_write_global_mmr64
 492                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 493                /*
 494                 * Subsequent reversals of the timebase bit (3) cause an
 495                 * immediate timeout of one or all INTD resources as
 496                 * indicated in bits 2:0 (7 causes all of them to timeout).
 497                 */
 498                mmr_image |= ((unsigned long)1 <<
 499                              UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
 500                uv_write_global_mmr64
 501                    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
 502        }
 503}
 504
 505static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
 506{
 507        if (*offset < num_possible_cpus())
 508                return offset;
 509        return NULL;
 510}
 511
 512static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
 513{
 514        (*offset)++;
 515        if (*offset < num_possible_cpus())
 516                return offset;
 517        return NULL;
 518}
 519
 520static void uv_ptc_seq_stop(struct seq_file *file, void *data)
 521{
 522}
 523
 524/*
 525 * Display the statistics thru /proc
 526 * data points to the cpu number
 527 */
 528static int uv_ptc_seq_show(struct seq_file *file, void *data)
 529{
 530        struct ptc_stats *stat;
 531        int cpu;
 532
 533        cpu = *(loff_t *)data;
 534
 535        if (!cpu) {
 536                seq_printf(file,
 537                "# cpu requestor requestee one all sretry dretry ptc_i ");
 538                seq_printf(file,
 539                "sw_ack sflush dflush sok dnomsg dmult starget\n");
 540        }
 541        if (cpu < num_possible_cpus() && cpu_online(cpu)) {
 542                stat = &per_cpu(ptcstats, cpu);
 543                seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
 544                           cpu, stat->requestor,
 545                           stat->requestee, stat->onetlb, stat->alltlb,
 546                           stat->s_retry, stat->d_retry, stat->ptc_i);
 547                seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
 548                           uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
 549                                        UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
 550                           stat->sflush, stat->dflush,
 551                           stat->retriesok, stat->nomsg,
 552                           stat->multmsg, stat->ntargeted);
 553        }
 554
 555        return 0;
 556}
 557
 558/*
 559 *  0: display meaning of the statistics
 560 * >0: retry limit
 561 */
 562static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
 563                                 size_t count, loff_t *data)
 564{
 565        long newmode;
 566        char optstr[64];
 567
 568        if (count == 0 || count > sizeof(optstr))
 569                return -EINVAL;
 570        if (copy_from_user(optstr, user, count))
 571                return -EFAULT;
 572        optstr[count - 1] = '\0';
 573        if (strict_strtoul(optstr, 10, &newmode) < 0) {
 574                printk(KERN_DEBUG "%s is invalid\n", optstr);
 575                return -EINVAL;
 576        }
 577
 578        if (newmode == 0) {
 579                printk(KERN_DEBUG "# cpu:      cpu number\n");
 580                printk(KERN_DEBUG
 581                "requestor:  times this cpu was the flush requestor\n");
 582                printk(KERN_DEBUG
 583                "requestee:  times this cpu was requested to flush its TLBs\n");
 584                printk(KERN_DEBUG
 585                "one:        times requested to flush a single address\n");
 586                printk(KERN_DEBUG
 587                "all:        times requested to flush all TLB's\n");
 588                printk(KERN_DEBUG
 589                "sretry:     number of retries of source-side timeouts\n");
 590                printk(KERN_DEBUG
 591                "dretry:     number of retries of destination-side timeouts\n");
 592                printk(KERN_DEBUG
 593                "ptc_i:      times UV fell through to IPI-style flushes\n");
 594                printk(KERN_DEBUG
 595                "sw_ack:     image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
 596                printk(KERN_DEBUG
 597                "sflush_us:  cycles spent in uv_flush_tlb_others()\n");
 598                printk(KERN_DEBUG
 599                "dflush_us:  cycles spent in handling flush requests\n");
 600                printk(KERN_DEBUG "sok:        successes on retry\n");
 601                printk(KERN_DEBUG "dnomsg:     interrupts with no message\n");
 602                printk(KERN_DEBUG
 603                "dmult:      interrupts with multiple messages\n");
 604                printk(KERN_DEBUG "starget:    nodes targeted\n");
 605        } else {
 606                uv_bau_retry_limit = newmode;
 607                printk(KERN_DEBUG "timeout retry limit:%d\n",
 608                       uv_bau_retry_limit);
 609        }
 610
 611        return count;
 612}
 613
 614static const struct seq_operations uv_ptc_seq_ops = {
 615        .start          = uv_ptc_seq_start,
 616        .next           = uv_ptc_seq_next,
 617        .stop           = uv_ptc_seq_stop,
 618        .show           = uv_ptc_seq_show
 619};
 620
 621static int uv_ptc_proc_open(struct inode *inode, struct file *file)
 622{
 623        return seq_open(file, &uv_ptc_seq_ops);
 624}
 625
 626static const struct file_operations proc_uv_ptc_operations = {
 627        .open           = uv_ptc_proc_open,
 628        .read           = seq_read,
 629        .write          = uv_ptc_proc_write,
 630        .llseek         = seq_lseek,
 631        .release        = seq_release,
 632};
 633
 634static int __init uv_ptc_init(void)
 635{
 636        struct proc_dir_entry *proc_uv_ptc;
 637
 638        if (!is_uv_system())
 639                return 0;
 640
 641        proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
 642                                  &proc_uv_ptc_operations);
 643        if (!proc_uv_ptc) {
 644                printk(KERN_ERR "unable to create %s proc entry\n",
 645                       UV_PTC_BASENAME);
 646                return -EINVAL;
 647        }
 648        return 0;
 649}
 650
 651/*
 652 * begin the initialization of the per-blade control structures
 653 */
 654static struct bau_control * __init uv_table_bases_init(int blade, int node)
 655{
 656        int i;
 657        struct bau_msg_status *msp;
 658        struct bau_control *bau_tabp;
 659
 660        bau_tabp =
 661            kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
 662        BUG_ON(!bau_tabp);
 663
 664        bau_tabp->msg_statuses =
 665            kmalloc_node(sizeof(struct bau_msg_status) *
 666                         DEST_Q_SIZE, GFP_KERNEL, node);
 667        BUG_ON(!bau_tabp->msg_statuses);
 668
 669        for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
 670                bau_cpubits_clear(&msp->seen_by, (int)
 671                                  uv_blade_nr_possible_cpus(blade));
 672
 673        uv_bau_table_bases[blade] = bau_tabp;
 674
 675        return bau_tabp;
 676}
 677
 678/*
 679 * finish the initialization of the per-blade control structures
 680 */
 681static void __init
 682uv_table_bases_finish(int blade,
 683                      struct bau_control *bau_tablesp,
 684                      struct bau_desc *adp)
 685{
 686        struct bau_control *bcp;
 687        int cpu;
 688
 689        for_each_present_cpu(cpu) {
 690                if (blade != uv_cpu_to_blade_id(cpu))
 691                        continue;
 692
 693                bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
 694                bcp->bau_msg_head       = bau_tablesp->va_queue_first;
 695                bcp->va_queue_first     = bau_tablesp->va_queue_first;
 696                bcp->va_queue_last      = bau_tablesp->va_queue_last;
 697                bcp->msg_statuses       = bau_tablesp->msg_statuses;
 698                bcp->descriptor_base    = adp;
 699        }
 700}
 701
 702/*
 703 * initialize the sending side's sending buffers
 704 */
 705static struct bau_desc * __init
 706uv_activation_descriptor_init(int node, int pnode)
 707{
 708        int i;
 709        unsigned long pa;
 710        unsigned long m;
 711        unsigned long n;
 712        struct bau_desc *adp;
 713        struct bau_desc *ad2;
 714
 715        /*
 716         * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
 717         * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
 718         */
 719        adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
 720                UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
 721        BUG_ON(!adp);
 722
 723        pa = uv_gpa(adp); /* need the real nasid*/
 724        n = uv_gpa_to_pnode(pa);
 725        m = pa & uv_mmask;
 726
 727        uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
 728                              (n << UV_DESC_BASE_PNODE_SHIFT | m));
 729
 730        /*
 731         * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
 732         * cpu even though we only use the first one; one descriptor can
 733         * describe a broadcast to 256 nodes.
 734         */
 735        for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
 736                i++, ad2++) {
 737                memset(ad2, 0, sizeof(struct bau_desc));
 738                ad2->header.sw_ack_flag = 1;
 739                /*
 740                 * base_dest_nodeid is the first node in the partition, so
 741                 * the bit map will indicate partition-relative node numbers.
 742                 * note that base_dest_nodeid is actually a nasid.
 743                 */
 744                ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
 745                ad2->header.dest_subnodeid = 0x10; /* the LB */
 746                ad2->header.command = UV_NET_ENDPOINT_INTD;
 747                ad2->header.int_both = 1;
 748                /*
 749                 * all others need to be set to zero:
 750                 *   fairness chaining multilevel count replied_to
 751                 */
 752        }
 753        return adp;
 754}
 755
 756/*
 757 * initialize the destination side's receiving buffers
 758 */
 759static struct bau_payload_queue_entry * __init
 760uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
 761{
 762        struct bau_payload_queue_entry *pqp;
 763        unsigned long pa;
 764        int pn;
 765        char *cp;
 766
 767        pqp = (struct bau_payload_queue_entry *) kmalloc_node(
 768                (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
 769                GFP_KERNEL, node);
 770        BUG_ON(!pqp);
 771
 772        cp = (char *)pqp + 31;
 773        pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
 774        bau_tablesp->va_queue_first = pqp;
 775        /*
 776         * need the pnode of where the memory was really allocated
 777         */
 778        pa = uv_gpa(pqp);
 779        pn = uv_gpa_to_pnode(pa);
 780        uv_write_global_mmr64(pnode,
 781                              UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
 782                              ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
 783                              uv_physnodeaddr(pqp));
 784        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
 785                              uv_physnodeaddr(pqp));
 786        bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
 787        uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
 788                              (unsigned long)
 789                              uv_physnodeaddr(bau_tablesp->va_queue_last));
 790        memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
 791
 792        return pqp;
 793}
 794
 795/*
 796 * Initialization of each UV blade's structures
 797 */
 798static int __init uv_init_blade(int blade)
 799{
 800        int node;
 801        int pnode;
 802        unsigned long pa;
 803        unsigned long apicid;
 804        struct bau_desc *adp;
 805        struct bau_payload_queue_entry *pqp;
 806        struct bau_control *bau_tablesp;
 807
 808        node = blade_to_first_node(blade);
 809        bau_tablesp = uv_table_bases_init(blade, node);
 810        pnode = uv_blade_to_pnode(blade);
 811        adp = uv_activation_descriptor_init(node, pnode);
 812        pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
 813        uv_table_bases_finish(blade, bau_tablesp, adp);
 814        /*
 815         * the below initialization can't be in firmware because the
 816         * messaging IRQ will be determined by the OS
 817         */
 818        apicid = blade_to_first_apicid(blade);
 819        pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
 820        if ((pa & 0xff) != UV_BAU_MESSAGE) {
 821                uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
 822                                      ((apicid << 32) | UV_BAU_MESSAGE));
 823        }
 824        return 0;
 825}
 826
 827/*
 828 * Initialization of BAU-related structures
 829 */
 830static int __init uv_bau_init(void)
 831{
 832        int blade;
 833        int nblades;
 834        int cur_cpu;
 835
 836        if (!is_uv_system())
 837                return 0;
 838
 839        for_each_possible_cpu(cur_cpu)
 840                zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
 841                                       GFP_KERNEL, cpu_to_node(cur_cpu));
 842
 843        uv_bau_retry_limit = 1;
 844        uv_mmask = (1UL << uv_hub_info->m_val) - 1;
 845        nblades = uv_num_possible_blades();
 846
 847        uv_bau_table_bases = (struct bau_control **)
 848            kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
 849        BUG_ON(!uv_bau_table_bases);
 850
 851        uv_partition_base_pnode = 0x7fffffff;
 852        for (blade = 0; blade < nblades; blade++)
 853                if (uv_blade_nr_possible_cpus(blade) &&
 854                        (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
 855                        uv_partition_base_pnode = uv_blade_to_pnode(blade);
 856        for (blade = 0; blade < nblades; blade++)
 857                if (uv_blade_nr_possible_cpus(blade))
 858                        uv_init_blade(blade);
 859
 860        alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
 861        uv_enable_timeouts();
 862
 863        return 0;
 864}
 865__initcall(uv_bau_init);
 866__initcall(uv_ptc_init);
 867