linux/drivers/misc/sgi-xp/xpc_partition.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
   7 */
   8
   9/*
  10 * Cross Partition Communication (XPC) partition support.
  11 *
  12 *      This is the part of XPC that detects the presence/absence of
  13 *      other partitions. It provides a heartbeat and monitors the
  14 *      heartbeats of other partitions.
  15 *
  16 */
  17
  18#include <linux/device.h>
  19#include <linux/hardirq.h>
  20#include "xpc.h"
  21
  22/* XPC is exiting flag */
  23int xpc_exiting;
  24
  25/* this partition's reserved page pointers */
  26struct xpc_rsvd_page *xpc_rsvd_page;
  27static unsigned long *xpc_part_nasids;
  28unsigned long *xpc_mach_nasids;
  29
  30static int xpc_nasid_mask_nbytes;       /* #of bytes in nasid mask */
  31int xpc_nasid_mask_nlongs;      /* #of longs in nasid mask */
  32
  33struct xpc_partition *xpc_partitions;
  34
  35/*
  36 * Guarantee that the kmalloc'd memory is cacheline aligned.
  37 */
  38void *
  39xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  40{
  41        /* see if kmalloc will give us cachline aligned memory by default */
  42        *base = kmalloc(size, flags);
  43        if (*base == NULL)
  44                return NULL;
  45
  46        if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
  47                return *base;
  48
  49        kfree(*base);
  50
  51        /* nope, we'll have to do it ourselves */
  52        *base = kmalloc(size + L1_CACHE_BYTES, flags);
  53        if (*base == NULL)
  54                return NULL;
  55
  56        return (void *)L1_CACHE_ALIGN((u64)*base);
  57}
  58
  59/*
  60 * Given a nasid, get the physical address of the  partition's reserved page
  61 * for that nasid. This function returns 0 on any error.
  62 */
  63static unsigned long
  64xpc_get_rsvd_page_pa(int nasid)
  65{
  66        enum xp_retval ret;
  67        u64 cookie = 0;
  68        unsigned long rp_pa = nasid;    /* seed with nasid */
  69        size_t len = 0;
  70        size_t buf_len = 0;
  71        void *buf = buf;
  72        void *buf_base = NULL;
  73        enum xp_retval (*get_partition_rsvd_page_pa)
  74                (void *, u64 *, unsigned long *, size_t *) =
  75                xpc_arch_ops.get_partition_rsvd_page_pa;
  76
  77        while (1) {
  78
  79                /* !!! rp_pa will need to be _gpa on UV.
  80                 * ??? So do we save it into the architecture specific parts
  81                 * ??? of the xpc_partition structure? Do we rename this
  82                 * ??? function or have two versions? Rename rp_pa for UV to
  83                 * ??? rp_gpa?
  84                 */
  85                ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
  86
  87                dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
  88                        "address=0x%016lx, len=0x%016lx\n", ret,
  89                        (unsigned long)cookie, rp_pa, len);
  90
  91                if (ret != xpNeedMoreInfo)
  92                        break;
  93
  94                /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
  95                if (L1_CACHE_ALIGN(len) > buf_len) {
  96                        kfree(buf_base);
  97                        buf_len = L1_CACHE_ALIGN(len);
  98                        buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
  99                                                            &buf_base);
 100                        if (buf_base == NULL) {
 101                                dev_err(xpc_part, "unable to kmalloc "
 102                                        "len=0x%016lx\n", buf_len);
 103                                ret = xpNoMemory;
 104                                break;
 105                        }
 106                }
 107
 108                ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len);
 109                if (ret != xpSuccess) {
 110                        dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
 111                        break;
 112                }
 113        }
 114
 115        kfree(buf_base);
 116
 117        if (ret != xpSuccess)
 118                rp_pa = 0;
 119
 120        dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
 121        return rp_pa;
 122}
 123
 124/*
 125 * Fill the partition reserved page with the information needed by
 126 * other partitions to discover we are alive and establish initial
 127 * communications.
 128 */
 129int
 130xpc_setup_rsvd_page(void)
 131{
 132        int ret;
 133        struct xpc_rsvd_page *rp;
 134        unsigned long rp_pa;
 135        unsigned long new_ts_jiffies;
 136
 137        /* get the local reserved page's address */
 138
 139        preempt_disable();
 140        rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
 141        preempt_enable();
 142        if (rp_pa == 0) {
 143                dev_err(xpc_part, "SAL failed to locate the reserved page\n");
 144                return -ESRCH;
 145        }
 146        rp = (struct xpc_rsvd_page *)__va(rp_pa);
 147
 148        if (rp->SAL_version < 3) {
 149                /* SAL_versions < 3 had a SAL_partid defined as a u8 */
 150                rp->SAL_partid &= 0xff;
 151        }
 152        BUG_ON(rp->SAL_partid != xp_partition_id);
 153
 154        if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
 155                dev_err(xpc_part, "the reserved page's partid of %d is outside "
 156                        "supported range (< 0 || >= %d)\n", rp->SAL_partid,
 157                        xp_max_npartitions);
 158                return -EINVAL;
 159        }
 160
 161        rp->version = XPC_RP_VERSION;
 162        rp->max_npartitions = xp_max_npartitions;
 163
 164        /* establish the actual sizes of the nasid masks */
 165        if (rp->SAL_version == 1) {
 166                /* SAL_version 1 didn't set the nasids_size field */
 167                rp->SAL_nasids_size = 128;
 168        }
 169        xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
 170        xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
 171                                              BITS_PER_BYTE);
 172
 173        /* setup the pointers to the various items in the reserved page */
 174        xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 175        xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 176
 177        ret = xpc_arch_ops.setup_rsvd_page(rp);
 178        if (ret != 0)
 179                return ret;
 180
 181        /*
 182         * Set timestamp of when reserved page was setup by XPC.
 183         * This signifies to the remote partition that our reserved
 184         * page is initialized.
 185         */
 186        new_ts_jiffies = jiffies;
 187        if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
 188                new_ts_jiffies++;
 189        rp->ts_jiffies = new_ts_jiffies;
 190
 191        xpc_rsvd_page = rp;
 192        return 0;
 193}
 194
 195void
 196xpc_teardown_rsvd_page(void)
 197{
 198        /* a zero timestamp indicates our rsvd page is not initialized */
 199        xpc_rsvd_page->ts_jiffies = 0;
 200}
 201
 202/*
 203 * Get a copy of a portion of the remote partition's rsvd page.
 204 *
 205 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
 206 * is large enough to contain a copy of their reserved page header and
 207 * part_nasids mask.
 208 */
 209enum xp_retval
 210xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 211                  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
 212{
 213        int l;
 214        enum xp_retval ret;
 215
 216        /* get the reserved page's physical address */
 217
 218        *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
 219        if (*remote_rp_pa == 0)
 220                return xpNoRsvdPageAddr;
 221
 222        /* pull over the reserved page header and part_nasids mask */
 223        ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
 224                               XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
 225        if (ret != xpSuccess)
 226                return ret;
 227
 228        if (discovered_nasids != NULL) {
 229                unsigned long *remote_part_nasids =
 230                    XPC_RP_PART_NASIDS(remote_rp);
 231
 232                for (l = 0; l < xpc_nasid_mask_nlongs; l++)
 233                        discovered_nasids[l] |= remote_part_nasids[l];
 234        }
 235
 236        /* zero timestamp indicates the reserved page has not been setup */
 237        if (remote_rp->ts_jiffies == 0)
 238                return xpRsvdPageNotSet;
 239
 240        if (XPC_VERSION_MAJOR(remote_rp->version) !=
 241            XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
 242                return xpBadVersion;
 243        }
 244
 245        /* check that both remote and local partids are valid for each side */
 246        if (remote_rp->SAL_partid < 0 ||
 247            remote_rp->SAL_partid >= xp_max_npartitions ||
 248            remote_rp->max_npartitions <= xp_partition_id) {
 249                return xpInvalidPartid;
 250        }
 251
 252        if (remote_rp->SAL_partid == xp_partition_id)
 253                return xpLocalPartid;
 254
 255        return xpSuccess;
 256}
 257
 258/*
 259 * See if the other side has responded to a partition deactivate request
 260 * from us. Though we requested the remote partition to deactivate with regard
 261 * to us, we really only need to wait for the other side to disengage from us.
 262 */
 263int
 264xpc_partition_disengaged(struct xpc_partition *part)
 265{
 266        short partid = XPC_PARTID(part);
 267        int disengaged;
 268
 269        disengaged = !xpc_arch_ops.partition_engaged(partid);
 270        if (part->disengage_timeout) {
 271                if (!disengaged) {
 272                        if (time_is_after_jiffies(part->disengage_timeout)) {
 273                                /* timelimit hasn't been reached yet */
 274                                return 0;
 275                        }
 276
 277                        /*
 278                         * Other side hasn't responded to our deactivate
 279                         * request in a timely fashion, so assume it's dead.
 280                         */
 281
 282                        dev_info(xpc_part, "deactivate request to remote "
 283                                 "partition %d timed out\n", partid);
 284                        xpc_disengage_timedout = 1;
 285                        xpc_arch_ops.assume_partition_disengaged(partid);
 286                        disengaged = 1;
 287                }
 288                part->disengage_timeout = 0;
 289
 290                /* cancel the timer function, provided it's not us */
 291                if (!in_interrupt())
 292                        del_singleshot_timer_sync(&part->disengage_timer);
 293
 294                DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
 295                        part->act_state != XPC_P_AS_INACTIVE);
 296                if (part->act_state != XPC_P_AS_INACTIVE)
 297                        xpc_wakeup_channel_mgr(part);
 298
 299                xpc_arch_ops.cancel_partition_deactivation_request(part);
 300        }
 301        return disengaged;
 302}
 303
 304/*
 305 * Mark specified partition as active.
 306 */
 307enum xp_retval
 308xpc_mark_partition_active(struct xpc_partition *part)
 309{
 310        unsigned long irq_flags;
 311        enum xp_retval ret;
 312
 313        dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
 314
 315        spin_lock_irqsave(&part->act_lock, irq_flags);
 316        if (part->act_state == XPC_P_AS_ACTIVATING) {
 317                part->act_state = XPC_P_AS_ACTIVE;
 318                ret = xpSuccess;
 319        } else {
 320                DBUG_ON(part->reason == xpSuccess);
 321                ret = part->reason;
 322        }
 323        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 324
 325        return ret;
 326}
 327
 328/*
 329 * Start the process of deactivating the specified partition.
 330 */
 331void
 332xpc_deactivate_partition(const int line, struct xpc_partition *part,
 333                         enum xp_retval reason)
 334{
 335        unsigned long irq_flags;
 336
 337        spin_lock_irqsave(&part->act_lock, irq_flags);
 338
 339        if (part->act_state == XPC_P_AS_INACTIVE) {
 340                XPC_SET_REASON(part, reason, line);
 341                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 342                if (reason == xpReactivating) {
 343                        /* we interrupt ourselves to reactivate partition */
 344                        xpc_arch_ops.request_partition_reactivation(part);
 345                }
 346                return;
 347        }
 348        if (part->act_state == XPC_P_AS_DEACTIVATING) {
 349                if ((part->reason == xpUnloading && reason != xpUnloading) ||
 350                    reason == xpReactivating) {
 351                        XPC_SET_REASON(part, reason, line);
 352                }
 353                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 354                return;
 355        }
 356
 357        part->act_state = XPC_P_AS_DEACTIVATING;
 358        XPC_SET_REASON(part, reason, line);
 359
 360        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 361
 362        /* ask remote partition to deactivate with regard to us */
 363        xpc_arch_ops.request_partition_deactivation(part);
 364
 365        /* set a timelimit on the disengage phase of the deactivation request */
 366        part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
 367        part->disengage_timer.expires = part->disengage_timeout;
 368        add_timer(&part->disengage_timer);
 369
 370        dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
 371                XPC_PARTID(part), reason);
 372
 373        xpc_partition_going_down(part, reason);
 374}
 375
 376/*
 377 * Mark specified partition as inactive.
 378 */
 379void
 380xpc_mark_partition_inactive(struct xpc_partition *part)
 381{
 382        unsigned long irq_flags;
 383
 384        dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
 385                XPC_PARTID(part));
 386
 387        spin_lock_irqsave(&part->act_lock, irq_flags);
 388        part->act_state = XPC_P_AS_INACTIVE;
 389        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 390        part->remote_rp_pa = 0;
 391}
 392
 393/*
 394 * SAL has provided a partition and machine mask.  The partition mask
 395 * contains a bit for each even nasid in our partition.  The machine
 396 * mask contains a bit for each even nasid in the entire machine.
 397 *
 398 * Using those two bit arrays, we can determine which nasids are
 399 * known in the machine.  Each should also have a reserved page
 400 * initialized if they are available for partitioning.
 401 */
 402void
 403xpc_discovery(void)
 404{
 405        void *remote_rp_base;
 406        struct xpc_rsvd_page *remote_rp;
 407        unsigned long remote_rp_pa;
 408        int region;
 409        int region_size;
 410        int max_regions;
 411        int nasid;
 412        struct xpc_rsvd_page *rp;
 413        unsigned long *discovered_nasids;
 414        enum xp_retval ret;
 415
 416        remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
 417                                                  xpc_nasid_mask_nbytes,
 418                                                  GFP_KERNEL, &remote_rp_base);
 419        if (remote_rp == NULL)
 420                return;
 421
 422        discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
 423                                    GFP_KERNEL);
 424        if (discovered_nasids == NULL) {
 425                kfree(remote_rp_base);
 426                return;
 427        }
 428
 429        rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
 430
 431        /*
 432         * The term 'region' in this context refers to the minimum number of
 433         * nodes that can comprise an access protection grouping. The access
 434         * protection is in regards to memory, IOI and IPI.
 435         */
 436        max_regions = 64;
 437        region_size = xp_region_size;
 438
 439        switch (region_size) {
 440        case 128:
 441                max_regions *= 2;
 442        case 64:
 443                max_regions *= 2;
 444        case 32:
 445                max_regions *= 2;
 446                region_size = 16;
 447                DBUG_ON(!is_shub2());
 448        }
 449
 450        for (region = 0; region < max_regions; region++) {
 451
 452                if (xpc_exiting)
 453                        break;
 454
 455                dev_dbg(xpc_part, "searching region %d\n", region);
 456
 457                for (nasid = (region * region_size * 2);
 458                     nasid < ((region + 1) * region_size * 2); nasid += 2) {
 459
 460                        if (xpc_exiting)
 461                                break;
 462
 463                        dev_dbg(xpc_part, "checking nasid %d\n", nasid);
 464
 465                        if (test_bit(nasid / 2, xpc_part_nasids)) {
 466                                dev_dbg(xpc_part, "PROM indicates Nasid %d is "
 467                                        "part of the local partition; skipping "
 468                                        "region\n", nasid);
 469                                break;
 470                        }
 471
 472                        if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
 473                                dev_dbg(xpc_part, "PROM indicates Nasid %d was "
 474                                        "not on Numa-Link network at reset\n",
 475                                        nasid);
 476                                continue;
 477                        }
 478
 479                        if (test_bit(nasid / 2, discovered_nasids)) {
 480                                dev_dbg(xpc_part, "Nasid %d is part of a "
 481                                        "partition which was previously "
 482                                        "discovered\n", nasid);
 483                                continue;
 484                        }
 485
 486                        /* pull over the rsvd page header & part_nasids mask */
 487
 488                        ret = xpc_get_remote_rp(nasid, discovered_nasids,
 489                                                remote_rp, &remote_rp_pa);
 490                        if (ret != xpSuccess) {
 491                                dev_dbg(xpc_part, "unable to get reserved page "
 492                                        "from nasid %d, reason=%d\n", nasid,
 493                                        ret);
 494
 495                                if (ret == xpLocalPartid)
 496                                        break;
 497
 498                                continue;
 499                        }
 500
 501                        xpc_arch_ops.request_partition_activation(remote_rp,
 502                                                         remote_rp_pa, nasid);
 503                }
 504        }
 505
 506        kfree(discovered_nasids);
 507        kfree(remote_rp_base);
 508}
 509
 510/*
 511 * Given a partid, get the nasids owned by that partition from the
 512 * remote partition's reserved page.
 513 */
 514enum xp_retval
 515xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 516{
 517        struct xpc_partition *part;
 518        unsigned long part_nasid_pa;
 519
 520        part = &xpc_partitions[partid];
 521        if (part->remote_rp_pa == 0)
 522                return xpPartitionDown;
 523
 524        memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
 525
 526        part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 527
 528        return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
 529                                xpc_nasid_mask_nbytes);
 530}
 531