linux/drivers/misc/sgi-xp/xpc_partition.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
   7 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
   8 */
   9
  10/*
  11 * Cross Partition Communication (XPC) partition support.
  12 *
  13 *      This is the part of XPC that detects the presence/absence of
  14 *      other partitions. It provides a heartbeat and monitors the
  15 *      heartbeats of other partitions.
  16 *
  17 */
  18
  19#include <linux/device.h>
  20#include <linux/hardirq.h>
  21#include <linux/slab.h>
  22#include "xpc.h"
  23#include <asm/uv/uv_hub.h>
  24
  25/* XPC is exiting flag */
  26int xpc_exiting;
  27
  28/* this partition's reserved page pointers */
  29struct xpc_rsvd_page *xpc_rsvd_page;
  30static unsigned long *xpc_part_nasids;
  31unsigned long *xpc_mach_nasids;
  32
  33static int xpc_nasid_mask_nbytes;       /* #of bytes in nasid mask */
  34int xpc_nasid_mask_nlongs;      /* #of longs in nasid mask */
  35
  36struct xpc_partition *xpc_partitions;
  37
  38/*
  39 * Guarantee that the kmalloc'd memory is cacheline aligned.
  40 */
  41void *
  42xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  43{
  44        /* see if kmalloc will give us cachline aligned memory by default */
  45        *base = kmalloc(size, flags);
  46        if (*base == NULL)
  47                return NULL;
  48
  49        if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
  50                return *base;
  51
  52        kfree(*base);
  53
  54        /* nope, we'll have to do it ourselves */
  55        *base = kmalloc(size + L1_CACHE_BYTES, flags);
  56        if (*base == NULL)
  57                return NULL;
  58
  59        return (void *)L1_CACHE_ALIGN((u64)*base);
  60}
  61
  62/*
  63 * Given a nasid, get the physical address of the  partition's reserved page
  64 * for that nasid. This function returns 0 on any error.
  65 */
  66static unsigned long
  67xpc_get_rsvd_page_pa(int nasid)
  68{
  69        enum xp_retval ret;
  70        u64 cookie = 0;
  71        unsigned long rp_pa = nasid;    /* seed with nasid */
  72        size_t len = 0;
  73        size_t buf_len = 0;
  74        void *buf = NULL;
  75        void *buf_base = NULL;
  76        enum xp_retval (*get_partition_rsvd_page_pa)
  77                (void *, u64 *, unsigned long *, size_t *) =
  78                xpc_arch_ops.get_partition_rsvd_page_pa;
  79
  80        while (1) {
  81
  82                /* !!! rp_pa will need to be _gpa on UV.
  83                 * ??? So do we save it into the architecture specific parts
  84                 * ??? of the xpc_partition structure? Do we rename this
  85                 * ??? function or have two versions? Rename rp_pa for UV to
  86                 * ??? rp_gpa?
  87                 */
  88                ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
  89
  90                dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
  91                        "address=0x%016lx, len=0x%016lx\n", ret,
  92                        (unsigned long)cookie, rp_pa, len);
  93
  94                if (ret != xpNeedMoreInfo)
  95                        break;
  96
  97                if (len > buf_len) {
  98                        kfree(buf_base);
  99                        buf_len = L1_CACHE_ALIGN(len);
 100                        buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
 101                                                            &buf_base);
 102                        if (buf_base == NULL) {
 103                                dev_err(xpc_part, "unable to kmalloc "
 104                                        "len=0x%016lx\n", buf_len);
 105                                ret = xpNoMemory;
 106                                break;
 107                        }
 108                }
 109
 110                ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
 111                if (ret != xpSuccess) {
 112                        dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
 113                        break;
 114                }
 115        }
 116
 117        kfree(buf_base);
 118
 119        if (ret != xpSuccess)
 120                rp_pa = 0;
 121
 122        dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
 123        return rp_pa;
 124}
 125
 126/*
 127 * Fill the partition reserved page with the information needed by
 128 * other partitions to discover we are alive and establish initial
 129 * communications.
 130 */
 131int
 132xpc_setup_rsvd_page(void)
 133{
 134        int ret;
 135        struct xpc_rsvd_page *rp;
 136        unsigned long rp_pa;
 137        unsigned long new_ts_jiffies;
 138
 139        /* get the local reserved page's address */
 140
 141        preempt_disable();
 142        rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
 143        preempt_enable();
 144        if (rp_pa == 0) {
 145                dev_err(xpc_part, "SAL failed to locate the reserved page\n");
 146                return -ESRCH;
 147        }
 148        rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
 149
 150        if (rp->SAL_version < 3) {
 151                /* SAL_versions < 3 had a SAL_partid defined as a u8 */
 152                rp->SAL_partid &= 0xff;
 153        }
 154        BUG_ON(rp->SAL_partid != xp_partition_id);
 155
 156        if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
 157                dev_err(xpc_part, "the reserved page's partid of %d is outside "
 158                        "supported range (< 0 || >= %d)\n", rp->SAL_partid,
 159                        xp_max_npartitions);
 160                return -EINVAL;
 161        }
 162
 163        rp->version = XPC_RP_VERSION;
 164        rp->max_npartitions = xp_max_npartitions;
 165
 166        /* establish the actual sizes of the nasid masks */
 167        if (rp->SAL_version == 1) {
 168                /* SAL_version 1 didn't set the nasids_size field */
 169                rp->SAL_nasids_size = 128;
 170        }
 171        xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
 172        xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
 173                                              BITS_PER_BYTE);
 174
 175        /* setup the pointers to the various items in the reserved page */
 176        xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 177        xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 178
 179        ret = xpc_arch_ops.setup_rsvd_page(rp);
 180        if (ret != 0)
 181                return ret;
 182
 183        /*
 184         * Set timestamp of when reserved page was setup by XPC.
 185         * This signifies to the remote partition that our reserved
 186         * page is initialized.
 187         */
 188        new_ts_jiffies = jiffies;
 189        if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
 190                new_ts_jiffies++;
 191        rp->ts_jiffies = new_ts_jiffies;
 192
 193        xpc_rsvd_page = rp;
 194        return 0;
 195}
 196
 197void
 198xpc_teardown_rsvd_page(void)
 199{
 200        /* a zero timestamp indicates our rsvd page is not initialized */
 201        xpc_rsvd_page->ts_jiffies = 0;
 202}
 203
 204/*
 205 * Get a copy of a portion of the remote partition's rsvd page.
 206 *
 207 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
 208 * is large enough to contain a copy of their reserved page header and
 209 * part_nasids mask.
 210 */
 211enum xp_retval
 212xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
 213                  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
 214{
 215        int l;
 216        enum xp_retval ret;
 217
 218        /* get the reserved page's physical address */
 219
 220        *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
 221        if (*remote_rp_pa == 0)
 222                return xpNoRsvdPageAddr;
 223
 224        /* pull over the reserved page header and part_nasids mask */
 225        ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
 226                               XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
 227        if (ret != xpSuccess)
 228                return ret;
 229
 230        if (discovered_nasids != NULL) {
 231                unsigned long *remote_part_nasids =
 232                    XPC_RP_PART_NASIDS(remote_rp);
 233
 234                for (l = 0; l < xpc_nasid_mask_nlongs; l++)
 235                        discovered_nasids[l] |= remote_part_nasids[l];
 236        }
 237
 238        /* zero timestamp indicates the reserved page has not been setup */
 239        if (remote_rp->ts_jiffies == 0)
 240                return xpRsvdPageNotSet;
 241
 242        if (XPC_VERSION_MAJOR(remote_rp->version) !=
 243            XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
 244                return xpBadVersion;
 245        }
 246
 247        /* check that both remote and local partids are valid for each side */
 248        if (remote_rp->SAL_partid < 0 ||
 249            remote_rp->SAL_partid >= xp_max_npartitions ||
 250            remote_rp->max_npartitions <= xp_partition_id) {
 251                return xpInvalidPartid;
 252        }
 253
 254        if (remote_rp->SAL_partid == xp_partition_id)
 255                return xpLocalPartid;
 256
 257        return xpSuccess;
 258}
 259
 260/*
 261 * See if the other side has responded to a partition deactivate request
 262 * from us. Though we requested the remote partition to deactivate with regard
 263 * to us, we really only need to wait for the other side to disengage from us.
 264 */
 265static int __xpc_partition_disengaged(struct xpc_partition *part,
 266                                      bool from_timer)
 267{
 268        short partid = XPC_PARTID(part);
 269        int disengaged;
 270
 271        disengaged = !xpc_arch_ops.partition_engaged(partid);
 272        if (part->disengage_timeout) {
 273                if (!disengaged) {
 274                        if (time_is_after_jiffies(part->disengage_timeout)) {
 275                                /* timelimit hasn't been reached yet */
 276                                return 0;
 277                        }
 278
 279                        /*
 280                         * Other side hasn't responded to our deactivate
 281                         * request in a timely fashion, so assume it's dead.
 282                         */
 283
 284                        dev_info(xpc_part, "deactivate request to remote "
 285                                 "partition %d timed out\n", partid);
 286                        xpc_disengage_timedout = 1;
 287                        xpc_arch_ops.assume_partition_disengaged(partid);
 288                        disengaged = 1;
 289                }
 290                part->disengage_timeout = 0;
 291
 292                /* Cancel the timer function if not called from it */
 293                if (!from_timer)
 294                        del_timer_sync(&part->disengage_timer);
 295
 296                DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
 297                        part->act_state != XPC_P_AS_INACTIVE);
 298                if (part->act_state != XPC_P_AS_INACTIVE)
 299                        xpc_wakeup_channel_mgr(part);
 300
 301                xpc_arch_ops.cancel_partition_deactivation_request(part);
 302        }
 303        return disengaged;
 304}
 305
 306int xpc_partition_disengaged(struct xpc_partition *part)
 307{
 308        return __xpc_partition_disengaged(part, false);
 309}
 310
 311int xpc_partition_disengaged_from_timer(struct xpc_partition *part)
 312{
 313        return __xpc_partition_disengaged(part, true);
 314}
 315
 316/*
 317 * Mark specified partition as active.
 318 */
 319enum xp_retval
 320xpc_mark_partition_active(struct xpc_partition *part)
 321{
 322        unsigned long irq_flags;
 323        enum xp_retval ret;
 324
 325        dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
 326
 327        spin_lock_irqsave(&part->act_lock, irq_flags);
 328        if (part->act_state == XPC_P_AS_ACTIVATING) {
 329                part->act_state = XPC_P_AS_ACTIVE;
 330                ret = xpSuccess;
 331        } else {
 332                DBUG_ON(part->reason == xpSuccess);
 333                ret = part->reason;
 334        }
 335        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 336
 337        return ret;
 338}
 339
 340/*
 341 * Start the process of deactivating the specified partition.
 342 */
 343void
 344xpc_deactivate_partition(const int line, struct xpc_partition *part,
 345                         enum xp_retval reason)
 346{
 347        unsigned long irq_flags;
 348
 349        spin_lock_irqsave(&part->act_lock, irq_flags);
 350
 351        if (part->act_state == XPC_P_AS_INACTIVE) {
 352                XPC_SET_REASON(part, reason, line);
 353                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 354                if (reason == xpReactivating) {
 355                        /* we interrupt ourselves to reactivate partition */
 356                        xpc_arch_ops.request_partition_reactivation(part);
 357                }
 358                return;
 359        }
 360        if (part->act_state == XPC_P_AS_DEACTIVATING) {
 361                if ((part->reason == xpUnloading && reason != xpUnloading) ||
 362                    reason == xpReactivating) {
 363                        XPC_SET_REASON(part, reason, line);
 364                }
 365                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 366                return;
 367        }
 368
 369        part->act_state = XPC_P_AS_DEACTIVATING;
 370        XPC_SET_REASON(part, reason, line);
 371
 372        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 373
 374        /* ask remote partition to deactivate with regard to us */
 375        xpc_arch_ops.request_partition_deactivation(part);
 376
 377        /* set a timelimit on the disengage phase of the deactivation request */
 378        part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
 379        part->disengage_timer.expires = part->disengage_timeout;
 380        add_timer(&part->disengage_timer);
 381
 382        dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
 383                XPC_PARTID(part), reason);
 384
 385        xpc_partition_going_down(part, reason);
 386}
 387
 388/*
 389 * Mark specified partition as inactive.
 390 */
 391void
 392xpc_mark_partition_inactive(struct xpc_partition *part)
 393{
 394        unsigned long irq_flags;
 395
 396        dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
 397                XPC_PARTID(part));
 398
 399        spin_lock_irqsave(&part->act_lock, irq_flags);
 400        part->act_state = XPC_P_AS_INACTIVE;
 401        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 402        part->remote_rp_pa = 0;
 403}
 404
 405/*
 406 * SAL has provided a partition and machine mask.  The partition mask
 407 * contains a bit for each even nasid in our partition.  The machine
 408 * mask contains a bit for each even nasid in the entire machine.
 409 *
 410 * Using those two bit arrays, we can determine which nasids are
 411 * known in the machine.  Each should also have a reserved page
 412 * initialized if they are available for partitioning.
 413 */
 414void
 415xpc_discovery(void)
 416{
 417        void *remote_rp_base;
 418        struct xpc_rsvd_page *remote_rp;
 419        unsigned long remote_rp_pa;
 420        int region;
 421        int region_size;
 422        int max_regions;
 423        int nasid;
 424        unsigned long *discovered_nasids;
 425        enum xp_retval ret;
 426
 427        remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
 428                                                  xpc_nasid_mask_nbytes,
 429                                                  GFP_KERNEL, &remote_rp_base);
 430        if (remote_rp == NULL)
 431                return;
 432
 433        discovered_nasids = kcalloc(xpc_nasid_mask_nlongs, sizeof(long),
 434                                    GFP_KERNEL);
 435        if (discovered_nasids == NULL) {
 436                kfree(remote_rp_base);
 437                return;
 438        }
 439
 440        /*
 441         * The term 'region' in this context refers to the minimum number of
 442         * nodes that can comprise an access protection grouping. The access
 443         * protection is in regards to memory, IOI and IPI.
 444         */
 445        region_size = xp_region_size;
 446
 447        if (is_uv_system())
 448                max_regions = 256;
 449        else {
 450                max_regions = 64;
 451
 452                switch (region_size) {
 453                case 128:
 454                        max_regions *= 2;
 455                        fallthrough;
 456                case 64:
 457                        max_regions *= 2;
 458                        fallthrough;
 459                case 32:
 460                        max_regions *= 2;
 461                        region_size = 16;
 462                }
 463        }
 464
 465        for (region = 0; region < max_regions; region++) {
 466
 467                if (xpc_exiting)
 468                        break;
 469
 470                dev_dbg(xpc_part, "searching region %d\n", region);
 471
 472                for (nasid = (region * region_size * 2);
 473                     nasid < ((region + 1) * region_size * 2); nasid += 2) {
 474
 475                        if (xpc_exiting)
 476                                break;
 477
 478                        dev_dbg(xpc_part, "checking nasid %d\n", nasid);
 479
 480                        if (test_bit(nasid / 2, xpc_part_nasids)) {
 481                                dev_dbg(xpc_part, "PROM indicates Nasid %d is "
 482                                        "part of the local partition; skipping "
 483                                        "region\n", nasid);
 484                                break;
 485                        }
 486
 487                        if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
 488                                dev_dbg(xpc_part, "PROM indicates Nasid %d was "
 489                                        "not on Numa-Link network at reset\n",
 490                                        nasid);
 491                                continue;
 492                        }
 493
 494                        if (test_bit(nasid / 2, discovered_nasids)) {
 495                                dev_dbg(xpc_part, "Nasid %d is part of a "
 496                                        "partition which was previously "
 497                                        "discovered\n", nasid);
 498                                continue;
 499                        }
 500
 501                        /* pull over the rsvd page header & part_nasids mask */
 502
 503                        ret = xpc_get_remote_rp(nasid, discovered_nasids,
 504                                                remote_rp, &remote_rp_pa);
 505                        if (ret != xpSuccess) {
 506                                dev_dbg(xpc_part, "unable to get reserved page "
 507                                        "from nasid %d, reason=%d\n", nasid,
 508                                        ret);
 509
 510                                if (ret == xpLocalPartid)
 511                                        break;
 512
 513                                continue;
 514                        }
 515
 516                        xpc_arch_ops.request_partition_activation(remote_rp,
 517                                                         remote_rp_pa, nasid);
 518                }
 519        }
 520
 521        kfree(discovered_nasids);
 522        kfree(remote_rp_base);
 523}
 524
 525/*
 526 * Given a partid, get the nasids owned by that partition from the
 527 * remote partition's reserved page.
 528 */
 529enum xp_retval
 530xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
 531{
 532        struct xpc_partition *part;
 533        unsigned long part_nasid_pa;
 534
 535        part = &xpc_partitions[partid];
 536        if (part->remote_rp_pa == 0)
 537                return xpPartitionDown;
 538
 539        memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
 540
 541        part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
 542
 543        return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
 544                                xpc_nasid_mask_nbytes);
 545}
 546