linux/arch/powerpc/platforms/pseries/mobility.c
<<
>>
Prefs
   1/*
   2 * Support for Partition Mobility/Migration
   3 *
   4 * Copyright (C) 2010 Nathan Fontenot
   5 * Copyright (C) 2010 IBM Corporation
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version
   9 * 2 as published by the Free Software Foundation.
  10 */
  11
  12#include <linux/cpu.h>
  13#include <linux/kernel.h>
  14#include <linux/kobject.h>
  15#include <linux/sched.h>
  16#include <linux/smp.h>
  17#include <linux/stat.h>
  18#include <linux/completion.h>
  19#include <linux/device.h>
  20#include <linux/delay.h>
  21#include <linux/slab.h>
  22
  23#include <asm/machdep.h>
  24#include <asm/rtas.h>
  25#include "pseries.h"
  26#include "../../kernel/cacheinfo.h"
  27
  28static struct kobject *mobility_kobj;
  29
  30struct update_props_workarea {
  31        __be32 phandle;
  32        __be32 state;
  33        __be64 reserved;
  34        __be32 nprops;
  35} __packed;
  36
  37#define NODE_ACTION_MASK        0xff000000
  38#define NODE_COUNT_MASK         0x00ffffff
  39
  40#define DELETE_DT_NODE  0x01000000
  41#define UPDATE_DT_NODE  0x02000000
  42#define ADD_DT_NODE     0x03000000
  43
  44#define MIGRATION_SCOPE (1)
  45#define PRRN_SCOPE -2
  46
  47static int mobility_rtas_call(int token, char *buf, s32 scope)
  48{
  49        int rc;
  50
  51        spin_lock(&rtas_data_buf_lock);
  52
  53        memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
  54        rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
  55        memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
  56
  57        spin_unlock(&rtas_data_buf_lock);
  58        return rc;
  59}
  60
  61static int delete_dt_node(__be32 phandle)
  62{
  63        struct device_node *dn;
  64
  65        dn = of_find_node_by_phandle(be32_to_cpu(phandle));
  66        if (!dn)
  67                return -ENOENT;
  68
  69        dlpar_detach_node(dn);
  70        of_node_put(dn);
  71        return 0;
  72}
  73
  74static int update_dt_property(struct device_node *dn, struct property **prop,
  75                              const char *name, u32 vd, char *value)
  76{
  77        struct property *new_prop = *prop;
  78        int more = 0;
  79
  80        /* A negative 'vd' value indicates that only part of the new property
  81         * value is contained in the buffer and we need to call
  82         * ibm,update-properties again to get the rest of the value.
  83         *
  84         * A negative value is also the two's compliment of the actual value.
  85         */
  86        if (vd & 0x80000000) {
  87                vd = ~vd + 1;
  88                more = 1;
  89        }
  90
  91        if (new_prop) {
  92                /* partial property fixup */
  93                char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
  94                if (!new_data)
  95                        return -ENOMEM;
  96
  97                memcpy(new_data, new_prop->value, new_prop->length);
  98                memcpy(new_data + new_prop->length, value, vd);
  99
 100                kfree(new_prop->value);
 101                new_prop->value = new_data;
 102                new_prop->length += vd;
 103        } else {
 104                new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
 105                if (!new_prop)
 106                        return -ENOMEM;
 107
 108                new_prop->name = kstrdup(name, GFP_KERNEL);
 109                if (!new_prop->name) {
 110                        kfree(new_prop);
 111                        return -ENOMEM;
 112                }
 113
 114                new_prop->length = vd;
 115                new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
 116                if (!new_prop->value) {
 117                        kfree(new_prop->name);
 118                        kfree(new_prop);
 119                        return -ENOMEM;
 120                }
 121
 122                memcpy(new_prop->value, value, vd);
 123                *prop = new_prop;
 124        }
 125
 126        if (!more) {
 127                of_update_property(dn, new_prop);
 128                *prop = NULL;
 129        }
 130
 131        return 0;
 132}
 133
 134static int update_dt_node(__be32 phandle, s32 scope)
 135{
 136        struct update_props_workarea *upwa;
 137        struct device_node *dn;
 138        struct property *prop = NULL;
 139        int i, rc, rtas_rc;
 140        char *prop_data;
 141        char *rtas_buf;
 142        int update_properties_token;
 143        u32 nprops;
 144        u32 vd;
 145
 146        update_properties_token = rtas_token("ibm,update-properties");
 147        if (update_properties_token == RTAS_UNKNOWN_SERVICE)
 148                return -EINVAL;
 149
 150        rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
 151        if (!rtas_buf)
 152                return -ENOMEM;
 153
 154        dn = of_find_node_by_phandle(be32_to_cpu(phandle));
 155        if (!dn) {
 156                kfree(rtas_buf);
 157                return -ENOENT;
 158        }
 159
 160        upwa = (struct update_props_workarea *)&rtas_buf[0];
 161        upwa->phandle = phandle;
 162
 163        do {
 164                rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
 165                                        scope);
 166                if (rtas_rc < 0)
 167                        break;
 168
 169                prop_data = rtas_buf + sizeof(*upwa);
 170                nprops = be32_to_cpu(upwa->nprops);
 171
 172                /* On the first call to ibm,update-properties for a node the
 173                 * the first property value descriptor contains an empty
 174                 * property name, the property value length encoded as u32,
 175                 * and the property value is the node path being updated.
 176                 */
 177                if (*prop_data == 0) {
 178                        prop_data++;
 179                        vd = be32_to_cpu(*(__be32 *)prop_data);
 180                        prop_data += vd + sizeof(vd);
 181                        nprops--;
 182                }
 183
 184                for (i = 0; i < nprops; i++) {
 185                        char *prop_name;
 186
 187                        prop_name = prop_data;
 188                        prop_data += strlen(prop_name) + 1;
 189                        vd = be32_to_cpu(*(__be32 *)prop_data);
 190                        prop_data += sizeof(vd);
 191
 192                        switch (vd) {
 193                        case 0x00000000:
 194                                /* name only property, nothing to do */
 195                                break;
 196
 197                        case 0x80000000:
 198                                of_remove_property(dn, of_find_property(dn,
 199                                                        prop_name, NULL));
 200                                prop = NULL;
 201                                break;
 202
 203                        default:
 204                                rc = update_dt_property(dn, &prop, prop_name,
 205                                                        vd, prop_data);
 206                                if (rc) {
 207                                        printk(KERN_ERR "Could not update %s"
 208                                               " property\n", prop_name);
 209                                }
 210
 211                                prop_data += vd;
 212                        }
 213
 214                        cond_resched();
 215                }
 216
 217                cond_resched();
 218        } while (rtas_rc == 1);
 219
 220        of_node_put(dn);
 221        kfree(rtas_buf);
 222        return 0;
 223}
 224
 225static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 226{
 227        struct device_node *dn;
 228        struct device_node *parent_dn;
 229        int rc;
 230
 231        parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
 232        if (!parent_dn)
 233                return -ENOENT;
 234
 235        dn = dlpar_configure_connector(drc_index, parent_dn);
 236        if (!dn) {
 237                of_node_put(parent_dn);
 238                return -ENOENT;
 239        }
 240
 241        rc = dlpar_attach_node(dn, parent_dn);
 242        if (rc)
 243                dlpar_free_cc_nodes(dn);
 244
 245        of_node_put(parent_dn);
 246        return rc;
 247}
 248
 249static void prrn_update_node(__be32 phandle)
 250{
 251        struct pseries_hp_errorlog hp_elog;
 252        struct device_node *dn;
 253
 254        /*
 255         * If a node is found from a the given phandle, the phandle does not
 256         * represent the drc index of an LMB and we can ignore.
 257         */
 258        dn = of_find_node_by_phandle(be32_to_cpu(phandle));
 259        if (dn) {
 260                of_node_put(dn);
 261                return;
 262        }
 263
 264        hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM;
 265        hp_elog.action = PSERIES_HP_ELOG_ACTION_READD;
 266        hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
 267        hp_elog._drc_u.drc_index = phandle;
 268
 269        handle_dlpar_errorlog(&hp_elog);
 270}
 271
 272int pseries_devicetree_update(s32 scope)
 273{
 274        char *rtas_buf;
 275        __be32 *data;
 276        int update_nodes_token;
 277        int rc;
 278
 279        update_nodes_token = rtas_token("ibm,update-nodes");
 280        if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
 281                return -EINVAL;
 282
 283        rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
 284        if (!rtas_buf)
 285                return -ENOMEM;
 286
 287        do {
 288                rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
 289                if (rc && rc != 1)
 290                        break;
 291
 292                data = (__be32 *)rtas_buf + 4;
 293                while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
 294                        int i;
 295                        u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
 296                        u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
 297
 298                        data++;
 299
 300                        for (i = 0; i < node_count; i++) {
 301                                __be32 phandle = *data++;
 302                                __be32 drc_index;
 303
 304                                switch (action) {
 305                                case DELETE_DT_NODE:
 306                                        delete_dt_node(phandle);
 307                                        break;
 308                                case UPDATE_DT_NODE:
 309                                        update_dt_node(phandle, scope);
 310
 311                                        if (scope == PRRN_SCOPE)
 312                                                prrn_update_node(phandle);
 313
 314                                        break;
 315                                case ADD_DT_NODE:
 316                                        drc_index = *data++;
 317                                        add_dt_node(phandle, drc_index);
 318                                        break;
 319                                }
 320
 321                                cond_resched();
 322                        }
 323                }
 324
 325                cond_resched();
 326        } while (rc == 1);
 327
 328        kfree(rtas_buf);
 329        return rc;
 330}
 331
 332void post_mobility_fixup(void)
 333{
 334        int rc;
 335        int activate_fw_token;
 336
 337        activate_fw_token = rtas_token("ibm,activate-firmware");
 338        if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
 339                printk(KERN_ERR "Could not make post-mobility "
 340                       "activate-fw call.\n");
 341                return;
 342        }
 343
 344        do {
 345                rc = rtas_call(activate_fw_token, 0, 1, NULL);
 346        } while (rtas_busy_delay(rc));
 347
 348        if (rc)
 349                printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
 350
 351        /*
 352         * We don't want CPUs to go online/offline while the device
 353         * tree is being updated.
 354         */
 355        cpus_read_lock();
 356
 357        /*
 358         * It's common for the destination firmware to replace cache
 359         * nodes.  Release all of the cacheinfo hierarchy's references
 360         * before updating the device tree.
 361         */
 362        cacheinfo_teardown();
 363
 364        rc = pseries_devicetree_update(MIGRATION_SCOPE);
 365        if (rc)
 366                printk(KERN_ERR "Post-mobility device tree update "
 367                        "failed: %d\n", rc);
 368
 369        cacheinfo_rebuild();
 370
 371        cpus_read_unlock();
 372
 373        /* Possibly switch to a new RFI flush type */
 374        pseries_setup_rfi_flush();
 375
 376        return;
 377}
 378
 379static ssize_t migration_store(struct class *class,
 380                               struct class_attribute *attr, const char *buf,
 381                               size_t count)
 382{
 383        u64 streamid;
 384        int rc;
 385
 386        rc = kstrtou64(buf, 0, &streamid);
 387        if (rc)
 388                return rc;
 389
 390        stop_topology_update();
 391
 392        do {
 393                rc = rtas_ibm_suspend_me(streamid);
 394                if (rc == -EAGAIN)
 395                        ssleep(1);
 396        } while (rc == -EAGAIN);
 397
 398        if (rc)
 399                return rc;
 400
 401        post_mobility_fixup();
 402
 403        start_topology_update();
 404
 405        return count;
 406}
 407
 408/*
 409 * Used by drmgr to determine the kernel behavior of the migration interface.
 410 *
 411 * Version 1: Performs all PAPR requirements for migration including
 412 *      firmware activation and device tree update.
 413 */
 414#define MIGRATION_API_VERSION   1
 415
 416static CLASS_ATTR_WO(migration);
 417static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
 418
 419static int __init mobility_sysfs_init(void)
 420{
 421        int rc;
 422
 423        mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
 424        if (!mobility_kobj)
 425                return -ENOMEM;
 426
 427        rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
 428        if (rc)
 429                pr_err("mobility: unable to create migration sysfs file (%d)\n", rc);
 430
 431        rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
 432        if (rc)
 433                pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc);
 434
 435        return 0;
 436}
 437machine_device_initcall(pseries, mobility_sysfs_init);
 438