linux/arch/ia64/sn/kernel/sn2/sn_hwperf.c
<<
>>
Prefs
   1/* 
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
   7 *
   8 * SGI Altix topology and hardware performance monitoring API.
   9 * Mark Goodwin <markgw@sgi.com>. 
  10 *
  11 * Creates /proc/sgi_sn/sn_topology (read-only) to export
  12 * info about Altix nodes, routers, CPUs and NumaLink
  13 * interconnection/topology.
  14 *
  15 * Also creates a dynamic misc device named "sn_hwperf"
  16 * that supports an ioctl interface to call down into SAL
  17 * to discover hw objects, topology and to read/write
  18 * memory mapped registers, e.g. for performance monitoring.
  19 * The "sn_hwperf" device is registered only after the procfs
  20 * file is first opened, i.e. only if/when it's needed. 
  21 *
  22 * This API is used by SGI Performance Co-Pilot and other
  23 * tools, see http://oss.sgi.com/projects/pcp
  24 */
  25
  26#include <linux/fs.h>
  27#include <linux/slab.h>
  28#include <linux/vmalloc.h>
  29#include <linux/seq_file.h>
  30#include <linux/miscdevice.h>
  31#include <linux/utsname.h>
  32#include <linux/cpumask.h>
  33#include <linux/nodemask.h>
  34#include <linux/smp.h>
  35#include <linux/mutex.h>
  36
  37#include <asm/processor.h>
  38#include <asm/topology.h>
  39#include <asm/uaccess.h>
  40#include <asm/sal.h>
  41#include <asm/sn/io.h>
  42#include <asm/sn/sn_sal.h>
  43#include <asm/sn/module.h>
  44#include <asm/sn/geo.h>
  45#include <asm/sn/sn2/sn_hwperf.h>
  46#include <asm/sn/addrs.h>
  47
  48static void *sn_hwperf_salheap = NULL;
  49static int sn_hwperf_obj_cnt = 0;
  50static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
  51static int sn_hwperf_init(void);
  52static DEFINE_MUTEX(sn_hwperf_init_mutex);
  53
  54#define cnode_possible(n)       ((n) < num_cnodes)
  55
  56static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
  57{
  58        int e;
  59        u64 sz;
  60        struct sn_hwperf_object_info *objbuf = NULL;
  61
  62        if ((e = sn_hwperf_init()) < 0) {
  63                printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e);
  64                goto out;
  65        }
  66
  67        sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
  68        objbuf = vmalloc(sz);
  69        if (objbuf == NULL) {
  70                printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
  71                e = -ENOMEM;
  72                goto out;
  73        }
  74
  75        e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
  76                0, sz, (u64) objbuf, 0, 0, NULL);
  77        if (e != SN_HWPERF_OP_OK) {
  78                e = -EINVAL;
  79                vfree(objbuf);
  80        }
  81
  82out:
  83        *nobj = sn_hwperf_obj_cnt;
  84        *ret = objbuf;
  85        return e;
  86}
  87
  88static int sn_hwperf_location_to_bpos(char *location,
  89        int *rack, int *bay, int *slot, int *slab)
  90{
  91        char type;
  92
  93        /* first scan for an old style geoid string */
  94        if (sscanf(location, "%03d%c%02d#%d",
  95                rack, &type, bay, slab) == 4)
  96                *slot = 0; 
  97        else /* scan for a new bladed geoid string */
  98        if (sscanf(location, "%03d%c%02d^%02d#%d",
  99                rack, &type, bay, slot, slab) != 5)
 100                return -1; 
 101        /* success */
 102        return 0;
 103}
 104
 105static int sn_hwperf_geoid_to_cnode(char *location)
 106{
 107        int cnode;
 108        geoid_t geoid;
 109        moduleid_t module_id;
 110        int rack, bay, slot, slab;
 111        int this_rack, this_bay, this_slot, this_slab;
 112
 113        if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab))
 114                return -1;
 115
 116        /*
 117         * FIXME: replace with cleaner for_each_XXX macro which addresses
 118         * both compute and IO nodes once ACPI3.0 is available.
 119         */
 120        for (cnode = 0; cnode < num_cnodes; cnode++) {
 121                geoid = cnodeid_get_geoid(cnode);
 122                module_id = geo_module(geoid);
 123                this_rack = MODULE_GET_RACK(module_id);
 124                this_bay = MODULE_GET_BPOS(module_id);
 125                this_slot = geo_slot(geoid);
 126                this_slab = geo_slab(geoid);
 127                if (rack == this_rack && bay == this_bay &&
 128                        slot == this_slot && slab == this_slab) {
 129                        break;
 130                }
 131        }
 132
 133        return cnode_possible(cnode) ? cnode : -1;
 134}
 135
 136static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
 137{
 138        if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
 139                BUG();
 140        if (SN_HWPERF_FOREIGN(obj))
 141                return -1;
 142        return sn_hwperf_geoid_to_cnode(obj->location);
 143}
 144
 145static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
 146                                struct sn_hwperf_object_info *objs)
 147{
 148        int ordinal;
 149        struct sn_hwperf_object_info *p;
 150
 151        for (ordinal=0, p=objs; p != obj; p++) {
 152                if (SN_HWPERF_FOREIGN(p))
 153                        continue;
 154                if (SN_HWPERF_SAME_OBJTYPE(p, obj))
 155                        ordinal++;
 156        }
 157
 158        return ordinal;
 159}
 160
 161static const char *slabname_node =      "node"; /* SHub asic */
 162static const char *slabname_ionode =    "ionode"; /* TIO asic */
 163static const char *slabname_router =    "router"; /* NL3R or NL4R */
 164static const char *slabname_other =     "other"; /* unknown asic */
 165
 166static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
 167                        struct sn_hwperf_object_info *objs, int *ordinal)
 168{
 169        int isnode;
 170        const char *slabname = slabname_other;
 171
 172        if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) {
 173                slabname = isnode ? slabname_node : slabname_ionode;
 174                *ordinal = sn_hwperf_obj_to_cnode(obj);
 175        }
 176        else {
 177                *ordinal = sn_hwperf_generic_ordinal(obj, objs);
 178                if (SN_HWPERF_IS_ROUTER(obj))
 179                        slabname = slabname_router;
 180        }
 181
 182        return slabname;
 183}
 184
 185static void print_pci_topology(struct seq_file *s)
 186{
 187        char *p;
 188        size_t sz;
 189        int e;
 190
 191        for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) {
 192                if (!(p = kmalloc(sz, GFP_KERNEL)))
 193                        break;
 194                e = ia64_sn_ioif_get_pci_topology(__pa(p), sz);
 195                if (e == SALRET_OK)
 196                        seq_puts(s, p);
 197                kfree(p);
 198                if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED)
 199                        break;
 200        }
 201}
 202
 203static inline int sn_hwperf_has_cpus(cnodeid_t node)
 204{
 205        return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node);
 206}
 207
 208static inline int sn_hwperf_has_mem(cnodeid_t node)
 209{
 210        return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages;
 211}
 212
 213static struct sn_hwperf_object_info *
 214sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf,
 215        int nobj, int id)
 216{
 217        int i;
 218        struct sn_hwperf_object_info *p = objbuf;
 219
 220        for (i=0; i < nobj; i++, p++) {
 221                if (p->id == id)
 222                        return p;
 223        }
 224
 225        return NULL;
 226
 227}
 228
 229static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf,
 230        int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
 231{
 232        int e;
 233        struct sn_hwperf_object_info *nodeobj = NULL;
 234        struct sn_hwperf_object_info *op;
 235        struct sn_hwperf_object_info *dest;
 236        struct sn_hwperf_object_info *router;
 237        struct sn_hwperf_port_info ptdata[16];
 238        int sz, i, j;
 239        cnodeid_t c;
 240        int found_mem = 0;
 241        int found_cpu = 0;
 242
 243        if (!cnode_possible(node))
 244                return -EINVAL;
 245
 246        if (sn_hwperf_has_cpus(node)) {
 247                if (near_cpu_node)
 248                        *near_cpu_node = node;
 249                found_cpu++;
 250        }
 251
 252        if (sn_hwperf_has_mem(node)) {
 253                if (near_mem_node)
 254                        *near_mem_node = node;
 255                found_mem++;
 256        }
 257
 258        if (found_cpu && found_mem)
 259                return 0; /* trivially successful */
 260
 261        /* find the argument node object */
 262        for (i=0, op=objbuf; i < nobj; i++, op++) {
 263                if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op))
 264                        continue;
 265                if (node == sn_hwperf_obj_to_cnode(op)) {
 266                        nodeobj = op;
 267                        break;
 268                }
 269        }
 270        if (!nodeobj) {
 271                e = -ENOENT;
 272                goto err;
 273        }
 274
 275        /* get it's interconnect topology */
 276        sz = op->ports * sizeof(struct sn_hwperf_port_info);
 277        BUG_ON(sz > sizeof(ptdata));
 278        e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 279                              SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
 280                              (u64)&ptdata, 0, 0, NULL);
 281        if (e != SN_HWPERF_OP_OK) {
 282                e = -EINVAL;
 283                goto err;
 284        }
 285
 286        /* find nearest node with cpus and nearest memory */
 287        for (router=NULL, j=0; j < op->ports; j++) {
 288                dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id);
 289                if (dest && SN_HWPERF_IS_ROUTER(dest))
 290                        router = dest;
 291                if (!dest || SN_HWPERF_FOREIGN(dest) ||
 292                    !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) {
 293                        continue;
 294                }
 295                c = sn_hwperf_obj_to_cnode(dest);
 296                if (!found_cpu && sn_hwperf_has_cpus(c)) {
 297                        if (near_cpu_node)
 298                                *near_cpu_node = c;
 299                        found_cpu++;
 300                }
 301                if (!found_mem && sn_hwperf_has_mem(c)) {
 302                        if (near_mem_node)
 303                                *near_mem_node = c;
 304                        found_mem++;
 305                }
 306        }
 307
 308        if (router && (!found_cpu || !found_mem)) {
 309                /* search for a node connected to the same router */
 310                sz = router->ports * sizeof(struct sn_hwperf_port_info);
 311                BUG_ON(sz > sizeof(ptdata));
 312                e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 313                                      SN_HWPERF_ENUM_PORTS, router->id, sz,
 314                                      (u64)&ptdata, 0, 0, NULL);
 315                if (e != SN_HWPERF_OP_OK) {
 316                        e = -EINVAL;
 317                        goto err;
 318                }
 319                for (j=0; j < router->ports; j++) {
 320                        dest = sn_hwperf_findobj_id(objbuf, nobj,
 321                                ptdata[j].conn_id);
 322                        if (!dest || dest->id == node ||
 323                            SN_HWPERF_FOREIGN(dest) ||
 324                            !SN_HWPERF_IS_NODE(dest) ||
 325                            SN_HWPERF_IS_IONODE(dest)) {
 326                                continue;
 327                        }
 328                        c = sn_hwperf_obj_to_cnode(dest);
 329                        if (!found_cpu && sn_hwperf_has_cpus(c)) {
 330                                if (near_cpu_node)
 331                                        *near_cpu_node = c;
 332                                found_cpu++;
 333                        }
 334                        if (!found_mem && sn_hwperf_has_mem(c)) {
 335                                if (near_mem_node)
 336                                        *near_mem_node = c;
 337                                found_mem++;
 338                        }
 339                        if (found_cpu && found_mem)
 340                                break;
 341                }
 342        }
 343
 344        if (!found_cpu || !found_mem) {
 345                /* resort to _any_ node with CPUs and memory */
 346                for (i=0, op=objbuf; i < nobj; i++, op++) {
 347                        if (SN_HWPERF_FOREIGN(op) ||
 348                            SN_HWPERF_IS_IONODE(op) ||
 349                            !SN_HWPERF_IS_NODE(op)) {
 350                                continue;
 351                        }
 352                        c = sn_hwperf_obj_to_cnode(op);
 353                        if (!found_cpu && sn_hwperf_has_cpus(c)) {
 354                                if (near_cpu_node)
 355                                        *near_cpu_node = c;
 356                                found_cpu++;
 357                        }
 358                        if (!found_mem && sn_hwperf_has_mem(c)) {
 359                                if (near_mem_node)
 360                                        *near_mem_node = c;
 361                                found_mem++;
 362                        }
 363                        if (found_cpu && found_mem)
 364                                break;
 365                }
 366        }
 367
 368        if (!found_cpu || !found_mem)
 369                e = -ENODATA;
 370
 371err:
 372        return e;
 373}
 374
 375
 376static int sn_topology_show(struct seq_file *s, void *d)
 377{
 378        int sz;
 379        int pt;
 380        int e = 0;
 381        int i;
 382        int j;
 383        const char *slabname;
 384        int ordinal;
 385        char slice;
 386        struct cpuinfo_ia64 *c;
 387        struct sn_hwperf_port_info *ptdata;
 388        struct sn_hwperf_object_info *p;
 389        struct sn_hwperf_object_info *obj = d;  /* this object */
 390        struct sn_hwperf_object_info *objs = s->private; /* all objects */
 391        u8 shubtype;
 392        u8 system_size;
 393        u8 sharing_size;
 394        u8 partid;
 395        u8 coher;
 396        u8 nasid_shift;
 397        u8 region_size;
 398        u16 nasid_mask;
 399        int nasid_msb;
 400
 401        if (obj == objs) {
 402                seq_printf(s, "# sn_topology version 2\n");
 403                seq_printf(s, "# objtype ordinal location partition"
 404                        " [attribute value [, ...]]\n");
 405
 406                if (ia64_sn_get_sn_info(0,
 407                        &shubtype, &nasid_mask, &nasid_shift, &system_size,
 408                        &sharing_size, &partid, &coher, &region_size))
 409                        BUG();
 410                for (nasid_msb=63; nasid_msb > 0; nasid_msb--) {
 411                        if (((u64)nasid_mask << nasid_shift) & (1ULL << nasid_msb))
 412                                break;
 413                }
 414                seq_printf(s, "partition %u %s local "
 415                        "shubtype %s, "
 416                        "nasid_mask 0x%016llx, "
 417                        "nasid_bits %d:%d, "
 418                        "system_size %d, "
 419                        "sharing_size %d, "
 420                        "coherency_domain %d, "
 421                        "region_size %d\n",
 422
 423                        partid, utsname()->nodename,
 424                        shubtype ? "shub2" : "shub1", 
 425                        (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
 426                        system_size, sharing_size, coher, region_size);
 427
 428                print_pci_topology(s);
 429        }
 430
 431        if (SN_HWPERF_FOREIGN(obj)) {
 432                /* private in another partition: not interesting */
 433                return 0;
 434        }
 435
 436        for (i = 0; i < SN_HWPERF_MAXSTRING && obj->name[i]; i++) {
 437                if (obj->name[i] == ' ')
 438                        obj->name[i] = '_';
 439        }
 440
 441        slabname = sn_hwperf_get_slabname(obj, objs, &ordinal);
 442        seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
 443                obj->sn_hwp_this_part ? "local" : "shared", obj->name);
 444
 445        if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)))
 446                seq_putc(s, '\n');
 447        else {
 448                cnodeid_t near_mem = -1;
 449                cnodeid_t near_cpu = -1;
 450
 451                seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
 452
 453                if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt,
 454                        ordinal, &near_mem, &near_cpu) == 0) {
 455                        seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d",
 456                                near_mem, near_cpu);
 457                }
 458
 459                if (!SN_HWPERF_IS_IONODE(obj)) {
 460                        for_each_online_node(i) {
 461                                seq_printf(s, i ? ":%d" : ", dist %d",
 462                                        node_distance(ordinal, i));
 463                        }
 464                }
 465
 466                seq_putc(s, '\n');
 467
 468                /*
 469                 * CPUs on this node, if any
 470                 */
 471                if (!SN_HWPERF_IS_IONODE(obj)) {
 472                        for_each_cpu_and(i, cpu_online_mask,
 473                                         cpumask_of_node(ordinal)) {
 474                                slice = 'a' + cpuid_to_slice(i);
 475                                c = cpu_data(i);
 476                                seq_printf(s, "cpu %d %s%c local"
 477                                           " freq %luMHz, arch ia64",
 478                                           i, obj->location, slice,
 479                                           c->proc_freq / 1000000);
 480                                for_each_online_cpu(j) {
 481                                        seq_printf(s, j ? ":%d" : ", dist %d",
 482                                                   node_distance(
 483                                                        cpu_to_node(i),
 484                                                        cpu_to_node(j)));
 485                                }
 486                                seq_putc(s, '\n');
 487                        }
 488                }
 489        }
 490
 491        if (obj->ports) {
 492                /*
 493                 * numalink ports
 494                 */
 495                sz = obj->ports * sizeof(struct sn_hwperf_port_info);
 496                if ((ptdata = kmalloc(sz, GFP_KERNEL)) == NULL)
 497                        return -ENOMEM;
 498                e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 499                                      SN_HWPERF_ENUM_PORTS, obj->id, sz,
 500                                      (u64) ptdata, 0, 0, NULL);
 501                if (e != SN_HWPERF_OP_OK)
 502                        return -EINVAL;
 503                for (ordinal=0, p=objs; p != obj; p++) {
 504                        if (!SN_HWPERF_FOREIGN(p))
 505                                ordinal += p->ports;
 506                }
 507                for (pt = 0; pt < obj->ports; pt++) {
 508                        for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
 509                                if (ptdata[pt].conn_id == p->id) {
 510                                        break;
 511                                }
 512                        }
 513                        seq_printf(s, "numalink %d %s-%d",
 514                            ordinal+pt, obj->location, ptdata[pt].port);
 515
 516                        if (i >= sn_hwperf_obj_cnt) {
 517                                /* no connection */
 518                                seq_puts(s, " local endpoint disconnected"
 519                                            ", protocol unknown\n");
 520                                continue;
 521                        }
 522
 523                        if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
 524                                /* both ends local to this partition */
 525                                seq_puts(s, " local");
 526                        else if (SN_HWPERF_FOREIGN(p))
 527                                /* both ends of the link in foreign partiton */
 528                                seq_puts(s, " foreign");
 529                        else
 530                                /* link straddles a partition */
 531                                seq_puts(s, " shared");
 532
 533                        /*
 534                         * Unlikely, but strictly should query the LLP config
 535                         * registers because an NL4R can be configured to run
 536                         * NL3 protocol, even when not talking to an NL3 router.
 537                         * Ditto for node-node.
 538                         */
 539                        seq_printf(s, " endpoint %s-%d, protocol %s\n",
 540                                p->location, ptdata[pt].conn_port,
 541                                (SN_HWPERF_IS_NL3ROUTER(obj) ||
 542                                SN_HWPERF_IS_NL3ROUTER(p)) ?  "LLP3" : "LLP4");
 543                }
 544                kfree(ptdata);
 545        }
 546
 547        return 0;
 548}
 549
 550static void *sn_topology_start(struct seq_file *s, loff_t * pos)
 551{
 552        struct sn_hwperf_object_info *objs = s->private;
 553
 554        if (*pos < sn_hwperf_obj_cnt)
 555                return (void *)(objs + *pos);
 556
 557        return NULL;
 558}
 559
 560static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
 561{
 562        ++*pos;
 563        return sn_topology_start(s, pos);
 564}
 565
 566static void sn_topology_stop(struct seq_file *m, void *v)
 567{
 568        return;
 569}
 570
 571/*
 572 * /proc/sgi_sn/sn_topology, read-only using seq_file
 573 */
 574static const struct seq_operations sn_topology_seq_ops = {
 575        .start = sn_topology_start,
 576        .next = sn_topology_next,
 577        .stop = sn_topology_stop,
 578        .show = sn_topology_show
 579};
 580
 581struct sn_hwperf_op_info {
 582        u64 op;
 583        struct sn_hwperf_ioctl_args *a;
 584        void *p;
 585        int *v0;
 586        int ret;
 587};
 588
 589static void sn_hwperf_call_sal(void *info)
 590{
 591        struct sn_hwperf_op_info *op_info = info;
 592        int r;
 593
 594        r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
 595                      op_info->a->arg, op_info->a->sz,
 596                      (u64) op_info->p, 0, 0, op_info->v0);
 597        op_info->ret = r;
 598}
 599
 600static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
 601{
 602        u32 cpu;
 603        u32 use_ipi;
 604        int r = 0;
 605        cpumask_t save_allowed;
 606        
 607        cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
 608        use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
 609        op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
 610
 611        if (cpu != SN_HWPERF_ARG_ANY_CPU) {
 612                if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 613                        r = -EINVAL;
 614                        goto out;
 615                }
 616        }
 617
 618        if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
 619                /* don't care, or already on correct cpu */
 620                sn_hwperf_call_sal(op_info);
 621        }
 622        else {
 623                if (use_ipi) {
 624                        /* use an interprocessor interrupt to call SAL */
 625                        smp_call_function_single(cpu, sn_hwperf_call_sal,
 626                                op_info, 1);
 627                }
 628                else {
 629                        /* migrate the task before calling SAL */ 
 630                        save_allowed = current->cpus_allowed;
 631                        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 632                        sn_hwperf_call_sal(op_info);
 633                        set_cpus_allowed_ptr(current, &save_allowed);
 634                }
 635        }
 636        r = op_info->ret;
 637
 638out:
 639        return r;
 640}
 641
 642/* map SAL hwperf error code to system error code */
 643static int sn_hwperf_map_err(int hwperf_err)
 644{
 645        int e;
 646
 647        switch(hwperf_err) {
 648        case SN_HWPERF_OP_OK:
 649                e = 0;
 650                break;
 651
 652        case SN_HWPERF_OP_NOMEM:
 653                e = -ENOMEM;
 654                break;
 655
 656        case SN_HWPERF_OP_NO_PERM:
 657                e = -EPERM;
 658                break;
 659
 660        case SN_HWPERF_OP_IO_ERROR:
 661                e = -EIO;
 662                break;
 663
 664        case SN_HWPERF_OP_BUSY:
 665                e = -EBUSY;
 666                break;
 667
 668        case SN_HWPERF_OP_RECONFIGURE:
 669                e = -EAGAIN;
 670                break;
 671
 672        case SN_HWPERF_OP_INVAL:
 673        default:
 674                e = -EINVAL;
 675                break;
 676        }
 677
 678        return e;
 679}
 680
 681/*
 682 * ioctl for "sn_hwperf" misc device
 683 */
 684static long sn_hwperf_ioctl(struct file *fp, u32 op, unsigned long arg)
 685{
 686        struct sn_hwperf_ioctl_args a;
 687        struct cpuinfo_ia64 *cdata;
 688        struct sn_hwperf_object_info *objs;
 689        struct sn_hwperf_object_info *cpuobj;
 690        struct sn_hwperf_op_info op_info;
 691        void *p = NULL;
 692        int nobj;
 693        char slice;
 694        int node;
 695        int r;
 696        int v0;
 697        int i;
 698        int j;
 699
 700        /* only user requests are allowed here */
 701        if ((op & SN_HWPERF_OP_MASK) < 10) {
 702                r = -EINVAL;
 703                goto error;
 704        }
 705        r = copy_from_user(&a, (const void __user *)arg,
 706                sizeof(struct sn_hwperf_ioctl_args));
 707        if (r != 0) {
 708                r = -EFAULT;
 709                goto error;
 710        }
 711
 712        /*
 713         * Allocate memory to hold a kernel copy of the user buffer. The
 714         * buffer contents are either copied in or out (or both) of user
 715         * space depending on the flags encoded in the requested operation.
 716         */
 717        if (a.ptr) {
 718                p = vmalloc(a.sz);
 719                if (!p) {
 720                        r = -ENOMEM;
 721                        goto error;
 722                }
 723        }
 724
 725        if (op & SN_HWPERF_OP_MEM_COPYIN) {
 726                r = copy_from_user(p, (const void __user *)a.ptr, a.sz);
 727                if (r != 0) {
 728                        r = -EFAULT;
 729                        goto error;
 730                }
 731        }
 732
 733        switch (op) {
 734        case SN_HWPERF_GET_CPU_INFO:
 735                if (a.sz == sizeof(u64)) {
 736                        /* special case to get size needed */
 737                        *(u64 *) p = (u64) num_online_cpus() *
 738                                sizeof(struct sn_hwperf_object_info);
 739                } else
 740                if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
 741                        r = -ENOMEM;
 742                        goto error;
 743                } else
 744                if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
 745                        int cpuobj_index = 0;
 746
 747                        memset(p, 0, a.sz);
 748                        for (i = 0; i < nobj; i++) {
 749                                if (!SN_HWPERF_IS_NODE(objs + i))
 750                                        continue;
 751                                node = sn_hwperf_obj_to_cnode(objs + i);
 752                                for_each_online_cpu(j) {
 753                                        if (node != cpu_to_node(j))
 754                                                continue;
 755                                        cpuobj = (struct sn_hwperf_object_info *) p + cpuobj_index++;
 756                                        slice = 'a' + cpuid_to_slice(j);
 757                                        cdata = cpu_data(j);
 758                                        cpuobj->id = j;
 759                                        snprintf(cpuobj->name,
 760                                                 sizeof(cpuobj->name),
 761                                                 "CPU %luMHz %s",
 762                                                 cdata->proc_freq / 1000000,
 763                                                 cdata->vendor);
 764                                        snprintf(cpuobj->location,
 765                                                 sizeof(cpuobj->location),
 766                                                 "%s%c", objs[i].location,
 767                                                 slice);
 768                                }
 769                        }
 770
 771                        vfree(objs);
 772                }
 773                break;
 774
 775        case SN_HWPERF_GET_NODE_NASID:
 776                if (a.sz != sizeof(u64) ||
 777                   (node = a.arg) < 0 || !cnode_possible(node)) {
 778                        r = -EINVAL;
 779                        goto error;
 780                }
 781                *(u64 *)p = (u64)cnodeid_to_nasid(node);
 782                break;
 783
 784        case SN_HWPERF_GET_OBJ_NODE:
 785                i = a.arg;
 786                if (a.sz != sizeof(u64) || i < 0) {
 787                        r = -EINVAL;
 788                        goto error;
 789                }
 790                if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
 791                        if (i >= nobj) {
 792                                r = -EINVAL;
 793                                vfree(objs);
 794                                goto error;
 795                        }
 796                        if (objs[i].id != a.arg) {
 797                                for (i = 0; i < nobj; i++) {
 798                                        if (objs[i].id == a.arg)
 799                                                break;
 800                                }
 801                        }
 802                        if (i == nobj) {
 803                                r = -EINVAL;
 804                                vfree(objs);
 805                                goto error;
 806                        }
 807
 808                        if (!SN_HWPERF_IS_NODE(objs + i) &&
 809                            !SN_HWPERF_IS_IONODE(objs + i)) {
 810                                r = -ENOENT;
 811                                vfree(objs);
 812                                goto error;
 813                        }
 814
 815                        *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
 816                        vfree(objs);
 817                }
 818                break;
 819
 820        case SN_HWPERF_GET_MMRS:
 821        case SN_HWPERF_SET_MMRS:
 822        case SN_HWPERF_OBJECT_DISTANCE:
 823                op_info.p = p;
 824                op_info.a = &a;
 825                op_info.v0 = &v0;
 826                op_info.op = op;
 827                r = sn_hwperf_op_cpu(&op_info);
 828                if (r) {
 829                        r = sn_hwperf_map_err(r);
 830                        a.v0 = v0;
 831                        goto error;
 832                }
 833                break;
 834
 835        default:
 836                /* all other ops are a direct SAL call */
 837                r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
 838                              a.arg, a.sz, (u64) p, 0, 0, &v0);
 839                if (r) {
 840                        r = sn_hwperf_map_err(r);
 841                        goto error;
 842                }
 843                a.v0 = v0;
 844                break;
 845        }
 846
 847        if (op & SN_HWPERF_OP_MEM_COPYOUT) {
 848                r = copy_to_user((void __user *)a.ptr, p, a.sz);
 849                if (r != 0) {
 850                        r = -EFAULT;
 851                        goto error;
 852                }
 853        }
 854
 855error:
 856        vfree(p);
 857
 858        return r;
 859}
 860
 861static const struct file_operations sn_hwperf_fops = {
 862        .unlocked_ioctl = sn_hwperf_ioctl,
 863        .llseek = noop_llseek,
 864};
 865
 866static struct miscdevice sn_hwperf_dev = {
 867        MISC_DYNAMIC_MINOR,
 868        "sn_hwperf",
 869        &sn_hwperf_fops
 870};
 871
 872static int sn_hwperf_init(void)
 873{
 874        u64 v;
 875        int salr;
 876        int e = 0;
 877
 878        /* single threaded, once-only initialization */
 879        mutex_lock(&sn_hwperf_init_mutex);
 880
 881        if (sn_hwperf_salheap) {
 882                mutex_unlock(&sn_hwperf_init_mutex);
 883                return e;
 884        }
 885
 886        /*
 887         * The PROM code needs a fixed reference node. For convenience the
 888         * same node as the console I/O is used.
 889         */
 890        sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
 891
 892        /*
 893         * Request the needed size and install the PROM scratch area.
 894         * The PROM keeps various tracking bits in this memory area.
 895         */
 896        salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 897                                 (u64) SN_HWPERF_GET_HEAPSIZE, 0,
 898                                 (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
 899        if (salr != SN_HWPERF_OP_OK) {
 900                e = -EINVAL;
 901                goto out;
 902        }
 903
 904        if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
 905                e = -ENOMEM;
 906                goto out;
 907        }
 908        salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 909                                 SN_HWPERF_INSTALL_HEAP, 0, v,
 910                                 (u64) sn_hwperf_salheap, 0, 0, NULL);
 911        if (salr != SN_HWPERF_OP_OK) {
 912                e = -EINVAL;
 913                goto out;
 914        }
 915
 916        salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
 917                                 SN_HWPERF_OBJECT_COUNT, 0,
 918                                 sizeof(u64), (u64) &v, 0, 0, NULL);
 919        if (salr != SN_HWPERF_OP_OK) {
 920                e = -EINVAL;
 921                goto out;
 922        }
 923        sn_hwperf_obj_cnt = (int)v;
 924
 925out:
 926        if (e < 0 && sn_hwperf_salheap) {
 927                vfree(sn_hwperf_salheap);
 928                sn_hwperf_salheap = NULL;
 929                sn_hwperf_obj_cnt = 0;
 930        }
 931        mutex_unlock(&sn_hwperf_init_mutex);
 932        return e;
 933}
 934
 935int sn_topology_open(struct inode *inode, struct file *file)
 936{
 937        int e;
 938        struct seq_file *seq;
 939        struct sn_hwperf_object_info *objbuf;
 940        int nobj;
 941
 942        if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
 943                e = seq_open(file, &sn_topology_seq_ops);
 944                seq = file->private_data;
 945                seq->private = objbuf;
 946        }
 947
 948        return e;
 949}
 950
 951int sn_topology_release(struct inode *inode, struct file *file)
 952{
 953        struct seq_file *seq = file->private_data;
 954
 955        vfree(seq->private);
 956        return seq_release(inode, file);
 957}
 958
 959int sn_hwperf_get_nearest_node(cnodeid_t node,
 960        cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node)
 961{
 962        int e;
 963        int nobj;
 964        struct sn_hwperf_object_info *objbuf;
 965
 966        if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
 967                e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj,
 968                        node, near_mem_node, near_cpu_node);
 969                vfree(objbuf);
 970        }
 971
 972        return e;
 973}
 974
 975static int __devinit sn_hwperf_misc_register_init(void)
 976{
 977        int e;
 978
 979        if (!ia64_platform_is("sn2"))
 980                return 0;
 981
 982        sn_hwperf_init();
 983
 984        /*
 985         * Register a dynamic misc device for hwperf ioctls. Platforms
 986         * supporting hotplug will create /dev/sn_hwperf, else user
 987         * can to look up the minor number in /proc/misc.
 988         */
 989        if ((e = misc_register(&sn_hwperf_dev)) != 0) {
 990                printk(KERN_ERR "sn_hwperf_misc_register_init: failed to "
 991                "register misc device for \"%s\"\n", sn_hwperf_dev.name);
 992        }
 993
 994        return e;
 995}
 996
 997device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */
 998EXPORT_SYMBOL(sn_hwperf_get_nearest_node);
 999