linux/arch/s390/kernel/sthyi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * store hypervisor information instruction emulation functions.
   4 *
   5 * Copyright IBM Corp. 2016
   6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
   7 */
   8#include <linux/errno.h>
   9#include <linux/pagemap.h>
  10#include <linux/vmalloc.h>
  11#include <linux/syscalls.h>
  12#include <linux/mutex.h>
  13#include <asm/asm-offsets.h>
  14#include <asm/sclp.h>
  15#include <asm/diag.h>
  16#include <asm/sysinfo.h>
  17#include <asm/ebcdic.h>
  18#include <asm/facility.h>
  19#include <asm/sthyi.h>
  20#include "entry.h"
  21
  22#define DED_WEIGHT 0xffff
  23/*
  24 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
  25 * as they are justified with spaces.
  26 */
  27#define CP  0xc3d7404040404040UL
  28#define IFL 0xc9c6d34040404040UL
  29
  30enum hdr_flags {
  31        HDR_NOT_LPAR   = 0x10,
  32        HDR_STACK_INCM = 0x20,
  33        HDR_STSI_UNAV  = 0x40,
  34        HDR_PERF_UNAV  = 0x80,
  35};
  36
  37enum mac_validity {
  38        MAC_NAME_VLD = 0x20,
  39        MAC_ID_VLD   = 0x40,
  40        MAC_CNT_VLD  = 0x80,
  41};
  42
  43enum par_flag {
  44        PAR_MT_EN = 0x80,
  45};
  46
  47enum par_validity {
  48        PAR_GRP_VLD  = 0x08,
  49        PAR_ID_VLD   = 0x10,
  50        PAR_ABS_VLD  = 0x20,
  51        PAR_WGHT_VLD = 0x40,
  52        PAR_PCNT_VLD  = 0x80,
  53};
  54
  55struct hdr_sctn {
  56        u8 infhflg1;
  57        u8 infhflg2; /* reserved */
  58        u8 infhval1; /* reserved */
  59        u8 infhval2; /* reserved */
  60        u8 reserved[3];
  61        u8 infhygct;
  62        u16 infhtotl;
  63        u16 infhdln;
  64        u16 infmoff;
  65        u16 infmlen;
  66        u16 infpoff;
  67        u16 infplen;
  68        u16 infhoff1;
  69        u16 infhlen1;
  70        u16 infgoff1;
  71        u16 infglen1;
  72        u16 infhoff2;
  73        u16 infhlen2;
  74        u16 infgoff2;
  75        u16 infglen2;
  76        u16 infhoff3;
  77        u16 infhlen3;
  78        u16 infgoff3;
  79        u16 infglen3;
  80        u8 reserved2[4];
  81} __packed;
  82
  83struct mac_sctn {
  84        u8 infmflg1; /* reserved */
  85        u8 infmflg2; /* reserved */
  86        u8 infmval1;
  87        u8 infmval2; /* reserved */
  88        u16 infmscps;
  89        u16 infmdcps;
  90        u16 infmsifl;
  91        u16 infmdifl;
  92        char infmname[8];
  93        char infmtype[4];
  94        char infmmanu[16];
  95        char infmseq[16];
  96        char infmpman[4];
  97        u8 reserved[4];
  98} __packed;
  99
 100struct par_sctn {
 101        u8 infpflg1;
 102        u8 infpflg2; /* reserved */
 103        u8 infpval1;
 104        u8 infpval2; /* reserved */
 105        u16 infppnum;
 106        u16 infpscps;
 107        u16 infpdcps;
 108        u16 infpsifl;
 109        u16 infpdifl;
 110        u16 reserved;
 111        char infppnam[8];
 112        u32 infpwbcp;
 113        u32 infpabcp;
 114        u32 infpwbif;
 115        u32 infpabif;
 116        char infplgnm[8];
 117        u32 infplgcp;
 118        u32 infplgif;
 119} __packed;
 120
 121struct sthyi_sctns {
 122        struct hdr_sctn hdr;
 123        struct mac_sctn mac;
 124        struct par_sctn par;
 125} __packed;
 126
 127struct cpu_inf {
 128        u64 lpar_cap;
 129        u64 lpar_grp_cap;
 130        u64 lpar_weight;
 131        u64 all_weight;
 132        int cpu_num_ded;
 133        int cpu_num_shd;
 134};
 135
 136struct lpar_cpu_inf {
 137        struct cpu_inf cp;
 138        struct cpu_inf ifl;
 139};
 140
 141/*
 142 * STHYI requires extensive locking in the higher hypervisors
 143 * and is very computational/memory expensive. Therefore we
 144 * cache the retrieved data whose valid period is 1s.
 145 */
 146#define CACHE_VALID_JIFFIES     HZ
 147
 148struct sthyi_info {
 149        void *info;
 150        unsigned long end;
 151};
 152
 153static DEFINE_MUTEX(sthyi_mutex);
 154static struct sthyi_info sthyi_cache;
 155
 156static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
 157{
 158        return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
 159}
 160
 161/*
 162 * Scales the cpu capping from the lpar range to the one expected in
 163 * sthyi data.
 164 *
 165 * diag204 reports a cap in hundredths of processor units.
 166 * z/VM's range for one core is 0 - 0x10000.
 167 */
 168static u32 scale_cap(u32 in)
 169{
 170        return (0x10000 * in) / 100;
 171}
 172
 173static void fill_hdr(struct sthyi_sctns *sctns)
 174{
 175        sctns->hdr.infhdln = sizeof(sctns->hdr);
 176        sctns->hdr.infmoff = sizeof(sctns->hdr);
 177        sctns->hdr.infmlen = sizeof(sctns->mac);
 178        sctns->hdr.infplen = sizeof(sctns->par);
 179        sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
 180        sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
 181}
 182
 183static void fill_stsi_mac(struct sthyi_sctns *sctns,
 184                          struct sysinfo_1_1_1 *sysinfo)
 185{
 186        sclp_ocf_cpc_name_copy(sctns->mac.infmname);
 187        if (*(u64 *)sctns->mac.infmname != 0)
 188                sctns->mac.infmval1 |= MAC_NAME_VLD;
 189
 190        if (stsi(sysinfo, 1, 1, 1))
 191                return;
 192
 193        memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
 194        memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
 195        memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
 196        memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
 197
 198        sctns->mac.infmval1 |= MAC_ID_VLD;
 199}
 200
 201static void fill_stsi_par(struct sthyi_sctns *sctns,
 202                          struct sysinfo_2_2_2 *sysinfo)
 203{
 204        if (stsi(sysinfo, 2, 2, 2))
 205                return;
 206
 207        sctns->par.infppnum = sysinfo->lpar_number;
 208        memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
 209
 210        sctns->par.infpval1 |= PAR_ID_VLD;
 211}
 212
 213static void fill_stsi(struct sthyi_sctns *sctns)
 214{
 215        void *sysinfo;
 216
 217        /* Errors are handled through the validity bits in the response. */
 218        sysinfo = (void *)__get_free_page(GFP_KERNEL);
 219        if (!sysinfo)
 220                return;
 221
 222        fill_stsi_mac(sctns, sysinfo);
 223        fill_stsi_par(sctns, sysinfo);
 224
 225        free_pages((unsigned long)sysinfo, 0);
 226}
 227
 228static void fill_diag_mac(struct sthyi_sctns *sctns,
 229                          struct diag204_x_phys_block *block,
 230                          void *diag224_buf)
 231{
 232        int i;
 233
 234        for (i = 0; i < block->hdr.cpus; i++) {
 235                switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
 236                case CP:
 237                        if (block->cpus[i].weight == DED_WEIGHT)
 238                                sctns->mac.infmdcps++;
 239                        else
 240                                sctns->mac.infmscps++;
 241                        break;
 242                case IFL:
 243                        if (block->cpus[i].weight == DED_WEIGHT)
 244                                sctns->mac.infmdifl++;
 245                        else
 246                                sctns->mac.infmsifl++;
 247                        break;
 248                }
 249        }
 250        sctns->mac.infmval1 |= MAC_CNT_VLD;
 251}
 252
 253/* Returns a pointer to the the next partition block. */
 254static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
 255                                                 bool this_lpar,
 256                                                 void *diag224_buf,
 257                                                 struct diag204_x_part_block *block)
 258{
 259        int i, capped = 0, weight_cp = 0, weight_ifl = 0;
 260        struct cpu_inf *cpu_inf;
 261
 262        for (i = 0; i < block->hdr.rcpus; i++) {
 263                if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
 264                        continue;
 265
 266                switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
 267                case CP:
 268                        cpu_inf = &part_inf->cp;
 269                        if (block->cpus[i].cur_weight < DED_WEIGHT)
 270                                weight_cp |= block->cpus[i].cur_weight;
 271                        break;
 272                case IFL:
 273                        cpu_inf = &part_inf->ifl;
 274                        if (block->cpus[i].cur_weight < DED_WEIGHT)
 275                                weight_ifl |= block->cpus[i].cur_weight;
 276                        break;
 277                default:
 278                        continue;
 279                }
 280
 281                if (!this_lpar)
 282                        continue;
 283
 284                capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
 285                cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
 286                cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
 287
 288                if (block->cpus[i].weight == DED_WEIGHT)
 289                        cpu_inf->cpu_num_ded += 1;
 290                else
 291                        cpu_inf->cpu_num_shd += 1;
 292        }
 293
 294        if (this_lpar && capped) {
 295                part_inf->cp.lpar_weight = weight_cp;
 296                part_inf->ifl.lpar_weight = weight_ifl;
 297        }
 298        part_inf->cp.all_weight += weight_cp;
 299        part_inf->ifl.all_weight += weight_ifl;
 300        return (struct diag204_x_part_block *)&block->cpus[i];
 301}
 302
 303static void fill_diag(struct sthyi_sctns *sctns)
 304{
 305        int i, r, pages;
 306        bool this_lpar;
 307        void *diag204_buf;
 308        void *diag224_buf = NULL;
 309        struct diag204_x_info_blk_hdr *ti_hdr;
 310        struct diag204_x_part_block *part_block;
 311        struct diag204_x_phys_block *phys_block;
 312        struct lpar_cpu_inf lpar_inf = {};
 313
 314        /* Errors are handled through the validity bits in the response. */
 315        pages = diag204((unsigned long)DIAG204_SUBC_RSI |
 316                        (unsigned long)DIAG204_INFO_EXT, 0, NULL);
 317        if (pages <= 0)
 318                return;
 319
 320        diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
 321        if (!diag204_buf)
 322                return;
 323
 324        r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
 325                    (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
 326        if (r < 0)
 327                goto out;
 328
 329        diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
 330        if (!diag224_buf || diag224(diag224_buf))
 331                goto out;
 332
 333        ti_hdr = diag204_buf;
 334        part_block = diag204_buf + sizeof(*ti_hdr);
 335
 336        for (i = 0; i < ti_hdr->npar; i++) {
 337                /*
 338                 * For the calling lpar we also need to get the cpu
 339                 * caps and weights. The time information block header
 340                 * specifies the offset to the partition block of the
 341                 * caller lpar, so we know when we process its data.
 342                 */
 343                this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
 344                part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
 345                                          part_block);
 346        }
 347
 348        phys_block = (struct diag204_x_phys_block *)part_block;
 349        part_block = diag204_buf + ti_hdr->this_part;
 350        if (part_block->hdr.mtid)
 351                sctns->par.infpflg1 = PAR_MT_EN;
 352
 353        sctns->par.infpval1 |= PAR_GRP_VLD;
 354        sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
 355        sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
 356        memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
 357               sizeof(sctns->par.infplgnm));
 358
 359        sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
 360        sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
 361        sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
 362        sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
 363        sctns->par.infpval1 |= PAR_PCNT_VLD;
 364
 365        sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
 366        sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
 367        sctns->par.infpval1 |= PAR_ABS_VLD;
 368
 369        /*
 370         * Everything below needs global performance data to be
 371         * meaningful.
 372         */
 373        if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
 374                sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
 375                goto out;
 376        }
 377
 378        fill_diag_mac(sctns, phys_block, diag224_buf);
 379
 380        if (lpar_inf.cp.lpar_weight) {
 381                sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
 382                        lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
 383        }
 384
 385        if (lpar_inf.ifl.lpar_weight) {
 386                sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
 387                        lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
 388        }
 389        sctns->par.infpval1 |= PAR_WGHT_VLD;
 390
 391out:
 392        free_page((unsigned long)diag224_buf);
 393        vfree(diag204_buf);
 394}
 395
 396static int sthyi(u64 vaddr, u64 *rc)
 397{
 398        union register_pair r1 = { .even = 0, }; /* subcode */
 399        union register_pair r2 = { .even = vaddr, };
 400        int cc;
 401
 402        asm volatile(
 403                ".insn   rre,0xB2560000,%[r1],%[r2]\n"
 404                "ipm     %[cc]\n"
 405                "srl     %[cc],28\n"
 406                : [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
 407                : [r1] "d" (r1.pair)
 408                : "memory", "cc");
 409        *rc = r2.odd;
 410        return cc;
 411}
 412
 413static int fill_dst(void *dst, u64 *rc)
 414{
 415        struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
 416
 417        /*
 418         * If the facility is on, we don't want to emulate the instruction.
 419         * We ask the hypervisor to provide the data.
 420         */
 421        if (test_facility(74))
 422                return sthyi((u64)dst, rc);
 423
 424        fill_hdr(sctns);
 425        fill_stsi(sctns);
 426        fill_diag(sctns);
 427        *rc = 0;
 428        return 0;
 429}
 430
 431static int sthyi_init_cache(void)
 432{
 433        if (sthyi_cache.info)
 434                return 0;
 435        sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
 436        if (!sthyi_cache.info)
 437                return -ENOMEM;
 438        sthyi_cache.end = jiffies - 1; /* expired */
 439        return 0;
 440}
 441
 442static int sthyi_update_cache(u64 *rc)
 443{
 444        int r;
 445
 446        memset(sthyi_cache.info, 0, PAGE_SIZE);
 447        r = fill_dst(sthyi_cache.info, rc);
 448        if (r)
 449                return r;
 450        sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
 451        return r;
 452}
 453
 454/*
 455 * sthyi_fill - Fill page with data returned by the STHYI instruction
 456 *
 457 * @dst: Pointer to zeroed page
 458 * @rc:  Pointer for storing the return code of the instruction
 459 *
 460 * Fills the destination with system information returned by the STHYI
 461 * instruction. The data is generated by emulation or execution of STHYI,
 462 * if available. The return value is the condition code that would be
 463 * returned, the rc parameter is the return code which is passed in
 464 * register R2 + 1.
 465 */
 466int sthyi_fill(void *dst, u64 *rc)
 467{
 468        int r;
 469
 470        mutex_lock(&sthyi_mutex);
 471        r = sthyi_init_cache();
 472        if (r)
 473                goto out;
 474
 475        if (time_is_before_jiffies(sthyi_cache.end)) {
 476                /* cache expired */
 477                r = sthyi_update_cache(rc);
 478                if (r)
 479                        goto out;
 480        }
 481        *rc = 0;
 482        memcpy(dst, sthyi_cache.info, PAGE_SIZE);
 483out:
 484        mutex_unlock(&sthyi_mutex);
 485        return r;
 486}
 487EXPORT_SYMBOL_GPL(sthyi_fill);
 488
 489SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
 490                u64 __user *, return_code, unsigned long, flags)
 491{
 492        u64 sthyi_rc;
 493        void *info;
 494        int r;
 495
 496        if (flags)
 497                return -EINVAL;
 498        if (function_code != STHYI_FC_CP_IFL_CAP)
 499                return -EOPNOTSUPP;
 500        info = (void *)get_zeroed_page(GFP_KERNEL);
 501        if (!info)
 502                return -ENOMEM;
 503        r = sthyi_fill(info, &sthyi_rc);
 504        if (r < 0)
 505                goto out;
 506        if (return_code && put_user(sthyi_rc, return_code)) {
 507                r = -EFAULT;
 508                goto out;
 509        }
 510        if (copy_to_user(buffer, info, PAGE_SIZE))
 511                r = -EFAULT;
 512out:
 513        free_page((unsigned long)info);
 514        return r;
 515}
 516