linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2020
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#define KMSG_COMPONENT "kvm-s390"
  15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34#include <linux/pgtable.h>
  35
  36#include <asm/asm-offsets.h>
  37#include <asm/lowcore.h>
  38#include <asm/stp.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include <asm/uv.h>
  48#include "kvm-s390.h"
  49#include "gaccess.h"
  50
  51#define CREATE_TRACE_POINTS
  52#include "trace.h"
  53#include "trace-s390.h"
  54
  55#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56#define LOCAL_IRQS 32
  57#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60struct kvm_stats_debugfs_item debugfs_entries[] = {
  61        VCPU_STAT("userspace_handled", exit_userspace),
  62        VCPU_STAT("exit_null", exit_null),
  63        VCPU_STAT("exit_validity", exit_validity),
  64        VCPU_STAT("exit_stop_request", exit_stop_request),
  65        VCPU_STAT("exit_external_request", exit_external_request),
  66        VCPU_STAT("exit_io_request", exit_io_request),
  67        VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
  68        VCPU_STAT("exit_instruction", exit_instruction),
  69        VCPU_STAT("exit_pei", exit_pei),
  70        VCPU_STAT("exit_program_interruption", exit_program_interruption),
  71        VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
  72        VCPU_STAT("exit_operation_exception", exit_operation_exception),
  73        VCPU_STAT("halt_successful_poll", halt_successful_poll),
  74        VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
  75        VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
  76        VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
  77        VCPU_STAT("halt_wakeup", halt_wakeup),
  78        VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
  79        VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
  80        VCPU_STAT("instruction_lctlg", instruction_lctlg),
  81        VCPU_STAT("instruction_lctl", instruction_lctl),
  82        VCPU_STAT("instruction_stctl", instruction_stctl),
  83        VCPU_STAT("instruction_stctg", instruction_stctg),
  84        VCPU_STAT("deliver_ckc", deliver_ckc),
  85        VCPU_STAT("deliver_cputm", deliver_cputm),
  86        VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
  87        VCPU_STAT("deliver_external_call", deliver_external_call),
  88        VCPU_STAT("deliver_service_signal", deliver_service_signal),
  89        VCPU_STAT("deliver_virtio", deliver_virtio),
  90        VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
  91        VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
  92        VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
  93        VCPU_STAT("deliver_program", deliver_program),
  94        VCPU_STAT("deliver_io", deliver_io),
  95        VCPU_STAT("deliver_machine_check", deliver_machine_check),
  96        VCPU_STAT("exit_wait_state", exit_wait_state),
  97        VCPU_STAT("inject_ckc", inject_ckc),
  98        VCPU_STAT("inject_cputm", inject_cputm),
  99        VCPU_STAT("inject_external_call", inject_external_call),
 100        VM_STAT("inject_float_mchk", inject_float_mchk),
 101        VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
 102        VM_STAT("inject_io", inject_io),
 103        VCPU_STAT("inject_mchk", inject_mchk),
 104        VM_STAT("inject_pfault_done", inject_pfault_done),
 105        VCPU_STAT("inject_program", inject_program),
 106        VCPU_STAT("inject_restart", inject_restart),
 107        VM_STAT("inject_service_signal", inject_service_signal),
 108        VCPU_STAT("inject_set_prefix", inject_set_prefix),
 109        VCPU_STAT("inject_stop_signal", inject_stop_signal),
 110        VCPU_STAT("inject_pfault_init", inject_pfault_init),
 111        VM_STAT("inject_virtio", inject_virtio),
 112        VCPU_STAT("instruction_epsw", instruction_epsw),
 113        VCPU_STAT("instruction_gs", instruction_gs),
 114        VCPU_STAT("instruction_io_other", instruction_io_other),
 115        VCPU_STAT("instruction_lpsw", instruction_lpsw),
 116        VCPU_STAT("instruction_lpswe", instruction_lpswe),
 117        VCPU_STAT("instruction_pfmf", instruction_pfmf),
 118        VCPU_STAT("instruction_ptff", instruction_ptff),
 119        VCPU_STAT("instruction_stidp", instruction_stidp),
 120        VCPU_STAT("instruction_sck", instruction_sck),
 121        VCPU_STAT("instruction_sckpf", instruction_sckpf),
 122        VCPU_STAT("instruction_spx", instruction_spx),
 123        VCPU_STAT("instruction_stpx", instruction_stpx),
 124        VCPU_STAT("instruction_stap", instruction_stap),
 125        VCPU_STAT("instruction_iske", instruction_iske),
 126        VCPU_STAT("instruction_ri", instruction_ri),
 127        VCPU_STAT("instruction_rrbe", instruction_rrbe),
 128        VCPU_STAT("instruction_sske", instruction_sske),
 129        VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
 130        VCPU_STAT("instruction_essa", instruction_essa),
 131        VCPU_STAT("instruction_stsi", instruction_stsi),
 132        VCPU_STAT("instruction_stfl", instruction_stfl),
 133        VCPU_STAT("instruction_tb", instruction_tb),
 134        VCPU_STAT("instruction_tpi", instruction_tpi),
 135        VCPU_STAT("instruction_tprot", instruction_tprot),
 136        VCPU_STAT("instruction_tsch", instruction_tsch),
 137        VCPU_STAT("instruction_sthyi", instruction_sthyi),
 138        VCPU_STAT("instruction_sie", instruction_sie),
 139        VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
 140        VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
 141        VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
 142        VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
 143        VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
 144        VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
 145        VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
 146        VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
 147        VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
 148        VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
 149        VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
 150        VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
 151        VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
 152        VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
 153        VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
 154        VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
 155        VCPU_STAT("instruction_diag_10", diagnose_10),
 156        VCPU_STAT("instruction_diag_44", diagnose_44),
 157        VCPU_STAT("instruction_diag_9c", diagnose_9c),
 158        VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
 159        VCPU_STAT("instruction_diag_258", diagnose_258),
 160        VCPU_STAT("instruction_diag_308", diagnose_308),
 161        VCPU_STAT("instruction_diag_500", diagnose_500),
 162        VCPU_STAT("instruction_diag_other", diagnose_other),
 163        { NULL }
 164};
 165
 166struct kvm_s390_tod_clock_ext {
 167        __u8 epoch_idx;
 168        __u64 tod;
 169        __u8 reserved[7];
 170} __packed;
 171
 172/* allow nested virtualization in KVM (if enabled by user space) */
 173static int nested;
 174module_param(nested, int, S_IRUGO);
 175MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177/* allow 1m huge page guest backing, if !nested */
 178static int hpage;
 179module_param(hpage, int, 0444);
 180MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183static u8 halt_poll_max_steal = 10;
 184module_param(halt_poll_max_steal, byte, 0644);
 185MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187/* if set to true, the GISA will be initialized and used if available */
 188static bool use_gisa  = true;
 189module_param(use_gisa, bool, 0644);
 190MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 191
 192/*
 193 * For now we handle at most 16 double words as this is what the s390 base
 194 * kernel handles and stores in the prefix page. If we ever need to go beyond
 195 * this, this requires changes to code, but the external uapi can stay.
 196 */
 197#define SIZE_INTERNAL 16
 198
 199/*
 200 * Base feature mask that defines default mask for facilities. Consists of the
 201 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 202 */
 203static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 204/*
 205 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 206 * and defines the facilities that can be enabled via a cpu model.
 207 */
 208static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 209
 210static unsigned long kvm_s390_fac_size(void)
 211{
 212        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 213        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 214        BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 215                sizeof(S390_lowcore.stfle_fac_list));
 216
 217        return SIZE_INTERNAL;
 218}
 219
 220/* available cpu features supported by kvm */
 221static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 222/* available subfunctions indicated via query / "test bit" */
 223static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 224
 225static struct gmap_notifier gmap_notifier;
 226static struct gmap_notifier vsie_gmap_notifier;
 227debug_info_t *kvm_s390_dbf;
 228debug_info_t *kvm_s390_dbf_uv;
 229
 230/* Section: not file related */
 231int kvm_arch_hardware_enable(void)
 232{
 233        /* every s390 is virtualization enabled ;-) */
 234        return 0;
 235}
 236
 237int kvm_arch_check_processor_compat(void *opaque)
 238{
 239        return 0;
 240}
 241
 242/* forward declarations */
 243static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 244                              unsigned long end);
 245static int sca_switch_to_extended(struct kvm *kvm);
 246
 247static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 248{
 249        u8 delta_idx = 0;
 250
 251        /*
 252         * The TOD jumps by delta, we have to compensate this by adding
 253         * -delta to the epoch.
 254         */
 255        delta = -delta;
 256
 257        /* sign-extension - we're adding to signed values below */
 258        if ((s64)delta < 0)
 259                delta_idx = -1;
 260
 261        scb->epoch += delta;
 262        if (scb->ecd & ECD_MEF) {
 263                scb->epdx += delta_idx;
 264                if (scb->epoch < delta)
 265                        scb->epdx += 1;
 266        }
 267}
 268
 269/*
 270 * This callback is executed during stop_machine(). All CPUs are therefore
 271 * temporarily stopped. In order not to change guest behavior, we have to
 272 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 273 * so a CPU won't be stopped while calculating with the epoch.
 274 */
 275static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 276                          void *v)
 277{
 278        struct kvm *kvm;
 279        struct kvm_vcpu *vcpu;
 280        int i;
 281        unsigned long long *delta = v;
 282
 283        list_for_each_entry(kvm, &vm_list, vm_list) {
 284                kvm_for_each_vcpu(i, vcpu, kvm) {
 285                        kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 286                        if (i == 0) {
 287                                kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 288                                kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 289                        }
 290                        if (vcpu->arch.cputm_enabled)
 291                                vcpu->arch.cputm_start += *delta;
 292                        if (vcpu->arch.vsie_block)
 293                                kvm_clock_sync_scb(vcpu->arch.vsie_block,
 294                                                   *delta);
 295                }
 296        }
 297        return NOTIFY_OK;
 298}
 299
 300static struct notifier_block kvm_clock_notifier = {
 301        .notifier_call = kvm_clock_sync,
 302};
 303
 304int kvm_arch_hardware_setup(void *opaque)
 305{
 306        gmap_notifier.notifier_call = kvm_gmap_notifier;
 307        gmap_register_pte_notifier(&gmap_notifier);
 308        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 309        gmap_register_pte_notifier(&vsie_gmap_notifier);
 310        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 311                                       &kvm_clock_notifier);
 312        return 0;
 313}
 314
 315void kvm_arch_hardware_unsetup(void)
 316{
 317        gmap_unregister_pte_notifier(&gmap_notifier);
 318        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 319        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 320                                         &kvm_clock_notifier);
 321}
 322
 323static void allow_cpu_feat(unsigned long nr)
 324{
 325        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 326}
 327
 328static inline int plo_test_bit(unsigned char nr)
 329{
 330        register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 331        int cc;
 332
 333        asm volatile(
 334                /* Parameter registers are ignored for "test bit" */
 335                "       plo     0,0,0,0(0)\n"
 336                "       ipm     %0\n"
 337                "       srl     %0,28\n"
 338                : "=d" (cc)
 339                : "d" (r0)
 340                : "cc");
 341        return cc == 0;
 342}
 343
 344static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 345{
 346        register unsigned long r0 asm("0") = 0; /* query function */
 347        register unsigned long r1 asm("1") = (unsigned long) query;
 348
 349        asm volatile(
 350                /* Parameter regs are ignored */
 351                "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 352                :
 353                : "d" (r0), "a" (r1), [opc] "i" (opcode)
 354                : "cc", "memory");
 355}
 356
 357#define INSN_SORTL 0xb938
 358#define INSN_DFLTCC 0xb939
 359
 360static void kvm_s390_cpu_feat_init(void)
 361{
 362        int i;
 363
 364        for (i = 0; i < 256; ++i) {
 365                if (plo_test_bit(i))
 366                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 367        }
 368
 369        if (test_facility(28)) /* TOD-clock steering */
 370                ptff(kvm_s390_available_subfunc.ptff,
 371                     sizeof(kvm_s390_available_subfunc.ptff),
 372                     PTFF_QAF);
 373
 374        if (test_facility(17)) { /* MSA */
 375                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 376                              kvm_s390_available_subfunc.kmac);
 377                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 378                              kvm_s390_available_subfunc.kmc);
 379                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 380                              kvm_s390_available_subfunc.km);
 381                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 382                              kvm_s390_available_subfunc.kimd);
 383                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 384                              kvm_s390_available_subfunc.klmd);
 385        }
 386        if (test_facility(76)) /* MSA3 */
 387                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 388                              kvm_s390_available_subfunc.pckmo);
 389        if (test_facility(77)) { /* MSA4 */
 390                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 391                              kvm_s390_available_subfunc.kmctr);
 392                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 393                              kvm_s390_available_subfunc.kmf);
 394                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 395                              kvm_s390_available_subfunc.kmo);
 396                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 397                              kvm_s390_available_subfunc.pcc);
 398        }
 399        if (test_facility(57)) /* MSA5 */
 400                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 401                              kvm_s390_available_subfunc.ppno);
 402
 403        if (test_facility(146)) /* MSA8 */
 404                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 405                              kvm_s390_available_subfunc.kma);
 406
 407        if (test_facility(155)) /* MSA9 */
 408                __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 409                              kvm_s390_available_subfunc.kdsa);
 410
 411        if (test_facility(150)) /* SORTL */
 412                __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 413
 414        if (test_facility(151)) /* DFLTCC */
 415                __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 416
 417        if (MACHINE_HAS_ESOP)
 418                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 419        /*
 420         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 421         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 422         */
 423        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 424            !test_facility(3) || !nested)
 425                return;
 426        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 427        if (sclp.has_64bscao)
 428                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 429        if (sclp.has_siif)
 430                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 431        if (sclp.has_gpere)
 432                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 433        if (sclp.has_gsls)
 434                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 435        if (sclp.has_ib)
 436                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 437        if (sclp.has_cei)
 438                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 439        if (sclp.has_ibs)
 440                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 441        if (sclp.has_kss)
 442                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 443        /*
 444         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 445         * all skey handling functions read/set the skey from the PGSTE
 446         * instead of the real storage key.
 447         *
 448         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 449         * pages being detected as preserved although they are resident.
 450         *
 451         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 452         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 453         *
 454         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 455         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 456         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 457         *
 458         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 459         * cannot easily shadow the SCA because of the ipte lock.
 460         */
 461}
 462
 463int kvm_arch_init(void *opaque)
 464{
 465        int rc = -ENOMEM;
 466
 467        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 468        if (!kvm_s390_dbf)
 469                return -ENOMEM;
 470
 471        kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 472        if (!kvm_s390_dbf_uv)
 473                goto out;
 474
 475        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 476            debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 477                goto out;
 478
 479        kvm_s390_cpu_feat_init();
 480
 481        /* Register floating interrupt controller interface. */
 482        rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 483        if (rc) {
 484                pr_err("A FLIC registration call failed with rc=%d\n", rc);
 485                goto out;
 486        }
 487
 488        rc = kvm_s390_gib_init(GAL_ISC);
 489        if (rc)
 490                goto out;
 491
 492        return 0;
 493
 494out:
 495        kvm_arch_exit();
 496        return rc;
 497}
 498
 499void kvm_arch_exit(void)
 500{
 501        kvm_s390_gib_destroy();
 502        debug_unregister(kvm_s390_dbf);
 503        debug_unregister(kvm_s390_dbf_uv);
 504}
 505
 506/* Section: device related */
 507long kvm_arch_dev_ioctl(struct file *filp,
 508                        unsigned int ioctl, unsigned long arg)
 509{
 510        if (ioctl == KVM_S390_ENABLE_SIE)
 511                return s390_enable_sie();
 512        return -EINVAL;
 513}
 514
 515int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 516{
 517        int r;
 518
 519        switch (ext) {
 520        case KVM_CAP_S390_PSW:
 521        case KVM_CAP_S390_GMAP:
 522        case KVM_CAP_SYNC_MMU:
 523#ifdef CONFIG_KVM_S390_UCONTROL
 524        case KVM_CAP_S390_UCONTROL:
 525#endif
 526        case KVM_CAP_ASYNC_PF:
 527        case KVM_CAP_SYNC_REGS:
 528        case KVM_CAP_ONE_REG:
 529        case KVM_CAP_ENABLE_CAP:
 530        case KVM_CAP_S390_CSS_SUPPORT:
 531        case KVM_CAP_IOEVENTFD:
 532        case KVM_CAP_DEVICE_CTRL:
 533        case KVM_CAP_S390_IRQCHIP:
 534        case KVM_CAP_VM_ATTRIBUTES:
 535        case KVM_CAP_MP_STATE:
 536        case KVM_CAP_IMMEDIATE_EXIT:
 537        case KVM_CAP_S390_INJECT_IRQ:
 538        case KVM_CAP_S390_USER_SIGP:
 539        case KVM_CAP_S390_USER_STSI:
 540        case KVM_CAP_S390_SKEYS:
 541        case KVM_CAP_S390_IRQ_STATE:
 542        case KVM_CAP_S390_USER_INSTR0:
 543        case KVM_CAP_S390_CMMA_MIGRATION:
 544        case KVM_CAP_S390_AIS:
 545        case KVM_CAP_S390_AIS_MIGRATION:
 546        case KVM_CAP_S390_VCPU_RESETS:
 547        case KVM_CAP_SET_GUEST_DEBUG:
 548        case KVM_CAP_S390_DIAG318:
 549                r = 1;
 550                break;
 551        case KVM_CAP_S390_HPAGE_1M:
 552                r = 0;
 553                if (hpage && !kvm_is_ucontrol(kvm))
 554                        r = 1;
 555                break;
 556        case KVM_CAP_S390_MEM_OP:
 557                r = MEM_OP_MAX_SIZE;
 558                break;
 559        case KVM_CAP_NR_VCPUS:
 560        case KVM_CAP_MAX_VCPUS:
 561        case KVM_CAP_MAX_VCPU_ID:
 562                r = KVM_S390_BSCA_CPU_SLOTS;
 563                if (!kvm_s390_use_sca_entries())
 564                        r = KVM_MAX_VCPUS;
 565                else if (sclp.has_esca && sclp.has_64bscao)
 566                        r = KVM_S390_ESCA_CPU_SLOTS;
 567                break;
 568        case KVM_CAP_S390_COW:
 569                r = MACHINE_HAS_ESOP;
 570                break;
 571        case KVM_CAP_S390_VECTOR_REGISTERS:
 572                r = MACHINE_HAS_VX;
 573                break;
 574        case KVM_CAP_S390_RI:
 575                r = test_facility(64);
 576                break;
 577        case KVM_CAP_S390_GS:
 578                r = test_facility(133);
 579                break;
 580        case KVM_CAP_S390_BPB:
 581                r = test_facility(82);
 582                break;
 583        case KVM_CAP_S390_PROTECTED:
 584                r = is_prot_virt_host();
 585                break;
 586        default:
 587                r = 0;
 588        }
 589        return r;
 590}
 591
 592void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 593{
 594        int i;
 595        gfn_t cur_gfn, last_gfn;
 596        unsigned long gaddr, vmaddr;
 597        struct gmap *gmap = kvm->arch.gmap;
 598        DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 599
 600        /* Loop over all guest segments */
 601        cur_gfn = memslot->base_gfn;
 602        last_gfn = memslot->base_gfn + memslot->npages;
 603        for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 604                gaddr = gfn_to_gpa(cur_gfn);
 605                vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 606                if (kvm_is_error_hva(vmaddr))
 607                        continue;
 608
 609                bitmap_zero(bitmap, _PAGE_ENTRIES);
 610                gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 611                for (i = 0; i < _PAGE_ENTRIES; i++) {
 612                        if (test_bit(i, bitmap))
 613                                mark_page_dirty(kvm, cur_gfn + i);
 614                }
 615
 616                if (fatal_signal_pending(current))
 617                        return;
 618                cond_resched();
 619        }
 620}
 621
 622/* Section: vm related */
 623static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 624
 625/*
 626 * Get (and clear) the dirty memory log for a memory slot.
 627 */
 628int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 629                               struct kvm_dirty_log *log)
 630{
 631        int r;
 632        unsigned long n;
 633        struct kvm_memory_slot *memslot;
 634        int is_dirty;
 635
 636        if (kvm_is_ucontrol(kvm))
 637                return -EINVAL;
 638
 639        mutex_lock(&kvm->slots_lock);
 640
 641        r = -EINVAL;
 642        if (log->slot >= KVM_USER_MEM_SLOTS)
 643                goto out;
 644
 645        r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 646        if (r)
 647                goto out;
 648
 649        /* Clear the dirty log */
 650        if (is_dirty) {
 651                n = kvm_dirty_bitmap_bytes(memslot);
 652                memset(memslot->dirty_bitmap, 0, n);
 653        }
 654        r = 0;
 655out:
 656        mutex_unlock(&kvm->slots_lock);
 657        return r;
 658}
 659
 660static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 661{
 662        unsigned int i;
 663        struct kvm_vcpu *vcpu;
 664
 665        kvm_for_each_vcpu(i, vcpu, kvm) {
 666                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 667        }
 668}
 669
 670int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 671{
 672        int r;
 673
 674        if (cap->flags)
 675                return -EINVAL;
 676
 677        switch (cap->cap) {
 678        case KVM_CAP_S390_IRQCHIP:
 679                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 680                kvm->arch.use_irqchip = 1;
 681                r = 0;
 682                break;
 683        case KVM_CAP_S390_USER_SIGP:
 684                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 685                kvm->arch.user_sigp = 1;
 686                r = 0;
 687                break;
 688        case KVM_CAP_S390_VECTOR_REGISTERS:
 689                mutex_lock(&kvm->lock);
 690                if (kvm->created_vcpus) {
 691                        r = -EBUSY;
 692                } else if (MACHINE_HAS_VX) {
 693                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 694                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 695                        if (test_facility(134)) {
 696                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 697                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 698                        }
 699                        if (test_facility(135)) {
 700                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 701                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 702                        }
 703                        if (test_facility(148)) {
 704                                set_kvm_facility(kvm->arch.model.fac_mask, 148);
 705                                set_kvm_facility(kvm->arch.model.fac_list, 148);
 706                        }
 707                        if (test_facility(152)) {
 708                                set_kvm_facility(kvm->arch.model.fac_mask, 152);
 709                                set_kvm_facility(kvm->arch.model.fac_list, 152);
 710                        }
 711                        r = 0;
 712                } else
 713                        r = -EINVAL;
 714                mutex_unlock(&kvm->lock);
 715                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 716                         r ? "(not available)" : "(success)");
 717                break;
 718        case KVM_CAP_S390_RI:
 719                r = -EINVAL;
 720                mutex_lock(&kvm->lock);
 721                if (kvm->created_vcpus) {
 722                        r = -EBUSY;
 723                } else if (test_facility(64)) {
 724                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 725                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 726                        r = 0;
 727                }
 728                mutex_unlock(&kvm->lock);
 729                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 730                         r ? "(not available)" : "(success)");
 731                break;
 732        case KVM_CAP_S390_AIS:
 733                mutex_lock(&kvm->lock);
 734                if (kvm->created_vcpus) {
 735                        r = -EBUSY;
 736                } else {
 737                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 738                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 739                        r = 0;
 740                }
 741                mutex_unlock(&kvm->lock);
 742                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 743                         r ? "(not available)" : "(success)");
 744                break;
 745        case KVM_CAP_S390_GS:
 746                r = -EINVAL;
 747                mutex_lock(&kvm->lock);
 748                if (kvm->created_vcpus) {
 749                        r = -EBUSY;
 750                } else if (test_facility(133)) {
 751                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 752                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 753                        r = 0;
 754                }
 755                mutex_unlock(&kvm->lock);
 756                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 757                         r ? "(not available)" : "(success)");
 758                break;
 759        case KVM_CAP_S390_HPAGE_1M:
 760                mutex_lock(&kvm->lock);
 761                if (kvm->created_vcpus)
 762                        r = -EBUSY;
 763                else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 764                        r = -EINVAL;
 765                else {
 766                        r = 0;
 767                        mmap_write_lock(kvm->mm);
 768                        kvm->mm->context.allow_gmap_hpage_1m = 1;
 769                        mmap_write_unlock(kvm->mm);
 770                        /*
 771                         * We might have to create fake 4k page
 772                         * tables. To avoid that the hardware works on
 773                         * stale PGSTEs, we emulate these instructions.
 774                         */
 775                        kvm->arch.use_skf = 0;
 776                        kvm->arch.use_pfmfi = 0;
 777                }
 778                mutex_unlock(&kvm->lock);
 779                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 780                         r ? "(not available)" : "(success)");
 781                break;
 782        case KVM_CAP_S390_USER_STSI:
 783                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 784                kvm->arch.user_stsi = 1;
 785                r = 0;
 786                break;
 787        case KVM_CAP_S390_USER_INSTR0:
 788                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 789                kvm->arch.user_instr0 = 1;
 790                icpt_operexc_on_all_vcpus(kvm);
 791                r = 0;
 792                break;
 793        default:
 794                r = -EINVAL;
 795                break;
 796        }
 797        return r;
 798}
 799
 800static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 801{
 802        int ret;
 803
 804        switch (attr->attr) {
 805        case KVM_S390_VM_MEM_LIMIT_SIZE:
 806                ret = 0;
 807                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 808                         kvm->arch.mem_limit);
 809                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 810                        ret = -EFAULT;
 811                break;
 812        default:
 813                ret = -ENXIO;
 814                break;
 815        }
 816        return ret;
 817}
 818
 819static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 820{
 821        int ret;
 822        unsigned int idx;
 823        switch (attr->attr) {
 824        case KVM_S390_VM_MEM_ENABLE_CMMA:
 825                ret = -ENXIO;
 826                if (!sclp.has_cmma)
 827                        break;
 828
 829                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 830                mutex_lock(&kvm->lock);
 831                if (kvm->created_vcpus)
 832                        ret = -EBUSY;
 833                else if (kvm->mm->context.allow_gmap_hpage_1m)
 834                        ret = -EINVAL;
 835                else {
 836                        kvm->arch.use_cmma = 1;
 837                        /* Not compatible with cmma. */
 838                        kvm->arch.use_pfmfi = 0;
 839                        ret = 0;
 840                }
 841                mutex_unlock(&kvm->lock);
 842                break;
 843        case KVM_S390_VM_MEM_CLR_CMMA:
 844                ret = -ENXIO;
 845                if (!sclp.has_cmma)
 846                        break;
 847                ret = -EINVAL;
 848                if (!kvm->arch.use_cmma)
 849                        break;
 850
 851                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 852                mutex_lock(&kvm->lock);
 853                idx = srcu_read_lock(&kvm->srcu);
 854                s390_reset_cmma(kvm->arch.gmap->mm);
 855                srcu_read_unlock(&kvm->srcu, idx);
 856                mutex_unlock(&kvm->lock);
 857                ret = 0;
 858                break;
 859        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 860                unsigned long new_limit;
 861
 862                if (kvm_is_ucontrol(kvm))
 863                        return -EINVAL;
 864
 865                if (get_user(new_limit, (u64 __user *)attr->addr))
 866                        return -EFAULT;
 867
 868                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 869                    new_limit > kvm->arch.mem_limit)
 870                        return -E2BIG;
 871
 872                if (!new_limit)
 873                        return -EINVAL;
 874
 875                /* gmap_create takes last usable address */
 876                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 877                        new_limit -= 1;
 878
 879                ret = -EBUSY;
 880                mutex_lock(&kvm->lock);
 881                if (!kvm->created_vcpus) {
 882                        /* gmap_create will round the limit up */
 883                        struct gmap *new = gmap_create(current->mm, new_limit);
 884
 885                        if (!new) {
 886                                ret = -ENOMEM;
 887                        } else {
 888                                gmap_remove(kvm->arch.gmap);
 889                                new->private = kvm;
 890                                kvm->arch.gmap = new;
 891                                ret = 0;
 892                        }
 893                }
 894                mutex_unlock(&kvm->lock);
 895                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 896                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 897                         (void *) kvm->arch.gmap->asce);
 898                break;
 899        }
 900        default:
 901                ret = -ENXIO;
 902                break;
 903        }
 904        return ret;
 905}
 906
 907static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 908
 909void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 910{
 911        struct kvm_vcpu *vcpu;
 912        int i;
 913
 914        kvm_s390_vcpu_block_all(kvm);
 915
 916        kvm_for_each_vcpu(i, vcpu, kvm) {
 917                kvm_s390_vcpu_crypto_setup(vcpu);
 918                /* recreate the shadow crycb by leaving the VSIE handler */
 919                kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 920        }
 921
 922        kvm_s390_vcpu_unblock_all(kvm);
 923}
 924
 925static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 926{
 927        mutex_lock(&kvm->lock);
 928        switch (attr->attr) {
 929        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 930                if (!test_kvm_facility(kvm, 76)) {
 931                        mutex_unlock(&kvm->lock);
 932                        return -EINVAL;
 933                }
 934                get_random_bytes(
 935                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 936                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 937                kvm->arch.crypto.aes_kw = 1;
 938                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 939                break;
 940        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 941                if (!test_kvm_facility(kvm, 76)) {
 942                        mutex_unlock(&kvm->lock);
 943                        return -EINVAL;
 944                }
 945                get_random_bytes(
 946                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 947                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 948                kvm->arch.crypto.dea_kw = 1;
 949                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 950                break;
 951        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 952                if (!test_kvm_facility(kvm, 76)) {
 953                        mutex_unlock(&kvm->lock);
 954                        return -EINVAL;
 955                }
 956                kvm->arch.crypto.aes_kw = 0;
 957                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 958                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 959                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 960                break;
 961        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 962                if (!test_kvm_facility(kvm, 76)) {
 963                        mutex_unlock(&kvm->lock);
 964                        return -EINVAL;
 965                }
 966                kvm->arch.crypto.dea_kw = 0;
 967                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 968                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 969                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 970                break;
 971        case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 972                if (!ap_instructions_available()) {
 973                        mutex_unlock(&kvm->lock);
 974                        return -EOPNOTSUPP;
 975                }
 976                kvm->arch.crypto.apie = 1;
 977                break;
 978        case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 979                if (!ap_instructions_available()) {
 980                        mutex_unlock(&kvm->lock);
 981                        return -EOPNOTSUPP;
 982                }
 983                kvm->arch.crypto.apie = 0;
 984                break;
 985        default:
 986                mutex_unlock(&kvm->lock);
 987                return -ENXIO;
 988        }
 989
 990        kvm_s390_vcpu_crypto_reset_all(kvm);
 991        mutex_unlock(&kvm->lock);
 992        return 0;
 993}
 994
 995static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 996{
 997        int cx;
 998        struct kvm_vcpu *vcpu;
 999
1000        kvm_for_each_vcpu(cx, vcpu, kvm)
1001                kvm_s390_sync_request(req, vcpu);
1002}
1003
1004/*
1005 * Must be called with kvm->srcu held to avoid races on memslots, and with
1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007 */
1008static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009{
1010        struct kvm_memory_slot *ms;
1011        struct kvm_memslots *slots;
1012        unsigned long ram_pages = 0;
1013        int slotnr;
1014
1015        /* migration mode already enabled */
1016        if (kvm->arch.migration_mode)
1017                return 0;
1018        slots = kvm_memslots(kvm);
1019        if (!slots || !slots->used_slots)
1020                return -EINVAL;
1021
1022        if (!kvm->arch.use_cmma) {
1023                kvm->arch.migration_mode = 1;
1024                return 0;
1025        }
1026        /* mark all the pages in active slots as dirty */
1027        for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028                ms = slots->memslots + slotnr;
1029                if (!ms->dirty_bitmap)
1030                        return -EINVAL;
1031                /*
1032                 * The second half of the bitmap is only used on x86,
1033                 * and would be wasted otherwise, so we put it to good
1034                 * use here to keep track of the state of the storage
1035                 * attributes.
1036                 */
1037                memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038                ram_pages += ms->npages;
1039        }
1040        atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041        kvm->arch.migration_mode = 1;
1042        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043        return 0;
1044}
1045
1046/*
1047 * Must be called with kvm->slots_lock to avoid races with ourselves and
1048 * kvm_s390_vm_start_migration.
1049 */
1050static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051{
1052        /* migration mode already disabled */
1053        if (!kvm->arch.migration_mode)
1054                return 0;
1055        kvm->arch.migration_mode = 0;
1056        if (kvm->arch.use_cmma)
1057                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058        return 0;
1059}
1060
1061static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062                                     struct kvm_device_attr *attr)
1063{
1064        int res = -ENXIO;
1065
1066        mutex_lock(&kvm->slots_lock);
1067        switch (attr->attr) {
1068        case KVM_S390_VM_MIGRATION_START:
1069                res = kvm_s390_vm_start_migration(kvm);
1070                break;
1071        case KVM_S390_VM_MIGRATION_STOP:
1072                res = kvm_s390_vm_stop_migration(kvm);
1073                break;
1074        default:
1075                break;
1076        }
1077        mutex_unlock(&kvm->slots_lock);
1078
1079        return res;
1080}
1081
1082static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083                                     struct kvm_device_attr *attr)
1084{
1085        u64 mig = kvm->arch.migration_mode;
1086
1087        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088                return -ENXIO;
1089
1090        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091                return -EFAULT;
1092        return 0;
1093}
1094
1095static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1096{
1097        struct kvm_s390_vm_tod_clock gtod;
1098
1099        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1100                return -EFAULT;
1101
1102        if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1103                return -EINVAL;
1104        kvm_s390_set_tod_clock(kvm, &gtod);
1105
1106        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1107                gtod.epoch_idx, gtod.tod);
1108
1109        return 0;
1110}
1111
1112static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113{
1114        u8 gtod_high;
1115
1116        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1117                                           sizeof(gtod_high)))
1118                return -EFAULT;
1119
1120        if (gtod_high != 0)
1121                return -EINVAL;
1122        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1123
1124        return 0;
1125}
1126
1127static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1128{
1129        struct kvm_s390_vm_tod_clock gtod = { 0 };
1130
1131        if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1132                           sizeof(gtod.tod)))
1133                return -EFAULT;
1134
1135        kvm_s390_set_tod_clock(kvm, &gtod);
1136        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137        return 0;
1138}
1139
1140static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1141{
1142        int ret;
1143
1144        if (attr->flags)
1145                return -EINVAL;
1146
1147        switch (attr->attr) {
1148        case KVM_S390_VM_TOD_EXT:
1149                ret = kvm_s390_set_tod_ext(kvm, attr);
1150                break;
1151        case KVM_S390_VM_TOD_HIGH:
1152                ret = kvm_s390_set_tod_high(kvm, attr);
1153                break;
1154        case KVM_S390_VM_TOD_LOW:
1155                ret = kvm_s390_set_tod_low(kvm, attr);
1156                break;
1157        default:
1158                ret = -ENXIO;
1159                break;
1160        }
1161        return ret;
1162}
1163
1164static void kvm_s390_get_tod_clock(struct kvm *kvm,
1165                                   struct kvm_s390_vm_tod_clock *gtod)
1166{
1167        struct kvm_s390_tod_clock_ext htod;
1168
1169        preempt_disable();
1170
1171        get_tod_clock_ext((char *)&htod);
1172
1173        gtod->tod = htod.tod + kvm->arch.epoch;
1174        gtod->epoch_idx = 0;
1175        if (test_kvm_facility(kvm, 139)) {
1176                gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1177                if (gtod->tod < htod.tod)
1178                        gtod->epoch_idx += 1;
1179        }
1180
1181        preempt_enable();
1182}
1183
1184static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1185{
1186        struct kvm_s390_vm_tod_clock gtod;
1187
1188        memset(&gtod, 0, sizeof(gtod));
1189        kvm_s390_get_tod_clock(kvm, &gtod);
1190        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1191                return -EFAULT;
1192
1193        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1194                gtod.epoch_idx, gtod.tod);
1195        return 0;
1196}
1197
1198static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199{
1200        u8 gtod_high = 0;
1201
1202        if (copy_to_user((void __user *)attr->addr, &gtod_high,
1203                                         sizeof(gtod_high)))
1204                return -EFAULT;
1205        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1206
1207        return 0;
1208}
1209
1210static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211{
1212        u64 gtod;
1213
1214        gtod = kvm_s390_get_tod_clock_fast(kvm);
1215        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216                return -EFAULT;
1217        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1218
1219        return 0;
1220}
1221
1222static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1223{
1224        int ret;
1225
1226        if (attr->flags)
1227                return -EINVAL;
1228
1229        switch (attr->attr) {
1230        case KVM_S390_VM_TOD_EXT:
1231                ret = kvm_s390_get_tod_ext(kvm, attr);
1232                break;
1233        case KVM_S390_VM_TOD_HIGH:
1234                ret = kvm_s390_get_tod_high(kvm, attr);
1235                break;
1236        case KVM_S390_VM_TOD_LOW:
1237                ret = kvm_s390_get_tod_low(kvm, attr);
1238                break;
1239        default:
1240                ret = -ENXIO;
1241                break;
1242        }
1243        return ret;
1244}
1245
1246static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247{
1248        struct kvm_s390_vm_cpu_processor *proc;
1249        u16 lowest_ibc, unblocked_ibc;
1250        int ret = 0;
1251
1252        mutex_lock(&kvm->lock);
1253        if (kvm->created_vcpus) {
1254                ret = -EBUSY;
1255                goto out;
1256        }
1257        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1258        if (!proc) {
1259                ret = -ENOMEM;
1260                goto out;
1261        }
1262        if (!copy_from_user(proc, (void __user *)attr->addr,
1263                            sizeof(*proc))) {
1264                kvm->arch.model.cpuid = proc->cpuid;
1265                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1266                unblocked_ibc = sclp.ibc & 0xfff;
1267                if (lowest_ibc && proc->ibc) {
1268                        if (proc->ibc > unblocked_ibc)
1269                                kvm->arch.model.ibc = unblocked_ibc;
1270                        else if (proc->ibc < lowest_ibc)
1271                                kvm->arch.model.ibc = lowest_ibc;
1272                        else
1273                                kvm->arch.model.ibc = proc->ibc;
1274                }
1275                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1276                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1277                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1278                         kvm->arch.model.ibc,
1279                         kvm->arch.model.cpuid);
1280                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1281                         kvm->arch.model.fac_list[0],
1282                         kvm->arch.model.fac_list[1],
1283                         kvm->arch.model.fac_list[2]);
1284        } else
1285                ret = -EFAULT;
1286        kfree(proc);
1287out:
1288        mutex_unlock(&kvm->lock);
1289        return ret;
1290}
1291
1292static int kvm_s390_set_processor_feat(struct kvm *kvm,
1293                                       struct kvm_device_attr *attr)
1294{
1295        struct kvm_s390_vm_cpu_feat data;
1296
1297        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1298                return -EFAULT;
1299        if (!bitmap_subset((unsigned long *) data.feat,
1300                           kvm_s390_available_cpu_feat,
1301                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1302                return -EINVAL;
1303
1304        mutex_lock(&kvm->lock);
1305        if (kvm->created_vcpus) {
1306                mutex_unlock(&kvm->lock);
1307                return -EBUSY;
1308        }
1309        bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1310                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1311        mutex_unlock(&kvm->lock);
1312        VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1313                         data.feat[0],
1314                         data.feat[1],
1315                         data.feat[2]);
1316        return 0;
1317}
1318
1319static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1320                                          struct kvm_device_attr *attr)
1321{
1322        mutex_lock(&kvm->lock);
1323        if (kvm->created_vcpus) {
1324                mutex_unlock(&kvm->lock);
1325                return -EBUSY;
1326        }
1327
1328        if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1329                           sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1330                mutex_unlock(&kvm->lock);
1331                return -EFAULT;
1332        }
1333        mutex_unlock(&kvm->lock);
1334
1335        VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1336                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1337                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1338                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1339                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1340        VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1341                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1342                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1343        VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1344                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1345                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1346        VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1347                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1348                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1349        VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1350                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1351                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1352        VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1353                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1354                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1355        VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1356                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1357                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1358        VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1359                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1360                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1361        VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1362                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1363                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1364        VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1365                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1366                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1367        VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1368                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1369                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1370        VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1371                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1372                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1373        VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1374                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1375                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1376        VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1377                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1378                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1379        VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1380                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1381                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1382        VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1383                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1384                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1385                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1386                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1387        VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1389                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1390                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1391                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1392
1393        return 0;
1394}
1395
1396static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397{
1398        int ret = -ENXIO;
1399
1400        switch (attr->attr) {
1401        case KVM_S390_VM_CPU_PROCESSOR:
1402                ret = kvm_s390_set_processor(kvm, attr);
1403                break;
1404        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1405                ret = kvm_s390_set_processor_feat(kvm, attr);
1406                break;
1407        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1409                break;
1410        }
1411        return ret;
1412}
1413
1414static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1415{
1416        struct kvm_s390_vm_cpu_processor *proc;
1417        int ret = 0;
1418
1419        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1420        if (!proc) {
1421                ret = -ENOMEM;
1422                goto out;
1423        }
1424        proc->cpuid = kvm->arch.model.cpuid;
1425        proc->ibc = kvm->arch.model.ibc;
1426        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1427               S390_ARCH_FAC_LIST_SIZE_BYTE);
1428        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1429                 kvm->arch.model.ibc,
1430                 kvm->arch.model.cpuid);
1431        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1432                 kvm->arch.model.fac_list[0],
1433                 kvm->arch.model.fac_list[1],
1434                 kvm->arch.model.fac_list[2]);
1435        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1436                ret = -EFAULT;
1437        kfree(proc);
1438out:
1439        return ret;
1440}
1441
1442static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1443{
1444        struct kvm_s390_vm_cpu_machine *mach;
1445        int ret = 0;
1446
1447        mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1448        if (!mach) {
1449                ret = -ENOMEM;
1450                goto out;
1451        }
1452        get_cpu_id((struct cpuid *) &mach->cpuid);
1453        mach->ibc = sclp.ibc;
1454        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1455               S390_ARCH_FAC_LIST_SIZE_BYTE);
1456        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1457               sizeof(S390_lowcore.stfle_fac_list));
1458        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1459                 kvm->arch.model.ibc,
1460                 kvm->arch.model.cpuid);
1461        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1462                 mach->fac_mask[0],
1463                 mach->fac_mask[1],
1464                 mach->fac_mask[2]);
1465        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1466                 mach->fac_list[0],
1467                 mach->fac_list[1],
1468                 mach->fac_list[2]);
1469        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1470                ret = -EFAULT;
1471        kfree(mach);
1472out:
1473        return ret;
1474}
1475
1476static int kvm_s390_get_processor_feat(struct kvm *kvm,
1477                                       struct kvm_device_attr *attr)
1478{
1479        struct kvm_s390_vm_cpu_feat data;
1480
1481        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1482                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1483        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1484                return -EFAULT;
1485        VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1486                         data.feat[0],
1487                         data.feat[1],
1488                         data.feat[2]);
1489        return 0;
1490}
1491
1492static int kvm_s390_get_machine_feat(struct kvm *kvm,
1493                                     struct kvm_device_attr *attr)
1494{
1495        struct kvm_s390_vm_cpu_feat data;
1496
1497        bitmap_copy((unsigned long *) data.feat,
1498                    kvm_s390_available_cpu_feat,
1499                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1500        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1501                return -EFAULT;
1502        VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1503                         data.feat[0],
1504                         data.feat[1],
1505                         data.feat[2]);
1506        return 0;
1507}
1508
1509static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1510                                          struct kvm_device_attr *attr)
1511{
1512        if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1513            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1514                return -EFAULT;
1515
1516        VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1518                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1519                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1520                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1521        VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1522                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1523                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1524        VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1525                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1526                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1527        VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1528                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1529                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1530        VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1531                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1532                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1533        VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1534                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1535                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1536        VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1537                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1538                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1539        VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1540                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1541                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1542        VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1543                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1544                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1545        VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1546                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1547                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1548        VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1549                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1550                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1551        VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1552                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1553                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1554        VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1555                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1556                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1557        VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1558                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1559                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1560        VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1561                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1562                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1563        VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1564                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1565                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1566                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1567                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1568        VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1570                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1571                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1572                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1573
1574        return 0;
1575}
1576
1577static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1578                                        struct kvm_device_attr *attr)
1579{
1580        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1581            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1582                return -EFAULT;
1583
1584        VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1585                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1586                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1587                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1588                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1589        VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1590                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1591                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1592        VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1593                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1594                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1595        VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1596                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1597                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1598        VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1599                 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1600                 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1601        VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1602                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1603                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1604        VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1605                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1606                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1607        VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1608                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1609                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1610        VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1611                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1612                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1613        VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1614                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1615                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1616        VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1617                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1618                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1619        VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1620                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1621                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1622        VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1623                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1624                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1625        VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1626                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1627                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1628        VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1629                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1630                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1631        VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1632                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1633                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1634                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1635                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1636        VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1638                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1639                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1640                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1641
1642        return 0;
1643}
1644
1645static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646{
1647        int ret = -ENXIO;
1648
1649        switch (attr->attr) {
1650        case KVM_S390_VM_CPU_PROCESSOR:
1651                ret = kvm_s390_get_processor(kvm, attr);
1652                break;
1653        case KVM_S390_VM_CPU_MACHINE:
1654                ret = kvm_s390_get_machine(kvm, attr);
1655                break;
1656        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1657                ret = kvm_s390_get_processor_feat(kvm, attr);
1658                break;
1659        case KVM_S390_VM_CPU_MACHINE_FEAT:
1660                ret = kvm_s390_get_machine_feat(kvm, attr);
1661                break;
1662        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1663                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1664                break;
1665        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1666                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1667                break;
1668        }
1669        return ret;
1670}
1671
1672static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673{
1674        int ret;
1675
1676        switch (attr->group) {
1677        case KVM_S390_VM_MEM_CTRL:
1678                ret = kvm_s390_set_mem_control(kvm, attr);
1679                break;
1680        case KVM_S390_VM_TOD:
1681                ret = kvm_s390_set_tod(kvm, attr);
1682                break;
1683        case KVM_S390_VM_CPU_MODEL:
1684                ret = kvm_s390_set_cpu_model(kvm, attr);
1685                break;
1686        case KVM_S390_VM_CRYPTO:
1687                ret = kvm_s390_vm_set_crypto(kvm, attr);
1688                break;
1689        case KVM_S390_VM_MIGRATION:
1690                ret = kvm_s390_vm_set_migration(kvm, attr);
1691                break;
1692        default:
1693                ret = -ENXIO;
1694                break;
1695        }
1696
1697        return ret;
1698}
1699
1700static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701{
1702        int ret;
1703
1704        switch (attr->group) {
1705        case KVM_S390_VM_MEM_CTRL:
1706                ret = kvm_s390_get_mem_control(kvm, attr);
1707                break;
1708        case KVM_S390_VM_TOD:
1709                ret = kvm_s390_get_tod(kvm, attr);
1710                break;
1711        case KVM_S390_VM_CPU_MODEL:
1712                ret = kvm_s390_get_cpu_model(kvm, attr);
1713                break;
1714        case KVM_S390_VM_MIGRATION:
1715                ret = kvm_s390_vm_get_migration(kvm, attr);
1716                break;
1717        default:
1718                ret = -ENXIO;
1719                break;
1720        }
1721
1722        return ret;
1723}
1724
1725static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726{
1727        int ret;
1728
1729        switch (attr->group) {
1730        case KVM_S390_VM_MEM_CTRL:
1731                switch (attr->attr) {
1732                case KVM_S390_VM_MEM_ENABLE_CMMA:
1733                case KVM_S390_VM_MEM_CLR_CMMA:
1734                        ret = sclp.has_cmma ? 0 : -ENXIO;
1735                        break;
1736                case KVM_S390_VM_MEM_LIMIT_SIZE:
1737                        ret = 0;
1738                        break;
1739                default:
1740                        ret = -ENXIO;
1741                        break;
1742                }
1743                break;
1744        case KVM_S390_VM_TOD:
1745                switch (attr->attr) {
1746                case KVM_S390_VM_TOD_LOW:
1747                case KVM_S390_VM_TOD_HIGH:
1748                        ret = 0;
1749                        break;
1750                default:
1751                        ret = -ENXIO;
1752                        break;
1753                }
1754                break;
1755        case KVM_S390_VM_CPU_MODEL:
1756                switch (attr->attr) {
1757                case KVM_S390_VM_CPU_PROCESSOR:
1758                case KVM_S390_VM_CPU_MACHINE:
1759                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1760                case KVM_S390_VM_CPU_MACHINE_FEAT:
1761                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1762                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1763                        ret = 0;
1764                        break;
1765                default:
1766                        ret = -ENXIO;
1767                        break;
1768                }
1769                break;
1770        case KVM_S390_VM_CRYPTO:
1771                switch (attr->attr) {
1772                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1773                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1774                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1775                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1776                        ret = 0;
1777                        break;
1778                case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1779                case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1780                        ret = ap_instructions_available() ? 0 : -ENXIO;
1781                        break;
1782                default:
1783                        ret = -ENXIO;
1784                        break;
1785                }
1786                break;
1787        case KVM_S390_VM_MIGRATION:
1788                ret = 0;
1789                break;
1790        default:
1791                ret = -ENXIO;
1792                break;
1793        }
1794
1795        return ret;
1796}
1797
1798static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799{
1800        uint8_t *keys;
1801        uint64_t hva;
1802        int srcu_idx, i, r = 0;
1803
1804        if (args->flags != 0)
1805                return -EINVAL;
1806
1807        /* Is this guest using storage keys? */
1808        if (!mm_uses_skeys(current->mm))
1809                return KVM_S390_GET_SKEYS_NONE;
1810
1811        /* Enforce sane limit on memory allocation */
1812        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1813                return -EINVAL;
1814
1815        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1816        if (!keys)
1817                return -ENOMEM;
1818
1819        mmap_read_lock(current->mm);
1820        srcu_idx = srcu_read_lock(&kvm->srcu);
1821        for (i = 0; i < args->count; i++) {
1822                hva = gfn_to_hva(kvm, args->start_gfn + i);
1823                if (kvm_is_error_hva(hva)) {
1824                        r = -EFAULT;
1825                        break;
1826                }
1827
1828                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829                if (r)
1830                        break;
1831        }
1832        srcu_read_unlock(&kvm->srcu, srcu_idx);
1833        mmap_read_unlock(current->mm);
1834
1835        if (!r) {
1836                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1837                                 sizeof(uint8_t) * args->count);
1838                if (r)
1839                        r = -EFAULT;
1840        }
1841
1842        kvfree(keys);
1843        return r;
1844}
1845
1846static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847{
1848        uint8_t *keys;
1849        uint64_t hva;
1850        int srcu_idx, i, r = 0;
1851        bool unlocked;
1852
1853        if (args->flags != 0)
1854                return -EINVAL;
1855
1856        /* Enforce sane limit on memory allocation */
1857        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1858                return -EINVAL;
1859
1860        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1861        if (!keys)
1862                return -ENOMEM;
1863
1864        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1865                           sizeof(uint8_t) * args->count);
1866        if (r) {
1867                r = -EFAULT;
1868                goto out;
1869        }
1870
1871        /* Enable storage key handling for the guest */
1872        r = s390_enable_skey();
1873        if (r)
1874                goto out;
1875
1876        i = 0;
1877        mmap_read_lock(current->mm);
1878        srcu_idx = srcu_read_lock(&kvm->srcu);
1879        while (i < args->count) {
1880                unlocked = false;
1881                hva = gfn_to_hva(kvm, args->start_gfn + i);
1882                if (kvm_is_error_hva(hva)) {
1883                        r = -EFAULT;
1884                        break;
1885                }
1886
1887                /* Lowest order bit is reserved */
1888                if (keys[i] & 0x01) {
1889                        r = -EINVAL;
1890                        break;
1891                }
1892
1893                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1894                if (r) {
1895                        r = fixup_user_fault(current->mm, hva,
1896                                             FAULT_FLAG_WRITE, &unlocked);
1897                        if (r)
1898                                break;
1899                }
1900                if (!r)
1901                        i++;
1902        }
1903        srcu_read_unlock(&kvm->srcu, srcu_idx);
1904        mmap_read_unlock(current->mm);
1905out:
1906        kvfree(keys);
1907        return r;
1908}
1909
1910/*
1911 * Base address and length must be sent at the start of each block, therefore
1912 * it's cheaper to send some clean data, as long as it's less than the size of
1913 * two longs.
1914 */
1915#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1916/* for consistency */
1917#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1918
1919/*
1920 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1921 * address falls in a hole. In that case the index of one of the memslots
1922 * bordering the hole is returned.
1923 */
1924static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1925{
1926        int start = 0, end = slots->used_slots;
1927        int slot = atomic_read(&slots->lru_slot);
1928        struct kvm_memory_slot *memslots = slots->memslots;
1929
1930        if (gfn >= memslots[slot].base_gfn &&
1931            gfn < memslots[slot].base_gfn + memslots[slot].npages)
1932                return slot;
1933
1934        while (start < end) {
1935                slot = start + (end - start) / 2;
1936
1937                if (gfn >= memslots[slot].base_gfn)
1938                        end = slot;
1939                else
1940                        start = slot + 1;
1941        }
1942
1943        if (start >= slots->used_slots)
1944                return slots->used_slots - 1;
1945
1946        if (gfn >= memslots[start].base_gfn &&
1947            gfn < memslots[start].base_gfn + memslots[start].npages) {
1948                atomic_set(&slots->lru_slot, start);
1949        }
1950
1951        return start;
1952}
1953
1954static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1955                              u8 *res, unsigned long bufsize)
1956{
1957        unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1958
1959        args->count = 0;
1960        while (args->count < bufsize) {
1961                hva = gfn_to_hva(kvm, cur_gfn);
1962                /*
1963                 * We return an error if the first value was invalid, but we
1964                 * return successfully if at least one value was copied.
1965                 */
1966                if (kvm_is_error_hva(hva))
1967                        return args->count ? 0 : -EFAULT;
1968                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1969                        pgstev = 0;
1970                res[args->count++] = (pgstev >> 24) & 0x43;
1971                cur_gfn++;
1972        }
1973
1974        return 0;
1975}
1976
1977static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1978                                              unsigned long cur_gfn)
1979{
1980        int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1981        struct kvm_memory_slot *ms = slots->memslots + slotidx;
1982        unsigned long ofs = cur_gfn - ms->base_gfn;
1983
1984        if (ms->base_gfn + ms->npages <= cur_gfn) {
1985                slotidx--;
1986                /* If we are above the highest slot, wrap around */
1987                if (slotidx < 0)
1988                        slotidx = slots->used_slots - 1;
1989
1990                ms = slots->memslots + slotidx;
1991                ofs = 0;
1992        }
1993        ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1994        while ((slotidx > 0) && (ofs >= ms->npages)) {
1995                slotidx--;
1996                ms = slots->memslots + slotidx;
1997                ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1998        }
1999        return ms->base_gfn + ofs;
2000}
2001
2002static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2003                             u8 *res, unsigned long bufsize)
2004{
2005        unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2006        struct kvm_memslots *slots = kvm_memslots(kvm);
2007        struct kvm_memory_slot *ms;
2008
2009        if (unlikely(!slots->used_slots))
2010                return 0;
2011
2012        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2013        ms = gfn_to_memslot(kvm, cur_gfn);
2014        args->count = 0;
2015        args->start_gfn = cur_gfn;
2016        if (!ms)
2017                return 0;
2018        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2019        mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2020
2021        while (args->count < bufsize) {
2022                hva = gfn_to_hva(kvm, cur_gfn);
2023                if (kvm_is_error_hva(hva))
2024                        return 0;
2025                /* Decrement only if we actually flipped the bit to 0 */
2026                if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2027                        atomic64_dec(&kvm->arch.cmma_dirty_pages);
2028                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2029                        pgstev = 0;
2030                /* Save the value */
2031                res[args->count++] = (pgstev >> 24) & 0x43;
2032                /* If the next bit is too far away, stop. */
2033                if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2034                        return 0;
2035                /* If we reached the previous "next", find the next one */
2036                if (cur_gfn == next_gfn)
2037                        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038                /* Reached the end of memory or of the buffer, stop */
2039                if ((next_gfn >= mem_end) ||
2040                    (next_gfn - args->start_gfn >= bufsize))
2041                        return 0;
2042                cur_gfn++;
2043                /* Reached the end of the current memslot, take the next one. */
2044                if (cur_gfn - ms->base_gfn >= ms->npages) {
2045                        ms = gfn_to_memslot(kvm, cur_gfn);
2046                        if (!ms)
2047                                return 0;
2048                }
2049        }
2050        return 0;
2051}
2052
2053/*
2054 * This function searches for the next page with dirty CMMA attributes, and
2055 * saves the attributes in the buffer up to either the end of the buffer or
2056 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2057 * no trailing clean bytes are saved.
2058 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2059 * output buffer will indicate 0 as length.
2060 */
2061static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2062                                  struct kvm_s390_cmma_log *args)
2063{
2064        unsigned long bufsize;
2065        int srcu_idx, peek, ret;
2066        u8 *values;
2067
2068        if (!kvm->arch.use_cmma)
2069                return -ENXIO;
2070        /* Invalid/unsupported flags were specified */
2071        if (args->flags & ~KVM_S390_CMMA_PEEK)
2072                return -EINVAL;
2073        /* Migration mode query, and we are not doing a migration */
2074        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2075        if (!peek && !kvm->arch.migration_mode)
2076                return -EINVAL;
2077        /* CMMA is disabled or was not used, or the buffer has length zero */
2078        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2079        if (!bufsize || !kvm->mm->context.uses_cmm) {
2080                memset(args, 0, sizeof(*args));
2081                return 0;
2082        }
2083        /* We are not peeking, and there are no dirty pages */
2084        if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2085                memset(args, 0, sizeof(*args));
2086                return 0;
2087        }
2088
2089        values = vmalloc(bufsize);
2090        if (!values)
2091                return -ENOMEM;
2092
2093        mmap_read_lock(kvm->mm);
2094        srcu_idx = srcu_read_lock(&kvm->srcu);
2095        if (peek)
2096                ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2097        else
2098                ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2099        srcu_read_unlock(&kvm->srcu, srcu_idx);
2100        mmap_read_unlock(kvm->mm);
2101
2102        if (kvm->arch.migration_mode)
2103                args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2104        else
2105                args->remaining = 0;
2106
2107        if (copy_to_user((void __user *)args->values, values, args->count))
2108                ret = -EFAULT;
2109
2110        vfree(values);
2111        return ret;
2112}
2113
2114/*
2115 * This function sets the CMMA attributes for the given pages. If the input
2116 * buffer has zero length, no action is taken, otherwise the attributes are
2117 * set and the mm->context.uses_cmm flag is set.
2118 */
2119static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2120                                  const struct kvm_s390_cmma_log *args)
2121{
2122        unsigned long hva, mask, pgstev, i;
2123        uint8_t *bits;
2124        int srcu_idx, r = 0;
2125
2126        mask = args->mask;
2127
2128        if (!kvm->arch.use_cmma)
2129                return -ENXIO;
2130        /* invalid/unsupported flags */
2131        if (args->flags != 0)
2132                return -EINVAL;
2133        /* Enforce sane limit on memory allocation */
2134        if (args->count > KVM_S390_CMMA_SIZE_MAX)
2135                return -EINVAL;
2136        /* Nothing to do */
2137        if (args->count == 0)
2138                return 0;
2139
2140        bits = vmalloc(array_size(sizeof(*bits), args->count));
2141        if (!bits)
2142                return -ENOMEM;
2143
2144        r = copy_from_user(bits, (void __user *)args->values, args->count);
2145        if (r) {
2146                r = -EFAULT;
2147                goto out;
2148        }
2149
2150        mmap_read_lock(kvm->mm);
2151        srcu_idx = srcu_read_lock(&kvm->srcu);
2152        for (i = 0; i < args->count; i++) {
2153                hva = gfn_to_hva(kvm, args->start_gfn + i);
2154                if (kvm_is_error_hva(hva)) {
2155                        r = -EFAULT;
2156                        break;
2157                }
2158
2159                pgstev = bits[i];
2160                pgstev = pgstev << 24;
2161                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2162                set_pgste_bits(kvm->mm, hva, mask, pgstev);
2163        }
2164        srcu_read_unlock(&kvm->srcu, srcu_idx);
2165        mmap_read_unlock(kvm->mm);
2166
2167        if (!kvm->mm->context.uses_cmm) {
2168                mmap_write_lock(kvm->mm);
2169                kvm->mm->context.uses_cmm = 1;
2170                mmap_write_unlock(kvm->mm);
2171        }
2172out:
2173        vfree(bits);
2174        return r;
2175}
2176
2177static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2178{
2179        struct kvm_vcpu *vcpu;
2180        u16 rc, rrc;
2181        int ret = 0;
2182        int i;
2183
2184        /*
2185         * We ignore failures and try to destroy as many CPUs as possible.
2186         * At the same time we must not free the assigned resources when
2187         * this fails, as the ultravisor has still access to that memory.
2188         * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2189         * behind.
2190         * We want to return the first failure rc and rrc, though.
2191         */
2192        kvm_for_each_vcpu(i, vcpu, kvm) {
2193                mutex_lock(&vcpu->mutex);
2194                if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195                        *rcp = rc;
2196                        *rrcp = rrc;
2197                        ret = -EIO;
2198                }
2199                mutex_unlock(&vcpu->mutex);
2200        }
2201        return ret;
2202}
2203
2204static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2205{
2206        int i, r = 0;
2207        u16 dummy;
2208
2209        struct kvm_vcpu *vcpu;
2210
2211        kvm_for_each_vcpu(i, vcpu, kvm) {
2212                mutex_lock(&vcpu->mutex);
2213                r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214                mutex_unlock(&vcpu->mutex);
2215                if (r)
2216                        break;
2217        }
2218        if (r)
2219                kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220        return r;
2221}
2222
2223static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224{
2225        int r = 0;
2226        u16 dummy;
2227        void __user *argp = (void __user *)cmd->data;
2228
2229        switch (cmd->cmd) {
2230        case KVM_PV_ENABLE: {
2231                r = -EINVAL;
2232                if (kvm_s390_pv_is_protected(kvm))
2233                        break;
2234
2235                /*
2236                 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237                 *  esca, we need no cleanup in the error cases below
2238                 */
2239                r = sca_switch_to_extended(kvm);
2240                if (r)
2241                        break;
2242
2243                mmap_write_lock(current->mm);
2244                r = gmap_mark_unmergeable();
2245                mmap_write_unlock(current->mm);
2246                if (r)
2247                        break;
2248
2249                r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250                if (r)
2251                        break;
2252
2253                r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254                if (r)
2255                        kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256
2257                /* we need to block service interrupts from now on */
2258                set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259                break;
2260        }
2261        case KVM_PV_DISABLE: {
2262                r = -EINVAL;
2263                if (!kvm_s390_pv_is_protected(kvm))
2264                        break;
2265
2266                r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267                /*
2268                 * If a CPU could not be destroyed, destroy VM will also fail.
2269                 * There is no point in trying to destroy it. Instead return
2270                 * the rc and rrc from the first CPU that failed destroying.
2271                 */
2272                if (r)
2273                        break;
2274                r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275
2276                /* no need to block service interrupts any more */
2277                clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278                break;
2279        }
2280        case KVM_PV_SET_SEC_PARMS: {
2281                struct kvm_s390_pv_sec_parm parms = {};
2282                void *hdr;
2283
2284                r = -EINVAL;
2285                if (!kvm_s390_pv_is_protected(kvm))
2286                        break;
2287
2288                r = -EFAULT;
2289                if (copy_from_user(&parms, argp, sizeof(parms)))
2290                        break;
2291
2292                /* Currently restricted to 8KB */
2293                r = -EINVAL;
2294                if (parms.length > PAGE_SIZE * 2)
2295                        break;
2296
2297                r = -ENOMEM;
2298                hdr = vmalloc(parms.length);
2299                if (!hdr)
2300                        break;
2301
2302                r = -EFAULT;
2303                if (!copy_from_user(hdr, (void __user *)parms.origin,
2304                                    parms.length))
2305                        r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306                                                      &cmd->rc, &cmd->rrc);
2307
2308                vfree(hdr);
2309                break;
2310        }
2311        case KVM_PV_UNPACK: {
2312                struct kvm_s390_pv_unp unp = {};
2313
2314                r = -EINVAL;
2315                if (!kvm_s390_pv_is_protected(kvm))
2316                        break;
2317
2318                r = -EFAULT;
2319                if (copy_from_user(&unp, argp, sizeof(unp)))
2320                        break;
2321
2322                r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323                                       &cmd->rc, &cmd->rrc);
2324                break;
2325        }
2326        case KVM_PV_VERIFY: {
2327                r = -EINVAL;
2328                if (!kvm_s390_pv_is_protected(kvm))
2329                        break;
2330
2331                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332                                  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333                KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334                             cmd->rrc);
2335                break;
2336        }
2337        case KVM_PV_PREP_RESET: {
2338                r = -EINVAL;
2339                if (!kvm_s390_pv_is_protected(kvm))
2340                        break;
2341
2342                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343                                  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344                KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345                             cmd->rc, cmd->rrc);
2346                break;
2347        }
2348        case KVM_PV_UNSHARE_ALL: {
2349                r = -EINVAL;
2350                if (!kvm_s390_pv_is_protected(kvm))
2351                        break;
2352
2353                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354                                  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355                KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356                             cmd->rc, cmd->rrc);
2357                break;
2358        }
2359        default:
2360                r = -ENOTTY;
2361        }
2362        return r;
2363}
2364
2365long kvm_arch_vm_ioctl(struct file *filp,
2366                       unsigned int ioctl, unsigned long arg)
2367{
2368        struct kvm *kvm = filp->private_data;
2369        void __user *argp = (void __user *)arg;
2370        struct kvm_device_attr attr;
2371        int r;
2372
2373        switch (ioctl) {
2374        case KVM_S390_INTERRUPT: {
2375                struct kvm_s390_interrupt s390int;
2376
2377                r = -EFAULT;
2378                if (copy_from_user(&s390int, argp, sizeof(s390int)))
2379                        break;
2380                r = kvm_s390_inject_vm(kvm, &s390int);
2381                break;
2382        }
2383        case KVM_CREATE_IRQCHIP: {
2384                struct kvm_irq_routing_entry routing;
2385
2386                r = -EINVAL;
2387                if (kvm->arch.use_irqchip) {
2388                        /* Set up dummy routing. */
2389                        memset(&routing, 0, sizeof(routing));
2390                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391                }
2392                break;
2393        }
2394        case KVM_SET_DEVICE_ATTR: {
2395                r = -EFAULT;
2396                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2397                        break;
2398                r = kvm_s390_vm_set_attr(kvm, &attr);
2399                break;
2400        }
2401        case KVM_GET_DEVICE_ATTR: {
2402                r = -EFAULT;
2403                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2404                        break;
2405                r = kvm_s390_vm_get_attr(kvm, &attr);
2406                break;
2407        }
2408        case KVM_HAS_DEVICE_ATTR: {
2409                r = -EFAULT;
2410                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2411                        break;
2412                r = kvm_s390_vm_has_attr(kvm, &attr);
2413                break;
2414        }
2415        case KVM_S390_GET_SKEYS: {
2416                struct kvm_s390_skeys args;
2417
2418                r = -EFAULT;
2419                if (copy_from_user(&args, argp,
2420                                   sizeof(struct kvm_s390_skeys)))
2421                        break;
2422                r = kvm_s390_get_skeys(kvm, &args);
2423                break;
2424        }
2425        case KVM_S390_SET_SKEYS: {
2426                struct kvm_s390_skeys args;
2427
2428                r = -EFAULT;
2429                if (copy_from_user(&args, argp,
2430                                   sizeof(struct kvm_s390_skeys)))
2431                        break;
2432                r = kvm_s390_set_skeys(kvm, &args);
2433                break;
2434        }
2435        case KVM_S390_GET_CMMA_BITS: {
2436                struct kvm_s390_cmma_log args;
2437
2438                r = -EFAULT;
2439                if (copy_from_user(&args, argp, sizeof(args)))
2440                        break;
2441                mutex_lock(&kvm->slots_lock);
2442                r = kvm_s390_get_cmma_bits(kvm, &args);
2443                mutex_unlock(&kvm->slots_lock);
2444                if (!r) {
2445                        r = copy_to_user(argp, &args, sizeof(args));
2446                        if (r)
2447                                r = -EFAULT;
2448                }
2449                break;
2450        }
2451        case KVM_S390_SET_CMMA_BITS: {
2452                struct kvm_s390_cmma_log args;
2453
2454                r = -EFAULT;
2455                if (copy_from_user(&args, argp, sizeof(args)))
2456                        break;
2457                mutex_lock(&kvm->slots_lock);
2458                r = kvm_s390_set_cmma_bits(kvm, &args);
2459                mutex_unlock(&kvm->slots_lock);
2460                break;
2461        }
2462        case KVM_S390_PV_COMMAND: {
2463                struct kvm_pv_cmd args;
2464
2465                /* protvirt means user sigp */
2466                kvm->arch.user_cpu_state_ctrl = 1;
2467                r = 0;
2468                if (!is_prot_virt_host()) {
2469                        r = -EINVAL;
2470                        break;
2471                }
2472                if (copy_from_user(&args, argp, sizeof(args))) {
2473                        r = -EFAULT;
2474                        break;
2475                }
2476                if (args.flags) {
2477                        r = -EINVAL;
2478                        break;
2479                }
2480                mutex_lock(&kvm->lock);
2481                r = kvm_s390_handle_pv(kvm, &args);
2482                mutex_unlock(&kvm->lock);
2483                if (copy_to_user(argp, &args, sizeof(args))) {
2484                        r = -EFAULT;
2485                        break;
2486                }
2487                break;
2488        }
2489        default:
2490                r = -ENOTTY;
2491        }
2492
2493        return r;
2494}
2495
2496static int kvm_s390_apxa_installed(void)
2497{
2498        struct ap_config_info info;
2499
2500        if (ap_instructions_available()) {
2501                if (ap_qci(&info) == 0)
2502                        return info.apxa;
2503        }
2504
2505        return 0;
2506}
2507
2508/*
2509 * The format of the crypto control block (CRYCB) is specified in the 3 low
2510 * order bits of the CRYCB designation (CRYCBD) field as follows:
2511 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2512 *           AP extended addressing (APXA) facility are installed.
2513 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2514 * Format 2: Both the APXA and MSAX3 facilities are installed
2515 */
2516static void kvm_s390_set_crycb_format(struct kvm *kvm)
2517{
2518        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2519
2520        /* Clear the CRYCB format bits - i.e., set format 0 by default */
2521        kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2522
2523        /* Check whether MSAX3 is installed */
2524        if (!test_kvm_facility(kvm, 76))
2525                return;
2526
2527        if (kvm_s390_apxa_installed())
2528                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2529        else
2530                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531}
2532
2533void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2534                               unsigned long *aqm, unsigned long *adm)
2535{
2536        struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2537
2538        mutex_lock(&kvm->lock);
2539        kvm_s390_vcpu_block_all(kvm);
2540
2541        switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2542        case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2543                memcpy(crycb->apcb1.apm, apm, 32);
2544                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2545                         apm[0], apm[1], apm[2], apm[3]);
2546                memcpy(crycb->apcb1.aqm, aqm, 32);
2547                VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2548                         aqm[0], aqm[1], aqm[2], aqm[3]);
2549                memcpy(crycb->apcb1.adm, adm, 32);
2550                VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2551                         adm[0], adm[1], adm[2], adm[3]);
2552                break;
2553        case CRYCB_FORMAT1:
2554        case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2555                memcpy(crycb->apcb0.apm, apm, 8);
2556                memcpy(crycb->apcb0.aqm, aqm, 2);
2557                memcpy(crycb->apcb0.adm, adm, 2);
2558                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2559                         apm[0], *((unsigned short *)aqm),
2560                         *((unsigned short *)adm));
2561                break;
2562        default:        /* Can not happen */
2563                break;
2564        }
2565
2566        /* recreate the shadow crycb for each vcpu */
2567        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2568        kvm_s390_vcpu_unblock_all(kvm);
2569        mutex_unlock(&kvm->lock);
2570}
2571EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2572
2573void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2574{
2575        mutex_lock(&kvm->lock);
2576        kvm_s390_vcpu_block_all(kvm);
2577
2578        memset(&kvm->arch.crypto.crycb->apcb0, 0,
2579               sizeof(kvm->arch.crypto.crycb->apcb0));
2580        memset(&kvm->arch.crypto.crycb->apcb1, 0,
2581               sizeof(kvm->arch.crypto.crycb->apcb1));
2582
2583        VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2584        /* recreate the shadow crycb for each vcpu */
2585        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2586        kvm_s390_vcpu_unblock_all(kvm);
2587        mutex_unlock(&kvm->lock);
2588}
2589EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2590
2591static u64 kvm_s390_get_initial_cpuid(void)
2592{
2593        struct cpuid cpuid;
2594
2595        get_cpu_id(&cpuid);
2596        cpuid.version = 0xff;
2597        return *((u64 *) &cpuid);
2598}
2599
2600static void kvm_s390_crypto_init(struct kvm *kvm)
2601{
2602        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2603        kvm_s390_set_crycb_format(kvm);
2604
2605        if (!test_kvm_facility(kvm, 76))
2606                return;
2607
2608        /* Enable AES/DEA protected key functions by default */
2609        kvm->arch.crypto.aes_kw = 1;
2610        kvm->arch.crypto.dea_kw = 1;
2611        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2612                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2613        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2614                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2615}
2616
2617static void sca_dispose(struct kvm *kvm)
2618{
2619        if (kvm->arch.use_esca)
2620                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2621        else
2622                free_page((unsigned long)(kvm->arch.sca));
2623        kvm->arch.sca = NULL;
2624}
2625
2626int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2627{
2628        gfp_t alloc_flags = GFP_KERNEL;
2629        int i, rc;
2630        char debug_name[16];
2631        static unsigned long sca_offset;
2632
2633        rc = -EINVAL;
2634#ifdef CONFIG_KVM_S390_UCONTROL
2635        if (type & ~KVM_VM_S390_UCONTROL)
2636                goto out_err;
2637        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2638                goto out_err;
2639#else
2640        if (type)
2641                goto out_err;
2642#endif
2643
2644        rc = s390_enable_sie();
2645        if (rc)
2646                goto out_err;
2647
2648        rc = -ENOMEM;
2649
2650        if (!sclp.has_64bscao)
2651                alloc_flags |= GFP_DMA;
2652        rwlock_init(&kvm->arch.sca_lock);
2653        /* start with basic SCA */
2654        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2655        if (!kvm->arch.sca)
2656                goto out_err;
2657        mutex_lock(&kvm_lock);
2658        sca_offset += 16;
2659        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2660                sca_offset = 0;
2661        kvm->arch.sca = (struct bsca_block *)
2662                        ((char *) kvm->arch.sca + sca_offset);
2663        mutex_unlock(&kvm_lock);
2664
2665        sprintf(debug_name, "kvm-%u", current->pid);
2666
2667        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668        if (!kvm->arch.dbf)
2669                goto out_err;
2670
2671        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2672        kvm->arch.sie_page2 =
2673             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2674        if (!kvm->arch.sie_page2)
2675                goto out_err;
2676
2677        kvm->arch.sie_page2->kvm = kvm;
2678        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2679
2680        for (i = 0; i < kvm_s390_fac_size(); i++) {
2681                kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2682                                              (kvm_s390_fac_base[i] |
2683                                               kvm_s390_fac_ext[i]);
2684                kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2685                                              kvm_s390_fac_base[i];
2686        }
2687        kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2688
2689        /* we are always in czam mode - even on pre z14 machines */
2690        set_kvm_facility(kvm->arch.model.fac_mask, 138);
2691        set_kvm_facility(kvm->arch.model.fac_list, 138);
2692        /* we emulate STHYI in kvm */
2693        set_kvm_facility(kvm->arch.model.fac_mask, 74);
2694        set_kvm_facility(kvm->arch.model.fac_list, 74);
2695        if (MACHINE_HAS_TLB_GUEST) {
2696                set_kvm_facility(kvm->arch.model.fac_mask, 147);
2697                set_kvm_facility(kvm->arch.model.fac_list, 147);
2698        }
2699
2700        if (css_general_characteristics.aiv && test_facility(65))
2701                set_kvm_facility(kvm->arch.model.fac_mask, 65);
2702
2703        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2704        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2705
2706        kvm_s390_crypto_init(kvm);
2707
2708        mutex_init(&kvm->arch.float_int.ais_lock);
2709        spin_lock_init(&kvm->arch.float_int.lock);
2710        for (i = 0; i < FIRQ_LIST_COUNT; i++)
2711                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2712        init_waitqueue_head(&kvm->arch.ipte_wq);
2713        mutex_init(&kvm->arch.ipte_mutex);
2714
2715        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2716        VM_EVENT(kvm, 3, "vm created with type %lu", type);
2717
2718        if (type & KVM_VM_S390_UCONTROL) {
2719                kvm->arch.gmap = NULL;
2720                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2721        } else {
2722                if (sclp.hamax == U64_MAX)
2723                        kvm->arch.mem_limit = TASK_SIZE_MAX;
2724                else
2725                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2726                                                    sclp.hamax + 1);
2727                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2728                if (!kvm->arch.gmap)
2729                        goto out_err;
2730                kvm->arch.gmap->private = kvm;
2731                kvm->arch.gmap->pfault_enabled = 0;
2732        }
2733
2734        kvm->arch.use_pfmfi = sclp.has_pfmfi;
2735        kvm->arch.use_skf = sclp.has_skey;
2736        spin_lock_init(&kvm->arch.start_stop_lock);
2737        kvm_s390_vsie_init(kvm);
2738        if (use_gisa)
2739                kvm_s390_gisa_init(kvm);
2740        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741
2742        return 0;
2743out_err:
2744        free_page((unsigned long)kvm->arch.sie_page2);
2745        debug_unregister(kvm->arch.dbf);
2746        sca_dispose(kvm);
2747        KVM_EVENT(3, "creation of vm failed: %d", rc);
2748        return rc;
2749}
2750
2751void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752{
2753        u16 rc, rrc;
2754
2755        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2756        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2757        kvm_s390_clear_local_irqs(vcpu);
2758        kvm_clear_async_pf_completion_queue(vcpu);
2759        if (!kvm_is_ucontrol(vcpu->kvm))
2760                sca_del_vcpu(vcpu);
2761
2762        if (kvm_is_ucontrol(vcpu->kvm))
2763                gmap_remove(vcpu->arch.gmap);
2764
2765        if (vcpu->kvm->arch.use_cmma)
2766                kvm_s390_vcpu_unsetup_cmma(vcpu);
2767        /* We can not hold the vcpu mutex here, we are already dying */
2768        if (kvm_s390_pv_cpu_get_handle(vcpu))
2769                kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2770        free_page((unsigned long)(vcpu->arch.sie_block));
2771}
2772
2773static void kvm_free_vcpus(struct kvm *kvm)
2774{
2775        unsigned int i;
2776        struct kvm_vcpu *vcpu;
2777
2778        kvm_for_each_vcpu(i, vcpu, kvm)
2779                kvm_vcpu_destroy(vcpu);
2780
2781        mutex_lock(&kvm->lock);
2782        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2783                kvm->vcpus[i] = NULL;
2784
2785        atomic_set(&kvm->online_vcpus, 0);
2786        mutex_unlock(&kvm->lock);
2787}
2788
2789void kvm_arch_destroy_vm(struct kvm *kvm)
2790{
2791        u16 rc, rrc;
2792
2793        kvm_free_vcpus(kvm);
2794        sca_dispose(kvm);
2795        kvm_s390_gisa_destroy(kvm);
2796        /*
2797         * We are already at the end of life and kvm->lock is not taken.
2798         * This is ok as the file descriptor is closed by now and nobody
2799         * can mess with the pv state. To avoid lockdep_assert_held from
2800         * complaining we do not use kvm_s390_pv_is_protected.
2801         */
2802        if (kvm_s390_pv_get_handle(kvm))
2803                kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2804        debug_unregister(kvm->arch.dbf);
2805        free_page((unsigned long)kvm->arch.sie_page2);
2806        if (!kvm_is_ucontrol(kvm))
2807                gmap_remove(kvm->arch.gmap);
2808        kvm_s390_destroy_adapters(kvm);
2809        kvm_s390_clear_float_irqs(kvm);
2810        kvm_s390_vsie_destroy(kvm);
2811        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2812}
2813
2814/* Section: vcpu related */
2815static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2816{
2817        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2818        if (!vcpu->arch.gmap)
2819                return -ENOMEM;
2820        vcpu->arch.gmap->private = vcpu->kvm;
2821
2822        return 0;
2823}
2824
2825static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2826{
2827        if (!kvm_s390_use_sca_entries())
2828                return;
2829        read_lock(&vcpu->kvm->arch.sca_lock);
2830        if (vcpu->kvm->arch.use_esca) {
2831                struct esca_block *sca = vcpu->kvm->arch.sca;
2832
2833                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2834                sca->cpu[vcpu->vcpu_id].sda = 0;
2835        } else {
2836                struct bsca_block *sca = vcpu->kvm->arch.sca;
2837
2838                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2839                sca->cpu[vcpu->vcpu_id].sda = 0;
2840        }
2841        read_unlock(&vcpu->kvm->arch.sca_lock);
2842}
2843
2844static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2845{
2846        if (!kvm_s390_use_sca_entries()) {
2847                struct bsca_block *sca = vcpu->kvm->arch.sca;
2848
2849                /* we still need the basic sca for the ipte control */
2850                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2851                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2852                return;
2853        }
2854        read_lock(&vcpu->kvm->arch.sca_lock);
2855        if (vcpu->kvm->arch.use_esca) {
2856                struct esca_block *sca = vcpu->kvm->arch.sca;
2857
2858                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2859                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2860                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2861                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2862                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863        } else {
2864                struct bsca_block *sca = vcpu->kvm->arch.sca;
2865
2866                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2867                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2868                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2869                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2870        }
2871        read_unlock(&vcpu->kvm->arch.sca_lock);
2872}
2873
2874/* Basic SCA to Extended SCA data copy routines */
2875static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2876{
2877        d->sda = s->sda;
2878        d->sigp_ctrl.c = s->sigp_ctrl.c;
2879        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2880}
2881
2882static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883{
2884        int i;
2885
2886        d->ipte_control = s->ipte_control;
2887        d->mcn[0] = s->mcn;
2888        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2889                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2890}
2891
2892static int sca_switch_to_extended(struct kvm *kvm)
2893{
2894        struct bsca_block *old_sca = kvm->arch.sca;
2895        struct esca_block *new_sca;
2896        struct kvm_vcpu *vcpu;
2897        unsigned int vcpu_idx;
2898        u32 scaol, scaoh;
2899
2900        if (kvm->arch.use_esca)
2901                return 0;
2902
2903        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2904        if (!new_sca)
2905                return -ENOMEM;
2906
2907        scaoh = (u32)((u64)(new_sca) >> 32);
2908        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2909
2910        kvm_s390_vcpu_block_all(kvm);
2911        write_lock(&kvm->arch.sca_lock);
2912
2913        sca_copy_b_to_e(new_sca, old_sca);
2914
2915        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2916                vcpu->arch.sie_block->scaoh = scaoh;
2917                vcpu->arch.sie_block->scaol = scaol;
2918                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2919        }
2920        kvm->arch.sca = new_sca;
2921        kvm->arch.use_esca = 1;
2922
2923        write_unlock(&kvm->arch.sca_lock);
2924        kvm_s390_vcpu_unblock_all(kvm);
2925
2926        free_page((unsigned long)old_sca);
2927
2928        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2929                 old_sca, kvm->arch.sca);
2930        return 0;
2931}
2932
2933static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934{
2935        int rc;
2936
2937        if (!kvm_s390_use_sca_entries()) {
2938                if (id < KVM_MAX_VCPUS)
2939                        return true;
2940                return false;
2941        }
2942        if (id < KVM_S390_BSCA_CPU_SLOTS)
2943                return true;
2944        if (!sclp.has_esca || !sclp.has_64bscao)
2945                return false;
2946
2947        mutex_lock(&kvm->lock);
2948        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2949        mutex_unlock(&kvm->lock);
2950
2951        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2952}
2953
2954/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2955static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2956{
2957        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2958        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2959        vcpu->arch.cputm_start = get_tod_clock_fast();
2960        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2961}
2962
2963/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2964static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2965{
2966        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2967        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2968        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2969        vcpu->arch.cputm_start = 0;
2970        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971}
2972
2973/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975{
2976        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2977        vcpu->arch.cputm_enabled = true;
2978        __start_cpu_timer_accounting(vcpu);
2979}
2980
2981/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2982static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2983{
2984        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2985        __stop_cpu_timer_accounting(vcpu);
2986        vcpu->arch.cputm_enabled = false;
2987}
2988
2989static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990{
2991        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2992        __enable_cpu_timer_accounting(vcpu);
2993        preempt_enable();
2994}
2995
2996static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997{
2998        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999        __disable_cpu_timer_accounting(vcpu);
3000        preempt_enable();
3001}
3002
3003/* set the cpu timer - may only be called from the VCPU thread itself */
3004void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3005{
3006        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008        if (vcpu->arch.cputm_enabled)
3009                vcpu->arch.cputm_start = get_tod_clock_fast();
3010        vcpu->arch.sie_block->cputm = cputm;
3011        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012        preempt_enable();
3013}
3014
3015/* update and get the cpu timer - can also be called from other VCPU threads */
3016__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3017{
3018        unsigned int seq;
3019        __u64 value;
3020
3021        if (unlikely(!vcpu->arch.cputm_enabled))
3022                return vcpu->arch.sie_block->cputm;
3023
3024        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3025        do {
3026                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3027                /*
3028                 * If the writer would ever execute a read in the critical
3029                 * section, e.g. in irq context, we have a deadlock.
3030                 */
3031                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3032                value = vcpu->arch.sie_block->cputm;
3033                /* if cputm_start is 0, accounting is being started/stopped */
3034                if (likely(vcpu->arch.cputm_start))
3035                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3036        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3037        preempt_enable();
3038        return value;
3039}
3040
3041void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3042{
3043
3044        gmap_enable(vcpu->arch.enabled_gmap);
3045        kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3046        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3047                __start_cpu_timer_accounting(vcpu);
3048        vcpu->cpu = cpu;
3049}
3050
3051void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3052{
3053        vcpu->cpu = -1;
3054        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3055                __stop_cpu_timer_accounting(vcpu);
3056        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3057        vcpu->arch.enabled_gmap = gmap_get_enabled();
3058        gmap_disable(vcpu->arch.enabled_gmap);
3059
3060}
3061
3062void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3063{
3064        mutex_lock(&vcpu->kvm->lock);
3065        preempt_disable();
3066        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3067        vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3068        preempt_enable();
3069        mutex_unlock(&vcpu->kvm->lock);
3070        if (!kvm_is_ucontrol(vcpu->kvm)) {
3071                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3072                sca_add_vcpu(vcpu);
3073        }
3074        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3075                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3076        /* make vcpu_load load the right gmap on the first trigger */
3077        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3078}
3079
3080static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3081{
3082        if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3083            test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3084                return true;
3085        return false;
3086}
3087
3088static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3089{
3090        /* At least one ECC subfunction must be present */
3091        return kvm_has_pckmo_subfunc(kvm, 32) ||
3092               kvm_has_pckmo_subfunc(kvm, 33) ||
3093               kvm_has_pckmo_subfunc(kvm, 34) ||
3094               kvm_has_pckmo_subfunc(kvm, 40) ||
3095               kvm_has_pckmo_subfunc(kvm, 41);
3096
3097}
3098
3099static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3100{
3101        /*
3102         * If the AP instructions are not being interpreted and the MSAX3
3103         * facility is not configured for the guest, there is nothing to set up.
3104         */
3105        if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3106                return;
3107
3108        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3109        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3110        vcpu->arch.sie_block->eca &= ~ECA_APIE;
3111        vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3112
3113        if (vcpu->kvm->arch.crypto.apie)
3114                vcpu->arch.sie_block->eca |= ECA_APIE;
3115
3116        /* Set up protected key support */
3117        if (vcpu->kvm->arch.crypto.aes_kw) {
3118                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3119                /* ecc is also wrapped with AES key */
3120                if (kvm_has_pckmo_ecc(vcpu->kvm))
3121                        vcpu->arch.sie_block->ecd |= ECD_ECC;
3122        }
3123
3124        if (vcpu->kvm->arch.crypto.dea_kw)
3125                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3126}
3127
3128void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3129{
3130        free_page(vcpu->arch.sie_block->cbrlo);
3131        vcpu->arch.sie_block->cbrlo = 0;
3132}
3133
3134int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3135{
3136        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3137        if (!vcpu->arch.sie_block->cbrlo)
3138                return -ENOMEM;
3139        return 0;
3140}
3141
3142static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3143{
3144        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3145
3146        vcpu->arch.sie_block->ibc = model->ibc;
3147        if (test_kvm_facility(vcpu->kvm, 7))
3148                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3149}
3150
3151static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3152{
3153        int rc = 0;
3154        u16 uvrc, uvrrc;
3155
3156        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157                                                    CPUSTAT_SM |
3158                                                    CPUSTAT_STOPPED);
3159
3160        if (test_kvm_facility(vcpu->kvm, 78))
3161                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3162        else if (test_kvm_facility(vcpu->kvm, 8))
3163                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3164
3165        kvm_s390_vcpu_setup_model(vcpu);
3166
3167        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3168        if (MACHINE_HAS_ESOP)
3169                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3170        if (test_kvm_facility(vcpu->kvm, 9))
3171                vcpu->arch.sie_block->ecb |= ECB_SRSI;
3172        if (test_kvm_facility(vcpu->kvm, 73))
3173                vcpu->arch.sie_block->ecb |= ECB_TE;
3174
3175        if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3176                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3177        if (test_kvm_facility(vcpu->kvm, 130))
3178                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3179        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3180        if (sclp.has_cei)
3181                vcpu->arch.sie_block->eca |= ECA_CEI;
3182        if (sclp.has_ib)
3183                vcpu->arch.sie_block->eca |= ECA_IB;
3184        if (sclp.has_siif)
3185                vcpu->arch.sie_block->eca |= ECA_SII;
3186        if (sclp.has_sigpif)
3187                vcpu->arch.sie_block->eca |= ECA_SIGPI;
3188        if (test_kvm_facility(vcpu->kvm, 129)) {
3189                vcpu->arch.sie_block->eca |= ECA_VX;
3190                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3191        }
3192        if (test_kvm_facility(vcpu->kvm, 139))
3193                vcpu->arch.sie_block->ecd |= ECD_MEF;
3194        if (test_kvm_facility(vcpu->kvm, 156))
3195                vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3196        if (vcpu->arch.sie_block->gd) {
3197                vcpu->arch.sie_block->eca |= ECA_AIV;
3198                VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3199                           vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3200        }
3201        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3202                                        | SDNXC;
3203        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3204
3205        if (sclp.has_kss)
3206                kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3207        else
3208                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3209
3210        if (vcpu->kvm->arch.use_cmma) {
3211                rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212                if (rc)
3213                        return rc;
3214        }
3215        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3216        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3217
3218        vcpu->arch.sie_block->hpid = HPID_KVM;
3219
3220        kvm_s390_vcpu_crypto_setup(vcpu);
3221
3222        mutex_lock(&vcpu->kvm->lock);
3223        if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3224                rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3225                if (rc)
3226                        kvm_s390_vcpu_unsetup_cmma(vcpu);
3227        }
3228        mutex_unlock(&vcpu->kvm->lock);
3229
3230        return rc;
3231}
3232
3233int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3234{
3235        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3236                return -EINVAL;
3237        return 0;
3238}
3239
3240int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3241{
3242        struct sie_page *sie_page;
3243        int rc;
3244
3245        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3246        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3247        if (!sie_page)
3248                return -ENOMEM;
3249
3250        vcpu->arch.sie_block = &sie_page->sie_block;
3251        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3252
3253        /* the real guest size will always be smaller than msl */
3254        vcpu->arch.sie_block->mso = 0;
3255        vcpu->arch.sie_block->msl = sclp.hamax;
3256
3257        vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3258        spin_lock_init(&vcpu->arch.local_int.lock);
3259        vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3260        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3261                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3262        seqcount_init(&vcpu->arch.cputm_seqcount);
3263
3264        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3265        kvm_clear_async_pf_completion_queue(vcpu);
3266        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3267                                    KVM_SYNC_GPRS |
3268                                    KVM_SYNC_ACRS |
3269                                    KVM_SYNC_CRS |
3270                                    KVM_SYNC_ARCH0 |
3271                                    KVM_SYNC_PFAULT |
3272                                    KVM_SYNC_DIAG318;
3273        kvm_s390_set_prefix(vcpu, 0);
3274        if (test_kvm_facility(vcpu->kvm, 64))
3275                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3276        if (test_kvm_facility(vcpu->kvm, 82))
3277                vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3278        if (test_kvm_facility(vcpu->kvm, 133))
3279                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3280        if (test_kvm_facility(vcpu->kvm, 156))
3281                vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3282        /* fprs can be synchronized via vrs, even if the guest has no vx. With
3283         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3284         */
3285        if (MACHINE_HAS_VX)
3286                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3287        else
3288                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3289
3290        if (kvm_is_ucontrol(vcpu->kvm)) {
3291                rc = __kvm_ucontrol_vcpu_init(vcpu);
3292                if (rc)
3293                        goto out_free_sie_block;
3294        }
3295
3296        VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3297                 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3298        trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299
3300        rc = kvm_s390_vcpu_setup(vcpu);
3301        if (rc)
3302                goto out_ucontrol_uninit;
3303        return 0;
3304
3305out_ucontrol_uninit:
3306        if (kvm_is_ucontrol(vcpu->kvm))
3307                gmap_remove(vcpu->arch.gmap);
3308out_free_sie_block:
3309        free_page((unsigned long)(vcpu->arch.sie_block));
3310        return rc;
3311}
3312
3313int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3314{
3315        return kvm_s390_vcpu_has_irq(vcpu, 0);
3316}
3317
3318bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3319{
3320        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3321}
3322
3323void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3324{
3325        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3326        exit_sie(vcpu);
3327}
3328
3329void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3330{
3331        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3332}
3333
3334static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3335{
3336        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3337        exit_sie(vcpu);
3338}
3339
3340bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3341{
3342        return atomic_read(&vcpu->arch.sie_block->prog20) &
3343               (PROG_BLOCK_SIE | PROG_REQUEST);
3344}
3345
3346static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3347{
3348        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3349}
3350
3351/*
3352 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3353 * If the CPU is not running (e.g. waiting as idle) the function will
3354 * return immediately. */
3355void exit_sie(struct kvm_vcpu *vcpu)
3356{
3357        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3358        kvm_s390_vsie_kick(vcpu);
3359        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3360                cpu_relax();
3361}
3362
3363/* Kick a guest cpu out of SIE to process a request synchronously */
3364void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3365{
3366        kvm_make_request(req, vcpu);
3367        kvm_s390_vcpu_request(vcpu);
3368}
3369
3370static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3371                              unsigned long end)
3372{
3373        struct kvm *kvm = gmap->private;
3374        struct kvm_vcpu *vcpu;
3375        unsigned long prefix;
3376        int i;
3377
3378        if (gmap_is_shadow(gmap))
3379                return;
3380        if (start >= 1UL << 31)
3381                /* We are only interested in prefix pages */
3382                return;
3383        kvm_for_each_vcpu(i, vcpu, kvm) {
3384                /* match against both prefix pages */
3385                prefix = kvm_s390_get_prefix(vcpu);
3386                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3387                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3388                                   start, end);
3389                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3390                }
3391        }
3392}
3393
3394bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3395{
3396        /* do not poll with more than halt_poll_max_steal percent of steal time */
3397        if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3398            halt_poll_max_steal) {
3399                vcpu->stat.halt_no_poll_steal++;
3400                return true;
3401        }
3402        return false;
3403}
3404
3405int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3406{
3407        /* kvm common code refers to this, but never calls it */
3408        BUG();
3409        return 0;
3410}
3411
3412static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3413                                           struct kvm_one_reg *reg)
3414{
3415        int r = -EINVAL;
3416
3417        switch (reg->id) {
3418        case KVM_REG_S390_TODPR:
3419                r = put_user(vcpu->arch.sie_block->todpr,
3420                             (u32 __user *)reg->addr);
3421                break;
3422        case KVM_REG_S390_EPOCHDIFF:
3423                r = put_user(vcpu->arch.sie_block->epoch,
3424                             (u64 __user *)reg->addr);
3425                break;
3426        case KVM_REG_S390_CPU_TIMER:
3427                r = put_user(kvm_s390_get_cpu_timer(vcpu),
3428                             (u64 __user *)reg->addr);
3429                break;
3430        case KVM_REG_S390_CLOCK_COMP:
3431                r = put_user(vcpu->arch.sie_block->ckc,
3432                             (u64 __user *)reg->addr);
3433                break;
3434        case KVM_REG_S390_PFTOKEN:
3435                r = put_user(vcpu->arch.pfault_token,
3436                             (u64 __user *)reg->addr);
3437                break;
3438        case KVM_REG_S390_PFCOMPARE:
3439                r = put_user(vcpu->arch.pfault_compare,
3440                             (u64 __user *)reg->addr);
3441                break;
3442        case KVM_REG_S390_PFSELECT:
3443                r = put_user(vcpu->arch.pfault_select,
3444                             (u64 __user *)reg->addr);
3445                break;
3446        case KVM_REG_S390_PP:
3447                r = put_user(vcpu->arch.sie_block->pp,
3448                             (u64 __user *)reg->addr);
3449                break;
3450        case KVM_REG_S390_GBEA:
3451                r = put_user(vcpu->arch.sie_block->gbea,
3452                             (u64 __user *)reg->addr);
3453                break;
3454        default:
3455                break;
3456        }
3457
3458        return r;
3459}
3460
3461static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3462                                           struct kvm_one_reg *reg)
3463{
3464        int r = -EINVAL;
3465        __u64 val;
3466
3467        switch (reg->id) {
3468        case KVM_REG_S390_TODPR:
3469                r = get_user(vcpu->arch.sie_block->todpr,
3470                             (u32 __user *)reg->addr);
3471                break;
3472        case KVM_REG_S390_EPOCHDIFF:
3473                r = get_user(vcpu->arch.sie_block->epoch,
3474                             (u64 __user *)reg->addr);
3475                break;
3476        case KVM_REG_S390_CPU_TIMER:
3477                r = get_user(val, (u64 __user *)reg->addr);
3478                if (!r)
3479                        kvm_s390_set_cpu_timer(vcpu, val);
3480                break;
3481        case KVM_REG_S390_CLOCK_COMP:
3482                r = get_user(vcpu->arch.sie_block->ckc,
3483                             (u64 __user *)reg->addr);
3484                break;
3485        case KVM_REG_S390_PFTOKEN:
3486                r = get_user(vcpu->arch.pfault_token,
3487                             (u64 __user *)reg->addr);
3488                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3489                        kvm_clear_async_pf_completion_queue(vcpu);
3490                break;
3491        case KVM_REG_S390_PFCOMPARE:
3492                r = get_user(vcpu->arch.pfault_compare,
3493                             (u64 __user *)reg->addr);
3494                break;
3495        case KVM_REG_S390_PFSELECT:
3496                r = get_user(vcpu->arch.pfault_select,
3497                             (u64 __user *)reg->addr);
3498                break;
3499        case KVM_REG_S390_PP:
3500                r = get_user(vcpu->arch.sie_block->pp,
3501                             (u64 __user *)reg->addr);
3502                break;
3503        case KVM_REG_S390_GBEA:
3504                r = get_user(vcpu->arch.sie_block->gbea,
3505                             (u64 __user *)reg->addr);
3506                break;
3507        default:
3508                break;
3509        }
3510
3511        return r;
3512}
3513
3514static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3515{
3516        vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3517        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3518        memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3519
3520        kvm_clear_async_pf_completion_queue(vcpu);
3521        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3522                kvm_s390_vcpu_stop(vcpu);
3523        kvm_s390_clear_local_irqs(vcpu);
3524}
3525
3526static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3527{
3528        /* Initial reset is a superset of the normal reset */
3529        kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3530
3531        /*
3532         * This equals initial cpu reset in pop, but we don't switch to ESA.
3533         * We do not only reset the internal data, but also ...
3534         */
3535        vcpu->arch.sie_block->gpsw.mask = 0;
3536        vcpu->arch.sie_block->gpsw.addr = 0;
3537        kvm_s390_set_prefix(vcpu, 0);
3538        kvm_s390_set_cpu_timer(vcpu, 0);
3539        vcpu->arch.sie_block->ckc = 0;
3540        memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3541        vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3542        vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3543
3544        /* ... the data in sync regs */
3545        memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3546        vcpu->run->s.regs.ckc = 0;
3547        vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3548        vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3549        vcpu->run->psw_addr = 0;
3550        vcpu->run->psw_mask = 0;
3551        vcpu->run->s.regs.todpr = 0;
3552        vcpu->run->s.regs.cputm = 0;
3553        vcpu->run->s.regs.ckc = 0;
3554        vcpu->run->s.regs.pp = 0;
3555        vcpu->run->s.regs.gbea = 1;
3556        vcpu->run->s.regs.fpc = 0;
3557        /*
3558         * Do not reset these registers in the protected case, as some of
3559         * them are overlayed and they are not accessible in this case
3560         * anyway.
3561         */
3562        if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3563                vcpu->arch.sie_block->gbea = 1;
3564                vcpu->arch.sie_block->pp = 0;
3565                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3566                vcpu->arch.sie_block->todpr = 0;
3567                vcpu->arch.sie_block->cpnc = 0;
3568        }
3569}
3570
3571static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572{
3573        struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574
3575        /* Clear reset is a superset of the initial reset */
3576        kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577
3578        memset(&regs->gprs, 0, sizeof(regs->gprs));
3579        memset(&regs->vrs, 0, sizeof(regs->vrs));
3580        memset(&regs->acrs, 0, sizeof(regs->acrs));
3581        memset(&regs->gscb, 0, sizeof(regs->gscb));
3582
3583        regs->etoken = 0;
3584        regs->etoken_extension = 0;
3585        regs->diag318 = 0;
3586}
3587
3588int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3589{
3590        vcpu_load(vcpu);
3591        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3592        vcpu_put(vcpu);
3593        return 0;
3594}
3595
3596int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3597{
3598        vcpu_load(vcpu);
3599        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3600        vcpu_put(vcpu);
3601        return 0;
3602}
3603
3604int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3605                                  struct kvm_sregs *sregs)
3606{
3607        vcpu_load(vcpu);
3608
3609        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3610        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3611
3612        vcpu_put(vcpu);
3613        return 0;
3614}
3615
3616int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3617                                  struct kvm_sregs *sregs)
3618{
3619        vcpu_load(vcpu);
3620
3621        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3622        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3623
3624        vcpu_put(vcpu);
3625        return 0;
3626}
3627
3628int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3629{
3630        int ret = 0;
3631
3632        vcpu_load(vcpu);
3633
3634        if (test_fp_ctl(fpu->fpc)) {
3635                ret = -EINVAL;
3636                goto out;
3637        }
3638        vcpu->run->s.regs.fpc = fpu->fpc;
3639        if (MACHINE_HAS_VX)
3640                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3641                                 (freg_t *) fpu->fprs);
3642        else
3643                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3644
3645out:
3646        vcpu_put(vcpu);
3647        return ret;
3648}
3649
3650int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3651{
3652        vcpu_load(vcpu);
3653
3654        /* make sure we have the latest values */
3655        save_fpu_regs();
3656        if (MACHINE_HAS_VX)
3657                convert_vx_to_fp((freg_t *) fpu->fprs,
3658                                 (__vector128 *) vcpu->run->s.regs.vrs);
3659        else
3660                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3661        fpu->fpc = vcpu->run->s.regs.fpc;
3662
3663        vcpu_put(vcpu);
3664        return 0;
3665}
3666
3667static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3668{
3669        int rc = 0;
3670
3671        if (!is_vcpu_stopped(vcpu))
3672                rc = -EBUSY;
3673        else {
3674                vcpu->run->psw_mask = psw.mask;
3675                vcpu->run->psw_addr = psw.addr;
3676        }
3677        return rc;
3678}
3679
3680int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3681                                  struct kvm_translation *tr)
3682{
3683        return -EINVAL; /* not implemented yet */
3684}
3685
3686#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3687                              KVM_GUESTDBG_USE_HW_BP | \
3688                              KVM_GUESTDBG_ENABLE)
3689
3690int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3691                                        struct kvm_guest_debug *dbg)
3692{
3693        int rc = 0;
3694
3695        vcpu_load(vcpu);
3696
3697        vcpu->guest_debug = 0;
3698        kvm_s390_clear_bp_data(vcpu);
3699
3700        if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3701                rc = -EINVAL;
3702                goto out;
3703        }
3704        if (!sclp.has_gpere) {
3705                rc = -EINVAL;
3706                goto out;
3707        }
3708
3709        if (dbg->control & KVM_GUESTDBG_ENABLE) {
3710                vcpu->guest_debug = dbg->control;
3711                /* enforce guest PER */
3712                kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3713
3714                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3715                        rc = kvm_s390_import_bp_data(vcpu, dbg);
3716        } else {
3717                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3718                vcpu->arch.guestdbg.last_bp = 0;
3719        }
3720
3721        if (rc) {
3722                vcpu->guest_debug = 0;
3723                kvm_s390_clear_bp_data(vcpu);
3724                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3725        }
3726
3727out:
3728        vcpu_put(vcpu);
3729        return rc;
3730}
3731
3732int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3733                                    struct kvm_mp_state *mp_state)
3734{
3735        int ret;
3736
3737        vcpu_load(vcpu);
3738
3739        /* CHECK_STOP and LOAD are not supported yet */
3740        ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3741                                      KVM_MP_STATE_OPERATING;
3742
3743        vcpu_put(vcpu);
3744        return ret;
3745}
3746
3747int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3748                                    struct kvm_mp_state *mp_state)
3749{
3750        int rc = 0;
3751
3752        vcpu_load(vcpu);
3753
3754        /* user space knows about this interface - let it control the state */
3755        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3756
3757        switch (mp_state->mp_state) {
3758        case KVM_MP_STATE_STOPPED:
3759                rc = kvm_s390_vcpu_stop(vcpu);
3760                break;
3761        case KVM_MP_STATE_OPERATING:
3762                rc = kvm_s390_vcpu_start(vcpu);
3763                break;
3764        case KVM_MP_STATE_LOAD:
3765                if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3766                        rc = -ENXIO;
3767                        break;
3768                }
3769                rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3770                break;
3771        case KVM_MP_STATE_CHECK_STOP:
3772                fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3773        default:
3774                rc = -ENXIO;
3775        }
3776
3777        vcpu_put(vcpu);
3778        return rc;
3779}
3780
3781static bool ibs_enabled(struct kvm_vcpu *vcpu)
3782{
3783        return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3784}
3785
3786static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3787{
3788retry:
3789        kvm_s390_vcpu_request_handled(vcpu);
3790        if (!kvm_request_pending(vcpu))
3791                return 0;
3792        /*
3793         * We use MMU_RELOAD just to re-arm the ipte notifier for the
3794         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3795         * This ensures that the ipte instruction for this request has
3796         * already finished. We might race against a second unmapper that
3797         * wants to set the blocking bit. Lets just retry the request loop.
3798         */
3799        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3800                int rc;
3801                rc = gmap_mprotect_notify(vcpu->arch.gmap,
3802                                          kvm_s390_get_prefix(vcpu),
3803                                          PAGE_SIZE * 2, PROT_WRITE);
3804                if (rc) {
3805                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3806                        return rc;
3807                }
3808                goto retry;
3809        }
3810
3811        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3812                vcpu->arch.sie_block->ihcpu = 0xffff;
3813                goto retry;
3814        }
3815
3816        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3817                if (!ibs_enabled(vcpu)) {
3818                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3819                        kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3820                }
3821                goto retry;
3822        }
3823
3824        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3825                if (ibs_enabled(vcpu)) {
3826                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3827                        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3828                }
3829                goto retry;
3830        }
3831
3832        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3833                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3834                goto retry;
3835        }
3836
3837        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3838                /*
3839                 * Disable CMM virtualization; we will emulate the ESSA
3840                 * instruction manually, in order to provide additional
3841                 * functionalities needed for live migration.
3842                 */
3843                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3844                goto retry;
3845        }
3846
3847        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3848                /*
3849                 * Re-enable CMM virtualization if CMMA is available and
3850                 * CMM has been used.
3851                 */
3852                if ((vcpu->kvm->arch.use_cmma) &&
3853                    (vcpu->kvm->mm->context.uses_cmm))
3854                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3855                goto retry;
3856        }
3857
3858        /* nothing to do, just clear the request */
3859        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3860        /* we left the vsie handler, nothing to do, just clear the request */
3861        kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3862
3863        return 0;
3864}
3865
3866void kvm_s390_set_tod_clock(struct kvm *kvm,
3867                            const struct kvm_s390_vm_tod_clock *gtod)
3868{
3869        struct kvm_vcpu *vcpu;
3870        struct kvm_s390_tod_clock_ext htod;
3871        int i;
3872
3873        mutex_lock(&kvm->lock);
3874        preempt_disable();
3875
3876        get_tod_clock_ext((char *)&htod);
3877
3878        kvm->arch.epoch = gtod->tod - htod.tod;
3879        kvm->arch.epdx = 0;
3880        if (test_kvm_facility(kvm, 139)) {
3881                kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3882                if (kvm->arch.epoch > gtod->tod)
3883                        kvm->arch.epdx -= 1;
3884        }
3885
3886        kvm_s390_vcpu_block_all(kvm);
3887        kvm_for_each_vcpu(i, vcpu, kvm) {
3888                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3889                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3890        }
3891
3892        kvm_s390_vcpu_unblock_all(kvm);
3893        preempt_enable();
3894        mutex_unlock(&kvm->lock);
3895}
3896
3897/**
3898 * kvm_arch_fault_in_page - fault-in guest page if necessary
3899 * @vcpu: The corresponding virtual cpu
3900 * @gpa: Guest physical address
3901 * @writable: Whether the page should be writable or not
3902 *
3903 * Make sure that a guest page has been faulted-in on the host.
3904 *
3905 * Return: Zero on success, negative error code otherwise.
3906 */
3907long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3908{
3909        return gmap_fault(vcpu->arch.gmap, gpa,
3910                          writable ? FAULT_FLAG_WRITE : 0);
3911}
3912
3913static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3914                                      unsigned long token)
3915{
3916        struct kvm_s390_interrupt inti;
3917        struct kvm_s390_irq irq;
3918
3919        if (start_token) {
3920                irq.u.ext.ext_params2 = token;
3921                irq.type = KVM_S390_INT_PFAULT_INIT;
3922                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3923        } else {
3924                inti.type = KVM_S390_INT_PFAULT_DONE;
3925                inti.parm64 = token;
3926                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3927        }
3928}
3929
3930bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3931                                     struct kvm_async_pf *work)
3932{
3933        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3934        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3935
3936        return true;
3937}
3938
3939void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3940                                 struct kvm_async_pf *work)
3941{
3942        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3943        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3944}
3945
3946void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3947                               struct kvm_async_pf *work)
3948{
3949        /* s390 will always inject the page directly */
3950}
3951
3952bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3953{
3954        /*
3955         * s390 will always inject the page directly,
3956         * but we still want check_async_completion to cleanup
3957         */
3958        return true;
3959}
3960
3961static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3962{
3963        hva_t hva;
3964        struct kvm_arch_async_pf arch;
3965
3966        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3967                return false;
3968        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3969            vcpu->arch.pfault_compare)
3970                return false;
3971        if (psw_extint_disabled(vcpu))
3972                return false;
3973        if (kvm_s390_vcpu_has_irq(vcpu, 0))
3974                return false;
3975        if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3976                return false;
3977        if (!vcpu->arch.gmap->pfault_enabled)
3978                return false;
3979
3980        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3981        hva += current->thread.gmap_addr & ~PAGE_MASK;
3982        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3983                return false;
3984
3985        return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3986}
3987
3988static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3989{
3990        int rc, cpuflags;
3991
3992        /*
3993         * On s390 notifications for arriving pages will be delivered directly
3994         * to the guest but the house keeping for completed pfaults is
3995         * handled outside the worker.
3996         */
3997        kvm_check_async_pf_completion(vcpu);
3998
3999        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4000        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4001
4002        if (need_resched())
4003                schedule();
4004
4005        if (!kvm_is_ucontrol(vcpu->kvm)) {
4006                rc = kvm_s390_deliver_pending_interrupts(vcpu);
4007                if (rc)
4008                        return rc;
4009        }
4010
4011        rc = kvm_s390_handle_requests(vcpu);
4012        if (rc)
4013                return rc;
4014
4015        if (guestdbg_enabled(vcpu)) {
4016                kvm_s390_backup_guest_per_regs(vcpu);
4017                kvm_s390_patch_guest_per_regs(vcpu);
4018        }
4019
4020        clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4021
4022        vcpu->arch.sie_block->icptcode = 0;
4023        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4024        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4025        trace_kvm_s390_sie_enter(vcpu, cpuflags);
4026
4027        return 0;
4028}
4029
4030static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4031{
4032        struct kvm_s390_pgm_info pgm_info = {
4033                .code = PGM_ADDRESSING,
4034        };
4035        u8 opcode, ilen;
4036        int rc;
4037
4038        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4039        trace_kvm_s390_sie_fault(vcpu);
4040
4041        /*
4042         * We want to inject an addressing exception, which is defined as a
4043         * suppressing or terminating exception. However, since we came here
4044         * by a DAT access exception, the PSW still points to the faulting
4045         * instruction since DAT exceptions are nullifying. So we've got
4046         * to look up the current opcode to get the length of the instruction
4047         * to be able to forward the PSW.
4048         */
4049        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4050        ilen = insn_length(opcode);
4051        if (rc < 0) {
4052                return rc;
4053        } else if (rc) {
4054                /* Instruction-Fetching Exceptions - we can't detect the ilen.
4055                 * Forward by arbitrary ilc, injection will take care of
4056                 * nullification if necessary.
4057                 */
4058                pgm_info = vcpu->arch.pgm;
4059                ilen = 4;
4060        }
4061        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4062        kvm_s390_forward_psw(vcpu, ilen);
4063        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4064}
4065
4066static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4067{
4068        struct mcck_volatile_info *mcck_info;
4069        struct sie_page *sie_page;
4070
4071        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4072                   vcpu->arch.sie_block->icptcode);
4073        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4074
4075        if (guestdbg_enabled(vcpu))
4076                kvm_s390_restore_guest_per_regs(vcpu);
4077
4078        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4079        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4080
4081        if (exit_reason == -EINTR) {
4082                VCPU_EVENT(vcpu, 3, "%s", "machine check");
4083                sie_page = container_of(vcpu->arch.sie_block,
4084                                        struct sie_page, sie_block);
4085                mcck_info = &sie_page->mcck_info;
4086                kvm_s390_reinject_machine_check(vcpu, mcck_info);
4087                return 0;
4088        }
4089
4090        if (vcpu->arch.sie_block->icptcode > 0) {
4091                int rc = kvm_handle_sie_intercept(vcpu);
4092
4093                if (rc != -EOPNOTSUPP)
4094                        return rc;
4095                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4096                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4097                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4098                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4099                return -EREMOTE;
4100        } else if (exit_reason != -EFAULT) {
4101                vcpu->stat.exit_null++;
4102                return 0;
4103        } else if (kvm_is_ucontrol(vcpu->kvm)) {
4104                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4105                vcpu->run->s390_ucontrol.trans_exc_code =
4106                                                current->thread.gmap_addr;
4107                vcpu->run->s390_ucontrol.pgm_code = 0x10;
4108                return -EREMOTE;
4109        } else if (current->thread.gmap_pfault) {
4110                trace_kvm_s390_major_guest_pfault(vcpu);
4111                current->thread.gmap_pfault = 0;
4112                if (kvm_arch_setup_async_pf(vcpu))
4113                        return 0;
4114                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4115        }
4116        return vcpu_post_run_fault_in_sie(vcpu);
4117}
4118
4119#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120static int __vcpu_run(struct kvm_vcpu *vcpu)
4121{
4122        int rc, exit_reason;
4123        struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4124
4125        /*
4126         * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127         * ning the guest), so that memslots (and other stuff) are protected
4128         */
4129        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4130
4131        do {
4132                rc = vcpu_pre_run(vcpu);
4133                if (rc)
4134                        break;
4135
4136                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4137                /*
4138                 * As PF_VCPU will be used in fault handler, between
4139                 * guest_enter and guest_exit should be no uaccess.
4140                 */
4141                local_irq_disable();
4142                guest_enter_irqoff();
4143                __disable_cpu_timer_accounting(vcpu);
4144                local_irq_enable();
4145                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146                        memcpy(sie_page->pv_grregs,
4147                               vcpu->run->s.regs.gprs,
4148                               sizeof(sie_page->pv_grregs));
4149                }
4150                exit_reason = sie64a(vcpu->arch.sie_block,
4151                                     vcpu->run->s.regs.gprs);
4152                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153                        memcpy(vcpu->run->s.regs.gprs,
4154                               sie_page->pv_grregs,
4155                               sizeof(sie_page->pv_grregs));
4156                        /*
4157                         * We're not allowed to inject interrupts on intercepts
4158                         * that leave the guest state in an "in-between" state
4159                         * where the next SIE entry will do a continuation.
4160                         * Fence interrupts in our "internal" PSW.
4161                         */
4162                        if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163                            vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164                                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4165                        }
4166                }
4167                local_irq_disable();
4168                __enable_cpu_timer_accounting(vcpu);
4169                guest_exit_irqoff();
4170                local_irq_enable();
4171                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4172
4173                rc = vcpu_post_run(vcpu, exit_reason);
4174        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4175
4176        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177        return rc;
4178}
4179
4180static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4181{
4182        struct kvm_run *kvm_run = vcpu->run;
4183        struct runtime_instr_cb *riccb;
4184        struct gs_cb *gscb;
4185
4186        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4187        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4188        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4189        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4190        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4191                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4192                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4193                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4194        }
4195        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4196                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4197                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4198                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4199                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4200                        kvm_clear_async_pf_completion_queue(vcpu);
4201        }
4202        if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4203                vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4204                vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4205        }
4206        /*
4207         * If userspace sets the riccb (e.g. after migration) to a valid state,
4208         * we should enable RI here instead of doing the lazy enablement.
4209         */
4210        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4211            test_kvm_facility(vcpu->kvm, 64) &&
4212            riccb->v &&
4213            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4214                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4215                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4216        }
4217        /*
4218         * If userspace sets the gscb (e.g. after migration) to non-zero,
4219         * we should enable GS here instead of doing the lazy enablement.
4220         */
4221        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4222            test_kvm_facility(vcpu->kvm, 133) &&
4223            gscb->gssm &&
4224            !vcpu->arch.gs_enabled) {
4225                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4226                vcpu->arch.sie_block->ecb |= ECB_GS;
4227                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4228                vcpu->arch.gs_enabled = 1;
4229        }
4230        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4231            test_kvm_facility(vcpu->kvm, 82)) {
4232                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4233                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4234        }
4235        if (MACHINE_HAS_GS) {
4236                preempt_disable();
4237                __ctl_set_bit(2, 4);
4238                if (current->thread.gs_cb) {
4239                        vcpu->arch.host_gscb = current->thread.gs_cb;
4240                        save_gs_cb(vcpu->arch.host_gscb);
4241                }
4242                if (vcpu->arch.gs_enabled) {
4243                        current->thread.gs_cb = (struct gs_cb *)
4244                                                &vcpu->run->s.regs.gscb;
4245                        restore_gs_cb(current->thread.gs_cb);
4246                }
4247                preempt_enable();
4248        }
4249        /* SIE will load etoken directly from SDNX and therefore kvm_run */
4250}
4251
4252static void sync_regs(struct kvm_vcpu *vcpu)
4253{
4254        struct kvm_run *kvm_run = vcpu->run;
4255
4256        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4257                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4258        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4259                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4260                /* some control register changes require a tlb flush */
4261                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4262        }
4263        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4265                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4266        }
4267        save_access_regs(vcpu->arch.host_acrs);
4268        restore_access_regs(vcpu->run->s.regs.acrs);
4269        /* save host (userspace) fprs/vrs */
4270        save_fpu_regs();
4271        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4272        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4273        if (MACHINE_HAS_VX)
4274                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4275        else
4276                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4277        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4278        if (test_fp_ctl(current->thread.fpu.fpc))
4279                /* User space provided an invalid FPC, let's clear it */
4280                current->thread.fpu.fpc = 0;
4281
4282        /* Sync fmt2 only data */
4283        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4284                sync_regs_fmt2(vcpu);
4285        } else {
4286                /*
4287                 * In several places we have to modify our internal view to
4288                 * not do things that are disallowed by the ultravisor. For
4289                 * example we must not inject interrupts after specific exits
4290                 * (e.g. 112 prefix page not secure). We do this by turning
4291                 * off the machine check, external and I/O interrupt bits
4292                 * of our PSW copy. To avoid getting validity intercepts, we
4293                 * do only accept the condition code from userspace.
4294                 */
4295                vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4296                vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4297                                                   PSW_MASK_CC;
4298        }
4299
4300        kvm_run->kvm_dirty_regs = 0;
4301}
4302
4303static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4304{
4305        struct kvm_run *kvm_run = vcpu->run;
4306
4307        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4308        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4309        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4310        kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4311        kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4312        if (MACHINE_HAS_GS) {
4313                __ctl_set_bit(2, 4);
4314                if (vcpu->arch.gs_enabled)
4315                        save_gs_cb(current->thread.gs_cb);
4316                preempt_disable();
4317                current->thread.gs_cb = vcpu->arch.host_gscb;
4318                restore_gs_cb(vcpu->arch.host_gscb);
4319                preempt_enable();
4320                if (!vcpu->arch.host_gscb)
4321                        __ctl_clear_bit(2, 4);
4322                vcpu->arch.host_gscb = NULL;
4323        }
4324        /* SIE will save etoken directly into SDNX and therefore kvm_run */
4325}
4326
4327static void store_regs(struct kvm_vcpu *vcpu)
4328{
4329        struct kvm_run *kvm_run = vcpu->run;
4330
4331        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4332        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4333        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4334        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4335        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4336        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4337        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4338        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4339        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4340        save_access_regs(vcpu->run->s.regs.acrs);
4341        restore_access_regs(vcpu->arch.host_acrs);
4342        /* Save guest register state */
4343        save_fpu_regs();
4344        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4345        /* Restore will be done lazily at return */
4346        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4347        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4348        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4349                store_regs_fmt2(vcpu);
4350}
4351
4352int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4353{
4354        struct kvm_run *kvm_run = vcpu->run;
4355        int rc;
4356
4357        if (kvm_run->immediate_exit)
4358                return -EINTR;
4359
4360        if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4361            kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4362                return -EINVAL;
4363
4364        vcpu_load(vcpu);
4365
4366        if (guestdbg_exit_pending(vcpu)) {
4367                kvm_s390_prepare_debug_exit(vcpu);
4368                rc = 0;
4369                goto out;
4370        }
4371
4372        kvm_sigset_activate(vcpu);
4373
4374        /*
4375         * no need to check the return value of vcpu_start as it can only have
4376         * an error for protvirt, but protvirt means user cpu state
4377         */
4378        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4379                kvm_s390_vcpu_start(vcpu);
4380        } else if (is_vcpu_stopped(vcpu)) {
4381                pr_err_ratelimited("can't run stopped vcpu %d\n",
4382                                   vcpu->vcpu_id);
4383                rc = -EINVAL;
4384                goto out;
4385        }
4386
4387        sync_regs(vcpu);
4388        enable_cpu_timer_accounting(vcpu);
4389
4390        might_fault();
4391        rc = __vcpu_run(vcpu);
4392
4393        if (signal_pending(current) && !rc) {
4394                kvm_run->exit_reason = KVM_EXIT_INTR;
4395                rc = -EINTR;
4396        }
4397
4398        if (guestdbg_exit_pending(vcpu) && !rc)  {
4399                kvm_s390_prepare_debug_exit(vcpu);
4400                rc = 0;
4401        }
4402
4403        if (rc == -EREMOTE) {
4404                /* userspace support is needed, kvm_run has been prepared */
4405                rc = 0;
4406        }
4407
4408        disable_cpu_timer_accounting(vcpu);
4409        store_regs(vcpu);
4410
4411        kvm_sigset_deactivate(vcpu);
4412
4413        vcpu->stat.exit_userspace++;
4414out:
4415        vcpu_put(vcpu);
4416        return rc;
4417}
4418
4419/*
4420 * store status at address
4421 * we use have two special cases:
4422 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4423 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4424 */
4425int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4426{
4427        unsigned char archmode = 1;
4428        freg_t fprs[NUM_FPRS];
4429        unsigned int px;
4430        u64 clkcomp, cputm;
4431        int rc;
4432
4433        px = kvm_s390_get_prefix(vcpu);
4434        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4435                if (write_guest_abs(vcpu, 163, &archmode, 1))
4436                        return -EFAULT;
4437                gpa = 0;
4438        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4439                if (write_guest_real(vcpu, 163, &archmode, 1))
4440                        return -EFAULT;
4441                gpa = px;
4442        } else
4443                gpa -= __LC_FPREGS_SAVE_AREA;
4444
4445        /* manually convert vector registers if necessary */
4446        if (MACHINE_HAS_VX) {
4447                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4448                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449                                     fprs, 128);
4450        } else {
4451                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4452                                     vcpu->run->s.regs.fprs, 128);
4453        }
4454        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4455                              vcpu->run->s.regs.gprs, 128);
4456        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4457                              &vcpu->arch.sie_block->gpsw, 16);
4458        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4459                              &px, 4);
4460        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4461                              &vcpu->run->s.regs.fpc, 4);
4462        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4463                              &vcpu->arch.sie_block->todpr, 4);
4464        cputm = kvm_s390_get_cpu_timer(vcpu);
4465        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4466                              &cputm, 8);
4467        clkcomp = vcpu->arch.sie_block->ckc >> 8;
4468        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4469                              &clkcomp, 8);
4470        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4471                              &vcpu->run->s.regs.acrs, 64);
4472        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4473                              &vcpu->arch.sie_block->gcr, 128);
4474        return rc ? -EFAULT : 0;
4475}
4476
4477int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4478{
4479        /*
4480         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4481         * switch in the run ioctl. Let's update our copies before we save
4482         * it into the save area
4483         */
4484        save_fpu_regs();
4485        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4486        save_access_regs(vcpu->run->s.regs.acrs);
4487
4488        return kvm_s390_store_status_unloaded(vcpu, addr);
4489}
4490
4491static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4492{
4493        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4494        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4495}
4496
4497static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4498{
4499        unsigned int i;
4500        struct kvm_vcpu *vcpu;
4501
4502        kvm_for_each_vcpu(i, vcpu, kvm) {
4503                __disable_ibs_on_vcpu(vcpu);
4504        }
4505}
4506
4507static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4508{
4509        if (!sclp.has_ibs)
4510                return;
4511        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4512        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4513}
4514
4515int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4516{
4517        int i, online_vcpus, r = 0, started_vcpus = 0;
4518
4519        if (!is_vcpu_stopped(vcpu))
4520                return 0;
4521
4522        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4523        /* Only one cpu at a time may enter/leave the STOPPED state. */
4524        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4525        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4526
4527        /* Let's tell the UV that we want to change into the operating state */
4528        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4529                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4530                if (r) {
4531                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4532                        return r;
4533                }
4534        }
4535
4536        for (i = 0; i < online_vcpus; i++) {
4537                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4538                        started_vcpus++;
4539        }
4540
4541        if (started_vcpus == 0) {
4542                /* we're the only active VCPU -> speed it up */
4543                __enable_ibs_on_vcpu(vcpu);
4544        } else if (started_vcpus == 1) {
4545                /*
4546                 * As we are starting a second VCPU, we have to disable
4547                 * the IBS facility on all VCPUs to remove potentially
4548                 * oustanding ENABLE requests.
4549                 */
4550                __disable_ibs_on_all_vcpus(vcpu->kvm);
4551        }
4552
4553        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4554        /*
4555         * The real PSW might have changed due to a RESTART interpreted by the
4556         * ultravisor. We block all interrupts and let the next sie exit
4557         * refresh our view.
4558         */
4559        if (kvm_s390_pv_cpu_is_protected(vcpu))
4560                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4561        /*
4562         * Another VCPU might have used IBS while we were offline.
4563         * Let's play safe and flush the VCPU at startup.
4564         */
4565        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4566        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4567        return 0;
4568}
4569
4570int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4571{
4572        int i, online_vcpus, r = 0, started_vcpus = 0;
4573        struct kvm_vcpu *started_vcpu = NULL;
4574
4575        if (is_vcpu_stopped(vcpu))
4576                return 0;
4577
4578        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4579        /* Only one cpu at a time may enter/leave the STOPPED state. */
4580        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582
4583        /* Let's tell the UV that we want to change into the stopped state */
4584        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4586                if (r) {
4587                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588                        return r;
4589                }
4590        }
4591
4592        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4593        kvm_s390_clear_stop_irq(vcpu);
4594
4595        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4596        __disable_ibs_on_vcpu(vcpu);
4597
4598        for (i = 0; i < online_vcpus; i++) {
4599                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4600                        started_vcpus++;
4601                        started_vcpu = vcpu->kvm->vcpus[i];
4602                }
4603        }
4604
4605        if (started_vcpus == 1) {
4606                /*
4607                 * As we only have one VCPU left, we want to enable the
4608                 * IBS facility for that VCPU to speed it up.
4609                 */
4610                __enable_ibs_on_vcpu(started_vcpu);
4611        }
4612
4613        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4614        return 0;
4615}
4616
4617static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4618                                     struct kvm_enable_cap *cap)
4619{
4620        int r;
4621
4622        if (cap->flags)
4623                return -EINVAL;
4624
4625        switch (cap->cap) {
4626        case KVM_CAP_S390_CSS_SUPPORT:
4627                if (!vcpu->kvm->arch.css_support) {
4628                        vcpu->kvm->arch.css_support = 1;
4629                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4630                        trace_kvm_s390_enable_css(vcpu->kvm);
4631                }
4632                r = 0;
4633                break;
4634        default:
4635                r = -EINVAL;
4636                break;
4637        }
4638        return r;
4639}
4640
4641static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4642                                   struct kvm_s390_mem_op *mop)
4643{
4644        void __user *uaddr = (void __user *)mop->buf;
4645        int r = 0;
4646
4647        if (mop->flags || !mop->size)
4648                return -EINVAL;
4649        if (mop->size + mop->sida_offset < mop->size)
4650                return -EINVAL;
4651        if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4652                return -E2BIG;
4653
4654        switch (mop->op) {
4655        case KVM_S390_MEMOP_SIDA_READ:
4656                if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4657                                 mop->sida_offset), mop->size))
4658                        r = -EFAULT;
4659
4660                break;
4661        case KVM_S390_MEMOP_SIDA_WRITE:
4662                if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4663                                   mop->sida_offset), uaddr, mop->size))
4664                        r = -EFAULT;
4665                break;
4666        }
4667        return r;
4668}
4669static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4670                                  struct kvm_s390_mem_op *mop)
4671{
4672        void __user *uaddr = (void __user *)mop->buf;
4673        void *tmpbuf = NULL;
4674        int r = 0;
4675        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4676                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
4677
4678        if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4679                return -EINVAL;
4680
4681        if (mop->size > MEM_OP_MAX_SIZE)
4682                return -E2BIG;
4683
4684        if (kvm_s390_pv_cpu_is_protected(vcpu))
4685                return -EINVAL;
4686
4687        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4688                tmpbuf = vmalloc(mop->size);
4689                if (!tmpbuf)
4690                        return -ENOMEM;
4691        }
4692
4693        switch (mop->op) {
4694        case KVM_S390_MEMOP_LOGICAL_READ:
4695                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4696                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4697                                            mop->size, GACC_FETCH);
4698                        break;
4699                }
4700                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4701                if (r == 0) {
4702                        if (copy_to_user(uaddr, tmpbuf, mop->size))
4703                                r = -EFAULT;
4704                }
4705                break;
4706        case KVM_S390_MEMOP_LOGICAL_WRITE:
4707                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4708                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4709                                            mop->size, GACC_STORE);
4710                        break;
4711                }
4712                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4713                        r = -EFAULT;
4714                        break;
4715                }
4716                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4717                break;
4718        }
4719
4720        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4721                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4722
4723        vfree(tmpbuf);
4724        return r;
4725}
4726
4727static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4728                                      struct kvm_s390_mem_op *mop)
4729{
4730        int r, srcu_idx;
4731
4732        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4733
4734        switch (mop->op) {
4735        case KVM_S390_MEMOP_LOGICAL_READ:
4736        case KVM_S390_MEMOP_LOGICAL_WRITE:
4737                r = kvm_s390_guest_mem_op(vcpu, mop);
4738                break;
4739        case KVM_S390_MEMOP_SIDA_READ:
4740        case KVM_S390_MEMOP_SIDA_WRITE:
4741                /* we are locked against sida going away by the vcpu->mutex */
4742                r = kvm_s390_guest_sida_op(vcpu, mop);
4743                break;
4744        default:
4745                r = -EINVAL;
4746        }
4747
4748        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4749        return r;
4750}
4751
4752long kvm_arch_vcpu_async_ioctl(struct file *filp,
4753                               unsigned int ioctl, unsigned long arg)
4754{
4755        struct kvm_vcpu *vcpu = filp->private_data;
4756        void __user *argp = (void __user *)arg;
4757
4758        switch (ioctl) {
4759        case KVM_S390_IRQ: {
4760                struct kvm_s390_irq s390irq;
4761
4762                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4763                        return -EFAULT;
4764                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4765        }
4766        case KVM_S390_INTERRUPT: {
4767                struct kvm_s390_interrupt s390int;
4768                struct kvm_s390_irq s390irq = {};
4769
4770                if (copy_from_user(&s390int, argp, sizeof(s390int)))
4771                        return -EFAULT;
4772                if (s390int_to_s390irq(&s390int, &s390irq))
4773                        return -EINVAL;
4774                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4775        }
4776        }
4777        return -ENOIOCTLCMD;
4778}
4779
4780long kvm_arch_vcpu_ioctl(struct file *filp,
4781                         unsigned int ioctl, unsigned long arg)
4782{
4783        struct kvm_vcpu *vcpu = filp->private_data;
4784        void __user *argp = (void __user *)arg;
4785        int idx;
4786        long r;
4787        u16 rc, rrc;
4788
4789        vcpu_load(vcpu);
4790
4791        switch (ioctl) {
4792        case KVM_S390_STORE_STATUS:
4793                idx = srcu_read_lock(&vcpu->kvm->srcu);
4794                r = kvm_s390_store_status_unloaded(vcpu, arg);
4795                srcu_read_unlock(&vcpu->kvm->srcu, idx);
4796                break;
4797        case KVM_S390_SET_INITIAL_PSW: {
4798                psw_t psw;
4799
4800                r = -EFAULT;
4801                if (copy_from_user(&psw, argp, sizeof(psw)))
4802                        break;
4803                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4804                break;
4805        }
4806        case KVM_S390_CLEAR_RESET:
4807                r = 0;
4808                kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4809                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4810                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4811                                          UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4812                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4813                                   rc, rrc);
4814                }
4815                break;
4816        case KVM_S390_INITIAL_RESET:
4817                r = 0;
4818                kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4819                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4820                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4821                                          UVC_CMD_CPU_RESET_INITIAL,
4822                                          &rc, &rrc);
4823                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4824                                   rc, rrc);
4825                }
4826                break;
4827        case KVM_S390_NORMAL_RESET:
4828                r = 0;
4829                kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4830                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4831                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4832                                          UVC_CMD_CPU_RESET, &rc, &rrc);
4833                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4834                                   rc, rrc);
4835                }
4836                break;
4837        case KVM_SET_ONE_REG:
4838        case KVM_GET_ONE_REG: {
4839                struct kvm_one_reg reg;
4840                r = -EINVAL;
4841                if (kvm_s390_pv_cpu_is_protected(vcpu))
4842                        break;
4843                r = -EFAULT;
4844                if (copy_from_user(&reg, argp, sizeof(reg)))
4845                        break;
4846                if (ioctl == KVM_SET_ONE_REG)
4847                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4848                else
4849                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4850                break;
4851        }
4852#ifdef CONFIG_KVM_S390_UCONTROL
4853        case KVM_S390_UCAS_MAP: {
4854                struct kvm_s390_ucas_mapping ucasmap;
4855
4856                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857                        r = -EFAULT;
4858                        break;
4859                }
4860
4861                if (!kvm_is_ucontrol(vcpu->kvm)) {
4862                        r = -EINVAL;
4863                        break;
4864                }
4865
4866                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4867                                     ucasmap.vcpu_addr, ucasmap.length);
4868                break;
4869        }
4870        case KVM_S390_UCAS_UNMAP: {
4871                struct kvm_s390_ucas_mapping ucasmap;
4872
4873                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874                        r = -EFAULT;
4875                        break;
4876                }
4877
4878                if (!kvm_is_ucontrol(vcpu->kvm)) {
4879                        r = -EINVAL;
4880                        break;
4881                }
4882
4883                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4884                        ucasmap.length);
4885                break;
4886        }
4887#endif
4888        case KVM_S390_VCPU_FAULT: {
4889                r = gmap_fault(vcpu->arch.gmap, arg, 0);
4890                break;
4891        }
4892        case KVM_ENABLE_CAP:
4893        {
4894                struct kvm_enable_cap cap;
4895                r = -EFAULT;
4896                if (copy_from_user(&cap, argp, sizeof(cap)))
4897                        break;
4898                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4899                break;
4900        }
4901        case KVM_S390_MEM_OP: {
4902                struct kvm_s390_mem_op mem_op;
4903
4904                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4905                        r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4906                else
4907                        r = -EFAULT;
4908                break;
4909        }
4910        case KVM_S390_SET_IRQ_STATE: {
4911                struct kvm_s390_irq_state irq_state;
4912
4913                r = -EFAULT;
4914                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4915                        break;
4916                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4917                    irq_state.len == 0 ||
4918                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4919                        r = -EINVAL;
4920                        break;
4921                }
4922                /* do not use irq_state.flags, it will break old QEMUs */
4923                r = kvm_s390_set_irq_state(vcpu,
4924                                           (void __user *) irq_state.buf,
4925                                           irq_state.len);
4926                break;
4927        }
4928        case KVM_S390_GET_IRQ_STATE: {
4929                struct kvm_s390_irq_state irq_state;
4930
4931                r = -EFAULT;
4932                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933                        break;
4934                if (irq_state.len == 0) {
4935                        r = -EINVAL;
4936                        break;
4937                }
4938                /* do not use irq_state.flags, it will break old QEMUs */
4939                r = kvm_s390_get_irq_state(vcpu,
4940                                           (__u8 __user *)  irq_state.buf,
4941                                           irq_state.len);
4942                break;
4943        }
4944        default:
4945                r = -ENOTTY;
4946        }
4947
4948        vcpu_put(vcpu);
4949        return r;
4950}
4951
4952vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4953{
4954#ifdef CONFIG_KVM_S390_UCONTROL
4955        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4956                 && (kvm_is_ucontrol(vcpu->kvm))) {
4957                vmf->page = virt_to_page(vcpu->arch.sie_block);
4958                get_page(vmf->page);
4959                return 0;
4960        }
4961#endif
4962        return VM_FAULT_SIGBUS;
4963}
4964
4965/* Section: memory related */
4966int kvm_arch_prepare_memory_region(struct kvm *kvm,
4967                                   struct kvm_memory_slot *memslot,
4968                                   const struct kvm_userspace_memory_region *mem,
4969                                   enum kvm_mr_change change)
4970{
4971        /* A few sanity checks. We can have memory slots which have to be
4972           located/ended at a segment boundary (1MB). The memory in userland is
4973           ok to be fragmented into various different vmas. It is okay to mmap()
4974           and munmap() stuff in this slot after doing this call at any time */
4975
4976        if (mem->userspace_addr & 0xffffful)
4977                return -EINVAL;
4978
4979        if (mem->memory_size & 0xffffful)
4980                return -EINVAL;
4981
4982        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4983                return -EINVAL;
4984
4985        /* When we are protected, we should not change the memory slots */
4986        if (kvm_s390_pv_get_handle(kvm))
4987                return -EINVAL;
4988        return 0;
4989}
4990
4991void kvm_arch_commit_memory_region(struct kvm *kvm,
4992                                const struct kvm_userspace_memory_region *mem,
4993                                struct kvm_memory_slot *old,
4994                                const struct kvm_memory_slot *new,
4995                                enum kvm_mr_change change)
4996{
4997        int rc = 0;
4998
4999        switch (change) {
5000        case KVM_MR_DELETE:
5001                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5002                                        old->npages * PAGE_SIZE);
5003                break;
5004        case KVM_MR_MOVE:
5005                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5006                                        old->npages * PAGE_SIZE);
5007                if (rc)
5008                        break;
5009                fallthrough;
5010        case KVM_MR_CREATE:
5011                rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5012                                      mem->guest_phys_addr, mem->memory_size);
5013                break;
5014        case KVM_MR_FLAGS_ONLY:
5015                break;
5016        default:
5017                WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5018        }
5019        if (rc)
5020                pr_warn("failed to commit memory region\n");
5021        return;
5022}
5023
5024static inline unsigned long nonhyp_mask(int i)
5025{
5026        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5027
5028        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5029}
5030
5031void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5032{
5033        vcpu->valid_wakeup = false;
5034}
5035
5036static int __init kvm_s390_init(void)
5037{
5038        int i;
5039
5040        if (!sclp.has_sief2) {
5041                pr_info("SIE is not available\n");
5042                return -ENODEV;
5043        }
5044
5045        if (nested && hpage) {
5046                pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5047                return -EINVAL;
5048        }
5049
5050        for (i = 0; i < 16; i++)
5051                kvm_s390_fac_base[i] |=
5052                        S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5053
5054        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5055}
5056
5057static void __exit kvm_s390_exit(void)
5058{
5059        kvm_exit();
5060}
5061
5062module_init(kvm_s390_init);
5063module_exit(kvm_s390_exit);
5064
5065/*
5066 * Enable autoloading of the kvm module.
5067 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5068 * since x86 takes a different approach.
5069 */
5070#include <linux/miscdevice.h>
5071MODULE_ALIAS_MISCDEV(KVM_MINOR);
5072MODULE_ALIAS("devname:kvm");
5073