linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2018
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#include <linux/compiler.h>
  15#include <linux/err.h>
  16#include <linux/fs.h>
  17#include <linux/hrtimer.h>
  18#include <linux/init.h>
  19#include <linux/kvm.h>
  20#include <linux/kvm_host.h>
  21#include <linux/mman.h>
  22#include <linux/module.h>
  23#include <linux/moduleparam.h>
  24#include <linux/random.h>
  25#include <linux/slab.h>
  26#include <linux/timer.h>
  27#include <linux/vmalloc.h>
  28#include <linux/bitmap.h>
  29#include <linux/sched/signal.h>
  30#include <linux/string.h>
  31
  32#include <asm/asm-offsets.h>
  33#include <asm/lowcore.h>
  34#include <asm/stp.h>
  35#include <asm/pgtable.h>
  36#include <asm/gmap.h>
  37#include <asm/nmi.h>
  38#include <asm/switch_to.h>
  39#include <asm/isc.h>
  40#include <asm/sclp.h>
  41#include <asm/cpacf.h>
  42#include <asm/timex.h>
  43#include "kvm-s390.h"
  44#include "gaccess.h"
  45
  46#define KMSG_COMPONENT "kvm-s390"
  47#undef pr_fmt
  48#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50#define CREATE_TRACE_POINTS
  51#include "trace.h"
  52#include "trace-s390.h"
  53
  54#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55#define LOCAL_IRQS 32
  56#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62struct kvm_stats_debugfs_item debugfs_entries[] = {
  63        { "userspace_handled", VCPU_STAT(exit_userspace) },
  64        { "exit_null", VCPU_STAT(exit_null) },
  65        { "exit_validity", VCPU_STAT(exit_validity) },
  66        { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67        { "exit_external_request", VCPU_STAT(exit_external_request) },
  68        { "exit_io_request", VCPU_STAT(exit_io_request) },
  69        { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70        { "exit_instruction", VCPU_STAT(exit_instruction) },
  71        { "exit_pei", VCPU_STAT(exit_pei) },
  72        { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74        { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79        { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80        { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81        { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82        { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83        { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84        { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85        { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86        { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87        { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88        { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89        { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90        { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91        { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92        { "deliver_program", VCPU_STAT(deliver_program) },
  93        { "deliver_io", VCPU_STAT(deliver_io) },
  94        { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95        { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96        { "inject_ckc", VCPU_STAT(inject_ckc) },
  97        { "inject_cputm", VCPU_STAT(inject_cputm) },
  98        { "inject_external_call", VCPU_STAT(inject_external_call) },
  99        { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100        { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101        { "inject_io", VM_STAT(inject_io) },
 102        { "inject_mchk", VCPU_STAT(inject_mchk) },
 103        { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104        { "inject_program", VCPU_STAT(inject_program) },
 105        { "inject_restart", VCPU_STAT(inject_restart) },
 106        { "inject_service_signal", VM_STAT(inject_service_signal) },
 107        { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108        { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109        { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110        { "inject_virtio", VM_STAT(inject_virtio) },
 111        { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112        { "instruction_gs", VCPU_STAT(instruction_gs) },
 113        { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114        { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115        { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116        { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117        { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118        { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119        { "instruction_sck", VCPU_STAT(instruction_sck) },
 120        { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121        { "instruction_spx", VCPU_STAT(instruction_spx) },
 122        { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123        { "instruction_stap", VCPU_STAT(instruction_stap) },
 124        { "instruction_iske", VCPU_STAT(instruction_iske) },
 125        { "instruction_ri", VCPU_STAT(instruction_ri) },
 126        { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127        { "instruction_sske", VCPU_STAT(instruction_sske) },
 128        { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129        { "instruction_essa", VCPU_STAT(instruction_essa) },
 130        { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132        { "instruction_tb", VCPU_STAT(instruction_tb) },
 133        { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135        { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136        { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137        { "instruction_sie", VCPU_STAT(instruction_sie) },
 138        { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141        { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142        { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143        { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147        { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151        { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152        { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153        { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154        { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155        { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156        { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157        { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158        { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159        { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160        { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161        { NULL }
 162};
 163
 164struct kvm_s390_tod_clock_ext {
 165        __u8 epoch_idx;
 166        __u64 tod;
 167        __u8 reserved[7];
 168} __packed;
 169
 170/* allow nested virtualization in KVM (if enabled by user space) */
 171static int nested;
 172module_param(nested, int, S_IRUGO);
 173MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175
 176/*
 177 * For now we handle at most 16 double words as this is what the s390 base
 178 * kernel handles and stores in the prefix page. If we ever need to go beyond
 179 * this, this requires changes to code, but the external uapi can stay.
 180 */
 181#define SIZE_INTERNAL 16
 182
 183/*
 184 * Base feature mask that defines default mask for facilities. Consists of the
 185 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 186 */
 187static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 188/*
 189 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 190 * and defines the facilities that can be enabled via a cpu model.
 191 */
 192static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 193
 194static unsigned long kvm_s390_fac_size(void)
 195{
 196        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 197        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 198        BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 199                sizeof(S390_lowcore.stfle_fac_list));
 200
 201        return SIZE_INTERNAL;
 202}
 203
 204/* available cpu features supported by kvm */
 205static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 206/* available subfunctions indicated via query / "test bit" */
 207static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 208
 209static struct gmap_notifier gmap_notifier;
 210static struct gmap_notifier vsie_gmap_notifier;
 211debug_info_t *kvm_s390_dbf;
 212
 213/* Section: not file related */
 214int kvm_arch_hardware_enable(void)
 215{
 216        /* every s390 is virtualization enabled ;-) */
 217        return 0;
 218}
 219
 220static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 221                              unsigned long end);
 222
 223static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 224{
 225        u8 delta_idx = 0;
 226
 227        /*
 228         * The TOD jumps by delta, we have to compensate this by adding
 229         * -delta to the epoch.
 230         */
 231        delta = -delta;
 232
 233        /* sign-extension - we're adding to signed values below */
 234        if ((s64)delta < 0)
 235                delta_idx = -1;
 236
 237        scb->epoch += delta;
 238        if (scb->ecd & ECD_MEF) {
 239                scb->epdx += delta_idx;
 240                if (scb->epoch < delta)
 241                        scb->epdx += 1;
 242        }
 243}
 244
 245/*
 246 * This callback is executed during stop_machine(). All CPUs are therefore
 247 * temporarily stopped. In order not to change guest behavior, we have to
 248 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 249 * so a CPU won't be stopped while calculating with the epoch.
 250 */
 251static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 252                          void *v)
 253{
 254        struct kvm *kvm;
 255        struct kvm_vcpu *vcpu;
 256        int i;
 257        unsigned long long *delta = v;
 258
 259        list_for_each_entry(kvm, &vm_list, vm_list) {
 260                kvm_for_each_vcpu(i, vcpu, kvm) {
 261                        kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 262                        if (i == 0) {
 263                                kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 264                                kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 265                        }
 266                        if (vcpu->arch.cputm_enabled)
 267                                vcpu->arch.cputm_start += *delta;
 268                        if (vcpu->arch.vsie_block)
 269                                kvm_clock_sync_scb(vcpu->arch.vsie_block,
 270                                                   *delta);
 271                }
 272        }
 273        return NOTIFY_OK;
 274}
 275
 276static struct notifier_block kvm_clock_notifier = {
 277        .notifier_call = kvm_clock_sync,
 278};
 279
 280int kvm_arch_hardware_setup(void)
 281{
 282        gmap_notifier.notifier_call = kvm_gmap_notifier;
 283        gmap_register_pte_notifier(&gmap_notifier);
 284        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 285        gmap_register_pte_notifier(&vsie_gmap_notifier);
 286        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 287                                       &kvm_clock_notifier);
 288        return 0;
 289}
 290
 291void kvm_arch_hardware_unsetup(void)
 292{
 293        gmap_unregister_pte_notifier(&gmap_notifier);
 294        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 295        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 296                                         &kvm_clock_notifier);
 297}
 298
 299static void allow_cpu_feat(unsigned long nr)
 300{
 301        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 302}
 303
 304static inline int plo_test_bit(unsigned char nr)
 305{
 306        register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 307        int cc;
 308
 309        asm volatile(
 310                /* Parameter registers are ignored for "test bit" */
 311                "       plo     0,0,0,0(0)\n"
 312                "       ipm     %0\n"
 313                "       srl     %0,28\n"
 314                : "=d" (cc)
 315                : "d" (r0)
 316                : "cc");
 317        return cc == 0;
 318}
 319
 320static void kvm_s390_cpu_feat_init(void)
 321{
 322        int i;
 323
 324        for (i = 0; i < 256; ++i) {
 325                if (plo_test_bit(i))
 326                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 327        }
 328
 329        if (test_facility(28)) /* TOD-clock steering */
 330                ptff(kvm_s390_available_subfunc.ptff,
 331                     sizeof(kvm_s390_available_subfunc.ptff),
 332                     PTFF_QAF);
 333
 334        if (test_facility(17)) { /* MSA */
 335                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 336                              kvm_s390_available_subfunc.kmac);
 337                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 338                              kvm_s390_available_subfunc.kmc);
 339                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 340                              kvm_s390_available_subfunc.km);
 341                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 342                              kvm_s390_available_subfunc.kimd);
 343                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 344                              kvm_s390_available_subfunc.klmd);
 345        }
 346        if (test_facility(76)) /* MSA3 */
 347                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 348                              kvm_s390_available_subfunc.pckmo);
 349        if (test_facility(77)) { /* MSA4 */
 350                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 351                              kvm_s390_available_subfunc.kmctr);
 352                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 353                              kvm_s390_available_subfunc.kmf);
 354                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 355                              kvm_s390_available_subfunc.kmo);
 356                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 357                              kvm_s390_available_subfunc.pcc);
 358        }
 359        if (test_facility(57)) /* MSA5 */
 360                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 361                              kvm_s390_available_subfunc.ppno);
 362
 363        if (test_facility(146)) /* MSA8 */
 364                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 365                              kvm_s390_available_subfunc.kma);
 366
 367        if (MACHINE_HAS_ESOP)
 368                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 369        /*
 370         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 371         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 372         */
 373        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 374            !test_facility(3) || !nested)
 375                return;
 376        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 377        if (sclp.has_64bscao)
 378                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 379        if (sclp.has_siif)
 380                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 381        if (sclp.has_gpere)
 382                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 383        if (sclp.has_gsls)
 384                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 385        if (sclp.has_ib)
 386                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 387        if (sclp.has_cei)
 388                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 389        if (sclp.has_ibs)
 390                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 391        if (sclp.has_kss)
 392                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 393        /*
 394         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 395         * all skey handling functions read/set the skey from the PGSTE
 396         * instead of the real storage key.
 397         *
 398         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 399         * pages being detected as preserved although they are resident.
 400         *
 401         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 402         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 403         *
 404         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 405         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 406         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 407         *
 408         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 409         * cannot easily shadow the SCA because of the ipte lock.
 410         */
 411}
 412
 413int kvm_arch_init(void *opaque)
 414{
 415        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 416        if (!kvm_s390_dbf)
 417                return -ENOMEM;
 418
 419        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 420                debug_unregister(kvm_s390_dbf);
 421                return -ENOMEM;
 422        }
 423
 424        kvm_s390_cpu_feat_init();
 425
 426        /* Register floating interrupt controller interface. */
 427        return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 428}
 429
 430void kvm_arch_exit(void)
 431{
 432        debug_unregister(kvm_s390_dbf);
 433}
 434
 435/* Section: device related */
 436long kvm_arch_dev_ioctl(struct file *filp,
 437                        unsigned int ioctl, unsigned long arg)
 438{
 439        if (ioctl == KVM_S390_ENABLE_SIE)
 440                return s390_enable_sie();
 441        return -EINVAL;
 442}
 443
 444int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 445{
 446        int r;
 447
 448        switch (ext) {
 449        case KVM_CAP_S390_PSW:
 450        case KVM_CAP_S390_GMAP:
 451        case KVM_CAP_SYNC_MMU:
 452#ifdef CONFIG_KVM_S390_UCONTROL
 453        case KVM_CAP_S390_UCONTROL:
 454#endif
 455        case KVM_CAP_ASYNC_PF:
 456        case KVM_CAP_SYNC_REGS:
 457        case KVM_CAP_ONE_REG:
 458        case KVM_CAP_ENABLE_CAP:
 459        case KVM_CAP_S390_CSS_SUPPORT:
 460        case KVM_CAP_IOEVENTFD:
 461        case KVM_CAP_DEVICE_CTRL:
 462        case KVM_CAP_ENABLE_CAP_VM:
 463        case KVM_CAP_S390_IRQCHIP:
 464        case KVM_CAP_VM_ATTRIBUTES:
 465        case KVM_CAP_MP_STATE:
 466        case KVM_CAP_IMMEDIATE_EXIT:
 467        case KVM_CAP_S390_INJECT_IRQ:
 468        case KVM_CAP_S390_USER_SIGP:
 469        case KVM_CAP_S390_USER_STSI:
 470        case KVM_CAP_S390_SKEYS:
 471        case KVM_CAP_S390_IRQ_STATE:
 472        case KVM_CAP_S390_USER_INSTR0:
 473        case KVM_CAP_S390_CMMA_MIGRATION:
 474        case KVM_CAP_S390_AIS:
 475        case KVM_CAP_S390_AIS_MIGRATION:
 476                r = 1;
 477                break;
 478        case KVM_CAP_S390_MEM_OP:
 479                r = MEM_OP_MAX_SIZE;
 480                break;
 481        case KVM_CAP_NR_VCPUS:
 482        case KVM_CAP_MAX_VCPUS:
 483                r = KVM_S390_BSCA_CPU_SLOTS;
 484                if (!kvm_s390_use_sca_entries())
 485                        r = KVM_MAX_VCPUS;
 486                else if (sclp.has_esca && sclp.has_64bscao)
 487                        r = KVM_S390_ESCA_CPU_SLOTS;
 488                break;
 489        case KVM_CAP_NR_MEMSLOTS:
 490                r = KVM_USER_MEM_SLOTS;
 491                break;
 492        case KVM_CAP_S390_COW:
 493                r = MACHINE_HAS_ESOP;
 494                break;
 495        case KVM_CAP_S390_VECTOR_REGISTERS:
 496                r = MACHINE_HAS_VX;
 497                break;
 498        case KVM_CAP_S390_RI:
 499                r = test_facility(64);
 500                break;
 501        case KVM_CAP_S390_GS:
 502                r = test_facility(133);
 503                break;
 504        case KVM_CAP_S390_BPB:
 505                r = test_facility(82);
 506                break;
 507        default:
 508                r = 0;
 509        }
 510        return r;
 511}
 512
 513static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 514                                        struct kvm_memory_slot *memslot)
 515{
 516        gfn_t cur_gfn, last_gfn;
 517        unsigned long address;
 518        struct gmap *gmap = kvm->arch.gmap;
 519
 520        /* Loop over all guest pages */
 521        last_gfn = memslot->base_gfn + memslot->npages;
 522        for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 523                address = gfn_to_hva_memslot(memslot, cur_gfn);
 524
 525                if (test_and_clear_guest_dirty(gmap->mm, address))
 526                        mark_page_dirty(kvm, cur_gfn);
 527                if (fatal_signal_pending(current))
 528                        return;
 529                cond_resched();
 530        }
 531}
 532
 533/* Section: vm related */
 534static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 535
 536/*
 537 * Get (and clear) the dirty memory log for a memory slot.
 538 */
 539int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 540                               struct kvm_dirty_log *log)
 541{
 542        int r;
 543        unsigned long n;
 544        struct kvm_memslots *slots;
 545        struct kvm_memory_slot *memslot;
 546        int is_dirty = 0;
 547
 548        if (kvm_is_ucontrol(kvm))
 549                return -EINVAL;
 550
 551        mutex_lock(&kvm->slots_lock);
 552
 553        r = -EINVAL;
 554        if (log->slot >= KVM_USER_MEM_SLOTS)
 555                goto out;
 556
 557        slots = kvm_memslots(kvm);
 558        memslot = id_to_memslot(slots, log->slot);
 559        r = -ENOENT;
 560        if (!memslot->dirty_bitmap)
 561                goto out;
 562
 563        kvm_s390_sync_dirty_log(kvm, memslot);
 564        r = kvm_get_dirty_log(kvm, log, &is_dirty);
 565        if (r)
 566                goto out;
 567
 568        /* Clear the dirty log */
 569        if (is_dirty) {
 570                n = kvm_dirty_bitmap_bytes(memslot);
 571                memset(memslot->dirty_bitmap, 0, n);
 572        }
 573        r = 0;
 574out:
 575        mutex_unlock(&kvm->slots_lock);
 576        return r;
 577}
 578
 579static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 580{
 581        unsigned int i;
 582        struct kvm_vcpu *vcpu;
 583
 584        kvm_for_each_vcpu(i, vcpu, kvm) {
 585                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 586        }
 587}
 588
 589static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 590{
 591        int r;
 592
 593        if (cap->flags)
 594                return -EINVAL;
 595
 596        switch (cap->cap) {
 597        case KVM_CAP_S390_IRQCHIP:
 598                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 599                kvm->arch.use_irqchip = 1;
 600                r = 0;
 601                break;
 602        case KVM_CAP_S390_USER_SIGP:
 603                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 604                kvm->arch.user_sigp = 1;
 605                r = 0;
 606                break;
 607        case KVM_CAP_S390_VECTOR_REGISTERS:
 608                mutex_lock(&kvm->lock);
 609                if (kvm->created_vcpus) {
 610                        r = -EBUSY;
 611                } else if (MACHINE_HAS_VX) {
 612                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 613                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 614                        if (test_facility(134)) {
 615                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 616                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 617                        }
 618                        if (test_facility(135)) {
 619                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 620                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 621                        }
 622                        r = 0;
 623                } else
 624                        r = -EINVAL;
 625                mutex_unlock(&kvm->lock);
 626                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 627                         r ? "(not available)" : "(success)");
 628                break;
 629        case KVM_CAP_S390_RI:
 630                r = -EINVAL;
 631                mutex_lock(&kvm->lock);
 632                if (kvm->created_vcpus) {
 633                        r = -EBUSY;
 634                } else if (test_facility(64)) {
 635                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 636                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 637                        r = 0;
 638                }
 639                mutex_unlock(&kvm->lock);
 640                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 641                         r ? "(not available)" : "(success)");
 642                break;
 643        case KVM_CAP_S390_AIS:
 644                mutex_lock(&kvm->lock);
 645                if (kvm->created_vcpus) {
 646                        r = -EBUSY;
 647                } else {
 648                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 649                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 650                        r = 0;
 651                }
 652                mutex_unlock(&kvm->lock);
 653                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 654                         r ? "(not available)" : "(success)");
 655                break;
 656        case KVM_CAP_S390_GS:
 657                r = -EINVAL;
 658                mutex_lock(&kvm->lock);
 659                if (kvm->created_vcpus) {
 660                        r = -EBUSY;
 661                } else if (test_facility(133)) {
 662                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 663                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 664                        r = 0;
 665                }
 666                mutex_unlock(&kvm->lock);
 667                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 668                         r ? "(not available)" : "(success)");
 669                break;
 670        case KVM_CAP_S390_USER_STSI:
 671                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 672                kvm->arch.user_stsi = 1;
 673                r = 0;
 674                break;
 675        case KVM_CAP_S390_USER_INSTR0:
 676                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 677                kvm->arch.user_instr0 = 1;
 678                icpt_operexc_on_all_vcpus(kvm);
 679                r = 0;
 680                break;
 681        default:
 682                r = -EINVAL;
 683                break;
 684        }
 685        return r;
 686}
 687
 688static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 689{
 690        int ret;
 691
 692        switch (attr->attr) {
 693        case KVM_S390_VM_MEM_LIMIT_SIZE:
 694                ret = 0;
 695                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 696                         kvm->arch.mem_limit);
 697                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 698                        ret = -EFAULT;
 699                break;
 700        default:
 701                ret = -ENXIO;
 702                break;
 703        }
 704        return ret;
 705}
 706
 707static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 708{
 709        int ret;
 710        unsigned int idx;
 711        switch (attr->attr) {
 712        case KVM_S390_VM_MEM_ENABLE_CMMA:
 713                ret = -ENXIO;
 714                if (!sclp.has_cmma)
 715                        break;
 716
 717                ret = -EBUSY;
 718                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 719                mutex_lock(&kvm->lock);
 720                if (!kvm->created_vcpus) {
 721                        kvm->arch.use_cmma = 1;
 722                        /* Not compatible with cmma. */
 723                        kvm->arch.use_pfmfi = 0;
 724                        ret = 0;
 725                }
 726                mutex_unlock(&kvm->lock);
 727                break;
 728        case KVM_S390_VM_MEM_CLR_CMMA:
 729                ret = -ENXIO;
 730                if (!sclp.has_cmma)
 731                        break;
 732                ret = -EINVAL;
 733                if (!kvm->arch.use_cmma)
 734                        break;
 735
 736                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 737                mutex_lock(&kvm->lock);
 738                idx = srcu_read_lock(&kvm->srcu);
 739                s390_reset_cmma(kvm->arch.gmap->mm);
 740                srcu_read_unlock(&kvm->srcu, idx);
 741                mutex_unlock(&kvm->lock);
 742                ret = 0;
 743                break;
 744        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 745                unsigned long new_limit;
 746
 747                if (kvm_is_ucontrol(kvm))
 748                        return -EINVAL;
 749
 750                if (get_user(new_limit, (u64 __user *)attr->addr))
 751                        return -EFAULT;
 752
 753                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 754                    new_limit > kvm->arch.mem_limit)
 755                        return -E2BIG;
 756
 757                if (!new_limit)
 758                        return -EINVAL;
 759
 760                /* gmap_create takes last usable address */
 761                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 762                        new_limit -= 1;
 763
 764                ret = -EBUSY;
 765                mutex_lock(&kvm->lock);
 766                if (!kvm->created_vcpus) {
 767                        /* gmap_create will round the limit up */
 768                        struct gmap *new = gmap_create(current->mm, new_limit);
 769
 770                        if (!new) {
 771                                ret = -ENOMEM;
 772                        } else {
 773                                gmap_remove(kvm->arch.gmap);
 774                                new->private = kvm;
 775                                kvm->arch.gmap = new;
 776                                ret = 0;
 777                        }
 778                }
 779                mutex_unlock(&kvm->lock);
 780                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 781                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 782                         (void *) kvm->arch.gmap->asce);
 783                break;
 784        }
 785        default:
 786                ret = -ENXIO;
 787                break;
 788        }
 789        return ret;
 790}
 791
 792static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 793
 794static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 795{
 796        struct kvm_vcpu *vcpu;
 797        int i;
 798
 799        if (!test_kvm_facility(kvm, 76))
 800                return -EINVAL;
 801
 802        mutex_lock(&kvm->lock);
 803        switch (attr->attr) {
 804        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 805                get_random_bytes(
 806                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 807                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 808                kvm->arch.crypto.aes_kw = 1;
 809                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 810                break;
 811        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 812                get_random_bytes(
 813                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 814                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 815                kvm->arch.crypto.dea_kw = 1;
 816                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 817                break;
 818        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 819                kvm->arch.crypto.aes_kw = 0;
 820                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 821                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 822                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 823                break;
 824        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 825                kvm->arch.crypto.dea_kw = 0;
 826                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 827                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 828                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 829                break;
 830        default:
 831                mutex_unlock(&kvm->lock);
 832                return -ENXIO;
 833        }
 834
 835        kvm_for_each_vcpu(i, vcpu, kvm) {
 836                kvm_s390_vcpu_crypto_setup(vcpu);
 837                exit_sie(vcpu);
 838        }
 839        mutex_unlock(&kvm->lock);
 840        return 0;
 841}
 842
 843static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 844{
 845        int cx;
 846        struct kvm_vcpu *vcpu;
 847
 848        kvm_for_each_vcpu(cx, vcpu, kvm)
 849                kvm_s390_sync_request(req, vcpu);
 850}
 851
 852/*
 853 * Must be called with kvm->srcu held to avoid races on memslots, and with
 854 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 855 */
 856static int kvm_s390_vm_start_migration(struct kvm *kvm)
 857{
 858        struct kvm_s390_migration_state *mgs;
 859        struct kvm_memory_slot *ms;
 860        /* should be the only one */
 861        struct kvm_memslots *slots;
 862        unsigned long ram_pages;
 863        int slotnr;
 864
 865        /* migration mode already enabled */
 866        if (kvm->arch.migration_state)
 867                return 0;
 868
 869        slots = kvm_memslots(kvm);
 870        if (!slots || !slots->used_slots)
 871                return -EINVAL;
 872
 873        mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 874        if (!mgs)
 875                return -ENOMEM;
 876        kvm->arch.migration_state = mgs;
 877
 878        if (kvm->arch.use_cmma) {
 879                /*
 880                 * Get the first slot. They are reverse sorted by base_gfn, so
 881                 * the first slot is also the one at the end of the address
 882                 * space. We have verified above that at least one slot is
 883                 * present.
 884                 */
 885                ms = slots->memslots;
 886                /* round up so we only use full longs */
 887                ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 888                /* allocate enough bytes to store all the bits */
 889                mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 890                if (!mgs->pgste_bitmap) {
 891                        kfree(mgs);
 892                        kvm->arch.migration_state = NULL;
 893                        return -ENOMEM;
 894                }
 895
 896                mgs->bitmap_size = ram_pages;
 897                atomic64_set(&mgs->dirty_pages, ram_pages);
 898                /* mark all the pages in active slots as dirty */
 899                for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 900                        ms = slots->memslots + slotnr;
 901                        bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 902                }
 903
 904                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 905        }
 906        return 0;
 907}
 908
 909/*
 910 * Must be called with kvm->slots_lock to avoid races with ourselves and
 911 * kvm_s390_vm_start_migration.
 912 */
 913static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 914{
 915        struct kvm_s390_migration_state *mgs;
 916
 917        /* migration mode already disabled */
 918        if (!kvm->arch.migration_state)
 919                return 0;
 920        mgs = kvm->arch.migration_state;
 921        kvm->arch.migration_state = NULL;
 922
 923        if (kvm->arch.use_cmma) {
 924                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 925                /* We have to wait for the essa emulation to finish */
 926                synchronize_srcu(&kvm->srcu);
 927                vfree(mgs->pgste_bitmap);
 928        }
 929        kfree(mgs);
 930        return 0;
 931}
 932
 933static int kvm_s390_vm_set_migration(struct kvm *kvm,
 934                                     struct kvm_device_attr *attr)
 935{
 936        int res = -ENXIO;
 937
 938        mutex_lock(&kvm->slots_lock);
 939        switch (attr->attr) {
 940        case KVM_S390_VM_MIGRATION_START:
 941                res = kvm_s390_vm_start_migration(kvm);
 942                break;
 943        case KVM_S390_VM_MIGRATION_STOP:
 944                res = kvm_s390_vm_stop_migration(kvm);
 945                break;
 946        default:
 947                break;
 948        }
 949        mutex_unlock(&kvm->slots_lock);
 950
 951        return res;
 952}
 953
 954static int kvm_s390_vm_get_migration(struct kvm *kvm,
 955                                     struct kvm_device_attr *attr)
 956{
 957        u64 mig = (kvm->arch.migration_state != NULL);
 958
 959        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 960                return -ENXIO;
 961
 962        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 963                return -EFAULT;
 964        return 0;
 965}
 966
 967static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 968{
 969        struct kvm_s390_vm_tod_clock gtod;
 970
 971        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 972                return -EFAULT;
 973
 974        if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 975                return -EINVAL;
 976        kvm_s390_set_tod_clock(kvm, &gtod);
 977
 978        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 979                gtod.epoch_idx, gtod.tod);
 980
 981        return 0;
 982}
 983
 984static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 985{
 986        u8 gtod_high;
 987
 988        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 989                                           sizeof(gtod_high)))
 990                return -EFAULT;
 991
 992        if (gtod_high != 0)
 993                return -EINVAL;
 994        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 995
 996        return 0;
 997}
 998
 999static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1000{
1001        struct kvm_s390_vm_tod_clock gtod = { 0 };
1002
1003        if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1004                           sizeof(gtod.tod)))
1005                return -EFAULT;
1006
1007        kvm_s390_set_tod_clock(kvm, &gtod);
1008        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1009        return 0;
1010}
1011
1012static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1013{
1014        int ret;
1015
1016        if (attr->flags)
1017                return -EINVAL;
1018
1019        switch (attr->attr) {
1020        case KVM_S390_VM_TOD_EXT:
1021                ret = kvm_s390_set_tod_ext(kvm, attr);
1022                break;
1023        case KVM_S390_VM_TOD_HIGH:
1024                ret = kvm_s390_set_tod_high(kvm, attr);
1025                break;
1026        case KVM_S390_VM_TOD_LOW:
1027                ret = kvm_s390_set_tod_low(kvm, attr);
1028                break;
1029        default:
1030                ret = -ENXIO;
1031                break;
1032        }
1033        return ret;
1034}
1035
1036static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
1037                                        struct kvm_s390_vm_tod_clock *gtod)
1038{
1039        struct kvm_s390_tod_clock_ext htod;
1040
1041        preempt_disable();
1042
1043        get_tod_clock_ext((char *)&htod);
1044
1045        gtod->tod = htod.tod + kvm->arch.epoch;
1046        gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1047
1048        if (gtod->tod < htod.tod)
1049                gtod->epoch_idx += 1;
1050
1051        preempt_enable();
1052}
1053
1054static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1055{
1056        struct kvm_s390_vm_tod_clock gtod;
1057
1058        memset(&gtod, 0, sizeof(gtod));
1059
1060        if (test_kvm_facility(kvm, 139))
1061                kvm_s390_get_tod_clock_ext(kvm, &gtod);
1062        else
1063                gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1064
1065        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1066                return -EFAULT;
1067
1068        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1069                gtod.epoch_idx, gtod.tod);
1070        return 0;
1071}
1072
1073static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1074{
1075        u8 gtod_high = 0;
1076
1077        if (copy_to_user((void __user *)attr->addr, &gtod_high,
1078                                         sizeof(gtod_high)))
1079                return -EFAULT;
1080        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1081
1082        return 0;
1083}
1084
1085static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1086{
1087        u64 gtod;
1088
1089        gtod = kvm_s390_get_tod_clock_fast(kvm);
1090        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1091                return -EFAULT;
1092        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1093
1094        return 0;
1095}
1096
1097static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1098{
1099        int ret;
1100
1101        if (attr->flags)
1102                return -EINVAL;
1103
1104        switch (attr->attr) {
1105        case KVM_S390_VM_TOD_EXT:
1106                ret = kvm_s390_get_tod_ext(kvm, attr);
1107                break;
1108        case KVM_S390_VM_TOD_HIGH:
1109                ret = kvm_s390_get_tod_high(kvm, attr);
1110                break;
1111        case KVM_S390_VM_TOD_LOW:
1112                ret = kvm_s390_get_tod_low(kvm, attr);
1113                break;
1114        default:
1115                ret = -ENXIO;
1116                break;
1117        }
1118        return ret;
1119}
1120
1121static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1122{
1123        struct kvm_s390_vm_cpu_processor *proc;
1124        u16 lowest_ibc, unblocked_ibc;
1125        int ret = 0;
1126
1127        mutex_lock(&kvm->lock);
1128        if (kvm->created_vcpus) {
1129                ret = -EBUSY;
1130                goto out;
1131        }
1132        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1133        if (!proc) {
1134                ret = -ENOMEM;
1135                goto out;
1136        }
1137        if (!copy_from_user(proc, (void __user *)attr->addr,
1138                            sizeof(*proc))) {
1139                kvm->arch.model.cpuid = proc->cpuid;
1140                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1141                unblocked_ibc = sclp.ibc & 0xfff;
1142                if (lowest_ibc && proc->ibc) {
1143                        if (proc->ibc > unblocked_ibc)
1144                                kvm->arch.model.ibc = unblocked_ibc;
1145                        else if (proc->ibc < lowest_ibc)
1146                                kvm->arch.model.ibc = lowest_ibc;
1147                        else
1148                                kvm->arch.model.ibc = proc->ibc;
1149                }
1150                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1151                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1152                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                         kvm->arch.model.ibc,
1154                         kvm->arch.model.cpuid);
1155                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                         kvm->arch.model.fac_list[0],
1157                         kvm->arch.model.fac_list[1],
1158                         kvm->arch.model.fac_list[2]);
1159        } else
1160                ret = -EFAULT;
1161        kfree(proc);
1162out:
1163        mutex_unlock(&kvm->lock);
1164        return ret;
1165}
1166
1167static int kvm_s390_set_processor_feat(struct kvm *kvm,
1168                                       struct kvm_device_attr *attr)
1169{
1170        struct kvm_s390_vm_cpu_feat data;
1171
1172        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1173                return -EFAULT;
1174        if (!bitmap_subset((unsigned long *) data.feat,
1175                           kvm_s390_available_cpu_feat,
1176                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1177                return -EINVAL;
1178
1179        mutex_lock(&kvm->lock);
1180        if (kvm->created_vcpus) {
1181                mutex_unlock(&kvm->lock);
1182                return -EBUSY;
1183        }
1184        bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1185                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1186        mutex_unlock(&kvm->lock);
1187        VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1188                         data.feat[0],
1189                         data.feat[1],
1190                         data.feat[2]);
1191        return 0;
1192}
1193
1194static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1195                                          struct kvm_device_attr *attr)
1196{
1197        /*
1198         * Once supported by kernel + hw, we have to store the subfunctions
1199         * in kvm->arch and remember that user space configured them.
1200         */
1201        return -ENXIO;
1202}
1203
1204static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1205{
1206        int ret = -ENXIO;
1207
1208        switch (attr->attr) {
1209        case KVM_S390_VM_CPU_PROCESSOR:
1210                ret = kvm_s390_set_processor(kvm, attr);
1211                break;
1212        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1213                ret = kvm_s390_set_processor_feat(kvm, attr);
1214                break;
1215        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1216                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1217                break;
1218        }
1219        return ret;
1220}
1221
1222static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1223{
1224        struct kvm_s390_vm_cpu_processor *proc;
1225        int ret = 0;
1226
1227        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1228        if (!proc) {
1229                ret = -ENOMEM;
1230                goto out;
1231        }
1232        proc->cpuid = kvm->arch.model.cpuid;
1233        proc->ibc = kvm->arch.model.ibc;
1234        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1235               S390_ARCH_FAC_LIST_SIZE_BYTE);
1236        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1237                 kvm->arch.model.ibc,
1238                 kvm->arch.model.cpuid);
1239        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1240                 kvm->arch.model.fac_list[0],
1241                 kvm->arch.model.fac_list[1],
1242                 kvm->arch.model.fac_list[2]);
1243        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1244                ret = -EFAULT;
1245        kfree(proc);
1246out:
1247        return ret;
1248}
1249
1250static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1251{
1252        struct kvm_s390_vm_cpu_machine *mach;
1253        int ret = 0;
1254
1255        mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1256        if (!mach) {
1257                ret = -ENOMEM;
1258                goto out;
1259        }
1260        get_cpu_id((struct cpuid *) &mach->cpuid);
1261        mach->ibc = sclp.ibc;
1262        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1263               S390_ARCH_FAC_LIST_SIZE_BYTE);
1264        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1265               sizeof(S390_lowcore.stfle_fac_list));
1266        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1267                 kvm->arch.model.ibc,
1268                 kvm->arch.model.cpuid);
1269        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1270                 mach->fac_mask[0],
1271                 mach->fac_mask[1],
1272                 mach->fac_mask[2]);
1273        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1274                 mach->fac_list[0],
1275                 mach->fac_list[1],
1276                 mach->fac_list[2]);
1277        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1278                ret = -EFAULT;
1279        kfree(mach);
1280out:
1281        return ret;
1282}
1283
1284static int kvm_s390_get_processor_feat(struct kvm *kvm,
1285                                       struct kvm_device_attr *attr)
1286{
1287        struct kvm_s390_vm_cpu_feat data;
1288
1289        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1290                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1291        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1292                return -EFAULT;
1293        VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1294                         data.feat[0],
1295                         data.feat[1],
1296                         data.feat[2]);
1297        return 0;
1298}
1299
1300static int kvm_s390_get_machine_feat(struct kvm *kvm,
1301                                     struct kvm_device_attr *attr)
1302{
1303        struct kvm_s390_vm_cpu_feat data;
1304
1305        bitmap_copy((unsigned long *) data.feat,
1306                    kvm_s390_available_cpu_feat,
1307                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1308        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1309                return -EFAULT;
1310        VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1311                         data.feat[0],
1312                         data.feat[1],
1313                         data.feat[2]);
1314        return 0;
1315}
1316
1317static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1318                                          struct kvm_device_attr *attr)
1319{
1320        /*
1321         * Once we can actually configure subfunctions (kernel + hw support),
1322         * we have to check if they were already set by user space, if so copy
1323         * them from kvm->arch.
1324         */
1325        return -ENXIO;
1326}
1327
1328static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1329                                        struct kvm_device_attr *attr)
1330{
1331        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1332            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1333                return -EFAULT;
1334        return 0;
1335}
1336static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1337{
1338        int ret = -ENXIO;
1339
1340        switch (attr->attr) {
1341        case KVM_S390_VM_CPU_PROCESSOR:
1342                ret = kvm_s390_get_processor(kvm, attr);
1343                break;
1344        case KVM_S390_VM_CPU_MACHINE:
1345                ret = kvm_s390_get_machine(kvm, attr);
1346                break;
1347        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1348                ret = kvm_s390_get_processor_feat(kvm, attr);
1349                break;
1350        case KVM_S390_VM_CPU_MACHINE_FEAT:
1351                ret = kvm_s390_get_machine_feat(kvm, attr);
1352                break;
1353        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1354                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1355                break;
1356        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1358                break;
1359        }
1360        return ret;
1361}
1362
1363static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1364{
1365        int ret;
1366
1367        switch (attr->group) {
1368        case KVM_S390_VM_MEM_CTRL:
1369                ret = kvm_s390_set_mem_control(kvm, attr);
1370                break;
1371        case KVM_S390_VM_TOD:
1372                ret = kvm_s390_set_tod(kvm, attr);
1373                break;
1374        case KVM_S390_VM_CPU_MODEL:
1375                ret = kvm_s390_set_cpu_model(kvm, attr);
1376                break;
1377        case KVM_S390_VM_CRYPTO:
1378                ret = kvm_s390_vm_set_crypto(kvm, attr);
1379                break;
1380        case KVM_S390_VM_MIGRATION:
1381                ret = kvm_s390_vm_set_migration(kvm, attr);
1382                break;
1383        default:
1384                ret = -ENXIO;
1385                break;
1386        }
1387
1388        return ret;
1389}
1390
1391static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392{
1393        int ret;
1394
1395        switch (attr->group) {
1396        case KVM_S390_VM_MEM_CTRL:
1397                ret = kvm_s390_get_mem_control(kvm, attr);
1398                break;
1399        case KVM_S390_VM_TOD:
1400                ret = kvm_s390_get_tod(kvm, attr);
1401                break;
1402        case KVM_S390_VM_CPU_MODEL:
1403                ret = kvm_s390_get_cpu_model(kvm, attr);
1404                break;
1405        case KVM_S390_VM_MIGRATION:
1406                ret = kvm_s390_vm_get_migration(kvm, attr);
1407                break;
1408        default:
1409                ret = -ENXIO;
1410                break;
1411        }
1412
1413        return ret;
1414}
1415
1416static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1417{
1418        int ret;
1419
1420        switch (attr->group) {
1421        case KVM_S390_VM_MEM_CTRL:
1422                switch (attr->attr) {
1423                case KVM_S390_VM_MEM_ENABLE_CMMA:
1424                case KVM_S390_VM_MEM_CLR_CMMA:
1425                        ret = sclp.has_cmma ? 0 : -ENXIO;
1426                        break;
1427                case KVM_S390_VM_MEM_LIMIT_SIZE:
1428                        ret = 0;
1429                        break;
1430                default:
1431                        ret = -ENXIO;
1432                        break;
1433                }
1434                break;
1435        case KVM_S390_VM_TOD:
1436                switch (attr->attr) {
1437                case KVM_S390_VM_TOD_LOW:
1438                case KVM_S390_VM_TOD_HIGH:
1439                        ret = 0;
1440                        break;
1441                default:
1442                        ret = -ENXIO;
1443                        break;
1444                }
1445                break;
1446        case KVM_S390_VM_CPU_MODEL:
1447                switch (attr->attr) {
1448                case KVM_S390_VM_CPU_PROCESSOR:
1449                case KVM_S390_VM_CPU_MACHINE:
1450                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1451                case KVM_S390_VM_CPU_MACHINE_FEAT:
1452                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1453                        ret = 0;
1454                        break;
1455                /* configuring subfunctions is not supported yet */
1456                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1457                default:
1458                        ret = -ENXIO;
1459                        break;
1460                }
1461                break;
1462        case KVM_S390_VM_CRYPTO:
1463                switch (attr->attr) {
1464                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1465                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1466                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1467                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1468                        ret = 0;
1469                        break;
1470                default:
1471                        ret = -ENXIO;
1472                        break;
1473                }
1474                break;
1475        case KVM_S390_VM_MIGRATION:
1476                ret = 0;
1477                break;
1478        default:
1479                ret = -ENXIO;
1480                break;
1481        }
1482
1483        return ret;
1484}
1485
1486static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1487{
1488        uint8_t *keys;
1489        uint64_t hva;
1490        int srcu_idx, i, r = 0;
1491
1492        if (args->flags != 0)
1493                return -EINVAL;
1494
1495        /* Is this guest using storage keys? */
1496        if (!mm_use_skey(current->mm))
1497                return KVM_S390_GET_SKEYS_NONE;
1498
1499        /* Enforce sane limit on memory allocation */
1500        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1501                return -EINVAL;
1502
1503        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1504        if (!keys)
1505                return -ENOMEM;
1506
1507        down_read(&current->mm->mmap_sem);
1508        srcu_idx = srcu_read_lock(&kvm->srcu);
1509        for (i = 0; i < args->count; i++) {
1510                hva = gfn_to_hva(kvm, args->start_gfn + i);
1511                if (kvm_is_error_hva(hva)) {
1512                        r = -EFAULT;
1513                        break;
1514                }
1515
1516                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1517                if (r)
1518                        break;
1519        }
1520        srcu_read_unlock(&kvm->srcu, srcu_idx);
1521        up_read(&current->mm->mmap_sem);
1522
1523        if (!r) {
1524                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1525                                 sizeof(uint8_t) * args->count);
1526                if (r)
1527                        r = -EFAULT;
1528        }
1529
1530        kvfree(keys);
1531        return r;
1532}
1533
1534static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1535{
1536        uint8_t *keys;
1537        uint64_t hva;
1538        int srcu_idx, i, r = 0;
1539
1540        if (args->flags != 0)
1541                return -EINVAL;
1542
1543        /* Enforce sane limit on memory allocation */
1544        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1545                return -EINVAL;
1546
1547        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1548        if (!keys)
1549                return -ENOMEM;
1550
1551        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1552                           sizeof(uint8_t) * args->count);
1553        if (r) {
1554                r = -EFAULT;
1555                goto out;
1556        }
1557
1558        /* Enable storage key handling for the guest */
1559        r = s390_enable_skey();
1560        if (r)
1561                goto out;
1562
1563        down_read(&current->mm->mmap_sem);
1564        srcu_idx = srcu_read_lock(&kvm->srcu);
1565        for (i = 0; i < args->count; i++) {
1566                hva = gfn_to_hva(kvm, args->start_gfn + i);
1567                if (kvm_is_error_hva(hva)) {
1568                        r = -EFAULT;
1569                        break;
1570                }
1571
1572                /* Lowest order bit is reserved */
1573                if (keys[i] & 0x01) {
1574                        r = -EINVAL;
1575                        break;
1576                }
1577
1578                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1579                if (r)
1580                        break;
1581        }
1582        srcu_read_unlock(&kvm->srcu, srcu_idx);
1583        up_read(&current->mm->mmap_sem);
1584out:
1585        kvfree(keys);
1586        return r;
1587}
1588
1589/*
1590 * Base address and length must be sent at the start of each block, therefore
1591 * it's cheaper to send some clean data, as long as it's less than the size of
1592 * two longs.
1593 */
1594#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1595/* for consistency */
1596#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1597
1598/*
1599 * This function searches for the next page with dirty CMMA attributes, and
1600 * saves the attributes in the buffer up to either the end of the buffer or
1601 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1602 * no trailing clean bytes are saved.
1603 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1604 * output buffer will indicate 0 as length.
1605 */
1606static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1607                                  struct kvm_s390_cmma_log *args)
1608{
1609        struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1610        unsigned long bufsize, hva, pgstev, i, next, cur;
1611        int srcu_idx, peek, r = 0, rr;
1612        u8 *res;
1613
1614        cur = args->start_gfn;
1615        i = next = pgstev = 0;
1616
1617        if (unlikely(!kvm->arch.use_cmma))
1618                return -ENXIO;
1619        /* Invalid/unsupported flags were specified */
1620        if (args->flags & ~KVM_S390_CMMA_PEEK)
1621                return -EINVAL;
1622        /* Migration mode query, and we are not doing a migration */
1623        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1624        if (!peek && !s)
1625                return -EINVAL;
1626        /* CMMA is disabled or was not used, or the buffer has length zero */
1627        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1628        if (!bufsize || !kvm->mm->context.uses_cmm) {
1629                memset(args, 0, sizeof(*args));
1630                return 0;
1631        }
1632
1633        if (!peek) {
1634                /* We are not peeking, and there are no dirty pages */
1635                if (!atomic64_read(&s->dirty_pages)) {
1636                        memset(args, 0, sizeof(*args));
1637                        return 0;
1638                }
1639                cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1640                                    args->start_gfn);
1641                if (cur >= s->bitmap_size)      /* nothing found, loop back */
1642                        cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1643                if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1644                        memset(args, 0, sizeof(*args));
1645                        return 0;
1646                }
1647                next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1648        }
1649
1650        res = vmalloc(bufsize);
1651        if (!res)
1652                return -ENOMEM;
1653
1654        args->start_gfn = cur;
1655
1656        down_read(&kvm->mm->mmap_sem);
1657        srcu_idx = srcu_read_lock(&kvm->srcu);
1658        while (i < bufsize) {
1659                hva = gfn_to_hva(kvm, cur);
1660                if (kvm_is_error_hva(hva)) {
1661                        r = -EFAULT;
1662                        break;
1663                }
1664                /* decrement only if we actually flipped the bit to 0 */
1665                if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1666                        atomic64_dec(&s->dirty_pages);
1667                r = get_pgste(kvm->mm, hva, &pgstev);
1668                if (r < 0)
1669                        pgstev = 0;
1670                /* save the value */
1671                res[i++] = (pgstev >> 24) & 0x43;
1672                /*
1673                 * if the next bit is too far away, stop.
1674                 * if we reached the previous "next", find the next one
1675                 */
1676                if (!peek) {
1677                        if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1678                                break;
1679                        if (cur == next)
1680                                next = find_next_bit(s->pgste_bitmap,
1681                                                     s->bitmap_size, cur + 1);
1682                /* reached the end of the bitmap or of the buffer, stop */
1683                        if ((next >= s->bitmap_size) ||
1684                            (next >= args->start_gfn + bufsize))
1685                                break;
1686                }
1687                cur++;
1688        }
1689        srcu_read_unlock(&kvm->srcu, srcu_idx);
1690        up_read(&kvm->mm->mmap_sem);
1691        args->count = i;
1692        args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1693
1694        rr = copy_to_user((void __user *)args->values, res, args->count);
1695        if (rr)
1696                r = -EFAULT;
1697
1698        vfree(res);
1699        return r;
1700}
1701
1702/*
1703 * This function sets the CMMA attributes for the given pages. If the input
1704 * buffer has zero length, no action is taken, otherwise the attributes are
1705 * set and the mm->context.uses_cmm flag is set.
1706 */
1707static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1708                                  const struct kvm_s390_cmma_log *args)
1709{
1710        unsigned long hva, mask, pgstev, i;
1711        uint8_t *bits;
1712        int srcu_idx, r = 0;
1713
1714        mask = args->mask;
1715
1716        if (!kvm->arch.use_cmma)
1717                return -ENXIO;
1718        /* invalid/unsupported flags */
1719        if (args->flags != 0)
1720                return -EINVAL;
1721        /* Enforce sane limit on memory allocation */
1722        if (args->count > KVM_S390_CMMA_SIZE_MAX)
1723                return -EINVAL;
1724        /* Nothing to do */
1725        if (args->count == 0)
1726                return 0;
1727
1728        bits = vmalloc(sizeof(*bits) * args->count);
1729        if (!bits)
1730                return -ENOMEM;
1731
1732        r = copy_from_user(bits, (void __user *)args->values, args->count);
1733        if (r) {
1734                r = -EFAULT;
1735                goto out;
1736        }
1737
1738        down_read(&kvm->mm->mmap_sem);
1739        srcu_idx = srcu_read_lock(&kvm->srcu);
1740        for (i = 0; i < args->count; i++) {
1741                hva = gfn_to_hva(kvm, args->start_gfn + i);
1742                if (kvm_is_error_hva(hva)) {
1743                        r = -EFAULT;
1744                        break;
1745                }
1746
1747                pgstev = bits[i];
1748                pgstev = pgstev << 24;
1749                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1750                set_pgste_bits(kvm->mm, hva, mask, pgstev);
1751        }
1752        srcu_read_unlock(&kvm->srcu, srcu_idx);
1753        up_read(&kvm->mm->mmap_sem);
1754
1755        if (!kvm->mm->context.uses_cmm) {
1756                down_write(&kvm->mm->mmap_sem);
1757                kvm->mm->context.uses_cmm = 1;
1758                up_write(&kvm->mm->mmap_sem);
1759        }
1760out:
1761        vfree(bits);
1762        return r;
1763}
1764
1765long kvm_arch_vm_ioctl(struct file *filp,
1766                       unsigned int ioctl, unsigned long arg)
1767{
1768        struct kvm *kvm = filp->private_data;
1769        void __user *argp = (void __user *)arg;
1770        struct kvm_device_attr attr;
1771        int r;
1772
1773        switch (ioctl) {
1774        case KVM_S390_INTERRUPT: {
1775                struct kvm_s390_interrupt s390int;
1776
1777                r = -EFAULT;
1778                if (copy_from_user(&s390int, argp, sizeof(s390int)))
1779                        break;
1780                r = kvm_s390_inject_vm(kvm, &s390int);
1781                break;
1782        }
1783        case KVM_ENABLE_CAP: {
1784                struct kvm_enable_cap cap;
1785                r = -EFAULT;
1786                if (copy_from_user(&cap, argp, sizeof(cap)))
1787                        break;
1788                r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1789                break;
1790        }
1791        case KVM_CREATE_IRQCHIP: {
1792                struct kvm_irq_routing_entry routing;
1793
1794                r = -EINVAL;
1795                if (kvm->arch.use_irqchip) {
1796                        /* Set up dummy routing. */
1797                        memset(&routing, 0, sizeof(routing));
1798                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1799                }
1800                break;
1801        }
1802        case KVM_SET_DEVICE_ATTR: {
1803                r = -EFAULT;
1804                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1805                        break;
1806                r = kvm_s390_vm_set_attr(kvm, &attr);
1807                break;
1808        }
1809        case KVM_GET_DEVICE_ATTR: {
1810                r = -EFAULT;
1811                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1812                        break;
1813                r = kvm_s390_vm_get_attr(kvm, &attr);
1814                break;
1815        }
1816        case KVM_HAS_DEVICE_ATTR: {
1817                r = -EFAULT;
1818                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1819                        break;
1820                r = kvm_s390_vm_has_attr(kvm, &attr);
1821                break;
1822        }
1823        case KVM_S390_GET_SKEYS: {
1824                struct kvm_s390_skeys args;
1825
1826                r = -EFAULT;
1827                if (copy_from_user(&args, argp,
1828                                   sizeof(struct kvm_s390_skeys)))
1829                        break;
1830                r = kvm_s390_get_skeys(kvm, &args);
1831                break;
1832        }
1833        case KVM_S390_SET_SKEYS: {
1834                struct kvm_s390_skeys args;
1835
1836                r = -EFAULT;
1837                if (copy_from_user(&args, argp,
1838                                   sizeof(struct kvm_s390_skeys)))
1839                        break;
1840                r = kvm_s390_set_skeys(kvm, &args);
1841                break;
1842        }
1843        case KVM_S390_GET_CMMA_BITS: {
1844                struct kvm_s390_cmma_log args;
1845
1846                r = -EFAULT;
1847                if (copy_from_user(&args, argp, sizeof(args)))
1848                        break;
1849                mutex_lock(&kvm->slots_lock);
1850                r = kvm_s390_get_cmma_bits(kvm, &args);
1851                mutex_unlock(&kvm->slots_lock);
1852                if (!r) {
1853                        r = copy_to_user(argp, &args, sizeof(args));
1854                        if (r)
1855                                r = -EFAULT;
1856                }
1857                break;
1858        }
1859        case KVM_S390_SET_CMMA_BITS: {
1860                struct kvm_s390_cmma_log args;
1861
1862                r = -EFAULT;
1863                if (copy_from_user(&args, argp, sizeof(args)))
1864                        break;
1865                mutex_lock(&kvm->slots_lock);
1866                r = kvm_s390_set_cmma_bits(kvm, &args);
1867                mutex_unlock(&kvm->slots_lock);
1868                break;
1869        }
1870        default:
1871                r = -ENOTTY;
1872        }
1873
1874        return r;
1875}
1876
1877static int kvm_s390_query_ap_config(u8 *config)
1878{
1879        u32 fcn_code = 0x04000000UL;
1880        u32 cc = 0;
1881
1882        memset(config, 0, 128);
1883        asm volatile(
1884                "lgr 0,%1\n"
1885                "lgr 2,%2\n"
1886                ".long 0xb2af0000\n"            /* PQAP(QCI) */
1887                "0: ipm %0\n"
1888                "srl %0,28\n"
1889                "1:\n"
1890                EX_TABLE(0b, 1b)
1891                : "+r" (cc)
1892                : "r" (fcn_code), "r" (config)
1893                : "cc", "0", "2", "memory"
1894        );
1895
1896        return cc;
1897}
1898
1899static int kvm_s390_apxa_installed(void)
1900{
1901        u8 config[128];
1902        int cc;
1903
1904        if (test_facility(12)) {
1905                cc = kvm_s390_query_ap_config(config);
1906
1907                if (cc)
1908                        pr_err("PQAP(QCI) failed with cc=%d", cc);
1909                else
1910                        return config[0] & 0x40;
1911        }
1912
1913        return 0;
1914}
1915
1916static void kvm_s390_set_crycb_format(struct kvm *kvm)
1917{
1918        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1919
1920        if (kvm_s390_apxa_installed())
1921                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1922        else
1923                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1924}
1925
1926static u64 kvm_s390_get_initial_cpuid(void)
1927{
1928        struct cpuid cpuid;
1929
1930        get_cpu_id(&cpuid);
1931        cpuid.version = 0xff;
1932        return *((u64 *) &cpuid);
1933}
1934
1935static void kvm_s390_crypto_init(struct kvm *kvm)
1936{
1937        if (!test_kvm_facility(kvm, 76))
1938                return;
1939
1940        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1941        kvm_s390_set_crycb_format(kvm);
1942
1943        /* Enable AES/DEA protected key functions by default */
1944        kvm->arch.crypto.aes_kw = 1;
1945        kvm->arch.crypto.dea_kw = 1;
1946        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1948        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1949                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1950}
1951
1952static void sca_dispose(struct kvm *kvm)
1953{
1954        if (kvm->arch.use_esca)
1955                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1956        else
1957                free_page((unsigned long)(kvm->arch.sca));
1958        kvm->arch.sca = NULL;
1959}
1960
1961int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1962{
1963        gfp_t alloc_flags = GFP_KERNEL;
1964        int i, rc;
1965        char debug_name[16];
1966        static unsigned long sca_offset;
1967
1968        rc = -EINVAL;
1969#ifdef CONFIG_KVM_S390_UCONTROL
1970        if (type & ~KVM_VM_S390_UCONTROL)
1971                goto out_err;
1972        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1973                goto out_err;
1974#else
1975        if (type)
1976                goto out_err;
1977#endif
1978
1979        rc = s390_enable_sie();
1980        if (rc)
1981                goto out_err;
1982
1983        rc = -ENOMEM;
1984
1985        kvm->arch.use_esca = 0; /* start with basic SCA */
1986        if (!sclp.has_64bscao)
1987                alloc_flags |= GFP_DMA;
1988        rwlock_init(&kvm->arch.sca_lock);
1989        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1990        if (!kvm->arch.sca)
1991                goto out_err;
1992        spin_lock(&kvm_lock);
1993        sca_offset += 16;
1994        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1995                sca_offset = 0;
1996        kvm->arch.sca = (struct bsca_block *)
1997                        ((char *) kvm->arch.sca + sca_offset);
1998        spin_unlock(&kvm_lock);
1999
2000        sprintf(debug_name, "kvm-%u", current->pid);
2001
2002        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2003        if (!kvm->arch.dbf)
2004                goto out_err;
2005
2006        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2007        kvm->arch.sie_page2 =
2008             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2009        if (!kvm->arch.sie_page2)
2010                goto out_err;
2011
2012        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2013
2014        for (i = 0; i < kvm_s390_fac_size(); i++) {
2015                kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2016                                              (kvm_s390_fac_base[i] |
2017                                               kvm_s390_fac_ext[i]);
2018                kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2019                                              kvm_s390_fac_base[i];
2020        }
2021
2022        /* we are always in czam mode - even on pre z14 machines */
2023        set_kvm_facility(kvm->arch.model.fac_mask, 138);
2024        set_kvm_facility(kvm->arch.model.fac_list, 138);
2025        /* we emulate STHYI in kvm */
2026        set_kvm_facility(kvm->arch.model.fac_mask, 74);
2027        set_kvm_facility(kvm->arch.model.fac_list, 74);
2028        if (MACHINE_HAS_TLB_GUEST) {
2029                set_kvm_facility(kvm->arch.model.fac_mask, 147);
2030                set_kvm_facility(kvm->arch.model.fac_list, 147);
2031        }
2032
2033        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2034        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2035
2036        kvm_s390_crypto_init(kvm);
2037
2038        mutex_init(&kvm->arch.float_int.ais_lock);
2039        kvm->arch.float_int.simm = 0;
2040        kvm->arch.float_int.nimm = 0;
2041        spin_lock_init(&kvm->arch.float_int.lock);
2042        for (i = 0; i < FIRQ_LIST_COUNT; i++)
2043                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2044        init_waitqueue_head(&kvm->arch.ipte_wq);
2045        mutex_init(&kvm->arch.ipte_mutex);
2046
2047        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2048        VM_EVENT(kvm, 3, "vm created with type %lu", type);
2049
2050        if (type & KVM_VM_S390_UCONTROL) {
2051                kvm->arch.gmap = NULL;
2052                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2053        } else {
2054                if (sclp.hamax == U64_MAX)
2055                        kvm->arch.mem_limit = TASK_SIZE_MAX;
2056                else
2057                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2058                                                    sclp.hamax + 1);
2059                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2060                if (!kvm->arch.gmap)
2061                        goto out_err;
2062                kvm->arch.gmap->private = kvm;
2063                kvm->arch.gmap->pfault_enabled = 0;
2064        }
2065
2066        kvm->arch.css_support = 0;
2067        kvm->arch.use_irqchip = 0;
2068        kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069        kvm->arch.epoch = 0;
2070
2071        spin_lock_init(&kvm->arch.start_stop_lock);
2072        kvm_s390_vsie_init(kvm);
2073        kvm_s390_gisa_init(kvm);
2074        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2075
2076        return 0;
2077out_err:
2078        free_page((unsigned long)kvm->arch.sie_page2);
2079        debug_unregister(kvm->arch.dbf);
2080        sca_dispose(kvm);
2081        KVM_EVENT(3, "creation of vm failed: %d", rc);
2082        return rc;
2083}
2084
2085bool kvm_arch_has_vcpu_debugfs(void)
2086{
2087        return false;
2088}
2089
2090int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2091{
2092        return 0;
2093}
2094
2095void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2096{
2097        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2098        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2099        kvm_s390_clear_local_irqs(vcpu);
2100        kvm_clear_async_pf_completion_queue(vcpu);
2101        if (!kvm_is_ucontrol(vcpu->kvm))
2102                sca_del_vcpu(vcpu);
2103
2104        if (kvm_is_ucontrol(vcpu->kvm))
2105                gmap_remove(vcpu->arch.gmap);
2106
2107        if (vcpu->kvm->arch.use_cmma)
2108                kvm_s390_vcpu_unsetup_cmma(vcpu);
2109        free_page((unsigned long)(vcpu->arch.sie_block));
2110
2111        kvm_vcpu_uninit(vcpu);
2112        kmem_cache_free(kvm_vcpu_cache, vcpu);
2113}
2114
2115static void kvm_free_vcpus(struct kvm *kvm)
2116{
2117        unsigned int i;
2118        struct kvm_vcpu *vcpu;
2119
2120        kvm_for_each_vcpu(i, vcpu, kvm)
2121                kvm_arch_vcpu_destroy(vcpu);
2122
2123        mutex_lock(&kvm->lock);
2124        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2125                kvm->vcpus[i] = NULL;
2126
2127        atomic_set(&kvm->online_vcpus, 0);
2128        mutex_unlock(&kvm->lock);
2129}
2130
2131void kvm_arch_destroy_vm(struct kvm *kvm)
2132{
2133        kvm_free_vcpus(kvm);
2134        sca_dispose(kvm);
2135        debug_unregister(kvm->arch.dbf);
2136        kvm_s390_gisa_destroy(kvm);
2137        free_page((unsigned long)kvm->arch.sie_page2);
2138        if (!kvm_is_ucontrol(kvm))
2139                gmap_remove(kvm->arch.gmap);
2140        kvm_s390_destroy_adapters(kvm);
2141        kvm_s390_clear_float_irqs(kvm);
2142        kvm_s390_vsie_destroy(kvm);
2143        if (kvm->arch.migration_state) {
2144                vfree(kvm->arch.migration_state->pgste_bitmap);
2145                kfree(kvm->arch.migration_state);
2146        }
2147        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2148}
2149
2150/* Section: vcpu related */
2151static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2152{
2153        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2154        if (!vcpu->arch.gmap)
2155                return -ENOMEM;
2156        vcpu->arch.gmap->private = vcpu->kvm;
2157
2158        return 0;
2159}
2160
2161static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2162{
2163        if (!kvm_s390_use_sca_entries())
2164                return;
2165        read_lock(&vcpu->kvm->arch.sca_lock);
2166        if (vcpu->kvm->arch.use_esca) {
2167                struct esca_block *sca = vcpu->kvm->arch.sca;
2168
2169                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2170                sca->cpu[vcpu->vcpu_id].sda = 0;
2171        } else {
2172                struct bsca_block *sca = vcpu->kvm->arch.sca;
2173
2174                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2175                sca->cpu[vcpu->vcpu_id].sda = 0;
2176        }
2177        read_unlock(&vcpu->kvm->arch.sca_lock);
2178}
2179
2180static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2181{
2182        if (!kvm_s390_use_sca_entries()) {
2183                struct bsca_block *sca = vcpu->kvm->arch.sca;
2184
2185                /* we still need the basic sca for the ipte control */
2186                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2187                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2188                return;
2189        }
2190        read_lock(&vcpu->kvm->arch.sca_lock);
2191        if (vcpu->kvm->arch.use_esca) {
2192                struct esca_block *sca = vcpu->kvm->arch.sca;
2193
2194                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2195                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2196                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2197                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2198                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2199        } else {
2200                struct bsca_block *sca = vcpu->kvm->arch.sca;
2201
2202                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2203                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2204                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2205                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2206        }
2207        read_unlock(&vcpu->kvm->arch.sca_lock);
2208}
2209
2210/* Basic SCA to Extended SCA data copy routines */
2211static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2212{
2213        d->sda = s->sda;
2214        d->sigp_ctrl.c = s->sigp_ctrl.c;
2215        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2216}
2217
2218static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2219{
2220        int i;
2221
2222        d->ipte_control = s->ipte_control;
2223        d->mcn[0] = s->mcn;
2224        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2225                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2226}
2227
2228static int sca_switch_to_extended(struct kvm *kvm)
2229{
2230        struct bsca_block *old_sca = kvm->arch.sca;
2231        struct esca_block *new_sca;
2232        struct kvm_vcpu *vcpu;
2233        unsigned int vcpu_idx;
2234        u32 scaol, scaoh;
2235
2236        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2237        if (!new_sca)
2238                return -ENOMEM;
2239
2240        scaoh = (u32)((u64)(new_sca) >> 32);
2241        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2242
2243        kvm_s390_vcpu_block_all(kvm);
2244        write_lock(&kvm->arch.sca_lock);
2245
2246        sca_copy_b_to_e(new_sca, old_sca);
2247
2248        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2249                vcpu->arch.sie_block->scaoh = scaoh;
2250                vcpu->arch.sie_block->scaol = scaol;
2251                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252        }
2253        kvm->arch.sca = new_sca;
2254        kvm->arch.use_esca = 1;
2255
2256        write_unlock(&kvm->arch.sca_lock);
2257        kvm_s390_vcpu_unblock_all(kvm);
2258
2259        free_page((unsigned long)old_sca);
2260
2261        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2262                 old_sca, kvm->arch.sca);
2263        return 0;
2264}
2265
2266static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2267{
2268        int rc;
2269
2270        if (!kvm_s390_use_sca_entries()) {
2271                if (id < KVM_MAX_VCPUS)
2272                        return true;
2273                return false;
2274        }
2275        if (id < KVM_S390_BSCA_CPU_SLOTS)
2276                return true;
2277        if (!sclp.has_esca || !sclp.has_64bscao)
2278                return false;
2279
2280        mutex_lock(&kvm->lock);
2281        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2282        mutex_unlock(&kvm->lock);
2283
2284        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2285}
2286
2287int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2288{
2289        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2290        kvm_clear_async_pf_completion_queue(vcpu);
2291        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2292                                    KVM_SYNC_GPRS |
2293                                    KVM_SYNC_ACRS |
2294                                    KVM_SYNC_CRS |
2295                                    KVM_SYNC_ARCH0 |
2296                                    KVM_SYNC_PFAULT;
2297        kvm_s390_set_prefix(vcpu, 0);
2298        if (test_kvm_facility(vcpu->kvm, 64))
2299                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2300        if (test_kvm_facility(vcpu->kvm, 82))
2301                vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2302        if (test_kvm_facility(vcpu->kvm, 133))
2303                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2304        /* fprs can be synchronized via vrs, even if the guest has no vx. With
2305         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2306         */
2307        if (MACHINE_HAS_VX)
2308                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2309        else
2310                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2311
2312        if (kvm_is_ucontrol(vcpu->kvm))
2313                return __kvm_ucontrol_vcpu_init(vcpu);
2314
2315        return 0;
2316}
2317
2318/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2319static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2320{
2321        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2322        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2323        vcpu->arch.cputm_start = get_tod_clock_fast();
2324        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325}
2326
2327/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2328static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2329{
2330        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2331        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2333        vcpu->arch.cputm_start = 0;
2334        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2335}
2336
2337/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2338static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2339{
2340        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2341        vcpu->arch.cputm_enabled = true;
2342        __start_cpu_timer_accounting(vcpu);
2343}
2344
2345/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2346static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2347{
2348        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2349        __stop_cpu_timer_accounting(vcpu);
2350        vcpu->arch.cputm_enabled = false;
2351}
2352
2353static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2354{
2355        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2356        __enable_cpu_timer_accounting(vcpu);
2357        preempt_enable();
2358}
2359
2360static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2361{
2362        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2363        __disable_cpu_timer_accounting(vcpu);
2364        preempt_enable();
2365}
2366
2367/* set the cpu timer - may only be called from the VCPU thread itself */
2368void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2369{
2370        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2371        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2372        if (vcpu->arch.cputm_enabled)
2373                vcpu->arch.cputm_start = get_tod_clock_fast();
2374        vcpu->arch.sie_block->cputm = cputm;
2375        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2376        preempt_enable();
2377}
2378
2379/* update and get the cpu timer - can also be called from other VCPU threads */
2380__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2381{
2382        unsigned int seq;
2383        __u64 value;
2384
2385        if (unlikely(!vcpu->arch.cputm_enabled))
2386                return vcpu->arch.sie_block->cputm;
2387
2388        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2389        do {
2390                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2391                /*
2392                 * If the writer would ever execute a read in the critical
2393                 * section, e.g. in irq context, we have a deadlock.
2394                 */
2395                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2396                value = vcpu->arch.sie_block->cputm;
2397                /* if cputm_start is 0, accounting is being started/stopped */
2398                if (likely(vcpu->arch.cputm_start))
2399                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2400        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2401        preempt_enable();
2402        return value;
2403}
2404
2405void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2406{
2407
2408        gmap_enable(vcpu->arch.enabled_gmap);
2409        kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2410        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2411                __start_cpu_timer_accounting(vcpu);
2412        vcpu->cpu = cpu;
2413}
2414
2415void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2416{
2417        vcpu->cpu = -1;
2418        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2419                __stop_cpu_timer_accounting(vcpu);
2420        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2421        vcpu->arch.enabled_gmap = gmap_get_enabled();
2422        gmap_disable(vcpu->arch.enabled_gmap);
2423
2424}
2425
2426static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2427{
2428        /* this equals initial cpu reset in pop, but we don't switch to ESA */
2429        vcpu->arch.sie_block->gpsw.mask = 0UL;
2430        vcpu->arch.sie_block->gpsw.addr = 0UL;
2431        kvm_s390_set_prefix(vcpu, 0);
2432        kvm_s390_set_cpu_timer(vcpu, 0);
2433        vcpu->arch.sie_block->ckc       = 0UL;
2434        vcpu->arch.sie_block->todpr     = 0;
2435        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2436        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2437        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2438        /* make sure the new fpc will be lazily loaded */
2439        save_fpu_regs();
2440        current->thread.fpu.fpc = 0;
2441        vcpu->arch.sie_block->gbea = 1;
2442        vcpu->arch.sie_block->pp = 0;
2443        vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2444        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2445        kvm_clear_async_pf_completion_queue(vcpu);
2446        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2447                kvm_s390_vcpu_stop(vcpu);
2448        kvm_s390_clear_local_irqs(vcpu);
2449}
2450
2451void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2452{
2453        mutex_lock(&vcpu->kvm->lock);
2454        preempt_disable();
2455        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2456        vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2457        preempt_enable();
2458        mutex_unlock(&vcpu->kvm->lock);
2459        if (!kvm_is_ucontrol(vcpu->kvm)) {
2460                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2461                sca_add_vcpu(vcpu);
2462        }
2463        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2464                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2465        /* make vcpu_load load the right gmap on the first trigger */
2466        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2467}
2468
2469static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2470{
2471        if (!test_kvm_facility(vcpu->kvm, 76))
2472                return;
2473
2474        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2475
2476        if (vcpu->kvm->arch.crypto.aes_kw)
2477                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2478        if (vcpu->kvm->arch.crypto.dea_kw)
2479                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2480
2481        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2482}
2483
2484void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2485{
2486        free_page(vcpu->arch.sie_block->cbrlo);
2487        vcpu->arch.sie_block->cbrlo = 0;
2488}
2489
2490int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2491{
2492        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2493        if (!vcpu->arch.sie_block->cbrlo)
2494                return -ENOMEM;
2495        return 0;
2496}
2497
2498static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2499{
2500        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2501
2502        vcpu->arch.sie_block->ibc = model->ibc;
2503        if (test_kvm_facility(vcpu->kvm, 7))
2504                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2505}
2506
2507int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2508{
2509        int rc = 0;
2510
2511        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2512                                                    CPUSTAT_SM |
2513                                                    CPUSTAT_STOPPED);
2514
2515        if (test_kvm_facility(vcpu->kvm, 78))
2516                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2517        else if (test_kvm_facility(vcpu->kvm, 8))
2518                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2519
2520        kvm_s390_vcpu_setup_model(vcpu);
2521
2522        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2523        if (MACHINE_HAS_ESOP)
2524                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2525        if (test_kvm_facility(vcpu->kvm, 9))
2526                vcpu->arch.sie_block->ecb |= ECB_SRSI;
2527        if (test_kvm_facility(vcpu->kvm, 73))
2528                vcpu->arch.sie_block->ecb |= ECB_TE;
2529
2530        if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2531                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2532        if (test_kvm_facility(vcpu->kvm, 130))
2533                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2534        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2535        if (sclp.has_cei)
2536                vcpu->arch.sie_block->eca |= ECA_CEI;
2537        if (sclp.has_ib)
2538                vcpu->arch.sie_block->eca |= ECA_IB;
2539        if (sclp.has_siif)
2540                vcpu->arch.sie_block->eca |= ECA_SII;
2541        if (sclp.has_sigpif)
2542                vcpu->arch.sie_block->eca |= ECA_SIGPI;
2543        if (test_kvm_facility(vcpu->kvm, 129)) {
2544                vcpu->arch.sie_block->eca |= ECA_VX;
2545                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2546        }
2547        if (test_kvm_facility(vcpu->kvm, 139))
2548                vcpu->arch.sie_block->ecd |= ECD_MEF;
2549
2550        if (vcpu->arch.sie_block->gd) {
2551                vcpu->arch.sie_block->eca |= ECA_AIV;
2552                VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2553                           vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2554        }
2555        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2556                                        | SDNXC;
2557        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2558
2559        if (sclp.has_kss)
2560                kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2561        else
2562                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2563
2564        if (vcpu->kvm->arch.use_cmma) {
2565                rc = kvm_s390_vcpu_setup_cmma(vcpu);
2566                if (rc)
2567                        return rc;
2568        }
2569        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2570        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2571
2572        kvm_s390_vcpu_crypto_setup(vcpu);
2573
2574        return rc;
2575}
2576
2577struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2578                                      unsigned int id)
2579{
2580        struct kvm_vcpu *vcpu;
2581        struct sie_page *sie_page;
2582        int rc = -EINVAL;
2583
2584        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2585                goto out;
2586
2587        rc = -ENOMEM;
2588
2589        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2590        if (!vcpu)
2591                goto out;
2592
2593        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2594        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2595        if (!sie_page)
2596                goto out_free_cpu;
2597
2598        vcpu->arch.sie_block = &sie_page->sie_block;
2599        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2600
2601        /* the real guest size will always be smaller than msl */
2602        vcpu->arch.sie_block->mso = 0;
2603        vcpu->arch.sie_block->msl = sclp.hamax;
2604
2605        vcpu->arch.sie_block->icpua = id;
2606        spin_lock_init(&vcpu->arch.local_int.lock);
2607        vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2608        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2609                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2610        seqcount_init(&vcpu->arch.cputm_seqcount);
2611
2612        rc = kvm_vcpu_init(vcpu, kvm, id);
2613        if (rc)
2614                goto out_free_sie_block;
2615        VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2616                 vcpu->arch.sie_block);
2617        trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2618
2619        return vcpu;
2620out_free_sie_block:
2621        free_page((unsigned long)(vcpu->arch.sie_block));
2622out_free_cpu:
2623        kmem_cache_free(kvm_vcpu_cache, vcpu);
2624out:
2625        return ERR_PTR(rc);
2626}
2627
2628int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2629{
2630        return kvm_s390_vcpu_has_irq(vcpu, 0);
2631}
2632
2633bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2634{
2635        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2636}
2637
2638void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2639{
2640        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2641        exit_sie(vcpu);
2642}
2643
2644void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2645{
2646        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2647}
2648
2649static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2650{
2651        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2652        exit_sie(vcpu);
2653}
2654
2655static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2656{
2657        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2658}
2659
2660/*
2661 * Kick a guest cpu out of SIE and wait until SIE is not running.
2662 * If the CPU is not running (e.g. waiting as idle) the function will
2663 * return immediately. */
2664void exit_sie(struct kvm_vcpu *vcpu)
2665{
2666        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2667        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2668                cpu_relax();
2669}
2670
2671/* Kick a guest cpu out of SIE to process a request synchronously */
2672void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2673{
2674        kvm_make_request(req, vcpu);
2675        kvm_s390_vcpu_request(vcpu);
2676}
2677
2678static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2679                              unsigned long end)
2680{
2681        struct kvm *kvm = gmap->private;
2682        struct kvm_vcpu *vcpu;
2683        unsigned long prefix;
2684        int i;
2685
2686        if (gmap_is_shadow(gmap))
2687                return;
2688        if (start >= 1UL << 31)
2689                /* We are only interested in prefix pages */
2690                return;
2691        kvm_for_each_vcpu(i, vcpu, kvm) {
2692                /* match against both prefix pages */
2693                prefix = kvm_s390_get_prefix(vcpu);
2694                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2695                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2696                                   start, end);
2697                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2698                }
2699        }
2700}
2701
2702int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2703{
2704        /* kvm common code refers to this, but never calls it */
2705        BUG();
2706        return 0;
2707}
2708
2709static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2710                                           struct kvm_one_reg *reg)
2711{
2712        int r = -EINVAL;
2713
2714        switch (reg->id) {
2715        case KVM_REG_S390_TODPR:
2716                r = put_user(vcpu->arch.sie_block->todpr,
2717                             (u32 __user *)reg->addr);
2718                break;
2719        case KVM_REG_S390_EPOCHDIFF:
2720                r = put_user(vcpu->arch.sie_block->epoch,
2721                             (u64 __user *)reg->addr);
2722                break;
2723        case KVM_REG_S390_CPU_TIMER:
2724                r = put_user(kvm_s390_get_cpu_timer(vcpu),
2725                             (u64 __user *)reg->addr);
2726                break;
2727        case KVM_REG_S390_CLOCK_COMP:
2728                r = put_user(vcpu->arch.sie_block->ckc,
2729                             (u64 __user *)reg->addr);
2730                break;
2731        case KVM_REG_S390_PFTOKEN:
2732                r = put_user(vcpu->arch.pfault_token,
2733                             (u64 __user *)reg->addr);
2734                break;
2735        case KVM_REG_S390_PFCOMPARE:
2736                r = put_user(vcpu->arch.pfault_compare,
2737                             (u64 __user *)reg->addr);
2738                break;
2739        case KVM_REG_S390_PFSELECT:
2740                r = put_user(vcpu->arch.pfault_select,
2741                             (u64 __user *)reg->addr);
2742                break;
2743        case KVM_REG_S390_PP:
2744                r = put_user(vcpu->arch.sie_block->pp,
2745                             (u64 __user *)reg->addr);
2746                break;
2747        case KVM_REG_S390_GBEA:
2748                r = put_user(vcpu->arch.sie_block->gbea,
2749                             (u64 __user *)reg->addr);
2750                break;
2751        default:
2752                break;
2753        }
2754
2755        return r;
2756}
2757
2758static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2759                                           struct kvm_one_reg *reg)
2760{
2761        int r = -EINVAL;
2762        __u64 val;
2763
2764        switch (reg->id) {
2765        case KVM_REG_S390_TODPR:
2766                r = get_user(vcpu->arch.sie_block->todpr,
2767                             (u32 __user *)reg->addr);
2768                break;
2769        case KVM_REG_S390_EPOCHDIFF:
2770                r = get_user(vcpu->arch.sie_block->epoch,
2771                             (u64 __user *)reg->addr);
2772                break;
2773        case KVM_REG_S390_CPU_TIMER:
2774                r = get_user(val, (u64 __user *)reg->addr);
2775                if (!r)
2776                        kvm_s390_set_cpu_timer(vcpu, val);
2777                break;
2778        case KVM_REG_S390_CLOCK_COMP:
2779                r = get_user(vcpu->arch.sie_block->ckc,
2780                             (u64 __user *)reg->addr);
2781                break;
2782        case KVM_REG_S390_PFTOKEN:
2783                r = get_user(vcpu->arch.pfault_token,
2784                             (u64 __user *)reg->addr);
2785                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2786                        kvm_clear_async_pf_completion_queue(vcpu);
2787                break;
2788        case KVM_REG_S390_PFCOMPARE:
2789                r = get_user(vcpu->arch.pfault_compare,
2790                             (u64 __user *)reg->addr);
2791                break;
2792        case KVM_REG_S390_PFSELECT:
2793                r = get_user(vcpu->arch.pfault_select,
2794                             (u64 __user *)reg->addr);
2795                break;
2796        case KVM_REG_S390_PP:
2797                r = get_user(vcpu->arch.sie_block->pp,
2798                             (u64 __user *)reg->addr);
2799                break;
2800        case KVM_REG_S390_GBEA:
2801                r = get_user(vcpu->arch.sie_block->gbea,
2802                             (u64 __user *)reg->addr);
2803                break;
2804        default:
2805                break;
2806        }
2807
2808        return r;
2809}
2810
2811static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2812{
2813        kvm_s390_vcpu_initial_reset(vcpu);
2814        return 0;
2815}
2816
2817int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2818{
2819        vcpu_load(vcpu);
2820        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2821        vcpu_put(vcpu);
2822        return 0;
2823}
2824
2825int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2826{
2827        vcpu_load(vcpu);
2828        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2829        vcpu_put(vcpu);
2830        return 0;
2831}
2832
2833int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2834                                  struct kvm_sregs *sregs)
2835{
2836        vcpu_load(vcpu);
2837
2838        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2839        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2840
2841        vcpu_put(vcpu);
2842        return 0;
2843}
2844
2845int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2846                                  struct kvm_sregs *sregs)
2847{
2848        vcpu_load(vcpu);
2849
2850        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2851        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2852
2853        vcpu_put(vcpu);
2854        return 0;
2855}
2856
2857int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2858{
2859        int ret = 0;
2860
2861        vcpu_load(vcpu);
2862
2863        if (test_fp_ctl(fpu->fpc)) {
2864                ret = -EINVAL;
2865                goto out;
2866        }
2867        vcpu->run->s.regs.fpc = fpu->fpc;
2868        if (MACHINE_HAS_VX)
2869                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2870                                 (freg_t *) fpu->fprs);
2871        else
2872                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2873
2874out:
2875        vcpu_put(vcpu);
2876        return ret;
2877}
2878
2879int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2880{
2881        vcpu_load(vcpu);
2882
2883        /* make sure we have the latest values */
2884        save_fpu_regs();
2885        if (MACHINE_HAS_VX)
2886                convert_vx_to_fp((freg_t *) fpu->fprs,
2887                                 (__vector128 *) vcpu->run->s.regs.vrs);
2888        else
2889                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2890        fpu->fpc = vcpu->run->s.regs.fpc;
2891
2892        vcpu_put(vcpu);
2893        return 0;
2894}
2895
2896static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2897{
2898        int rc = 0;
2899
2900        if (!is_vcpu_stopped(vcpu))
2901                rc = -EBUSY;
2902        else {
2903                vcpu->run->psw_mask = psw.mask;
2904                vcpu->run->psw_addr = psw.addr;
2905        }
2906        return rc;
2907}
2908
2909int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910                                  struct kvm_translation *tr)
2911{
2912        return -EINVAL; /* not implemented yet */
2913}
2914
2915#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2916                              KVM_GUESTDBG_USE_HW_BP | \
2917                              KVM_GUESTDBG_ENABLE)
2918
2919int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2920                                        struct kvm_guest_debug *dbg)
2921{
2922        int rc = 0;
2923
2924        vcpu_load(vcpu);
2925
2926        vcpu->guest_debug = 0;
2927        kvm_s390_clear_bp_data(vcpu);
2928
2929        if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2930                rc = -EINVAL;
2931                goto out;
2932        }
2933        if (!sclp.has_gpere) {
2934                rc = -EINVAL;
2935                goto out;
2936        }
2937
2938        if (dbg->control & KVM_GUESTDBG_ENABLE) {
2939                vcpu->guest_debug = dbg->control;
2940                /* enforce guest PER */
2941                kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2942
2943                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2944                        rc = kvm_s390_import_bp_data(vcpu, dbg);
2945        } else {
2946                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2947                vcpu->arch.guestdbg.last_bp = 0;
2948        }
2949
2950        if (rc) {
2951                vcpu->guest_debug = 0;
2952                kvm_s390_clear_bp_data(vcpu);
2953                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2954        }
2955
2956out:
2957        vcpu_put(vcpu);
2958        return rc;
2959}
2960
2961int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2962                                    struct kvm_mp_state *mp_state)
2963{
2964        int ret;
2965
2966        vcpu_load(vcpu);
2967
2968        /* CHECK_STOP and LOAD are not supported yet */
2969        ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2970                                      KVM_MP_STATE_OPERATING;
2971
2972        vcpu_put(vcpu);
2973        return ret;
2974}
2975
2976int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2977                                    struct kvm_mp_state *mp_state)
2978{
2979        int rc = 0;
2980
2981        vcpu_load(vcpu);
2982
2983        /* user space knows about this interface - let it control the state */
2984        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2985
2986        switch (mp_state->mp_state) {
2987        case KVM_MP_STATE_STOPPED:
2988                kvm_s390_vcpu_stop(vcpu);
2989                break;
2990        case KVM_MP_STATE_OPERATING:
2991                kvm_s390_vcpu_start(vcpu);
2992                break;
2993        case KVM_MP_STATE_LOAD:
2994        case KVM_MP_STATE_CHECK_STOP:
2995                /* fall through - CHECK_STOP and LOAD are not supported yet */
2996        default:
2997                rc = -ENXIO;
2998        }
2999
3000        vcpu_put(vcpu);
3001        return rc;
3002}
3003
3004static bool ibs_enabled(struct kvm_vcpu *vcpu)
3005{
3006        return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3007}
3008
3009static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3010{
3011retry:
3012        kvm_s390_vcpu_request_handled(vcpu);
3013        if (!kvm_request_pending(vcpu))
3014                return 0;
3015        /*
3016         * We use MMU_RELOAD just to re-arm the ipte notifier for the
3017         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3018         * This ensures that the ipte instruction for this request has
3019         * already finished. We might race against a second unmapper that
3020         * wants to set the blocking bit. Lets just retry the request loop.
3021         */
3022        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3023                int rc;
3024                rc = gmap_mprotect_notify(vcpu->arch.gmap,
3025                                          kvm_s390_get_prefix(vcpu),
3026                                          PAGE_SIZE * 2, PROT_WRITE);
3027                if (rc) {
3028                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3029                        return rc;
3030                }
3031                goto retry;
3032        }
3033
3034        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3035                vcpu->arch.sie_block->ihcpu = 0xffff;
3036                goto retry;
3037        }
3038
3039        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3040                if (!ibs_enabled(vcpu)) {
3041                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3042                        kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3043                }
3044                goto retry;
3045        }
3046
3047        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3048                if (ibs_enabled(vcpu)) {
3049                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3050                        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3051                }
3052                goto retry;
3053        }
3054
3055        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3056                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3057                goto retry;
3058        }
3059
3060        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3061                /*
3062                 * Disable CMM virtualization; we will emulate the ESSA
3063                 * instruction manually, in order to provide additional
3064                 * functionalities needed for live migration.
3065                 */
3066                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3067                goto retry;
3068        }
3069
3070        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3071                /*
3072                 * Re-enable CMM virtualization if CMMA is available and
3073                 * CMM has been used.
3074                 */
3075                if ((vcpu->kvm->arch.use_cmma) &&
3076                    (vcpu->kvm->mm->context.uses_cmm))
3077                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3078                goto retry;
3079        }
3080
3081        /* nothing to do, just clear the request */
3082        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3083
3084        return 0;
3085}
3086
3087void kvm_s390_set_tod_clock(struct kvm *kvm,
3088                            const struct kvm_s390_vm_tod_clock *gtod)
3089{
3090        struct kvm_vcpu *vcpu;
3091        struct kvm_s390_tod_clock_ext htod;
3092        int i;
3093
3094        mutex_lock(&kvm->lock);
3095        preempt_disable();
3096
3097        get_tod_clock_ext((char *)&htod);
3098
3099        kvm->arch.epoch = gtod->tod - htod.tod;
3100        kvm->arch.epdx = 0;
3101        if (test_kvm_facility(kvm, 139)) {
3102                kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3103                if (kvm->arch.epoch > gtod->tod)
3104                        kvm->arch.epdx -= 1;
3105        }
3106
3107        kvm_s390_vcpu_block_all(kvm);
3108        kvm_for_each_vcpu(i, vcpu, kvm) {
3109                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3110                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3111        }
3112
3113        kvm_s390_vcpu_unblock_all(kvm);
3114        preempt_enable();
3115        mutex_unlock(&kvm->lock);
3116}
3117
3118/**
3119 * kvm_arch_fault_in_page - fault-in guest page if necessary
3120 * @vcpu: The corresponding virtual cpu
3121 * @gpa: Guest physical address
3122 * @writable: Whether the page should be writable or not
3123 *
3124 * Make sure that a guest page has been faulted-in on the host.
3125 *
3126 * Return: Zero on success, negative error code otherwise.
3127 */
3128long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3129{
3130        return gmap_fault(vcpu->arch.gmap, gpa,
3131                          writable ? FAULT_FLAG_WRITE : 0);
3132}
3133
3134static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3135                                      unsigned long token)
3136{
3137        struct kvm_s390_interrupt inti;
3138        struct kvm_s390_irq irq;
3139
3140        if (start_token) {
3141                irq.u.ext.ext_params2 = token;
3142                irq.type = KVM_S390_INT_PFAULT_INIT;
3143                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3144        } else {
3145                inti.type = KVM_S390_INT_PFAULT_DONE;
3146                inti.parm64 = token;
3147                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3148        }
3149}
3150
3151void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3152                                     struct kvm_async_pf *work)
3153{
3154        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3155        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3156}
3157
3158void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3159                                 struct kvm_async_pf *work)
3160{
3161        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3162        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3163}
3164
3165void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3166                               struct kvm_async_pf *work)
3167{
3168        /* s390 will always inject the page directly */
3169}
3170
3171bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3172{
3173        /*
3174         * s390 will always inject the page directly,
3175         * but we still want check_async_completion to cleanup
3176         */
3177        return true;
3178}
3179
3180static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3181{
3182        hva_t hva;
3183        struct kvm_arch_async_pf arch;
3184        int rc;
3185
3186        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3187                return 0;
3188        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3189            vcpu->arch.pfault_compare)
3190                return 0;
3191        if (psw_extint_disabled(vcpu))
3192                return 0;
3193        if (kvm_s390_vcpu_has_irq(vcpu, 0))
3194                return 0;
3195        if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3196                return 0;
3197        if (!vcpu->arch.gmap->pfault_enabled)
3198                return 0;
3199
3200        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3201        hva += current->thread.gmap_addr & ~PAGE_MASK;
3202        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3203                return 0;
3204
3205        rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3206        return rc;
3207}
3208
3209static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3210{
3211        int rc, cpuflags;
3212
3213        /*
3214         * On s390 notifications for arriving pages will be delivered directly
3215         * to the guest but the house keeping for completed pfaults is
3216         * handled outside the worker.
3217         */
3218        kvm_check_async_pf_completion(vcpu);
3219
3220        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3221        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3222
3223        if (need_resched())
3224                schedule();
3225
3226        if (test_cpu_flag(CIF_MCCK_PENDING))
3227                s390_handle_mcck();
3228
3229        if (!kvm_is_ucontrol(vcpu->kvm)) {
3230                rc = kvm_s390_deliver_pending_interrupts(vcpu);
3231                if (rc)
3232                        return rc;
3233        }
3234
3235        rc = kvm_s390_handle_requests(vcpu);
3236        if (rc)
3237                return rc;
3238
3239        if (guestdbg_enabled(vcpu)) {
3240                kvm_s390_backup_guest_per_regs(vcpu);
3241                kvm_s390_patch_guest_per_regs(vcpu);
3242        }
3243
3244        vcpu->arch.sie_block->icptcode = 0;
3245        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3246        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3247        trace_kvm_s390_sie_enter(vcpu, cpuflags);
3248
3249        return 0;
3250}
3251
3252static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3253{
3254        struct kvm_s390_pgm_info pgm_info = {
3255                .code = PGM_ADDRESSING,
3256        };
3257        u8 opcode, ilen;
3258        int rc;
3259
3260        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3261        trace_kvm_s390_sie_fault(vcpu);
3262
3263        /*
3264         * We want to inject an addressing exception, which is defined as a
3265         * suppressing or terminating exception. However, since we came here
3266         * by a DAT access exception, the PSW still points to the faulting
3267         * instruction since DAT exceptions are nullifying. So we've got
3268         * to look up the current opcode to get the length of the instruction
3269         * to be able to forward the PSW.
3270         */
3271        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3272        ilen = insn_length(opcode);
3273        if (rc < 0) {
3274                return rc;
3275        } else if (rc) {
3276                /* Instruction-Fetching Exceptions - we can't detect the ilen.
3277                 * Forward by arbitrary ilc, injection will take care of
3278                 * nullification if necessary.
3279                 */
3280                pgm_info = vcpu->arch.pgm;
3281                ilen = 4;
3282        }
3283        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3284        kvm_s390_forward_psw(vcpu, ilen);
3285        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3286}
3287
3288static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3289{
3290        struct mcck_volatile_info *mcck_info;
3291        struct sie_page *sie_page;
3292
3293        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3294                   vcpu->arch.sie_block->icptcode);
3295        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3296
3297        if (guestdbg_enabled(vcpu))
3298                kvm_s390_restore_guest_per_regs(vcpu);
3299
3300        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3301        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3302
3303        if (exit_reason == -EINTR) {
3304                VCPU_EVENT(vcpu, 3, "%s", "machine check");
3305                sie_page = container_of(vcpu->arch.sie_block,
3306                                        struct sie_page, sie_block);
3307                mcck_info = &sie_page->mcck_info;
3308                kvm_s390_reinject_machine_check(vcpu, mcck_info);
3309                return 0;
3310        }
3311
3312        if (vcpu->arch.sie_block->icptcode > 0) {
3313                int rc = kvm_handle_sie_intercept(vcpu);
3314
3315                if (rc != -EOPNOTSUPP)
3316                        return rc;
3317                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3318                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3319                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3320                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3321                return -EREMOTE;
3322        } else if (exit_reason != -EFAULT) {
3323                vcpu->stat.exit_null++;
3324                return 0;
3325        } else if (kvm_is_ucontrol(vcpu->kvm)) {
3326                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3327                vcpu->run->s390_ucontrol.trans_exc_code =
3328                                                current->thread.gmap_addr;
3329                vcpu->run->s390_ucontrol.pgm_code = 0x10;
3330                return -EREMOTE;
3331        } else if (current->thread.gmap_pfault) {
3332                trace_kvm_s390_major_guest_pfault(vcpu);
3333                current->thread.gmap_pfault = 0;
3334                if (kvm_arch_setup_async_pf(vcpu))
3335                        return 0;
3336                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3337        }
3338        return vcpu_post_run_fault_in_sie(vcpu);
3339}
3340
3341static int __vcpu_run(struct kvm_vcpu *vcpu)
3342{
3343        int rc, exit_reason;
3344
3345        /*
3346         * We try to hold kvm->srcu during most of vcpu_run (except when run-
3347         * ning the guest), so that memslots (and other stuff) are protected
3348         */
3349        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3350
3351        do {
3352                rc = vcpu_pre_run(vcpu);
3353                if (rc)
3354                        break;
3355
3356                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3357                /*
3358                 * As PF_VCPU will be used in fault handler, between
3359                 * guest_enter and guest_exit should be no uaccess.
3360                 */
3361                local_irq_disable();
3362                guest_enter_irqoff();
3363                __disable_cpu_timer_accounting(vcpu);
3364                local_irq_enable();
3365                exit_reason = sie64a(vcpu->arch.sie_block,
3366                                     vcpu->run->s.regs.gprs);
3367                local_irq_disable();
3368                __enable_cpu_timer_accounting(vcpu);
3369                guest_exit_irqoff();
3370                local_irq_enable();
3371                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3372
3373                rc = vcpu_post_run(vcpu, exit_reason);
3374        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3375
3376        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3377        return rc;
3378}
3379
3380static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3381{
3382        struct runtime_instr_cb *riccb;
3383        struct gs_cb *gscb;
3384
3385        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3386        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3387        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3388        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3389        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3390                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3391        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3392                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3393                /* some control register changes require a tlb flush */
3394                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3395        }
3396        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3397                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3398                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3399                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3400                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3401                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3402        }
3403        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3404                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3405                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3406                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3407                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3408                        kvm_clear_async_pf_completion_queue(vcpu);
3409        }
3410        /*
3411         * If userspace sets the riccb (e.g. after migration) to a valid state,
3412         * we should enable RI here instead of doing the lazy enablement.
3413         */
3414        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3415            test_kvm_facility(vcpu->kvm, 64) &&
3416            riccb->v &&
3417            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3418                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3419                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3420        }
3421        /*
3422         * If userspace sets the gscb (e.g. after migration) to non-zero,
3423         * we should enable GS here instead of doing the lazy enablement.
3424         */
3425        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3426            test_kvm_facility(vcpu->kvm, 133) &&
3427            gscb->gssm &&
3428            !vcpu->arch.gs_enabled) {
3429                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3430                vcpu->arch.sie_block->ecb |= ECB_GS;
3431                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3432                vcpu->arch.gs_enabled = 1;
3433        }
3434        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3435            test_kvm_facility(vcpu->kvm, 82)) {
3436                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3437                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3438        }
3439        save_access_regs(vcpu->arch.host_acrs);
3440        restore_access_regs(vcpu->run->s.regs.acrs);
3441        /* save host (userspace) fprs/vrs */
3442        save_fpu_regs();
3443        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3444        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3445        if (MACHINE_HAS_VX)
3446                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3447        else
3448                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3449        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3450        if (test_fp_ctl(current->thread.fpu.fpc))
3451                /* User space provided an invalid FPC, let's clear it */
3452                current->thread.fpu.fpc = 0;
3453        if (MACHINE_HAS_GS) {
3454                preempt_disable();
3455                __ctl_set_bit(2, 4);
3456                if (current->thread.gs_cb) {
3457                        vcpu->arch.host_gscb = current->thread.gs_cb;
3458                        save_gs_cb(vcpu->arch.host_gscb);
3459                }
3460                if (vcpu->arch.gs_enabled) {
3461                        current->thread.gs_cb = (struct gs_cb *)
3462                                                &vcpu->run->s.regs.gscb;
3463                        restore_gs_cb(current->thread.gs_cb);
3464                }
3465                preempt_enable();
3466        }
3467
3468        kvm_run->kvm_dirty_regs = 0;
3469}
3470
3471static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472{
3473        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3474        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3475        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3476        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3477        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3478        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3479        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3480        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3481        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3482        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3483        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3484        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3485        kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3486        save_access_regs(vcpu->run->s.regs.acrs);
3487        restore_access_regs(vcpu->arch.host_acrs);
3488        /* Save guest register state */
3489        save_fpu_regs();
3490        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491        /* Restore will be done lazily at return */
3492        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3493        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3494        if (MACHINE_HAS_GS) {
3495                __ctl_set_bit(2, 4);
3496                if (vcpu->arch.gs_enabled)
3497                        save_gs_cb(current->thread.gs_cb);
3498                preempt_disable();
3499                current->thread.gs_cb = vcpu->arch.host_gscb;
3500                restore_gs_cb(vcpu->arch.host_gscb);
3501                preempt_enable();
3502                if (!vcpu->arch.host_gscb)
3503                        __ctl_clear_bit(2, 4);
3504                vcpu->arch.host_gscb = NULL;
3505        }
3506
3507}
3508
3509int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3510{
3511        int rc;
3512
3513        if (kvm_run->immediate_exit)
3514                return -EINTR;
3515
3516        vcpu_load(vcpu);
3517
3518        if (guestdbg_exit_pending(vcpu)) {
3519                kvm_s390_prepare_debug_exit(vcpu);
3520                rc = 0;
3521                goto out;
3522        }
3523
3524        kvm_sigset_activate(vcpu);
3525
3526        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3527                kvm_s390_vcpu_start(vcpu);
3528        } else if (is_vcpu_stopped(vcpu)) {
3529                pr_err_ratelimited("can't run stopped vcpu %d\n",
3530                                   vcpu->vcpu_id);
3531                rc = -EINVAL;
3532                goto out;
3533        }
3534
3535        sync_regs(vcpu, kvm_run);
3536        enable_cpu_timer_accounting(vcpu);
3537
3538        might_fault();
3539        rc = __vcpu_run(vcpu);
3540
3541        if (signal_pending(current) && !rc) {
3542                kvm_run->exit_reason = KVM_EXIT_INTR;
3543                rc = -EINTR;
3544        }
3545
3546        if (guestdbg_exit_pending(vcpu) && !rc)  {
3547                kvm_s390_prepare_debug_exit(vcpu);
3548                rc = 0;
3549        }
3550
3551        if (rc == -EREMOTE) {
3552                /* userspace support is needed, kvm_run has been prepared */
3553                rc = 0;
3554        }
3555
3556        disable_cpu_timer_accounting(vcpu);
3557        store_regs(vcpu, kvm_run);
3558
3559        kvm_sigset_deactivate(vcpu);
3560
3561        vcpu->stat.exit_userspace++;
3562out:
3563        vcpu_put(vcpu);
3564        return rc;
3565}
3566
3567/*
3568 * store status at address
3569 * we use have two special cases:
3570 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3571 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3572 */
3573int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3574{
3575        unsigned char archmode = 1;
3576        freg_t fprs[NUM_FPRS];
3577        unsigned int px;
3578        u64 clkcomp, cputm;
3579        int rc;
3580
3581        px = kvm_s390_get_prefix(vcpu);
3582        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3583                if (write_guest_abs(vcpu, 163, &archmode, 1))
3584                        return -EFAULT;
3585                gpa = 0;
3586        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3587                if (write_guest_real(vcpu, 163, &archmode, 1))
3588                        return -EFAULT;
3589                gpa = px;
3590        } else
3591                gpa -= __LC_FPREGS_SAVE_AREA;
3592
3593        /* manually convert vector registers if necessary */
3594        if (MACHINE_HAS_VX) {
3595                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3596                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3597                                     fprs, 128);
3598        } else {
3599                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600                                     vcpu->run->s.regs.fprs, 128);
3601        }
3602        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3603                              vcpu->run->s.regs.gprs, 128);
3604        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3605                              &vcpu->arch.sie_block->gpsw, 16);
3606        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3607                              &px, 4);
3608        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3609                              &vcpu->run->s.regs.fpc, 4);
3610        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3611                              &vcpu->arch.sie_block->todpr, 4);
3612        cputm = kvm_s390_get_cpu_timer(vcpu);
3613        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3614                              &cputm, 8);
3615        clkcomp = vcpu->arch.sie_block->ckc >> 8;
3616        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3617                              &clkcomp, 8);
3618        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3619                              &vcpu->run->s.regs.acrs, 64);
3620        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3621                              &vcpu->arch.sie_block->gcr, 128);
3622        return rc ? -EFAULT : 0;
3623}
3624
3625int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3626{
3627        /*
3628         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3629         * switch in the run ioctl. Let's update our copies before we save
3630         * it into the save area
3631         */
3632        save_fpu_regs();
3633        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3634        save_access_regs(vcpu->run->s.regs.acrs);
3635
3636        return kvm_s390_store_status_unloaded(vcpu, addr);
3637}
3638
3639static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3640{
3641        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3642        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3643}
3644
3645static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3646{
3647        unsigned int i;
3648        struct kvm_vcpu *vcpu;
3649
3650        kvm_for_each_vcpu(i, vcpu, kvm) {
3651                __disable_ibs_on_vcpu(vcpu);
3652        }
3653}
3654
3655static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3656{
3657        if (!sclp.has_ibs)
3658                return;
3659        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3660        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3661}
3662
3663void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3664{
3665        int i, online_vcpus, started_vcpus = 0;
3666
3667        if (!is_vcpu_stopped(vcpu))
3668                return;
3669
3670        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3671        /* Only one cpu at a time may enter/leave the STOPPED state. */
3672        spin_lock(&vcpu->kvm->arch.start_stop_lock);
3673        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3674
3675        for (i = 0; i < online_vcpus; i++) {
3676                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3677                        started_vcpus++;
3678        }
3679
3680        if (started_vcpus == 0) {
3681                /* we're the only active VCPU -> speed it up */
3682                __enable_ibs_on_vcpu(vcpu);
3683        } else if (started_vcpus == 1) {
3684                /*
3685                 * As we are starting a second VCPU, we have to disable
3686                 * the IBS facility on all VCPUs to remove potentially
3687                 * oustanding ENABLE requests.
3688                 */
3689                __disable_ibs_on_all_vcpus(vcpu->kvm);
3690        }
3691
3692        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3693        /*
3694         * Another VCPU might have used IBS while we were offline.
3695         * Let's play safe and flush the VCPU at startup.
3696         */
3697        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3698        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699        return;
3700}
3701
3702void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3703{
3704        int i, online_vcpus, started_vcpus = 0;
3705        struct kvm_vcpu *started_vcpu = NULL;
3706
3707        if (is_vcpu_stopped(vcpu))
3708                return;
3709
3710        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3711        /* Only one cpu at a time may enter/leave the STOPPED state. */
3712        spin_lock(&vcpu->kvm->arch.start_stop_lock);
3713        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3714
3715        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3716        kvm_s390_clear_stop_irq(vcpu);
3717
3718        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3719        __disable_ibs_on_vcpu(vcpu);
3720
3721        for (i = 0; i < online_vcpus; i++) {
3722                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3723                        started_vcpus++;
3724                        started_vcpu = vcpu->kvm->vcpus[i];
3725                }
3726        }
3727
3728        if (started_vcpus == 1) {
3729                /*
3730                 * As we only have one VCPU left, we want to enable the
3731                 * IBS facility for that VCPU to speed it up.
3732                 */
3733                __enable_ibs_on_vcpu(started_vcpu);
3734        }
3735
3736        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3737        return;
3738}
3739
3740static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3741                                     struct kvm_enable_cap *cap)
3742{
3743        int r;
3744
3745        if (cap->flags)
3746                return -EINVAL;
3747
3748        switch (cap->cap) {
3749        case KVM_CAP_S390_CSS_SUPPORT:
3750                if (!vcpu->kvm->arch.css_support) {
3751                        vcpu->kvm->arch.css_support = 1;
3752                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3753                        trace_kvm_s390_enable_css(vcpu->kvm);
3754                }
3755                r = 0;
3756                break;
3757        default:
3758                r = -EINVAL;
3759                break;
3760        }
3761        return r;
3762}
3763
3764static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3765                                  struct kvm_s390_mem_op *mop)
3766{
3767        void __user *uaddr = (void __user *)mop->buf;
3768        void *tmpbuf = NULL;
3769        int r, srcu_idx;
3770        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3771                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
3772
3773        if (mop->flags & ~supported_flags)
3774                return -EINVAL;
3775
3776        if (mop->size > MEM_OP_MAX_SIZE)
3777                return -E2BIG;
3778
3779        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3780                tmpbuf = vmalloc(mop->size);
3781                if (!tmpbuf)
3782                        return -ENOMEM;
3783        }
3784
3785        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786
3787        switch (mop->op) {
3788        case KVM_S390_MEMOP_LOGICAL_READ:
3789                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3790                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3791                                            mop->size, GACC_FETCH);
3792                        break;
3793                }
3794                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3795                if (r == 0) {
3796                        if (copy_to_user(uaddr, tmpbuf, mop->size))
3797                                r = -EFAULT;
3798                }
3799                break;
3800        case KVM_S390_MEMOP_LOGICAL_WRITE:
3801                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3802                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3803                                            mop->size, GACC_STORE);
3804                        break;
3805                }
3806                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3807                        r = -EFAULT;
3808                        break;
3809                }
3810                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3811                break;
3812        default:
3813                r = -EINVAL;
3814        }
3815
3816        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3817
3818        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3819                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3820
3821        vfree(tmpbuf);
3822        return r;
3823}
3824
3825long kvm_arch_vcpu_async_ioctl(struct file *filp,
3826                               unsigned int ioctl, unsigned long arg)
3827{
3828        struct kvm_vcpu *vcpu = filp->private_data;
3829        void __user *argp = (void __user *)arg;
3830
3831        switch (ioctl) {
3832        case KVM_S390_IRQ: {
3833                struct kvm_s390_irq s390irq;
3834
3835                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3836                        return -EFAULT;
3837                return kvm_s390_inject_vcpu(vcpu, &s390irq);
3838        }
3839        case KVM_S390_INTERRUPT: {
3840                struct kvm_s390_interrupt s390int;
3841                struct kvm_s390_irq s390irq;
3842
3843                if (copy_from_user(&s390int, argp, sizeof(s390int)))
3844                        return -EFAULT;
3845                if (s390int_to_s390irq(&s390int, &s390irq))
3846                        return -EINVAL;
3847                return kvm_s390_inject_vcpu(vcpu, &s390irq);
3848        }
3849        }
3850        return -ENOIOCTLCMD;
3851}
3852
3853long kvm_arch_vcpu_ioctl(struct file *filp,
3854                         unsigned int ioctl, unsigned long arg)
3855{
3856        struct kvm_vcpu *vcpu = filp->private_data;
3857        void __user *argp = (void __user *)arg;
3858        int idx;
3859        long r;
3860
3861        vcpu_load(vcpu);
3862
3863        switch (ioctl) {
3864        case KVM_S390_STORE_STATUS:
3865                idx = srcu_read_lock(&vcpu->kvm->srcu);
3866                r = kvm_s390_vcpu_store_status(vcpu, arg);
3867                srcu_read_unlock(&vcpu->kvm->srcu, idx);
3868                break;
3869        case KVM_S390_SET_INITIAL_PSW: {
3870                psw_t psw;
3871
3872                r = -EFAULT;
3873                if (copy_from_user(&psw, argp, sizeof(psw)))
3874                        break;
3875                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3876                break;
3877        }
3878        case KVM_S390_INITIAL_RESET:
3879                r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3880                break;
3881        case KVM_SET_ONE_REG:
3882        case KVM_GET_ONE_REG: {
3883                struct kvm_one_reg reg;
3884                r = -EFAULT;
3885                if (copy_from_user(&reg, argp, sizeof(reg)))
3886                        break;
3887                if (ioctl == KVM_SET_ONE_REG)
3888                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3889                else
3890                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3891                break;
3892        }
3893#ifdef CONFIG_KVM_S390_UCONTROL
3894        case KVM_S390_UCAS_MAP: {
3895                struct kvm_s390_ucas_mapping ucasmap;
3896
3897                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3898                        r = -EFAULT;
3899                        break;
3900                }
3901
3902                if (!kvm_is_ucontrol(vcpu->kvm)) {
3903                        r = -EINVAL;
3904                        break;
3905                }
3906
3907                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3908                                     ucasmap.vcpu_addr, ucasmap.length);
3909                break;
3910        }
3911        case KVM_S390_UCAS_UNMAP: {
3912                struct kvm_s390_ucas_mapping ucasmap;
3913
3914                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3915                        r = -EFAULT;
3916                        break;
3917                }
3918
3919                if (!kvm_is_ucontrol(vcpu->kvm)) {
3920                        r = -EINVAL;
3921                        break;
3922                }
3923
3924                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3925                        ucasmap.length);
3926                break;
3927        }
3928#endif
3929        case KVM_S390_VCPU_FAULT: {
3930                r = gmap_fault(vcpu->arch.gmap, arg, 0);
3931                break;
3932        }
3933        case KVM_ENABLE_CAP:
3934        {
3935                struct kvm_enable_cap cap;
3936                r = -EFAULT;
3937                if (copy_from_user(&cap, argp, sizeof(cap)))
3938                        break;
3939                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3940                break;
3941        }
3942        case KVM_S390_MEM_OP: {
3943                struct kvm_s390_mem_op mem_op;
3944
3945                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3946                        r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3947                else
3948                        r = -EFAULT;
3949                break;
3950        }
3951        case KVM_S390_SET_IRQ_STATE: {
3952                struct kvm_s390_irq_state irq_state;
3953
3954                r = -EFAULT;
3955                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3956                        break;
3957                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3958                    irq_state.len == 0 ||
3959                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3960                        r = -EINVAL;
3961                        break;
3962                }
3963                /* do not use irq_state.flags, it will break old QEMUs */
3964                r = kvm_s390_set_irq_state(vcpu,
3965                                           (void __user *) irq_state.buf,
3966                                           irq_state.len);
3967                break;
3968        }
3969        case KVM_S390_GET_IRQ_STATE: {
3970                struct kvm_s390_irq_state irq_state;
3971
3972                r = -EFAULT;
3973                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3974                        break;
3975                if (irq_state.len == 0) {
3976                        r = -EINVAL;
3977                        break;
3978                }
3979                /* do not use irq_state.flags, it will break old QEMUs */
3980                r = kvm_s390_get_irq_state(vcpu,
3981                                           (__u8 __user *)  irq_state.buf,
3982                                           irq_state.len);
3983                break;
3984        }
3985        default:
3986                r = -ENOTTY;
3987        }
3988
3989        vcpu_put(vcpu);
3990        return r;
3991}
3992
3993int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3994{
3995#ifdef CONFIG_KVM_S390_UCONTROL
3996        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3997                 && (kvm_is_ucontrol(vcpu->kvm))) {
3998                vmf->page = virt_to_page(vcpu->arch.sie_block);
3999                get_page(vmf->page);
4000                return 0;
4001        }
4002#endif
4003        return VM_FAULT_SIGBUS;
4004}
4005
4006int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4007                            unsigned long npages)
4008{
4009        return 0;
4010}
4011
4012/* Section: memory related */
4013int kvm_arch_prepare_memory_region(struct kvm *kvm,
4014                                   struct kvm_memory_slot *memslot,
4015                                   const struct kvm_userspace_memory_region *mem,
4016                                   enum kvm_mr_change change)
4017{
4018        /* A few sanity checks. We can have memory slots which have to be
4019           located/ended at a segment boundary (1MB). The memory in userland is
4020           ok to be fragmented into various different vmas. It is okay to mmap()
4021           and munmap() stuff in this slot after doing this call at any time */
4022
4023        if (mem->userspace_addr & 0xffffful)
4024                return -EINVAL;
4025
4026        if (mem->memory_size & 0xffffful)
4027                return -EINVAL;
4028
4029        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4030                return -EINVAL;
4031
4032        return 0;
4033}
4034
4035void kvm_arch_commit_memory_region(struct kvm *kvm,
4036                                const struct kvm_userspace_memory_region *mem,
4037                                const struct kvm_memory_slot *old,
4038                                const struct kvm_memory_slot *new,
4039                                enum kvm_mr_change change)
4040{
4041        int rc;
4042
4043        /* If the basics of the memslot do not change, we do not want
4044         * to update the gmap. Every update causes several unnecessary
4045         * segment translation exceptions. This is usually handled just
4046         * fine by the normal fault handler + gmap, but it will also
4047         * cause faults on the prefix page of running guest CPUs.
4048         */
4049        if (old->userspace_addr == mem->userspace_addr &&
4050            old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4051            old->npages * PAGE_SIZE == mem->memory_size)
4052                return;
4053
4054        rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4055                mem->guest_phys_addr, mem->memory_size);
4056        if (rc)
4057                pr_warn("failed to commit memory region\n");
4058        return;
4059}
4060
4061static inline unsigned long nonhyp_mask(int i)
4062{
4063        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4064
4065        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4066}
4067
4068void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4069{
4070        vcpu->valid_wakeup = false;
4071}
4072
4073static int __init kvm_s390_init(void)
4074{
4075        int i;
4076
4077        if (!sclp.has_sief2) {
4078                pr_info("SIE not available\n");
4079                return -ENODEV;
4080        }
4081
4082        for (i = 0; i < 16; i++)
4083                kvm_s390_fac_base[i] |=
4084                        S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4085
4086        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4087}
4088
4089static void __exit kvm_s390_exit(void)
4090{
4091        kvm_exit();
4092}
4093
4094module_init(kvm_s390_init);
4095module_exit(kvm_s390_exit);
4096
4097/*
4098 * Enable autoloading of the kvm module.
4099 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4100 * since x86 takes a different approach.
4101 */
4102#include <linux/miscdevice.h>
4103MODULE_ALIAS_MISCDEV(KVM_MINOR);
4104MODULE_ALIAS("devname:kvm");
4105