linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1/*
   2 * hosting zSeries kernel virtual machines
   3 *
   4 * Copyright IBM Corp. 2008, 2009
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License (version 2 only)
   8 * as published by the Free Software Foundation.
   9 *
  10 *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11 *               Christian Borntraeger <borntraeger@de.ibm.com>
  12 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14 *               Jason J. Herne <jjherne@us.ibm.com>
  15 */
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34
  35#include <asm/asm-offsets.h>
  36#include <asm/lowcore.h>
  37#include <asm/stp.h>
  38#include <asm/pgtable.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include "kvm-s390.h"
  47#include "gaccess.h"
  48
  49#define KMSG_COMPONENT "kvm-s390"
  50#undef pr_fmt
  51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  52
  53#define CREATE_TRACE_POINTS
  54#include "trace.h"
  55#include "trace-s390.h"
  56
  57#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  58#define LOCAL_IRQS 32
  59#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  60                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  61
  62#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  63
  64struct kvm_stats_debugfs_item debugfs_entries[] = {
  65        { "userspace_handled", VCPU_STAT(exit_userspace) },
  66        { "exit_null", VCPU_STAT(exit_null) },
  67        { "exit_validity", VCPU_STAT(exit_validity) },
  68        { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  69        { "exit_external_request", VCPU_STAT(exit_external_request) },
  70        { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71        { "exit_instruction", VCPU_STAT(exit_instruction) },
  72        { "exit_pei", VCPU_STAT(exit_pei) },
  73        { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75        { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80        { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81        { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82        { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83        { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84        { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  85        { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  86        { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  87        { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  88        { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  89        { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  90        { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  91        { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  92        { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  93        { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  94        { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  95        { "instruction_spx", VCPU_STAT(instruction_spx) },
  96        { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  97        { "instruction_stap", VCPU_STAT(instruction_stap) },
  98        { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  99        { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 100        { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 101        { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 102        { "instruction_essa", VCPU_STAT(instruction_essa) },
 103        { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 104        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 105        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 106        { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 107        { "instruction_sie", VCPU_STAT(instruction_sie) },
 108        { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 109        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 110        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 111        { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 112        { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 113        { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 114        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 115        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 116        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 117        { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 118        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 119        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 120        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 121        { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 122        { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 123        { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 124        { "diagnose_10", VCPU_STAT(diagnose_10) },
 125        { "diagnose_44", VCPU_STAT(diagnose_44) },
 126        { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 127        { "diagnose_258", VCPU_STAT(diagnose_258) },
 128        { "diagnose_308", VCPU_STAT(diagnose_308) },
 129        { "diagnose_500", VCPU_STAT(diagnose_500) },
 130        { NULL }
 131};
 132
 133struct kvm_s390_tod_clock_ext {
 134        __u8 epoch_idx;
 135        __u64 tod;
 136        __u8 reserved[7];
 137} __packed;
 138
 139/* allow nested virtualization in KVM (if enabled by user space) */
 140static int nested;
 141module_param(nested, int, S_IRUGO);
 142MODULE_PARM_DESC(nested, "Nested virtualization support");
 143
 144/* upper facilities limit for kvm */
 145unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 146
 147unsigned long kvm_s390_fac_list_mask_size(void)
 148{
 149        BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 150        return ARRAY_SIZE(kvm_s390_fac_list_mask);
 151}
 152
 153/* available cpu features supported by kvm */
 154static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 155/* available subfunctions indicated via query / "test bit" */
 156static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 157
 158static struct gmap_notifier gmap_notifier;
 159static struct gmap_notifier vsie_gmap_notifier;
 160debug_info_t *kvm_s390_dbf;
 161
 162/* Section: not file related */
 163int kvm_arch_hardware_enable(void)
 164{
 165        /* every s390 is virtualization enabled ;-) */
 166        return 0;
 167}
 168
 169static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 170                              unsigned long end);
 171
 172/*
 173 * This callback is executed during stop_machine(). All CPUs are therefore
 174 * temporarily stopped. In order not to change guest behavior, we have to
 175 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 176 * so a CPU won't be stopped while calculating with the epoch.
 177 */
 178static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 179                          void *v)
 180{
 181        struct kvm *kvm;
 182        struct kvm_vcpu *vcpu;
 183        int i;
 184        unsigned long long *delta = v;
 185
 186        list_for_each_entry(kvm, &vm_list, vm_list) {
 187                kvm->arch.epoch -= *delta;
 188                kvm_for_each_vcpu(i, vcpu, kvm) {
 189                        vcpu->arch.sie_block->epoch -= *delta;
 190                        if (vcpu->arch.cputm_enabled)
 191                                vcpu->arch.cputm_start += *delta;
 192                        if (vcpu->arch.vsie_block)
 193                                vcpu->arch.vsie_block->epoch -= *delta;
 194                }
 195        }
 196        return NOTIFY_OK;
 197}
 198
 199static struct notifier_block kvm_clock_notifier = {
 200        .notifier_call = kvm_clock_sync,
 201};
 202
 203int kvm_arch_hardware_setup(void)
 204{
 205        gmap_notifier.notifier_call = kvm_gmap_notifier;
 206        gmap_register_pte_notifier(&gmap_notifier);
 207        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 208        gmap_register_pte_notifier(&vsie_gmap_notifier);
 209        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 210                                       &kvm_clock_notifier);
 211        return 0;
 212}
 213
 214void kvm_arch_hardware_unsetup(void)
 215{
 216        gmap_unregister_pte_notifier(&gmap_notifier);
 217        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 218        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 219                                         &kvm_clock_notifier);
 220}
 221
 222static void allow_cpu_feat(unsigned long nr)
 223{
 224        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 225}
 226
 227static inline int plo_test_bit(unsigned char nr)
 228{
 229        register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 230        int cc;
 231
 232        asm volatile(
 233                /* Parameter registers are ignored for "test bit" */
 234                "       plo     0,0,0,0(0)\n"
 235                "       ipm     %0\n"
 236                "       srl     %0,28\n"
 237                : "=d" (cc)
 238                : "d" (r0)
 239                : "cc");
 240        return cc == 0;
 241}
 242
 243static void kvm_s390_cpu_feat_init(void)
 244{
 245        int i;
 246
 247        for (i = 0; i < 256; ++i) {
 248                if (plo_test_bit(i))
 249                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 250        }
 251
 252        if (test_facility(28)) /* TOD-clock steering */
 253                ptff(kvm_s390_available_subfunc.ptff,
 254                     sizeof(kvm_s390_available_subfunc.ptff),
 255                     PTFF_QAF);
 256
 257        if (test_facility(17)) { /* MSA */
 258                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 259                              kvm_s390_available_subfunc.kmac);
 260                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 261                              kvm_s390_available_subfunc.kmc);
 262                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 263                              kvm_s390_available_subfunc.km);
 264                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 265                              kvm_s390_available_subfunc.kimd);
 266                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 267                              kvm_s390_available_subfunc.klmd);
 268        }
 269        if (test_facility(76)) /* MSA3 */
 270                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 271                              kvm_s390_available_subfunc.pckmo);
 272        if (test_facility(77)) { /* MSA4 */
 273                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 274                              kvm_s390_available_subfunc.kmctr);
 275                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 276                              kvm_s390_available_subfunc.kmf);
 277                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 278                              kvm_s390_available_subfunc.kmo);
 279                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 280                              kvm_s390_available_subfunc.pcc);
 281        }
 282        if (test_facility(57)) /* MSA5 */
 283                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 284                              kvm_s390_available_subfunc.ppno);
 285
 286        if (test_facility(146)) /* MSA8 */
 287                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 288                              kvm_s390_available_subfunc.kma);
 289
 290        if (MACHINE_HAS_ESOP)
 291                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 292        /*
 293         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 294         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 295         */
 296        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 297            !test_facility(3) || !nested)
 298                return;
 299        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 300        if (sclp.has_64bscao)
 301                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 302        if (sclp.has_siif)
 303                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 304        if (sclp.has_gpere)
 305                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 306        if (sclp.has_gsls)
 307                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 308        if (sclp.has_ib)
 309                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 310        if (sclp.has_cei)
 311                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 312        if (sclp.has_ibs)
 313                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 314        if (sclp.has_kss)
 315                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 316        /*
 317         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 318         * all skey handling functions read/set the skey from the PGSTE
 319         * instead of the real storage key.
 320         *
 321         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 322         * pages being detected as preserved although they are resident.
 323         *
 324         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 325         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 326         *
 327         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 328         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 329         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 330         *
 331         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 332         * cannot easily shadow the SCA because of the ipte lock.
 333         */
 334}
 335
 336int kvm_arch_init(void *opaque)
 337{
 338        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 339        if (!kvm_s390_dbf)
 340                return -ENOMEM;
 341
 342        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 343                debug_unregister(kvm_s390_dbf);
 344                return -ENOMEM;
 345        }
 346
 347        kvm_s390_cpu_feat_init();
 348
 349        /* Register floating interrupt controller interface. */
 350        return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 351}
 352
 353void kvm_arch_exit(void)
 354{
 355        debug_unregister(kvm_s390_dbf);
 356}
 357
 358/* Section: device related */
 359long kvm_arch_dev_ioctl(struct file *filp,
 360                        unsigned int ioctl, unsigned long arg)
 361{
 362        if (ioctl == KVM_S390_ENABLE_SIE)
 363                return s390_enable_sie();
 364        return -EINVAL;
 365}
 366
 367int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 368{
 369        int r;
 370
 371        switch (ext) {
 372        case KVM_CAP_S390_PSW:
 373        case KVM_CAP_S390_GMAP:
 374        case KVM_CAP_SYNC_MMU:
 375#ifdef CONFIG_KVM_S390_UCONTROL
 376        case KVM_CAP_S390_UCONTROL:
 377#endif
 378        case KVM_CAP_ASYNC_PF:
 379        case KVM_CAP_SYNC_REGS:
 380        case KVM_CAP_ONE_REG:
 381        case KVM_CAP_ENABLE_CAP:
 382        case KVM_CAP_S390_CSS_SUPPORT:
 383        case KVM_CAP_IOEVENTFD:
 384        case KVM_CAP_DEVICE_CTRL:
 385        case KVM_CAP_ENABLE_CAP_VM:
 386        case KVM_CAP_S390_IRQCHIP:
 387        case KVM_CAP_VM_ATTRIBUTES:
 388        case KVM_CAP_MP_STATE:
 389        case KVM_CAP_IMMEDIATE_EXIT:
 390        case KVM_CAP_S390_INJECT_IRQ:
 391        case KVM_CAP_S390_USER_SIGP:
 392        case KVM_CAP_S390_USER_STSI:
 393        case KVM_CAP_S390_SKEYS:
 394        case KVM_CAP_S390_IRQ_STATE:
 395        case KVM_CAP_S390_USER_INSTR0:
 396        case KVM_CAP_S390_CMMA_MIGRATION:
 397        case KVM_CAP_S390_AIS:
 398                r = 1;
 399                break;
 400        case KVM_CAP_S390_MEM_OP:
 401                r = MEM_OP_MAX_SIZE;
 402                break;
 403        case KVM_CAP_NR_VCPUS:
 404        case KVM_CAP_MAX_VCPUS:
 405                r = KVM_S390_BSCA_CPU_SLOTS;
 406                if (!kvm_s390_use_sca_entries())
 407                        r = KVM_MAX_VCPUS;
 408                else if (sclp.has_esca && sclp.has_64bscao)
 409                        r = KVM_S390_ESCA_CPU_SLOTS;
 410                break;
 411        case KVM_CAP_NR_MEMSLOTS:
 412                r = KVM_USER_MEM_SLOTS;
 413                break;
 414        case KVM_CAP_S390_COW:
 415                r = MACHINE_HAS_ESOP;
 416                break;
 417        case KVM_CAP_S390_VECTOR_REGISTERS:
 418                r = MACHINE_HAS_VX;
 419                break;
 420        case KVM_CAP_S390_RI:
 421                r = test_facility(64);
 422                break;
 423        case KVM_CAP_S390_GS:
 424                r = test_facility(133);
 425                break;
 426        default:
 427                r = 0;
 428        }
 429        return r;
 430}
 431
 432static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 433                                        struct kvm_memory_slot *memslot)
 434{
 435        gfn_t cur_gfn, last_gfn;
 436        unsigned long address;
 437        struct gmap *gmap = kvm->arch.gmap;
 438
 439        /* Loop over all guest pages */
 440        last_gfn = memslot->base_gfn + memslot->npages;
 441        for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 442                address = gfn_to_hva_memslot(memslot, cur_gfn);
 443
 444                if (test_and_clear_guest_dirty(gmap->mm, address))
 445                        mark_page_dirty(kvm, cur_gfn);
 446                if (fatal_signal_pending(current))
 447                        return;
 448                cond_resched();
 449        }
 450}
 451
 452/* Section: vm related */
 453static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 454
 455/*
 456 * Get (and clear) the dirty memory log for a memory slot.
 457 */
 458int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 459                               struct kvm_dirty_log *log)
 460{
 461        int r;
 462        unsigned long n;
 463        struct kvm_memslots *slots;
 464        struct kvm_memory_slot *memslot;
 465        int is_dirty = 0;
 466
 467        if (kvm_is_ucontrol(kvm))
 468                return -EINVAL;
 469
 470        mutex_lock(&kvm->slots_lock);
 471
 472        r = -EINVAL;
 473        if (log->slot >= KVM_USER_MEM_SLOTS)
 474                goto out;
 475
 476        slots = kvm_memslots(kvm);
 477        memslot = id_to_memslot(slots, log->slot);
 478        r = -ENOENT;
 479        if (!memslot->dirty_bitmap)
 480                goto out;
 481
 482        kvm_s390_sync_dirty_log(kvm, memslot);
 483        r = kvm_get_dirty_log(kvm, log, &is_dirty);
 484        if (r)
 485                goto out;
 486
 487        /* Clear the dirty log */
 488        if (is_dirty) {
 489                n = kvm_dirty_bitmap_bytes(memslot);
 490                memset(memslot->dirty_bitmap, 0, n);
 491        }
 492        r = 0;
 493out:
 494        mutex_unlock(&kvm->slots_lock);
 495        return r;
 496}
 497
 498static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 499{
 500        unsigned int i;
 501        struct kvm_vcpu *vcpu;
 502
 503        kvm_for_each_vcpu(i, vcpu, kvm) {
 504                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 505        }
 506}
 507
 508static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 509{
 510        int r;
 511
 512        if (cap->flags)
 513                return -EINVAL;
 514
 515        switch (cap->cap) {
 516        case KVM_CAP_S390_IRQCHIP:
 517                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 518                kvm->arch.use_irqchip = 1;
 519                r = 0;
 520                break;
 521        case KVM_CAP_S390_USER_SIGP:
 522                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 523                kvm->arch.user_sigp = 1;
 524                r = 0;
 525                break;
 526        case KVM_CAP_S390_VECTOR_REGISTERS:
 527                mutex_lock(&kvm->lock);
 528                if (kvm->created_vcpus) {
 529                        r = -EBUSY;
 530                } else if (MACHINE_HAS_VX) {
 531                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 532                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 533                        if (test_facility(134)) {
 534                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 535                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 536                        }
 537                        if (test_facility(135)) {
 538                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 539                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 540                        }
 541                        r = 0;
 542                } else
 543                        r = -EINVAL;
 544                mutex_unlock(&kvm->lock);
 545                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 546                         r ? "(not available)" : "(success)");
 547                break;
 548        case KVM_CAP_S390_RI:
 549                r = -EINVAL;
 550                mutex_lock(&kvm->lock);
 551                if (kvm->created_vcpus) {
 552                        r = -EBUSY;
 553                } else if (test_facility(64)) {
 554                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 555                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 556                        r = 0;
 557                }
 558                mutex_unlock(&kvm->lock);
 559                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 560                         r ? "(not available)" : "(success)");
 561                break;
 562        case KVM_CAP_S390_AIS:
 563                mutex_lock(&kvm->lock);
 564                if (kvm->created_vcpus) {
 565                        r = -EBUSY;
 566                } else {
 567                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 568                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 569                        r = 0;
 570                }
 571                mutex_unlock(&kvm->lock);
 572                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 573                         r ? "(not available)" : "(success)");
 574                break;
 575        case KVM_CAP_S390_GS:
 576                r = -EINVAL;
 577                mutex_lock(&kvm->lock);
 578                if (atomic_read(&kvm->online_vcpus)) {
 579                        r = -EBUSY;
 580                } else if (test_facility(133)) {
 581                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 582                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 583                        r = 0;
 584                }
 585                mutex_unlock(&kvm->lock);
 586                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 587                         r ? "(not available)" : "(success)");
 588                break;
 589        case KVM_CAP_S390_USER_STSI:
 590                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 591                kvm->arch.user_stsi = 1;
 592                r = 0;
 593                break;
 594        case KVM_CAP_S390_USER_INSTR0:
 595                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 596                kvm->arch.user_instr0 = 1;
 597                icpt_operexc_on_all_vcpus(kvm);
 598                r = 0;
 599                break;
 600        default:
 601                r = -EINVAL;
 602                break;
 603        }
 604        return r;
 605}
 606
 607static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 608{
 609        int ret;
 610
 611        switch (attr->attr) {
 612        case KVM_S390_VM_MEM_LIMIT_SIZE:
 613                ret = 0;
 614                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 615                         kvm->arch.mem_limit);
 616                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 617                        ret = -EFAULT;
 618                break;
 619        default:
 620                ret = -ENXIO;
 621                break;
 622        }
 623        return ret;
 624}
 625
 626static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 627{
 628        int ret;
 629        unsigned int idx;
 630        switch (attr->attr) {
 631        case KVM_S390_VM_MEM_ENABLE_CMMA:
 632                ret = -ENXIO;
 633                if (!sclp.has_cmma)
 634                        break;
 635
 636                ret = -EBUSY;
 637                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 638                mutex_lock(&kvm->lock);
 639                if (!kvm->created_vcpus) {
 640                        kvm->arch.use_cmma = 1;
 641                        ret = 0;
 642                }
 643                mutex_unlock(&kvm->lock);
 644                break;
 645        case KVM_S390_VM_MEM_CLR_CMMA:
 646                ret = -ENXIO;
 647                if (!sclp.has_cmma)
 648                        break;
 649                ret = -EINVAL;
 650                if (!kvm->arch.use_cmma)
 651                        break;
 652
 653                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 654                mutex_lock(&kvm->lock);
 655                idx = srcu_read_lock(&kvm->srcu);
 656                s390_reset_cmma(kvm->arch.gmap->mm);
 657                srcu_read_unlock(&kvm->srcu, idx);
 658                mutex_unlock(&kvm->lock);
 659                ret = 0;
 660                break;
 661        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 662                unsigned long new_limit;
 663
 664                if (kvm_is_ucontrol(kvm))
 665                        return -EINVAL;
 666
 667                if (get_user(new_limit, (u64 __user *)attr->addr))
 668                        return -EFAULT;
 669
 670                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 671                    new_limit > kvm->arch.mem_limit)
 672                        return -E2BIG;
 673
 674                if (!new_limit)
 675                        return -EINVAL;
 676
 677                /* gmap_create takes last usable address */
 678                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 679                        new_limit -= 1;
 680
 681                ret = -EBUSY;
 682                mutex_lock(&kvm->lock);
 683                if (!kvm->created_vcpus) {
 684                        /* gmap_create will round the limit up */
 685                        struct gmap *new = gmap_create(current->mm, new_limit);
 686
 687                        if (!new) {
 688                                ret = -ENOMEM;
 689                        } else {
 690                                gmap_remove(kvm->arch.gmap);
 691                                new->private = kvm;
 692                                kvm->arch.gmap = new;
 693                                ret = 0;
 694                        }
 695                }
 696                mutex_unlock(&kvm->lock);
 697                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 698                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 699                         (void *) kvm->arch.gmap->asce);
 700                break;
 701        }
 702        default:
 703                ret = -ENXIO;
 704                break;
 705        }
 706        return ret;
 707}
 708
 709static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 710
 711static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 712{
 713        struct kvm_vcpu *vcpu;
 714        int i;
 715
 716        if (!test_kvm_facility(kvm, 76))
 717                return -EINVAL;
 718
 719        mutex_lock(&kvm->lock);
 720        switch (attr->attr) {
 721        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 722                get_random_bytes(
 723                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 724                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 725                kvm->arch.crypto.aes_kw = 1;
 726                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 727                break;
 728        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 729                get_random_bytes(
 730                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 731                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 732                kvm->arch.crypto.dea_kw = 1;
 733                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 734                break;
 735        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 736                kvm->arch.crypto.aes_kw = 0;
 737                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 738                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 739                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 740                break;
 741        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 742                kvm->arch.crypto.dea_kw = 0;
 743                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 744                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 745                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 746                break;
 747        default:
 748                mutex_unlock(&kvm->lock);
 749                return -ENXIO;
 750        }
 751
 752        kvm_for_each_vcpu(i, vcpu, kvm) {
 753                kvm_s390_vcpu_crypto_setup(vcpu);
 754                exit_sie(vcpu);
 755        }
 756        mutex_unlock(&kvm->lock);
 757        return 0;
 758}
 759
 760static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 761{
 762        int cx;
 763        struct kvm_vcpu *vcpu;
 764
 765        kvm_for_each_vcpu(cx, vcpu, kvm)
 766                kvm_s390_sync_request(req, vcpu);
 767}
 768
 769/*
 770 * Must be called with kvm->srcu held to avoid races on memslots, and with
 771 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 772 */
 773static int kvm_s390_vm_start_migration(struct kvm *kvm)
 774{
 775        struct kvm_s390_migration_state *mgs;
 776        struct kvm_memory_slot *ms;
 777        /* should be the only one */
 778        struct kvm_memslots *slots;
 779        unsigned long ram_pages;
 780        int slotnr;
 781
 782        /* migration mode already enabled */
 783        if (kvm->arch.migration_state)
 784                return 0;
 785
 786        slots = kvm_memslots(kvm);
 787        if (!slots || !slots->used_slots)
 788                return -EINVAL;
 789
 790        mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 791        if (!mgs)
 792                return -ENOMEM;
 793        kvm->arch.migration_state = mgs;
 794
 795        if (kvm->arch.use_cmma) {
 796                /*
 797                 * Get the last slot. They should be sorted by base_gfn, so the
 798                 * last slot is also the one at the end of the address space.
 799                 * We have verified above that at least one slot is present.
 800                 */
 801                ms = slots->memslots + slots->used_slots - 1;
 802                /* round up so we only use full longs */
 803                ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 804                /* allocate enough bytes to store all the bits */
 805                mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 806                if (!mgs->pgste_bitmap) {
 807                        kfree(mgs);
 808                        kvm->arch.migration_state = NULL;
 809                        return -ENOMEM;
 810                }
 811
 812                mgs->bitmap_size = ram_pages;
 813                atomic64_set(&mgs->dirty_pages, ram_pages);
 814                /* mark all the pages in active slots as dirty */
 815                for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 816                        ms = slots->memslots + slotnr;
 817                        bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 818                }
 819
 820                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 821        }
 822        return 0;
 823}
 824
 825/*
 826 * Must be called with kvm->lock to avoid races with ourselves and
 827 * kvm_s390_vm_start_migration.
 828 */
 829static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 830{
 831        struct kvm_s390_migration_state *mgs;
 832
 833        /* migration mode already disabled */
 834        if (!kvm->arch.migration_state)
 835                return 0;
 836        mgs = kvm->arch.migration_state;
 837        kvm->arch.migration_state = NULL;
 838
 839        if (kvm->arch.use_cmma) {
 840                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 841                vfree(mgs->pgste_bitmap);
 842        }
 843        kfree(mgs);
 844        return 0;
 845}
 846
 847static int kvm_s390_vm_set_migration(struct kvm *kvm,
 848                                     struct kvm_device_attr *attr)
 849{
 850        int idx, res = -ENXIO;
 851
 852        mutex_lock(&kvm->lock);
 853        switch (attr->attr) {
 854        case KVM_S390_VM_MIGRATION_START:
 855                idx = srcu_read_lock(&kvm->srcu);
 856                res = kvm_s390_vm_start_migration(kvm);
 857                srcu_read_unlock(&kvm->srcu, idx);
 858                break;
 859        case KVM_S390_VM_MIGRATION_STOP:
 860                res = kvm_s390_vm_stop_migration(kvm);
 861                break;
 862        default:
 863                break;
 864        }
 865        mutex_unlock(&kvm->lock);
 866
 867        return res;
 868}
 869
 870static int kvm_s390_vm_get_migration(struct kvm *kvm,
 871                                     struct kvm_device_attr *attr)
 872{
 873        u64 mig = (kvm->arch.migration_state != NULL);
 874
 875        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 876                return -ENXIO;
 877
 878        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 879                return -EFAULT;
 880        return 0;
 881}
 882
 883static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 884{
 885        struct kvm_s390_vm_tod_clock gtod;
 886
 887        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 888                return -EFAULT;
 889
 890        if (test_kvm_facility(kvm, 139))
 891                kvm_s390_set_tod_clock_ext(kvm, &gtod);
 892        else if (gtod.epoch_idx == 0)
 893                kvm_s390_set_tod_clock(kvm, gtod.tod);
 894        else
 895                return -EINVAL;
 896
 897        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 898                gtod.epoch_idx, gtod.tod);
 899
 900        return 0;
 901}
 902
 903static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 904{
 905        u8 gtod_high;
 906
 907        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 908                                           sizeof(gtod_high)))
 909                return -EFAULT;
 910
 911        if (gtod_high != 0)
 912                return -EINVAL;
 913        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 914
 915        return 0;
 916}
 917
 918static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 919{
 920        u64 gtod;
 921
 922        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 923                return -EFAULT;
 924
 925        kvm_s390_set_tod_clock(kvm, gtod);
 926        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 927        return 0;
 928}
 929
 930static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 931{
 932        int ret;
 933
 934        if (attr->flags)
 935                return -EINVAL;
 936
 937        switch (attr->attr) {
 938        case KVM_S390_VM_TOD_EXT:
 939                ret = kvm_s390_set_tod_ext(kvm, attr);
 940                break;
 941        case KVM_S390_VM_TOD_HIGH:
 942                ret = kvm_s390_set_tod_high(kvm, attr);
 943                break;
 944        case KVM_S390_VM_TOD_LOW:
 945                ret = kvm_s390_set_tod_low(kvm, attr);
 946                break;
 947        default:
 948                ret = -ENXIO;
 949                break;
 950        }
 951        return ret;
 952}
 953
 954static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
 955                                        struct kvm_s390_vm_tod_clock *gtod)
 956{
 957        struct kvm_s390_tod_clock_ext htod;
 958
 959        preempt_disable();
 960
 961        get_tod_clock_ext((char *)&htod);
 962
 963        gtod->tod = htod.tod + kvm->arch.epoch;
 964        gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
 965
 966        if (gtod->tod < htod.tod)
 967                gtod->epoch_idx += 1;
 968
 969        preempt_enable();
 970}
 971
 972static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 973{
 974        struct kvm_s390_vm_tod_clock gtod;
 975
 976        memset(&gtod, 0, sizeof(gtod));
 977
 978        if (test_kvm_facility(kvm, 139))
 979                kvm_s390_get_tod_clock_ext(kvm, &gtod);
 980        else
 981                gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
 982
 983        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 984                return -EFAULT;
 985
 986        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
 987                gtod.epoch_idx, gtod.tod);
 988        return 0;
 989}
 990
 991static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 992{
 993        u8 gtod_high = 0;
 994
 995        if (copy_to_user((void __user *)attr->addr, &gtod_high,
 996                                         sizeof(gtod_high)))
 997                return -EFAULT;
 998        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 999
1000        return 0;
1001}
1002
1003static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004{
1005        u64 gtod;
1006
1007        gtod = kvm_s390_get_tod_clock_fast(kvm);
1008        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009                return -EFAULT;
1010        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011
1012        return 0;
1013}
1014
1015static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016{
1017        int ret;
1018
1019        if (attr->flags)
1020                return -EINVAL;
1021
1022        switch (attr->attr) {
1023        case KVM_S390_VM_TOD_EXT:
1024                ret = kvm_s390_get_tod_ext(kvm, attr);
1025                break;
1026        case KVM_S390_VM_TOD_HIGH:
1027                ret = kvm_s390_get_tod_high(kvm, attr);
1028                break;
1029        case KVM_S390_VM_TOD_LOW:
1030                ret = kvm_s390_get_tod_low(kvm, attr);
1031                break;
1032        default:
1033                ret = -ENXIO;
1034                break;
1035        }
1036        return ret;
1037}
1038
1039static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040{
1041        struct kvm_s390_vm_cpu_processor *proc;
1042        u16 lowest_ibc, unblocked_ibc;
1043        int ret = 0;
1044
1045        mutex_lock(&kvm->lock);
1046        if (kvm->created_vcpus) {
1047                ret = -EBUSY;
1048                goto out;
1049        }
1050        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051        if (!proc) {
1052                ret = -ENOMEM;
1053                goto out;
1054        }
1055        if (!copy_from_user(proc, (void __user *)attr->addr,
1056                            sizeof(*proc))) {
1057                kvm->arch.model.cpuid = proc->cpuid;
1058                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059                unblocked_ibc = sclp.ibc & 0xfff;
1060                if (lowest_ibc && proc->ibc) {
1061                        if (proc->ibc > unblocked_ibc)
1062                                kvm->arch.model.ibc = unblocked_ibc;
1063                        else if (proc->ibc < lowest_ibc)
1064                                kvm->arch.model.ibc = lowest_ibc;
1065                        else
1066                                kvm->arch.model.ibc = proc->ibc;
1067                }
1068                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1070                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071                         kvm->arch.model.ibc,
1072                         kvm->arch.model.cpuid);
1073                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074                         kvm->arch.model.fac_list[0],
1075                         kvm->arch.model.fac_list[1],
1076                         kvm->arch.model.fac_list[2]);
1077        } else
1078                ret = -EFAULT;
1079        kfree(proc);
1080out:
1081        mutex_unlock(&kvm->lock);
1082        return ret;
1083}
1084
1085static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086                                       struct kvm_device_attr *attr)
1087{
1088        struct kvm_s390_vm_cpu_feat data;
1089        int ret = -EBUSY;
1090
1091        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092                return -EFAULT;
1093        if (!bitmap_subset((unsigned long *) data.feat,
1094                           kvm_s390_available_cpu_feat,
1095                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1096                return -EINVAL;
1097
1098        mutex_lock(&kvm->lock);
1099        if (!atomic_read(&kvm->online_vcpus)) {
1100                bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101                            KVM_S390_VM_CPU_FEAT_NR_BITS);
1102                ret = 0;
1103        }
1104        mutex_unlock(&kvm->lock);
1105        return ret;
1106}
1107
1108static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109                                          struct kvm_device_attr *attr)
1110{
1111        /*
1112         * Once supported by kernel + hw, we have to store the subfunctions
1113         * in kvm->arch and remember that user space configured them.
1114         */
1115        return -ENXIO;
1116}
1117
1118static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119{
1120        int ret = -ENXIO;
1121
1122        switch (attr->attr) {
1123        case KVM_S390_VM_CPU_PROCESSOR:
1124                ret = kvm_s390_set_processor(kvm, attr);
1125                break;
1126        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127                ret = kvm_s390_set_processor_feat(kvm, attr);
1128                break;
1129        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131                break;
1132        }
1133        return ret;
1134}
1135
1136static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137{
1138        struct kvm_s390_vm_cpu_processor *proc;
1139        int ret = 0;
1140
1141        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142        if (!proc) {
1143                ret = -ENOMEM;
1144                goto out;
1145        }
1146        proc->cpuid = kvm->arch.model.cpuid;
1147        proc->ibc = kvm->arch.model.ibc;
1148        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149               S390_ARCH_FAC_LIST_SIZE_BYTE);
1150        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151                 kvm->arch.model.ibc,
1152                 kvm->arch.model.cpuid);
1153        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154                 kvm->arch.model.fac_list[0],
1155                 kvm->arch.model.fac_list[1],
1156                 kvm->arch.model.fac_list[2]);
1157        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158                ret = -EFAULT;
1159        kfree(proc);
1160out:
1161        return ret;
1162}
1163
1164static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165{
1166        struct kvm_s390_vm_cpu_machine *mach;
1167        int ret = 0;
1168
1169        mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170        if (!mach) {
1171                ret = -ENOMEM;
1172                goto out;
1173        }
1174        get_cpu_id((struct cpuid *) &mach->cpuid);
1175        mach->ibc = sclp.ibc;
1176        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177               S390_ARCH_FAC_LIST_SIZE_BYTE);
1178        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179               sizeof(S390_lowcore.stfle_fac_list));
1180        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181                 kvm->arch.model.ibc,
1182                 kvm->arch.model.cpuid);
1183        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184                 mach->fac_mask[0],
1185                 mach->fac_mask[1],
1186                 mach->fac_mask[2]);
1187        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188                 mach->fac_list[0],
1189                 mach->fac_list[1],
1190                 mach->fac_list[2]);
1191        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192                ret = -EFAULT;
1193        kfree(mach);
1194out:
1195        return ret;
1196}
1197
1198static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199                                       struct kvm_device_attr *attr)
1200{
1201        struct kvm_s390_vm_cpu_feat data;
1202
1203        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1205        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206                return -EFAULT;
1207        return 0;
1208}
1209
1210static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211                                     struct kvm_device_attr *attr)
1212{
1213        struct kvm_s390_vm_cpu_feat data;
1214
1215        bitmap_copy((unsigned long *) data.feat,
1216                    kvm_s390_available_cpu_feat,
1217                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1218        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219                return -EFAULT;
1220        return 0;
1221}
1222
1223static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224                                          struct kvm_device_attr *attr)
1225{
1226        /*
1227         * Once we can actually configure subfunctions (kernel + hw support),
1228         * we have to check if they were already set by user space, if so copy
1229         * them from kvm->arch.
1230         */
1231        return -ENXIO;
1232}
1233
1234static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235                                        struct kvm_device_attr *attr)
1236{
1237        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239                return -EFAULT;
1240        return 0;
1241}
1242static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243{
1244        int ret = -ENXIO;
1245
1246        switch (attr->attr) {
1247        case KVM_S390_VM_CPU_PROCESSOR:
1248                ret = kvm_s390_get_processor(kvm, attr);
1249                break;
1250        case KVM_S390_VM_CPU_MACHINE:
1251                ret = kvm_s390_get_machine(kvm, attr);
1252                break;
1253        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254                ret = kvm_s390_get_processor_feat(kvm, attr);
1255                break;
1256        case KVM_S390_VM_CPU_MACHINE_FEAT:
1257                ret = kvm_s390_get_machine_feat(kvm, attr);
1258                break;
1259        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261                break;
1262        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264                break;
1265        }
1266        return ret;
1267}
1268
1269static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270{
1271        int ret;
1272
1273        switch (attr->group) {
1274        case KVM_S390_VM_MEM_CTRL:
1275                ret = kvm_s390_set_mem_control(kvm, attr);
1276                break;
1277        case KVM_S390_VM_TOD:
1278                ret = kvm_s390_set_tod(kvm, attr);
1279                break;
1280        case KVM_S390_VM_CPU_MODEL:
1281                ret = kvm_s390_set_cpu_model(kvm, attr);
1282                break;
1283        case KVM_S390_VM_CRYPTO:
1284                ret = kvm_s390_vm_set_crypto(kvm, attr);
1285                break;
1286        case KVM_S390_VM_MIGRATION:
1287                ret = kvm_s390_vm_set_migration(kvm, attr);
1288                break;
1289        default:
1290                ret = -ENXIO;
1291                break;
1292        }
1293
1294        return ret;
1295}
1296
1297static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298{
1299        int ret;
1300
1301        switch (attr->group) {
1302        case KVM_S390_VM_MEM_CTRL:
1303                ret = kvm_s390_get_mem_control(kvm, attr);
1304                break;
1305        case KVM_S390_VM_TOD:
1306                ret = kvm_s390_get_tod(kvm, attr);
1307                break;
1308        case KVM_S390_VM_CPU_MODEL:
1309                ret = kvm_s390_get_cpu_model(kvm, attr);
1310                break;
1311        case KVM_S390_VM_MIGRATION:
1312                ret = kvm_s390_vm_get_migration(kvm, attr);
1313                break;
1314        default:
1315                ret = -ENXIO;
1316                break;
1317        }
1318
1319        return ret;
1320}
1321
1322static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323{
1324        int ret;
1325
1326        switch (attr->group) {
1327        case KVM_S390_VM_MEM_CTRL:
1328                switch (attr->attr) {
1329                case KVM_S390_VM_MEM_ENABLE_CMMA:
1330                case KVM_S390_VM_MEM_CLR_CMMA:
1331                        ret = sclp.has_cmma ? 0 : -ENXIO;
1332                        break;
1333                case KVM_S390_VM_MEM_LIMIT_SIZE:
1334                        ret = 0;
1335                        break;
1336                default:
1337                        ret = -ENXIO;
1338                        break;
1339                }
1340                break;
1341        case KVM_S390_VM_TOD:
1342                switch (attr->attr) {
1343                case KVM_S390_VM_TOD_LOW:
1344                case KVM_S390_VM_TOD_HIGH:
1345                        ret = 0;
1346                        break;
1347                default:
1348                        ret = -ENXIO;
1349                        break;
1350                }
1351                break;
1352        case KVM_S390_VM_CPU_MODEL:
1353                switch (attr->attr) {
1354                case KVM_S390_VM_CPU_PROCESSOR:
1355                case KVM_S390_VM_CPU_MACHINE:
1356                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357                case KVM_S390_VM_CPU_MACHINE_FEAT:
1358                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359                        ret = 0;
1360                        break;
1361                /* configuring subfunctions is not supported yet */
1362                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363                default:
1364                        ret = -ENXIO;
1365                        break;
1366                }
1367                break;
1368        case KVM_S390_VM_CRYPTO:
1369                switch (attr->attr) {
1370                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374                        ret = 0;
1375                        break;
1376                default:
1377                        ret = -ENXIO;
1378                        break;
1379                }
1380                break;
1381        case KVM_S390_VM_MIGRATION:
1382                ret = 0;
1383                break;
1384        default:
1385                ret = -ENXIO;
1386                break;
1387        }
1388
1389        return ret;
1390}
1391
1392static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393{
1394        uint8_t *keys;
1395        uint64_t hva;
1396        int srcu_idx, i, r = 0;
1397
1398        if (args->flags != 0)
1399                return -EINVAL;
1400
1401        /* Is this guest using storage keys? */
1402        if (!mm_use_skey(current->mm))
1403                return KVM_S390_GET_SKEYS_NONE;
1404
1405        /* Enforce sane limit on memory allocation */
1406        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407                return -EINVAL;
1408
1409        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410        if (!keys)
1411                return -ENOMEM;
1412
1413        down_read(&current->mm->mmap_sem);
1414        srcu_idx = srcu_read_lock(&kvm->srcu);
1415        for (i = 0; i < args->count; i++) {
1416                hva = gfn_to_hva(kvm, args->start_gfn + i);
1417                if (kvm_is_error_hva(hva)) {
1418                        r = -EFAULT;
1419                        break;
1420                }
1421
1422                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423                if (r)
1424                        break;
1425        }
1426        srcu_read_unlock(&kvm->srcu, srcu_idx);
1427        up_read(&current->mm->mmap_sem);
1428
1429        if (!r) {
1430                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431                                 sizeof(uint8_t) * args->count);
1432                if (r)
1433                        r = -EFAULT;
1434        }
1435
1436        kvfree(keys);
1437        return r;
1438}
1439
1440static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441{
1442        uint8_t *keys;
1443        uint64_t hva;
1444        int srcu_idx, i, r = 0;
1445
1446        if (args->flags != 0)
1447                return -EINVAL;
1448
1449        /* Enforce sane limit on memory allocation */
1450        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451                return -EINVAL;
1452
1453        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454        if (!keys)
1455                return -ENOMEM;
1456
1457        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458                           sizeof(uint8_t) * args->count);
1459        if (r) {
1460                r = -EFAULT;
1461                goto out;
1462        }
1463
1464        /* Enable storage key handling for the guest */
1465        r = s390_enable_skey();
1466        if (r)
1467                goto out;
1468
1469        down_read(&current->mm->mmap_sem);
1470        srcu_idx = srcu_read_lock(&kvm->srcu);
1471        for (i = 0; i < args->count; i++) {
1472                hva = gfn_to_hva(kvm, args->start_gfn + i);
1473                if (kvm_is_error_hva(hva)) {
1474                        r = -EFAULT;
1475                        break;
1476                }
1477
1478                /* Lowest order bit is reserved */
1479                if (keys[i] & 0x01) {
1480                        r = -EINVAL;
1481                        break;
1482                }
1483
1484                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485                if (r)
1486                        break;
1487        }
1488        srcu_read_unlock(&kvm->srcu, srcu_idx);
1489        up_read(&current->mm->mmap_sem);
1490out:
1491        kvfree(keys);
1492        return r;
1493}
1494
1495/*
1496 * Base address and length must be sent at the start of each block, therefore
1497 * it's cheaper to send some clean data, as long as it's less than the size of
1498 * two longs.
1499 */
1500#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501/* for consistency */
1502#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503
1504/*
1505 * This function searches for the next page with dirty CMMA attributes, and
1506 * saves the attributes in the buffer up to either the end of the buffer or
1507 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508 * no trailing clean bytes are saved.
1509 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510 * output buffer will indicate 0 as length.
1511 */
1512static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513                                  struct kvm_s390_cmma_log *args)
1514{
1515        struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516        unsigned long bufsize, hva, pgstev, i, next, cur;
1517        int srcu_idx, peek, r = 0, rr;
1518        u8 *res;
1519
1520        cur = args->start_gfn;
1521        i = next = pgstev = 0;
1522
1523        if (unlikely(!kvm->arch.use_cmma))
1524                return -ENXIO;
1525        /* Invalid/unsupported flags were specified */
1526        if (args->flags & ~KVM_S390_CMMA_PEEK)
1527                return -EINVAL;
1528        /* Migration mode query, and we are not doing a migration */
1529        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530        if (!peek && !s)
1531                return -EINVAL;
1532        /* CMMA is disabled or was not used, or the buffer has length zero */
1533        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534        if (!bufsize || !kvm->mm->context.use_cmma) {
1535                memset(args, 0, sizeof(*args));
1536                return 0;
1537        }
1538
1539        if (!peek) {
1540                /* We are not peeking, and there are no dirty pages */
1541                if (!atomic64_read(&s->dirty_pages)) {
1542                        memset(args, 0, sizeof(*args));
1543                        return 0;
1544                }
1545                cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546                                    args->start_gfn);
1547                if (cur >= s->bitmap_size)      /* nothing found, loop back */
1548                        cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549                if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1550                        memset(args, 0, sizeof(*args));
1551                        return 0;
1552                }
1553                next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554        }
1555
1556        res = vmalloc(bufsize);
1557        if (!res)
1558                return -ENOMEM;
1559
1560        args->start_gfn = cur;
1561
1562        down_read(&kvm->mm->mmap_sem);
1563        srcu_idx = srcu_read_lock(&kvm->srcu);
1564        while (i < bufsize) {
1565                hva = gfn_to_hva(kvm, cur);
1566                if (kvm_is_error_hva(hva)) {
1567                        r = -EFAULT;
1568                        break;
1569                }
1570                /* decrement only if we actually flipped the bit to 0 */
1571                if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572                        atomic64_dec(&s->dirty_pages);
1573                r = get_pgste(kvm->mm, hva, &pgstev);
1574                if (r < 0)
1575                        pgstev = 0;
1576                /* save the value */
1577                res[i++] = (pgstev >> 24) & 0x43;
1578                /*
1579                 * if the next bit is too far away, stop.
1580                 * if we reached the previous "next", find the next one
1581                 */
1582                if (!peek) {
1583                        if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584                                break;
1585                        if (cur == next)
1586                                next = find_next_bit(s->pgste_bitmap,
1587                                                     s->bitmap_size, cur + 1);
1588                /* reached the end of the bitmap or of the buffer, stop */
1589                        if ((next >= s->bitmap_size) ||
1590                            (next >= args->start_gfn + bufsize))
1591                                break;
1592                }
1593                cur++;
1594        }
1595        srcu_read_unlock(&kvm->srcu, srcu_idx);
1596        up_read(&kvm->mm->mmap_sem);
1597        args->count = i;
1598        args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599
1600        rr = copy_to_user((void __user *)args->values, res, args->count);
1601        if (rr)
1602                r = -EFAULT;
1603
1604        vfree(res);
1605        return r;
1606}
1607
1608/*
1609 * This function sets the CMMA attributes for the given pages. If the input
1610 * buffer has zero length, no action is taken, otherwise the attributes are
1611 * set and the mm->context.use_cmma flag is set.
1612 */
1613static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614                                  const struct kvm_s390_cmma_log *args)
1615{
1616        unsigned long hva, mask, pgstev, i;
1617        uint8_t *bits;
1618        int srcu_idx, r = 0;
1619
1620        mask = args->mask;
1621
1622        if (!kvm->arch.use_cmma)
1623                return -ENXIO;
1624        /* invalid/unsupported flags */
1625        if (args->flags != 0)
1626                return -EINVAL;
1627        /* Enforce sane limit on memory allocation */
1628        if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629                return -EINVAL;
1630        /* Nothing to do */
1631        if (args->count == 0)
1632                return 0;
1633
1634        bits = vmalloc(sizeof(*bits) * args->count);
1635        if (!bits)
1636                return -ENOMEM;
1637
1638        r = copy_from_user(bits, (void __user *)args->values, args->count);
1639        if (r) {
1640                r = -EFAULT;
1641                goto out;
1642        }
1643
1644        down_read(&kvm->mm->mmap_sem);
1645        srcu_idx = srcu_read_lock(&kvm->srcu);
1646        for (i = 0; i < args->count; i++) {
1647                hva = gfn_to_hva(kvm, args->start_gfn + i);
1648                if (kvm_is_error_hva(hva)) {
1649                        r = -EFAULT;
1650                        break;
1651                }
1652
1653                pgstev = bits[i];
1654                pgstev = pgstev << 24;
1655                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656                set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657        }
1658        srcu_read_unlock(&kvm->srcu, srcu_idx);
1659        up_read(&kvm->mm->mmap_sem);
1660
1661        if (!kvm->mm->context.use_cmma) {
1662                down_write(&kvm->mm->mmap_sem);
1663                kvm->mm->context.use_cmma = 1;
1664                up_write(&kvm->mm->mmap_sem);
1665        }
1666out:
1667        vfree(bits);
1668        return r;
1669}
1670
1671long kvm_arch_vm_ioctl(struct file *filp,
1672                       unsigned int ioctl, unsigned long arg)
1673{
1674        struct kvm *kvm = filp->private_data;
1675        void __user *argp = (void __user *)arg;
1676        struct kvm_device_attr attr;
1677        int r;
1678
1679        switch (ioctl) {
1680        case KVM_S390_INTERRUPT: {
1681                struct kvm_s390_interrupt s390int;
1682
1683                r = -EFAULT;
1684                if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685                        break;
1686                r = kvm_s390_inject_vm(kvm, &s390int);
1687                break;
1688        }
1689        case KVM_ENABLE_CAP: {
1690                struct kvm_enable_cap cap;
1691                r = -EFAULT;
1692                if (copy_from_user(&cap, argp, sizeof(cap)))
1693                        break;
1694                r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695                break;
1696        }
1697        case KVM_CREATE_IRQCHIP: {
1698                struct kvm_irq_routing_entry routing;
1699
1700                r = -EINVAL;
1701                if (kvm->arch.use_irqchip) {
1702                        /* Set up dummy routing. */
1703                        memset(&routing, 0, sizeof(routing));
1704                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705                }
1706                break;
1707        }
1708        case KVM_SET_DEVICE_ATTR: {
1709                r = -EFAULT;
1710                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711                        break;
1712                r = kvm_s390_vm_set_attr(kvm, &attr);
1713                break;
1714        }
1715        case KVM_GET_DEVICE_ATTR: {
1716                r = -EFAULT;
1717                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718                        break;
1719                r = kvm_s390_vm_get_attr(kvm, &attr);
1720                break;
1721        }
1722        case KVM_HAS_DEVICE_ATTR: {
1723                r = -EFAULT;
1724                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725                        break;
1726                r = kvm_s390_vm_has_attr(kvm, &attr);
1727                break;
1728        }
1729        case KVM_S390_GET_SKEYS: {
1730                struct kvm_s390_skeys args;
1731
1732                r = -EFAULT;
1733                if (copy_from_user(&args, argp,
1734                                   sizeof(struct kvm_s390_skeys)))
1735                        break;
1736                r = kvm_s390_get_skeys(kvm, &args);
1737                break;
1738        }
1739        case KVM_S390_SET_SKEYS: {
1740                struct kvm_s390_skeys args;
1741
1742                r = -EFAULT;
1743                if (copy_from_user(&args, argp,
1744                                   sizeof(struct kvm_s390_skeys)))
1745                        break;
1746                r = kvm_s390_set_skeys(kvm, &args);
1747                break;
1748        }
1749        case KVM_S390_GET_CMMA_BITS: {
1750                struct kvm_s390_cmma_log args;
1751
1752                r = -EFAULT;
1753                if (copy_from_user(&args, argp, sizeof(args)))
1754                        break;
1755                r = kvm_s390_get_cmma_bits(kvm, &args);
1756                if (!r) {
1757                        r = copy_to_user(argp, &args, sizeof(args));
1758                        if (r)
1759                                r = -EFAULT;
1760                }
1761                break;
1762        }
1763        case KVM_S390_SET_CMMA_BITS: {
1764                struct kvm_s390_cmma_log args;
1765
1766                r = -EFAULT;
1767                if (copy_from_user(&args, argp, sizeof(args)))
1768                        break;
1769                r = kvm_s390_set_cmma_bits(kvm, &args);
1770                break;
1771        }
1772        default:
1773                r = -ENOTTY;
1774        }
1775
1776        return r;
1777}
1778
1779static int kvm_s390_query_ap_config(u8 *config)
1780{
1781        u32 fcn_code = 0x04000000UL;
1782        u32 cc = 0;
1783
1784        memset(config, 0, 128);
1785        asm volatile(
1786                "lgr 0,%1\n"
1787                "lgr 2,%2\n"
1788                ".long 0xb2af0000\n"            /* PQAP(QCI) */
1789                "0: ipm %0\n"
1790                "srl %0,28\n"
1791                "1:\n"
1792                EX_TABLE(0b, 1b)
1793                : "+r" (cc)
1794                : "r" (fcn_code), "r" (config)
1795                : "cc", "0", "2", "memory"
1796        );
1797
1798        return cc;
1799}
1800
1801static int kvm_s390_apxa_installed(void)
1802{
1803        u8 config[128];
1804        int cc;
1805
1806        if (test_facility(12)) {
1807                cc = kvm_s390_query_ap_config(config);
1808
1809                if (cc)
1810                        pr_err("PQAP(QCI) failed with cc=%d", cc);
1811                else
1812                        return config[0] & 0x40;
1813        }
1814
1815        return 0;
1816}
1817
1818static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819{
1820        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821
1822        if (kvm_s390_apxa_installed())
1823                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824        else
1825                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826}
1827
1828static u64 kvm_s390_get_initial_cpuid(void)
1829{
1830        struct cpuid cpuid;
1831
1832        get_cpu_id(&cpuid);
1833        cpuid.version = 0xff;
1834        return *((u64 *) &cpuid);
1835}
1836
1837static void kvm_s390_crypto_init(struct kvm *kvm)
1838{
1839        if (!test_kvm_facility(kvm, 76))
1840                return;
1841
1842        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843        kvm_s390_set_crycb_format(kvm);
1844
1845        /* Enable AES/DEA protected key functions by default */
1846        kvm->arch.crypto.aes_kw = 1;
1847        kvm->arch.crypto.dea_kw = 1;
1848        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852}
1853
1854static void sca_dispose(struct kvm *kvm)
1855{
1856        if (kvm->arch.use_esca)
1857                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858        else
1859                free_page((unsigned long)(kvm->arch.sca));
1860        kvm->arch.sca = NULL;
1861}
1862
1863int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864{
1865        gfp_t alloc_flags = GFP_KERNEL;
1866        int i, rc;
1867        char debug_name[16];
1868        static unsigned long sca_offset;
1869
1870        rc = -EINVAL;
1871#ifdef CONFIG_KVM_S390_UCONTROL
1872        if (type & ~KVM_VM_S390_UCONTROL)
1873                goto out_err;
1874        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875                goto out_err;
1876#else
1877        if (type)
1878                goto out_err;
1879#endif
1880
1881        rc = s390_enable_sie();
1882        if (rc)
1883                goto out_err;
1884
1885        rc = -ENOMEM;
1886
1887        ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1888
1889        kvm->arch.use_esca = 0; /* start with basic SCA */
1890        if (!sclp.has_64bscao)
1891                alloc_flags |= GFP_DMA;
1892        rwlock_init(&kvm->arch.sca_lock);
1893        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894        if (!kvm->arch.sca)
1895                goto out_err;
1896        spin_lock(&kvm_lock);
1897        sca_offset += 16;
1898        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899                sca_offset = 0;
1900        kvm->arch.sca = (struct bsca_block *)
1901                        ((char *) kvm->arch.sca + sca_offset);
1902        spin_unlock(&kvm_lock);
1903
1904        sprintf(debug_name, "kvm-%u", current->pid);
1905
1906        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907        if (!kvm->arch.dbf)
1908                goto out_err;
1909
1910        kvm->arch.sie_page2 =
1911             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912        if (!kvm->arch.sie_page2)
1913                goto out_err;
1914
1915        /* Populate the facility mask initially. */
1916        memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917               sizeof(S390_lowcore.stfle_fac_list));
1918        for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919                if (i < kvm_s390_fac_list_mask_size())
1920                        kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921                else
1922                        kvm->arch.model.fac_mask[i] = 0UL;
1923        }
1924
1925        /* Populate the facility list initially. */
1926        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927        memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928               S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930        /* we are always in czam mode - even on pre z14 machines */
1931        set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932        set_kvm_facility(kvm->arch.model.fac_list, 138);
1933        /* we emulate STHYI in kvm */
1934        set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935        set_kvm_facility(kvm->arch.model.fac_list, 74);
1936        if (MACHINE_HAS_TLB_GUEST) {
1937                set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938                set_kvm_facility(kvm->arch.model.fac_list, 147);
1939        }
1940
1941        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944        kvm_s390_crypto_init(kvm);
1945
1946        mutex_init(&kvm->arch.float_int.ais_lock);
1947        kvm->arch.float_int.simm = 0;
1948        kvm->arch.float_int.nimm = 0;
1949        spin_lock_init(&kvm->arch.float_int.lock);
1950        for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952        init_waitqueue_head(&kvm->arch.ipte_wq);
1953        mutex_init(&kvm->arch.ipte_mutex);
1954
1955        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956        VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958        if (type & KVM_VM_S390_UCONTROL) {
1959                kvm->arch.gmap = NULL;
1960                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961        } else {
1962                if (sclp.hamax == U64_MAX)
1963                        kvm->arch.mem_limit = TASK_SIZE_MAX;
1964                else
1965                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966                                                    sclp.hamax + 1);
1967                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968                if (!kvm->arch.gmap)
1969                        goto out_err;
1970                kvm->arch.gmap->private = kvm;
1971                kvm->arch.gmap->pfault_enabled = 0;
1972        }
1973
1974        kvm->arch.css_support = 0;
1975        kvm->arch.use_irqchip = 0;
1976        kvm->arch.epoch = 0;
1977
1978        spin_lock_init(&kvm->arch.start_stop_lock);
1979        kvm_s390_vsie_init(kvm);
1980        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982        return 0;
1983out_err:
1984        free_page((unsigned long)kvm->arch.sie_page2);
1985        debug_unregister(kvm->arch.dbf);
1986        sca_dispose(kvm);
1987        KVM_EVENT(3, "creation of vm failed: %d", rc);
1988        return rc;
1989}
1990
1991bool kvm_arch_has_vcpu_debugfs(void)
1992{
1993        return false;
1994}
1995
1996int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997{
1998        return 0;
1999}
2000
2001void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002{
2003        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005        kvm_s390_clear_local_irqs(vcpu);
2006        kvm_clear_async_pf_completion_queue(vcpu);
2007        if (!kvm_is_ucontrol(vcpu->kvm))
2008                sca_del_vcpu(vcpu);
2009
2010        if (kvm_is_ucontrol(vcpu->kvm))
2011                gmap_remove(vcpu->arch.gmap);
2012
2013        if (vcpu->kvm->arch.use_cmma)
2014                kvm_s390_vcpu_unsetup_cmma(vcpu);
2015        free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017        kvm_vcpu_uninit(vcpu);
2018        kmem_cache_free(kvm_vcpu_cache, vcpu);
2019}
2020
2021static void kvm_free_vcpus(struct kvm *kvm)
2022{
2023        unsigned int i;
2024        struct kvm_vcpu *vcpu;
2025
2026        kvm_for_each_vcpu(i, vcpu, kvm)
2027                kvm_arch_vcpu_destroy(vcpu);
2028
2029        mutex_lock(&kvm->lock);
2030        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031                kvm->vcpus[i] = NULL;
2032
2033        atomic_set(&kvm->online_vcpus, 0);
2034        mutex_unlock(&kvm->lock);
2035}
2036
2037void kvm_arch_destroy_vm(struct kvm *kvm)
2038{
2039        kvm_free_vcpus(kvm);
2040        sca_dispose(kvm);
2041        debug_unregister(kvm->arch.dbf);
2042        free_page((unsigned long)kvm->arch.sie_page2);
2043        if (!kvm_is_ucontrol(kvm))
2044                gmap_remove(kvm->arch.gmap);
2045        kvm_s390_destroy_adapters(kvm);
2046        kvm_s390_clear_float_irqs(kvm);
2047        kvm_s390_vsie_destroy(kvm);
2048        if (kvm->arch.migration_state) {
2049                vfree(kvm->arch.migration_state->pgste_bitmap);
2050                kfree(kvm->arch.migration_state);
2051        }
2052        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053}
2054
2055/* Section: vcpu related */
2056static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057{
2058        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059        if (!vcpu->arch.gmap)
2060                return -ENOMEM;
2061        vcpu->arch.gmap->private = vcpu->kvm;
2062
2063        return 0;
2064}
2065
2066static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067{
2068        if (!kvm_s390_use_sca_entries())
2069                return;
2070        read_lock(&vcpu->kvm->arch.sca_lock);
2071        if (vcpu->kvm->arch.use_esca) {
2072                struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075                sca->cpu[vcpu->vcpu_id].sda = 0;
2076        } else {
2077                struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080                sca->cpu[vcpu->vcpu_id].sda = 0;
2081        }
2082        read_unlock(&vcpu->kvm->arch.sca_lock);
2083}
2084
2085static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086{
2087        if (!kvm_s390_use_sca_entries()) {
2088                struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090                /* we still need the basic sca for the ipte control */
2091                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093        }
2094        read_lock(&vcpu->kvm->arch.sca_lock);
2095        if (vcpu->kvm->arch.use_esca) {
2096                struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103        } else {
2104                struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110        }
2111        read_unlock(&vcpu->kvm->arch.sca_lock);
2112}
2113
2114/* Basic SCA to Extended SCA data copy routines */
2115static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116{
2117        d->sda = s->sda;
2118        d->sigp_ctrl.c = s->sigp_ctrl.c;
2119        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120}
2121
2122static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123{
2124        int i;
2125
2126        d->ipte_control = s->ipte_control;
2127        d->mcn[0] = s->mcn;
2128        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130}
2131
2132static int sca_switch_to_extended(struct kvm *kvm)
2133{
2134        struct bsca_block *old_sca = kvm->arch.sca;
2135        struct esca_block *new_sca;
2136        struct kvm_vcpu *vcpu;
2137        unsigned int vcpu_idx;
2138        u32 scaol, scaoh;
2139
2140        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141        if (!new_sca)
2142                return -ENOMEM;
2143
2144        scaoh = (u32)((u64)(new_sca) >> 32);
2145        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147        kvm_s390_vcpu_block_all(kvm);
2148        write_lock(&kvm->arch.sca_lock);
2149
2150        sca_copy_b_to_e(new_sca, old_sca);
2151
2152        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153                vcpu->arch.sie_block->scaoh = scaoh;
2154                vcpu->arch.sie_block->scaol = scaol;
2155                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156        }
2157        kvm->arch.sca = new_sca;
2158        kvm->arch.use_esca = 1;
2159
2160        write_unlock(&kvm->arch.sca_lock);
2161        kvm_s390_vcpu_unblock_all(kvm);
2162
2163        free_page((unsigned long)old_sca);
2164
2165        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166                 old_sca, kvm->arch.sca);
2167        return 0;
2168}
2169
2170static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171{
2172        int rc;
2173
2174        if (!kvm_s390_use_sca_entries()) {
2175                if (id < KVM_MAX_VCPUS)
2176                        return true;
2177                return false;
2178        }
2179        if (id < KVM_S390_BSCA_CPU_SLOTS)
2180                return true;
2181        if (!sclp.has_esca || !sclp.has_64bscao)
2182                return false;
2183
2184        mutex_lock(&kvm->lock);
2185        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186        mutex_unlock(&kvm->lock);
2187
2188        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189}
2190
2191int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192{
2193        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194        kvm_clear_async_pf_completion_queue(vcpu);
2195        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196                                    KVM_SYNC_GPRS |
2197                                    KVM_SYNC_ACRS |
2198                                    KVM_SYNC_CRS |
2199                                    KVM_SYNC_ARCH0 |
2200                                    KVM_SYNC_PFAULT;
2201        kvm_s390_set_prefix(vcpu, 0);
2202        if (test_kvm_facility(vcpu->kvm, 64))
2203                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204        if (test_kvm_facility(vcpu->kvm, 133))
2205                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206        /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2208         */
2209        if (MACHINE_HAS_VX)
2210                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211        else
2212                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213
2214        if (kvm_is_ucontrol(vcpu->kvm))
2215                return __kvm_ucontrol_vcpu_init(vcpu);
2216
2217        return 0;
2218}
2219
2220/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222{
2223        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225        vcpu->arch.cputm_start = get_tod_clock_fast();
2226        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2227}
2228
2229/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231{
2232        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235        vcpu->arch.cputm_start = 0;
2236        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237}
2238
2239/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241{
2242        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243        vcpu->arch.cputm_enabled = true;
2244        __start_cpu_timer_accounting(vcpu);
2245}
2246
2247/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249{
2250        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251        __stop_cpu_timer_accounting(vcpu);
2252        vcpu->arch.cputm_enabled = false;
2253}
2254
2255static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256{
2257        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258        __enable_cpu_timer_accounting(vcpu);
2259        preempt_enable();
2260}
2261
2262static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263{
2264        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265        __disable_cpu_timer_accounting(vcpu);
2266        preempt_enable();
2267}
2268
2269/* set the cpu timer - may only be called from the VCPU thread itself */
2270void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271{
2272        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274        if (vcpu->arch.cputm_enabled)
2275                vcpu->arch.cputm_start = get_tod_clock_fast();
2276        vcpu->arch.sie_block->cputm = cputm;
2277        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2278        preempt_enable();
2279}
2280
2281/* update and get the cpu timer - can also be called from other VCPU threads */
2282__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2283{
2284        unsigned int seq;
2285        __u64 value;
2286
2287        if (unlikely(!vcpu->arch.cputm_enabled))
2288                return vcpu->arch.sie_block->cputm;
2289
2290        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291        do {
2292                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293                /*
2294                 * If the writer would ever execute a read in the critical
2295                 * section, e.g. in irq context, we have a deadlock.
2296                 */
2297                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298                value = vcpu->arch.sie_block->cputm;
2299                /* if cputm_start is 0, accounting is being started/stopped */
2300                if (likely(vcpu->arch.cputm_start))
2301                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2303        preempt_enable();
2304        return value;
2305}
2306
2307void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2308{
2309
2310        gmap_enable(vcpu->arch.enabled_gmap);
2311        atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313                __start_cpu_timer_accounting(vcpu);
2314        vcpu->cpu = cpu;
2315}
2316
2317void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2318{
2319        vcpu->cpu = -1;
2320        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321                __stop_cpu_timer_accounting(vcpu);
2322        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323        vcpu->arch.enabled_gmap = gmap_get_enabled();
2324        gmap_disable(vcpu->arch.enabled_gmap);
2325
2326}
2327
2328static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329{
2330        /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331        vcpu->arch.sie_block->gpsw.mask = 0UL;
2332        vcpu->arch.sie_block->gpsw.addr = 0UL;
2333        kvm_s390_set_prefix(vcpu, 0);
2334        kvm_s390_set_cpu_timer(vcpu, 0);
2335        vcpu->arch.sie_block->ckc       = 0UL;
2336        vcpu->arch.sie_block->todpr     = 0;
2337        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2339        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340        /* make sure the new fpc will be lazily loaded */
2341        save_fpu_regs();
2342        current->thread.fpu.fpc = 0;
2343        vcpu->arch.sie_block->gbea = 1;
2344        vcpu->arch.sie_block->pp = 0;
2345        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346        kvm_clear_async_pf_completion_queue(vcpu);
2347        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348                kvm_s390_vcpu_stop(vcpu);
2349        kvm_s390_clear_local_irqs(vcpu);
2350}
2351
2352void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353{
2354        mutex_lock(&vcpu->kvm->lock);
2355        preempt_disable();
2356        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357        preempt_enable();
2358        mutex_unlock(&vcpu->kvm->lock);
2359        if (!kvm_is_ucontrol(vcpu->kvm)) {
2360                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2361                sca_add_vcpu(vcpu);
2362        }
2363        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365        /* make vcpu_load load the right gmap on the first trigger */
2366        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2367}
2368
2369static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370{
2371        if (!test_kvm_facility(vcpu->kvm, 76))
2372                return;
2373
2374        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375
2376        if (vcpu->kvm->arch.crypto.aes_kw)
2377                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378        if (vcpu->kvm->arch.crypto.dea_kw)
2379                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380
2381        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2382}
2383
2384void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385{
2386        free_page(vcpu->arch.sie_block->cbrlo);
2387        vcpu->arch.sie_block->cbrlo = 0;
2388}
2389
2390int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391{
2392        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393        if (!vcpu->arch.sie_block->cbrlo)
2394                return -ENOMEM;
2395
2396        vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2397        return 0;
2398}
2399
2400static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401{
2402        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403
2404        vcpu->arch.sie_block->ibc = model->ibc;
2405        if (test_kvm_facility(vcpu->kvm, 7))
2406                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2407}
2408
2409int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2410{
2411        int rc = 0;
2412
2413        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2414                                                    CPUSTAT_SM |
2415                                                    CPUSTAT_STOPPED);
2416
2417        if (test_kvm_facility(vcpu->kvm, 78))
2418                atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419        else if (test_kvm_facility(vcpu->kvm, 8))
2420                atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421
2422        kvm_s390_vcpu_setup_model(vcpu);
2423
2424        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425        if (MACHINE_HAS_ESOP)
2426                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427        if (test_kvm_facility(vcpu->kvm, 9))
2428                vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429        if (test_kvm_facility(vcpu->kvm, 73))
2430                vcpu->arch.sie_block->ecb |= ECB_TE;
2431
2432        if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434        if (test_kvm_facility(vcpu->kvm, 130))
2435                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437        if (sclp.has_cei)
2438                vcpu->arch.sie_block->eca |= ECA_CEI;
2439        if (sclp.has_ib)
2440                vcpu->arch.sie_block->eca |= ECA_IB;
2441        if (sclp.has_siif)
2442                vcpu->arch.sie_block->eca |= ECA_SII;
2443        if (sclp.has_sigpif)
2444                vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445        if (test_kvm_facility(vcpu->kvm, 129)) {
2446                vcpu->arch.sie_block->eca |= ECA_VX;
2447                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448        }
2449        if (test_kvm_facility(vcpu->kvm, 139))
2450                vcpu->arch.sie_block->ecd |= ECD_MEF;
2451
2452        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453                                        | SDNXC;
2454        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2455
2456        if (sclp.has_kss)
2457                atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458        else
2459                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460
2461        if (vcpu->kvm->arch.use_cmma) {
2462                rc = kvm_s390_vcpu_setup_cmma(vcpu);
2463                if (rc)
2464                        return rc;
2465        }
2466        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468
2469        kvm_s390_vcpu_crypto_setup(vcpu);
2470
2471        return rc;
2472}
2473
2474struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2475                                      unsigned int id)
2476{
2477        struct kvm_vcpu *vcpu;
2478        struct sie_page *sie_page;
2479        int rc = -EINVAL;
2480
2481        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2482                goto out;
2483
2484        rc = -ENOMEM;
2485
2486        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2487        if (!vcpu)
2488                goto out;
2489
2490        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2492        if (!sie_page)
2493                goto out_free_cpu;
2494
2495        vcpu->arch.sie_block = &sie_page->sie_block;
2496        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497
2498        /* the real guest size will always be smaller than msl */
2499        vcpu->arch.sie_block->mso = 0;
2500        vcpu->arch.sie_block->msl = sclp.hamax;
2501
2502        vcpu->arch.sie_block->icpua = id;
2503        spin_lock_init(&vcpu->arch.local_int.lock);
2504        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505        vcpu->arch.local_int.wq = &vcpu->wq;
2506        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507        seqcount_init(&vcpu->arch.cputm_seqcount);
2508
2509        rc = kvm_vcpu_init(vcpu, kvm, id);
2510        if (rc)
2511                goto out_free_sie_block;
2512        VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513                 vcpu->arch.sie_block);
2514        trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2515
2516        return vcpu;
2517out_free_sie_block:
2518        free_page((unsigned long)(vcpu->arch.sie_block));
2519out_free_cpu:
2520        kmem_cache_free(kvm_vcpu_cache, vcpu);
2521out:
2522        return ERR_PTR(rc);
2523}
2524
2525int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526{
2527        return kvm_s390_vcpu_has_irq(vcpu, 0);
2528}
2529
2530bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2531{
2532        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2533}
2534
2535void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2536{
2537        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2538        exit_sie(vcpu);
2539}
2540
2541void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2542{
2543        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2544}
2545
2546static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2547{
2548        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2549        exit_sie(vcpu);
2550}
2551
2552static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2553{
2554        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2555}
2556
2557/*
2558 * Kick a guest cpu out of SIE and wait until SIE is not running.
2559 * If the CPU is not running (e.g. waiting as idle) the function will
2560 * return immediately. */
2561void exit_sie(struct kvm_vcpu *vcpu)
2562{
2563        atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2564        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2565                cpu_relax();
2566}
2567
2568/* Kick a guest cpu out of SIE to process a request synchronously */
2569void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2570{
2571        kvm_make_request(req, vcpu);
2572        kvm_s390_vcpu_request(vcpu);
2573}
2574
2575static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2576                              unsigned long end)
2577{
2578        struct kvm *kvm = gmap->private;
2579        struct kvm_vcpu *vcpu;
2580        unsigned long prefix;
2581        int i;
2582
2583        if (gmap_is_shadow(gmap))
2584                return;
2585        if (start >= 1UL << 31)
2586                /* We are only interested in prefix pages */
2587                return;
2588        kvm_for_each_vcpu(i, vcpu, kvm) {
2589                /* match against both prefix pages */
2590                prefix = kvm_s390_get_prefix(vcpu);
2591                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2592                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2593                                   start, end);
2594                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2595                }
2596        }
2597}
2598
2599int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2600{
2601        /* kvm common code refers to this, but never calls it */
2602        BUG();
2603        return 0;
2604}
2605
2606static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2607                                           struct kvm_one_reg *reg)
2608{
2609        int r = -EINVAL;
2610
2611        switch (reg->id) {
2612        case KVM_REG_S390_TODPR:
2613                r = put_user(vcpu->arch.sie_block->todpr,
2614                             (u32 __user *)reg->addr);
2615                break;
2616        case KVM_REG_S390_EPOCHDIFF:
2617                r = put_user(vcpu->arch.sie_block->epoch,
2618                             (u64 __user *)reg->addr);
2619                break;
2620        case KVM_REG_S390_CPU_TIMER:
2621                r = put_user(kvm_s390_get_cpu_timer(vcpu),
2622                             (u64 __user *)reg->addr);
2623                break;
2624        case KVM_REG_S390_CLOCK_COMP:
2625                r = put_user(vcpu->arch.sie_block->ckc,
2626                             (u64 __user *)reg->addr);
2627                break;
2628        case KVM_REG_S390_PFTOKEN:
2629                r = put_user(vcpu->arch.pfault_token,
2630                             (u64 __user *)reg->addr);
2631                break;
2632        case KVM_REG_S390_PFCOMPARE:
2633                r = put_user(vcpu->arch.pfault_compare,
2634                             (u64 __user *)reg->addr);
2635                break;
2636        case KVM_REG_S390_PFSELECT:
2637                r = put_user(vcpu->arch.pfault_select,
2638                             (u64 __user *)reg->addr);
2639                break;
2640        case KVM_REG_S390_PP:
2641                r = put_user(vcpu->arch.sie_block->pp,
2642                             (u64 __user *)reg->addr);
2643                break;
2644        case KVM_REG_S390_GBEA:
2645                r = put_user(vcpu->arch.sie_block->gbea,
2646                             (u64 __user *)reg->addr);
2647                break;
2648        default:
2649                break;
2650        }
2651
2652        return r;
2653}
2654
2655static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2656                                           struct kvm_one_reg *reg)
2657{
2658        int r = -EINVAL;
2659        __u64 val;
2660
2661        switch (reg->id) {
2662        case KVM_REG_S390_TODPR:
2663                r = get_user(vcpu->arch.sie_block->todpr,
2664                             (u32 __user *)reg->addr);
2665                break;
2666        case KVM_REG_S390_EPOCHDIFF:
2667                r = get_user(vcpu->arch.sie_block->epoch,
2668                             (u64 __user *)reg->addr);
2669                break;
2670        case KVM_REG_S390_CPU_TIMER:
2671                r = get_user(val, (u64 __user *)reg->addr);
2672                if (!r)
2673                        kvm_s390_set_cpu_timer(vcpu, val);
2674                break;
2675        case KVM_REG_S390_CLOCK_COMP:
2676                r = get_user(vcpu->arch.sie_block->ckc,
2677                             (u64 __user *)reg->addr);
2678                break;
2679        case KVM_REG_S390_PFTOKEN:
2680                r = get_user(vcpu->arch.pfault_token,
2681                             (u64 __user *)reg->addr);
2682                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2683                        kvm_clear_async_pf_completion_queue(vcpu);
2684                break;
2685        case KVM_REG_S390_PFCOMPARE:
2686                r = get_user(vcpu->arch.pfault_compare,
2687                             (u64 __user *)reg->addr);
2688                break;
2689        case KVM_REG_S390_PFSELECT:
2690                r = get_user(vcpu->arch.pfault_select,
2691                             (u64 __user *)reg->addr);
2692                break;
2693        case KVM_REG_S390_PP:
2694                r = get_user(vcpu->arch.sie_block->pp,
2695                             (u64 __user *)reg->addr);
2696                break;
2697        case KVM_REG_S390_GBEA:
2698                r = get_user(vcpu->arch.sie_block->gbea,
2699                             (u64 __user *)reg->addr);
2700                break;
2701        default:
2702                break;
2703        }
2704
2705        return r;
2706}
2707
2708static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2709{
2710        kvm_s390_vcpu_initial_reset(vcpu);
2711        return 0;
2712}
2713
2714int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2715{
2716        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2717        return 0;
2718}
2719
2720int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2721{
2722        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2723        return 0;
2724}
2725
2726int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2727                                  struct kvm_sregs *sregs)
2728{
2729        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2730        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2731        return 0;
2732}
2733
2734int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2735                                  struct kvm_sregs *sregs)
2736{
2737        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2738        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2739        return 0;
2740}
2741
2742int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743{
2744        if (test_fp_ctl(fpu->fpc))
2745                return -EINVAL;
2746        vcpu->run->s.regs.fpc = fpu->fpc;
2747        if (MACHINE_HAS_VX)
2748                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2749                                 (freg_t *) fpu->fprs);
2750        else
2751                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2752        return 0;
2753}
2754
2755int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2756{
2757        /* make sure we have the latest values */
2758        save_fpu_regs();
2759        if (MACHINE_HAS_VX)
2760                convert_vx_to_fp((freg_t *) fpu->fprs,
2761                                 (__vector128 *) vcpu->run->s.regs.vrs);
2762        else
2763                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2764        fpu->fpc = vcpu->run->s.regs.fpc;
2765        return 0;
2766}
2767
2768static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2769{
2770        int rc = 0;
2771
2772        if (!is_vcpu_stopped(vcpu))
2773                rc = -EBUSY;
2774        else {
2775                vcpu->run->psw_mask = psw.mask;
2776                vcpu->run->psw_addr = psw.addr;
2777        }
2778        return rc;
2779}
2780
2781int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2782                                  struct kvm_translation *tr)
2783{
2784        return -EINVAL; /* not implemented yet */
2785}
2786
2787#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2788                              KVM_GUESTDBG_USE_HW_BP | \
2789                              KVM_GUESTDBG_ENABLE)
2790
2791int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2792                                        struct kvm_guest_debug *dbg)
2793{
2794        int rc = 0;
2795
2796        vcpu->guest_debug = 0;
2797        kvm_s390_clear_bp_data(vcpu);
2798
2799        if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2800                return -EINVAL;
2801        if (!sclp.has_gpere)
2802                return -EINVAL;
2803
2804        if (dbg->control & KVM_GUESTDBG_ENABLE) {
2805                vcpu->guest_debug = dbg->control;
2806                /* enforce guest PER */
2807                atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808
2809                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2810                        rc = kvm_s390_import_bp_data(vcpu, dbg);
2811        } else {
2812                atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2813                vcpu->arch.guestdbg.last_bp = 0;
2814        }
2815
2816        if (rc) {
2817                vcpu->guest_debug = 0;
2818                kvm_s390_clear_bp_data(vcpu);
2819                atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820        }
2821
2822        return rc;
2823}
2824
2825int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2826                                    struct kvm_mp_state *mp_state)
2827{
2828        /* CHECK_STOP and LOAD are not supported yet */
2829        return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2830                                       KVM_MP_STATE_OPERATING;
2831}
2832
2833int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2834                                    struct kvm_mp_state *mp_state)
2835{
2836        int rc = 0;
2837
2838        /* user space knows about this interface - let it control the state */
2839        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2840
2841        switch (mp_state->mp_state) {
2842        case KVM_MP_STATE_STOPPED:
2843                kvm_s390_vcpu_stop(vcpu);
2844                break;
2845        case KVM_MP_STATE_OPERATING:
2846                kvm_s390_vcpu_start(vcpu);
2847                break;
2848        case KVM_MP_STATE_LOAD:
2849        case KVM_MP_STATE_CHECK_STOP:
2850                /* fall through - CHECK_STOP and LOAD are not supported yet */
2851        default:
2852                rc = -ENXIO;
2853        }
2854
2855        return rc;
2856}
2857
2858static bool ibs_enabled(struct kvm_vcpu *vcpu)
2859{
2860        return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2861}
2862
2863static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2864{
2865retry:
2866        kvm_s390_vcpu_request_handled(vcpu);
2867        if (!kvm_request_pending(vcpu))
2868                return 0;
2869        /*
2870         * We use MMU_RELOAD just to re-arm the ipte notifier for the
2871         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2872         * This ensures that the ipte instruction for this request has
2873         * already finished. We might race against a second unmapper that
2874         * wants to set the blocking bit. Lets just retry the request loop.
2875         */
2876        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2877                int rc;
2878                rc = gmap_mprotect_notify(vcpu->arch.gmap,
2879                                          kvm_s390_get_prefix(vcpu),
2880                                          PAGE_SIZE * 2, PROT_WRITE);
2881                if (rc) {
2882                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2883                        return rc;
2884                }
2885                goto retry;
2886        }
2887
2888        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2889                vcpu->arch.sie_block->ihcpu = 0xffff;
2890                goto retry;
2891        }
2892
2893        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2894                if (!ibs_enabled(vcpu)) {
2895                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2896                        atomic_or(CPUSTAT_IBS,
2897                                        &vcpu->arch.sie_block->cpuflags);
2898                }
2899                goto retry;
2900        }
2901
2902        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2903                if (ibs_enabled(vcpu)) {
2904                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2905                        atomic_andnot(CPUSTAT_IBS,
2906                                          &vcpu->arch.sie_block->cpuflags);
2907                }
2908                goto retry;
2909        }
2910
2911        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2912                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2913                goto retry;
2914        }
2915
2916        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2917                /*
2918                 * Disable CMMA virtualization; we will emulate the ESSA
2919                 * instruction manually, in order to provide additional
2920                 * functionalities needed for live migration.
2921                 */
2922                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2923                goto retry;
2924        }
2925
2926        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2927                /*
2928                 * Re-enable CMMA virtualization if CMMA is available and
2929                 * was used.
2930                 */
2931                if ((vcpu->kvm->arch.use_cmma) &&
2932                    (vcpu->kvm->mm->context.use_cmma))
2933                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2934                goto retry;
2935        }
2936
2937        /* nothing to do, just clear the request */
2938        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2939
2940        return 0;
2941}
2942
2943void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2944                                 const struct kvm_s390_vm_tod_clock *gtod)
2945{
2946        struct kvm_vcpu *vcpu;
2947        struct kvm_s390_tod_clock_ext htod;
2948        int i;
2949
2950        mutex_lock(&kvm->lock);
2951        preempt_disable();
2952
2953        get_tod_clock_ext((char *)&htod);
2954
2955        kvm->arch.epoch = gtod->tod - htod.tod;
2956        kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2957
2958        if (kvm->arch.epoch > gtod->tod)
2959                kvm->arch.epdx -= 1;
2960
2961        kvm_s390_vcpu_block_all(kvm);
2962        kvm_for_each_vcpu(i, vcpu, kvm) {
2963                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2964                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2965        }
2966
2967        kvm_s390_vcpu_unblock_all(kvm);
2968        preempt_enable();
2969        mutex_unlock(&kvm->lock);
2970}
2971
2972void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2973{
2974        struct kvm_vcpu *vcpu;
2975        int i;
2976
2977        mutex_lock(&kvm->lock);
2978        preempt_disable();
2979        kvm->arch.epoch = tod - get_tod_clock();
2980        kvm_s390_vcpu_block_all(kvm);
2981        kvm_for_each_vcpu(i, vcpu, kvm)
2982                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2983        kvm_s390_vcpu_unblock_all(kvm);
2984        preempt_enable();
2985        mutex_unlock(&kvm->lock);
2986}
2987
2988/**
2989 * kvm_arch_fault_in_page - fault-in guest page if necessary
2990 * @vcpu: The corresponding virtual cpu
2991 * @gpa: Guest physical address
2992 * @writable: Whether the page should be writable or not
2993 *
2994 * Make sure that a guest page has been faulted-in on the host.
2995 *
2996 * Return: Zero on success, negative error code otherwise.
2997 */
2998long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2999{
3000        return gmap_fault(vcpu->arch.gmap, gpa,
3001                          writable ? FAULT_FLAG_WRITE : 0);
3002}
3003
3004static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3005                                      unsigned long token)
3006{
3007        struct kvm_s390_interrupt inti;
3008        struct kvm_s390_irq irq;
3009
3010        if (start_token) {
3011                irq.u.ext.ext_params2 = token;
3012                irq.type = KVM_S390_INT_PFAULT_INIT;
3013                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3014        } else {
3015                inti.type = KVM_S390_INT_PFAULT_DONE;
3016                inti.parm64 = token;
3017                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3018        }
3019}
3020
3021void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3022                                     struct kvm_async_pf *work)
3023{
3024        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3025        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3026}
3027
3028void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3029                                 struct kvm_async_pf *work)
3030{
3031        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3032        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3033}
3034
3035void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3036                               struct kvm_async_pf *work)
3037{
3038        /* s390 will always inject the page directly */
3039}
3040
3041bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3042{
3043        /*
3044         * s390 will always inject the page directly,
3045         * but we still want check_async_completion to cleanup
3046         */
3047        return true;
3048}
3049
3050static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3051{
3052        hva_t hva;
3053        struct kvm_arch_async_pf arch;
3054        int rc;
3055
3056        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3057                return 0;
3058        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3059            vcpu->arch.pfault_compare)
3060                return 0;
3061        if (psw_extint_disabled(vcpu))
3062                return 0;
3063        if (kvm_s390_vcpu_has_irq(vcpu, 0))
3064                return 0;
3065        if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3066                return 0;
3067        if (!vcpu->arch.gmap->pfault_enabled)
3068                return 0;
3069
3070        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3071        hva += current->thread.gmap_addr & ~PAGE_MASK;
3072        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3073                return 0;
3074
3075        rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3076        return rc;
3077}
3078
3079static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3080{
3081        int rc, cpuflags;
3082
3083        /*
3084         * On s390 notifications for arriving pages will be delivered directly
3085         * to the guest but the house keeping for completed pfaults is
3086         * handled outside the worker.
3087         */
3088        kvm_check_async_pf_completion(vcpu);
3089
3090        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3091        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3092
3093        if (need_resched())
3094                schedule();
3095
3096        if (test_cpu_flag(CIF_MCCK_PENDING))
3097                s390_handle_mcck();
3098
3099        if (!kvm_is_ucontrol(vcpu->kvm)) {
3100                rc = kvm_s390_deliver_pending_interrupts(vcpu);
3101                if (rc)
3102                        return rc;
3103        }
3104
3105        rc = kvm_s390_handle_requests(vcpu);
3106        if (rc)
3107                return rc;
3108
3109        if (guestdbg_enabled(vcpu)) {
3110                kvm_s390_backup_guest_per_regs(vcpu);
3111                kvm_s390_patch_guest_per_regs(vcpu);
3112        }
3113
3114        vcpu->arch.sie_block->icptcode = 0;
3115        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3116        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3117        trace_kvm_s390_sie_enter(vcpu, cpuflags);
3118
3119        return 0;
3120}
3121
3122static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3123{
3124        struct kvm_s390_pgm_info pgm_info = {
3125                .code = PGM_ADDRESSING,
3126        };
3127        u8 opcode, ilen;
3128        int rc;
3129
3130        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3131        trace_kvm_s390_sie_fault(vcpu);
3132
3133        /*
3134         * We want to inject an addressing exception, which is defined as a
3135         * suppressing or terminating exception. However, since we came here
3136         * by a DAT access exception, the PSW still points to the faulting
3137         * instruction since DAT exceptions are nullifying. So we've got
3138         * to look up the current opcode to get the length of the instruction
3139         * to be able to forward the PSW.
3140         */
3141        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3142        ilen = insn_length(opcode);
3143        if (rc < 0) {
3144                return rc;
3145        } else if (rc) {
3146                /* Instruction-Fetching Exceptions - we can't detect the ilen.
3147                 * Forward by arbitrary ilc, injection will take care of
3148                 * nullification if necessary.
3149                 */
3150                pgm_info = vcpu->arch.pgm;
3151                ilen = 4;
3152        }
3153        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3154        kvm_s390_forward_psw(vcpu, ilen);
3155        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3156}
3157
3158static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3159{
3160        struct mcck_volatile_info *mcck_info;
3161        struct sie_page *sie_page;
3162
3163        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3164                   vcpu->arch.sie_block->icptcode);
3165        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3166
3167        if (guestdbg_enabled(vcpu))
3168                kvm_s390_restore_guest_per_regs(vcpu);
3169
3170        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3171        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3172
3173        if (exit_reason == -EINTR) {
3174                VCPU_EVENT(vcpu, 3, "%s", "machine check");
3175                sie_page = container_of(vcpu->arch.sie_block,
3176                                        struct sie_page, sie_block);
3177                mcck_info = &sie_page->mcck_info;
3178                kvm_s390_reinject_machine_check(vcpu, mcck_info);
3179                return 0;
3180        }
3181
3182        if (vcpu->arch.sie_block->icptcode > 0) {
3183                int rc = kvm_handle_sie_intercept(vcpu);
3184
3185                if (rc != -EOPNOTSUPP)
3186                        return rc;
3187                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3188                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3189                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3190                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3191                return -EREMOTE;
3192        } else if (exit_reason != -EFAULT) {
3193                vcpu->stat.exit_null++;
3194                return 0;
3195        } else if (kvm_is_ucontrol(vcpu->kvm)) {
3196                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3197                vcpu->run->s390_ucontrol.trans_exc_code =
3198                                                current->thread.gmap_addr;
3199                vcpu->run->s390_ucontrol.pgm_code = 0x10;
3200                return -EREMOTE;
3201        } else if (current->thread.gmap_pfault) {
3202                trace_kvm_s390_major_guest_pfault(vcpu);
3203                current->thread.gmap_pfault = 0;
3204                if (kvm_arch_setup_async_pf(vcpu))
3205                        return 0;
3206                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3207        }
3208        return vcpu_post_run_fault_in_sie(vcpu);
3209}
3210
3211static int __vcpu_run(struct kvm_vcpu *vcpu)
3212{
3213        int rc, exit_reason;
3214
3215        /*
3216         * We try to hold kvm->srcu during most of vcpu_run (except when run-
3217         * ning the guest), so that memslots (and other stuff) are protected
3218         */
3219        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3220
3221        do {
3222                rc = vcpu_pre_run(vcpu);
3223                if (rc)
3224                        break;
3225
3226                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3227                /*
3228                 * As PF_VCPU will be used in fault handler, between
3229                 * guest_enter and guest_exit should be no uaccess.
3230                 */
3231                local_irq_disable();
3232                guest_enter_irqoff();
3233                __disable_cpu_timer_accounting(vcpu);
3234                local_irq_enable();
3235                exit_reason = sie64a(vcpu->arch.sie_block,
3236                                     vcpu->run->s.regs.gprs);
3237                local_irq_disable();
3238                __enable_cpu_timer_accounting(vcpu);
3239                guest_exit_irqoff();
3240                local_irq_enable();
3241                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3242
3243                rc = vcpu_post_run(vcpu, exit_reason);
3244        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3245
3246        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3247        return rc;
3248}
3249
3250static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3251{
3252        struct runtime_instr_cb *riccb;
3253        struct gs_cb *gscb;
3254
3255        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3256        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3257        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3258        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3259        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3260                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3261        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3262                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3263                /* some control register changes require a tlb flush */
3264                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3265        }
3266        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3267                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3268                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3269                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3270                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3271                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3272        }
3273        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3274                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3275                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3276                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3277                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3278                        kvm_clear_async_pf_completion_queue(vcpu);
3279        }
3280        /*
3281         * If userspace sets the riccb (e.g. after migration) to a valid state,
3282         * we should enable RI here instead of doing the lazy enablement.
3283         */
3284        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3285            test_kvm_facility(vcpu->kvm, 64) &&
3286            riccb->valid &&
3287            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3288                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3289                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3290        }
3291        /*
3292         * If userspace sets the gscb (e.g. after migration) to non-zero,
3293         * we should enable GS here instead of doing the lazy enablement.
3294         */
3295        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3296            test_kvm_facility(vcpu->kvm, 133) &&
3297            gscb->gssm &&
3298            !vcpu->arch.gs_enabled) {
3299                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3300                vcpu->arch.sie_block->ecb |= ECB_GS;
3301                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3302                vcpu->arch.gs_enabled = 1;
3303        }
3304        save_access_regs(vcpu->arch.host_acrs);
3305        restore_access_regs(vcpu->run->s.regs.acrs);
3306        /* save host (userspace) fprs/vrs */
3307        save_fpu_regs();
3308        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3309        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3310        if (MACHINE_HAS_VX)
3311                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3312        else
3313                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3314        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3315        if (test_fp_ctl(current->thread.fpu.fpc))
3316                /* User space provided an invalid FPC, let's clear it */
3317                current->thread.fpu.fpc = 0;
3318        if (MACHINE_HAS_GS) {
3319                preempt_disable();
3320                __ctl_set_bit(2, 4);
3321                if (current->thread.gs_cb) {
3322                        vcpu->arch.host_gscb = current->thread.gs_cb;
3323                        save_gs_cb(vcpu->arch.host_gscb);
3324                }
3325                if (vcpu->arch.gs_enabled) {
3326                        current->thread.gs_cb = (struct gs_cb *)
3327                                                &vcpu->run->s.regs.gscb;
3328                        restore_gs_cb(current->thread.gs_cb);
3329                }
3330                preempt_enable();
3331        }
3332
3333        kvm_run->kvm_dirty_regs = 0;
3334}
3335
3336static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337{
3338        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3339        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3340        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3341        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3342        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3343        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3344        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3345        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3346        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3347        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3348        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3349        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3350        save_access_regs(vcpu->run->s.regs.acrs);
3351        restore_access_regs(vcpu->arch.host_acrs);
3352        /* Save guest register state */
3353        save_fpu_regs();
3354        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3355        /* Restore will be done lazily at return */
3356        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3357        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3358        if (MACHINE_HAS_GS) {
3359                __ctl_set_bit(2, 4);
3360                if (vcpu->arch.gs_enabled)
3361                        save_gs_cb(current->thread.gs_cb);
3362                preempt_disable();
3363                current->thread.gs_cb = vcpu->arch.host_gscb;
3364                restore_gs_cb(vcpu->arch.host_gscb);
3365                preempt_enable();
3366                if (!vcpu->arch.host_gscb)
3367                        __ctl_clear_bit(2, 4);
3368                vcpu->arch.host_gscb = NULL;
3369        }
3370
3371}
3372
3373int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3374{
3375        int rc;
3376        sigset_t sigsaved;
3377
3378        if (kvm_run->immediate_exit)
3379                return -EINTR;
3380
3381        if (guestdbg_exit_pending(vcpu)) {
3382                kvm_s390_prepare_debug_exit(vcpu);
3383                return 0;
3384        }
3385
3386        if (vcpu->sigset_active)
3387                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3388
3389        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3390                kvm_s390_vcpu_start(vcpu);
3391        } else if (is_vcpu_stopped(vcpu)) {
3392                pr_err_ratelimited("can't run stopped vcpu %d\n",
3393                                   vcpu->vcpu_id);
3394                return -EINVAL;
3395        }
3396
3397        sync_regs(vcpu, kvm_run);
3398        enable_cpu_timer_accounting(vcpu);
3399
3400        might_fault();
3401        rc = __vcpu_run(vcpu);
3402
3403        if (signal_pending(current) && !rc) {
3404                kvm_run->exit_reason = KVM_EXIT_INTR;
3405                rc = -EINTR;
3406        }
3407
3408        if (guestdbg_exit_pending(vcpu) && !rc)  {
3409                kvm_s390_prepare_debug_exit(vcpu);
3410                rc = 0;
3411        }
3412
3413        if (rc == -EREMOTE) {
3414                /* userspace support is needed, kvm_run has been prepared */
3415                rc = 0;
3416        }
3417
3418        disable_cpu_timer_accounting(vcpu);
3419        store_regs(vcpu, kvm_run);
3420
3421        if (vcpu->sigset_active)
3422                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3423
3424        vcpu->stat.exit_userspace++;
3425        return rc;
3426}
3427
3428/*
3429 * store status at address
3430 * we use have two special cases:
3431 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3432 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3433 */
3434int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3435{
3436        unsigned char archmode = 1;
3437        freg_t fprs[NUM_FPRS];
3438        unsigned int px;
3439        u64 clkcomp, cputm;
3440        int rc;
3441
3442        px = kvm_s390_get_prefix(vcpu);
3443        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3444                if (write_guest_abs(vcpu, 163, &archmode, 1))
3445                        return -EFAULT;
3446                gpa = 0;
3447        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3448                if (write_guest_real(vcpu, 163, &archmode, 1))
3449                        return -EFAULT;
3450                gpa = px;
3451        } else
3452                gpa -= __LC_FPREGS_SAVE_AREA;
3453
3454        /* manually convert vector registers if necessary */
3455        if (MACHINE_HAS_VX) {
3456                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3457                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3458                                     fprs, 128);
3459        } else {
3460                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3461                                     vcpu->run->s.regs.fprs, 128);
3462        }
3463        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3464                              vcpu->run->s.regs.gprs, 128);
3465        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3466                              &vcpu->arch.sie_block->gpsw, 16);
3467        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3468                              &px, 4);
3469        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3470                              &vcpu->run->s.regs.fpc, 4);
3471        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3472                              &vcpu->arch.sie_block->todpr, 4);
3473        cputm = kvm_s390_get_cpu_timer(vcpu);
3474        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3475                              &cputm, 8);
3476        clkcomp = vcpu->arch.sie_block->ckc >> 8;
3477        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3478                              &clkcomp, 8);
3479        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3480                              &vcpu->run->s.regs.acrs, 64);
3481        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3482                              &vcpu->arch.sie_block->gcr, 128);
3483        return rc ? -EFAULT : 0;
3484}
3485
3486int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3487{
3488        /*
3489         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3490         * switch in the run ioctl. Let's update our copies before we save
3491         * it into the save area
3492         */
3493        save_fpu_regs();
3494        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3495        save_access_regs(vcpu->run->s.regs.acrs);
3496
3497        return kvm_s390_store_status_unloaded(vcpu, addr);
3498}
3499
3500static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3501{
3502        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3503        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3504}
3505
3506static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3507{
3508        unsigned int i;
3509        struct kvm_vcpu *vcpu;
3510
3511        kvm_for_each_vcpu(i, vcpu, kvm) {
3512                __disable_ibs_on_vcpu(vcpu);
3513        }
3514}
3515
3516static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3517{
3518        if (!sclp.has_ibs)
3519                return;
3520        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3521        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3522}
3523
3524void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3525{
3526        int i, online_vcpus, started_vcpus = 0;
3527
3528        if (!is_vcpu_stopped(vcpu))
3529                return;
3530
3531        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3532        /* Only one cpu at a time may enter/leave the STOPPED state. */
3533        spin_lock(&vcpu->kvm->arch.start_stop_lock);
3534        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3535
3536        for (i = 0; i < online_vcpus; i++) {
3537                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3538                        started_vcpus++;
3539        }
3540
3541        if (started_vcpus == 0) {
3542                /* we're the only active VCPU -> speed it up */
3543                __enable_ibs_on_vcpu(vcpu);
3544        } else if (started_vcpus == 1) {
3545                /*
3546                 * As we are starting a second VCPU, we have to disable
3547                 * the IBS facility on all VCPUs to remove potentially
3548                 * oustanding ENABLE requests.
3549                 */
3550                __disable_ibs_on_all_vcpus(vcpu->kvm);
3551        }
3552
3553        atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3554        /*
3555         * Another VCPU might have used IBS while we were offline.
3556         * Let's play safe and flush the VCPU at startup.
3557         */
3558        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3559        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3560        return;
3561}
3562
3563void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3564{
3565        int i, online_vcpus, started_vcpus = 0;
3566        struct kvm_vcpu *started_vcpu = NULL;
3567
3568        if (is_vcpu_stopped(vcpu))
3569                return;
3570
3571        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3572        /* Only one cpu at a time may enter/leave the STOPPED state. */
3573        spin_lock(&vcpu->kvm->arch.start_stop_lock);
3574        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3575
3576        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3577        kvm_s390_clear_stop_irq(vcpu);
3578
3579        atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3580        __disable_ibs_on_vcpu(vcpu);
3581
3582        for (i = 0; i < online_vcpus; i++) {
3583                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3584                        started_vcpus++;
3585                        started_vcpu = vcpu->kvm->vcpus[i];
3586                }
3587        }
3588
3589        if (started_vcpus == 1) {
3590                /*
3591                 * As we only have one VCPU left, we want to enable the
3592                 * IBS facility for that VCPU to speed it up.
3593                 */
3594                __enable_ibs_on_vcpu(started_vcpu);
3595        }
3596
3597        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3598        return;
3599}
3600
3601static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3602                                     struct kvm_enable_cap *cap)
3603{
3604        int r;
3605
3606        if (cap->flags)
3607                return -EINVAL;
3608
3609        switch (cap->cap) {
3610        case KVM_CAP_S390_CSS_SUPPORT:
3611                if (!vcpu->kvm->arch.css_support) {
3612                        vcpu->kvm->arch.css_support = 1;
3613                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3614                        trace_kvm_s390_enable_css(vcpu->kvm);
3615                }
3616                r = 0;
3617                break;
3618        default:
3619                r = -EINVAL;
3620                break;
3621        }
3622        return r;
3623}
3624
3625static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3626                                  struct kvm_s390_mem_op *mop)
3627{
3628        void __user *uaddr = (void __user *)mop->buf;
3629        void *tmpbuf = NULL;
3630        int r, srcu_idx;
3631        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3632                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
3633
3634        if (mop->flags & ~supported_flags)
3635                return -EINVAL;
3636
3637        if (mop->size > MEM_OP_MAX_SIZE)
3638                return -E2BIG;
3639
3640        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3641                tmpbuf = vmalloc(mop->size);
3642                if (!tmpbuf)
3643                        return -ENOMEM;
3644        }
3645
3646        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3647
3648        switch (mop->op) {
3649        case KVM_S390_MEMOP_LOGICAL_READ:
3650                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3651                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3652                                            mop->size, GACC_FETCH);
3653                        break;
3654                }
3655                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3656                if (r == 0) {
3657                        if (copy_to_user(uaddr, tmpbuf, mop->size))
3658                                r = -EFAULT;
3659                }
3660                break;
3661        case KVM_S390_MEMOP_LOGICAL_WRITE:
3662                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3663                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3664                                            mop->size, GACC_STORE);
3665                        break;
3666                }
3667                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3668                        r = -EFAULT;
3669                        break;
3670                }
3671                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3672                break;
3673        default:
3674                r = -EINVAL;
3675        }
3676
3677        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3678
3679        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3680                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3681
3682        vfree(tmpbuf);
3683        return r;
3684}
3685
3686long kvm_arch_vcpu_ioctl(struct file *filp,
3687                         unsigned int ioctl, unsigned long arg)
3688{
3689        struct kvm_vcpu *vcpu = filp->private_data;
3690        void __user *argp = (void __user *)arg;
3691        int idx;
3692        long r;
3693
3694        switch (ioctl) {
3695        case KVM_S390_IRQ: {
3696                struct kvm_s390_irq s390irq;
3697
3698                r = -EFAULT;
3699                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3700                        break;
3701                r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3702                break;
3703        }
3704        case KVM_S390_INTERRUPT: {
3705                struct kvm_s390_interrupt s390int;
3706                struct kvm_s390_irq s390irq;
3707
3708                r = -EFAULT;
3709                if (copy_from_user(&s390int, argp, sizeof(s390int)))
3710                        break;
3711                if (s390int_to_s390irq(&s390int, &s390irq))
3712                        return -EINVAL;
3713                r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3714                break;
3715        }
3716        case KVM_S390_STORE_STATUS:
3717                idx = srcu_read_lock(&vcpu->kvm->srcu);
3718                r = kvm_s390_vcpu_store_status(vcpu, arg);
3719                srcu_read_unlock(&vcpu->kvm->srcu, idx);
3720                break;
3721        case KVM_S390_SET_INITIAL_PSW: {
3722                psw_t psw;
3723
3724                r = -EFAULT;
3725                if (copy_from_user(&psw, argp, sizeof(psw)))
3726                        break;
3727                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3728                break;
3729        }
3730        case KVM_S390_INITIAL_RESET:
3731                r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3732                break;
3733        case KVM_SET_ONE_REG:
3734        case KVM_GET_ONE_REG: {
3735                struct kvm_one_reg reg;
3736                r = -EFAULT;
3737                if (copy_from_user(&reg, argp, sizeof(reg)))
3738                        break;
3739                if (ioctl == KVM_SET_ONE_REG)
3740                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3741                else
3742                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3743                break;
3744        }
3745#ifdef CONFIG_KVM_S390_UCONTROL
3746        case KVM_S390_UCAS_MAP: {
3747                struct kvm_s390_ucas_mapping ucasmap;
3748
3749                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3750                        r = -EFAULT;
3751                        break;
3752                }
3753
3754                if (!kvm_is_ucontrol(vcpu->kvm)) {
3755                        r = -EINVAL;
3756                        break;
3757                }
3758
3759                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3760                                     ucasmap.vcpu_addr, ucasmap.length);
3761                break;
3762        }
3763        case KVM_S390_UCAS_UNMAP: {
3764                struct kvm_s390_ucas_mapping ucasmap;
3765
3766                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3767                        r = -EFAULT;
3768                        break;
3769                }
3770
3771                if (!kvm_is_ucontrol(vcpu->kvm)) {
3772                        r = -EINVAL;
3773                        break;
3774                }
3775
3776                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3777                        ucasmap.length);
3778                break;
3779        }
3780#endif
3781        case KVM_S390_VCPU_FAULT: {
3782                r = gmap_fault(vcpu->arch.gmap, arg, 0);
3783                break;
3784        }
3785        case KVM_ENABLE_CAP:
3786        {
3787                struct kvm_enable_cap cap;
3788                r = -EFAULT;
3789                if (copy_from_user(&cap, argp, sizeof(cap)))
3790                        break;
3791                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3792                break;
3793        }
3794        case KVM_S390_MEM_OP: {
3795                struct kvm_s390_mem_op mem_op;
3796
3797                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3798                        r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3799                else
3800                        r = -EFAULT;
3801                break;
3802        }
3803        case KVM_S390_SET_IRQ_STATE: {
3804                struct kvm_s390_irq_state irq_state;
3805
3806                r = -EFAULT;
3807                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3808                        break;
3809                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3810                    irq_state.len == 0 ||
3811                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3812                        r = -EINVAL;
3813                        break;
3814                }
3815                r = kvm_s390_set_irq_state(vcpu,
3816                                           (void __user *) irq_state.buf,
3817                                           irq_state.len);
3818                break;
3819        }
3820        case KVM_S390_GET_IRQ_STATE: {
3821                struct kvm_s390_irq_state irq_state;
3822
3823                r = -EFAULT;
3824                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3825                        break;
3826                if (irq_state.len == 0) {
3827                        r = -EINVAL;
3828                        break;
3829                }
3830                r = kvm_s390_get_irq_state(vcpu,
3831                                           (__u8 __user *)  irq_state.buf,
3832                                           irq_state.len);
3833                break;
3834        }
3835        default:
3836                r = -ENOTTY;
3837        }
3838        return r;
3839}
3840
3841int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3842{
3843#ifdef CONFIG_KVM_S390_UCONTROL
3844        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3845                 && (kvm_is_ucontrol(vcpu->kvm))) {
3846                vmf->page = virt_to_page(vcpu->arch.sie_block);
3847                get_page(vmf->page);
3848                return 0;
3849        }
3850#endif
3851        return VM_FAULT_SIGBUS;
3852}
3853
3854int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3855                            unsigned long npages)
3856{
3857        return 0;
3858}
3859
3860/* Section: memory related */
3861int kvm_arch_prepare_memory_region(struct kvm *kvm,
3862                                   struct kvm_memory_slot *memslot,
3863                                   const struct kvm_userspace_memory_region *mem,
3864                                   enum kvm_mr_change change)
3865{
3866        /* A few sanity checks. We can have memory slots which have to be
3867           located/ended at a segment boundary (1MB). The memory in userland is
3868           ok to be fragmented into various different vmas. It is okay to mmap()
3869           and munmap() stuff in this slot after doing this call at any time */
3870
3871        if (mem->userspace_addr & 0xffffful)
3872                return -EINVAL;
3873
3874        if (mem->memory_size & 0xffffful)
3875                return -EINVAL;
3876
3877        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3878                return -EINVAL;
3879
3880        return 0;
3881}
3882
3883void kvm_arch_commit_memory_region(struct kvm *kvm,
3884                                const struct kvm_userspace_memory_region *mem,
3885                                const struct kvm_memory_slot *old,
3886                                const struct kvm_memory_slot *new,
3887                                enum kvm_mr_change change)
3888{
3889        int rc;
3890
3891        /* If the basics of the memslot do not change, we do not want
3892         * to update the gmap. Every update causes several unnecessary
3893         * segment translation exceptions. This is usually handled just
3894         * fine by the normal fault handler + gmap, but it will also
3895         * cause faults on the prefix page of running guest CPUs.
3896         */
3897        if (old->userspace_addr == mem->userspace_addr &&
3898            old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3899            old->npages * PAGE_SIZE == mem->memory_size)
3900                return;
3901
3902        rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3903                mem->guest_phys_addr, mem->memory_size);
3904        if (rc)
3905                pr_warn("failed to commit memory region\n");
3906        return;
3907}
3908
3909static inline unsigned long nonhyp_mask(int i)
3910{
3911        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3912
3913        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3914}
3915
3916void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3917{
3918        vcpu->valid_wakeup = false;
3919}
3920
3921static int __init kvm_s390_init(void)
3922{
3923        int i;
3924
3925        if (!sclp.has_sief2) {
3926                pr_info("SIE not available\n");
3927                return -ENODEV;
3928        }
3929
3930        for (i = 0; i < 16; i++)
3931                kvm_s390_fac_list_mask[i] |=
3932                        S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3933
3934        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3935}
3936
3937static void __exit kvm_s390_exit(void)
3938{
3939        kvm_exit();
3940}
3941
3942module_init(kvm_s390_init);
3943module_exit(kvm_s390_exit);
3944
3945/*
3946 * Enable autoloading of the kvm module.
3947 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3948 * since x86 takes a different approach.
3949 */
3950#include <linux/miscdevice.h>
3951MODULE_ALIAS_MISCDEV(KVM_MINOR);
3952MODULE_ALIAS("devname:kvm");
3953