linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1/*
   2 * hosting zSeries kernel virtual machines
   3 *
   4 * Copyright IBM Corp. 2008, 2009
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License (version 2 only)
   8 * as published by the Free Software Foundation.
   9 *
  10 *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11 *               Christian Borntraeger <borntraeger@de.ibm.com>
  12 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14 *               Jason J. Herne <jjherne@us.ibm.com>
  15 */
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33
  34#include <asm/asm-offsets.h>
  35#include <asm/lowcore.h>
  36#include <asm/stp.h>
  37#include <asm/pgtable.h>
  38#include <asm/gmap.h>
  39#include <asm/nmi.h>
  40#include <asm/switch_to.h>
  41#include <asm/isc.h>
  42#include <asm/sclp.h>
  43#include <asm/cpacf.h>
  44#include <asm/timex.h>
  45#include "kvm-s390.h"
  46#include "gaccess.h"
  47
  48#define KMSG_COMPONENT "kvm-s390"
  49#undef pr_fmt
  50#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  51
  52#define CREATE_TRACE_POINTS
  53#include "trace.h"
  54#include "trace-s390.h"
  55
  56#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57#define LOCAL_IRQS 32
  58#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  62
  63struct kvm_stats_debugfs_item debugfs_entries[] = {
  64        { "userspace_handled", VCPU_STAT(exit_userspace) },
  65        { "exit_null", VCPU_STAT(exit_null) },
  66        { "exit_validity", VCPU_STAT(exit_validity) },
  67        { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68        { "exit_external_request", VCPU_STAT(exit_external_request) },
  69        { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70        { "exit_instruction", VCPU_STAT(exit_instruction) },
  71        { "exit_pei", VCPU_STAT(exit_pei) },
  72        { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74        { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79        { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80        { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81        { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82        { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83        { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  84        { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  85        { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  86        { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  87        { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  88        { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  89        { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  90        { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  91        { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  92        { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  93        { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  94        { "instruction_spx", VCPU_STAT(instruction_spx) },
  95        { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  96        { "instruction_stap", VCPU_STAT(instruction_stap) },
  97        { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  98        { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  99        { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 100        { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 101        { "instruction_essa", VCPU_STAT(instruction_essa) },
 102        { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 103        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 104        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 105        { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 106        { "instruction_sie", VCPU_STAT(instruction_sie) },
 107        { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 108        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 109        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 110        { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 111        { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 112        { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 113        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 114        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 115        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 116        { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 117        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 118        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 119        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 120        { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 121        { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 122        { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 123        { "diagnose_10", VCPU_STAT(diagnose_10) },
 124        { "diagnose_44", VCPU_STAT(diagnose_44) },
 125        { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 126        { "diagnose_258", VCPU_STAT(diagnose_258) },
 127        { "diagnose_308", VCPU_STAT(diagnose_308) },
 128        { "diagnose_500", VCPU_STAT(diagnose_500) },
 129        { NULL }
 130};
 131
 132/* allow nested virtualization in KVM (if enabled by user space) */
 133static int nested;
 134module_param(nested, int, S_IRUGO);
 135MODULE_PARM_DESC(nested, "Nested virtualization support");
 136
 137/* upper facilities limit for kvm */
 138unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 139
 140unsigned long kvm_s390_fac_list_mask_size(void)
 141{
 142        BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 143        return ARRAY_SIZE(kvm_s390_fac_list_mask);
 144}
 145
 146/* available cpu features supported by kvm */
 147static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 148/* available subfunctions indicated via query / "test bit" */
 149static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 150
 151static struct gmap_notifier gmap_notifier;
 152static struct gmap_notifier vsie_gmap_notifier;
 153debug_info_t *kvm_s390_dbf;
 154
 155/* Section: not file related */
 156int kvm_arch_hardware_enable(void)
 157{
 158        /* every s390 is virtualization enabled ;-) */
 159        return 0;
 160}
 161
 162static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 163                              unsigned long end);
 164
 165/*
 166 * This callback is executed during stop_machine(). All CPUs are therefore
 167 * temporarily stopped. In order not to change guest behavior, we have to
 168 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 169 * so a CPU won't be stopped while calculating with the epoch.
 170 */
 171static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 172                          void *v)
 173{
 174        struct kvm *kvm;
 175        struct kvm_vcpu *vcpu;
 176        int i;
 177        unsigned long long *delta = v;
 178
 179        list_for_each_entry(kvm, &vm_list, vm_list) {
 180                kvm->arch.epoch -= *delta;
 181                kvm_for_each_vcpu(i, vcpu, kvm) {
 182                        vcpu->arch.sie_block->epoch -= *delta;
 183                        if (vcpu->arch.cputm_enabled)
 184                                vcpu->arch.cputm_start += *delta;
 185                        if (vcpu->arch.vsie_block)
 186                                vcpu->arch.vsie_block->epoch -= *delta;
 187                }
 188        }
 189        return NOTIFY_OK;
 190}
 191
 192static struct notifier_block kvm_clock_notifier = {
 193        .notifier_call = kvm_clock_sync,
 194};
 195
 196int kvm_arch_hardware_setup(void)
 197{
 198        gmap_notifier.notifier_call = kvm_gmap_notifier;
 199        gmap_register_pte_notifier(&gmap_notifier);
 200        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 201        gmap_register_pte_notifier(&vsie_gmap_notifier);
 202        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 203                                       &kvm_clock_notifier);
 204        return 0;
 205}
 206
 207void kvm_arch_hardware_unsetup(void)
 208{
 209        gmap_unregister_pte_notifier(&gmap_notifier);
 210        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 211        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 212                                         &kvm_clock_notifier);
 213}
 214
 215static void allow_cpu_feat(unsigned long nr)
 216{
 217        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 218}
 219
 220static inline int plo_test_bit(unsigned char nr)
 221{
 222        register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 223        int cc;
 224
 225        asm volatile(
 226                /* Parameter registers are ignored for "test bit" */
 227                "       plo     0,0,0,0(0)\n"
 228                "       ipm     %0\n"
 229                "       srl     %0,28\n"
 230                : "=d" (cc)
 231                : "d" (r0)
 232                : "cc");
 233        return cc == 0;
 234}
 235
 236static void kvm_s390_cpu_feat_init(void)
 237{
 238        int i;
 239
 240        for (i = 0; i < 256; ++i) {
 241                if (plo_test_bit(i))
 242                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 243        }
 244
 245        if (test_facility(28)) /* TOD-clock steering */
 246                ptff(kvm_s390_available_subfunc.ptff,
 247                     sizeof(kvm_s390_available_subfunc.ptff),
 248                     PTFF_QAF);
 249
 250        if (test_facility(17)) { /* MSA */
 251                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 252                              kvm_s390_available_subfunc.kmac);
 253                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 254                              kvm_s390_available_subfunc.kmc);
 255                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 256                              kvm_s390_available_subfunc.km);
 257                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 258                              kvm_s390_available_subfunc.kimd);
 259                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 260                              kvm_s390_available_subfunc.klmd);
 261        }
 262        if (test_facility(76)) /* MSA3 */
 263                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 264                              kvm_s390_available_subfunc.pckmo);
 265        if (test_facility(77)) { /* MSA4 */
 266                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 267                              kvm_s390_available_subfunc.kmctr);
 268                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 269                              kvm_s390_available_subfunc.kmf);
 270                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 271                              kvm_s390_available_subfunc.kmo);
 272                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 273                              kvm_s390_available_subfunc.pcc);
 274        }
 275        if (test_facility(57)) /* MSA5 */
 276                __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
 277                              kvm_s390_available_subfunc.ppno);
 278
 279        if (MACHINE_HAS_ESOP)
 280                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 281        /*
 282         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 283         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 284         */
 285        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 286            !test_facility(3) || !nested)
 287                return;
 288        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 289        if (sclp.has_64bscao)
 290                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 291        if (sclp.has_siif)
 292                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 293        if (sclp.has_gpere)
 294                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 295        if (sclp.has_gsls)
 296                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 297        if (sclp.has_ib)
 298                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 299        if (sclp.has_cei)
 300                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 301        if (sclp.has_ibs)
 302                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 303        /*
 304         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 305         * all skey handling functions read/set the skey from the PGSTE
 306         * instead of the real storage key.
 307         *
 308         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 309         * pages being detected as preserved although they are resident.
 310         *
 311         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 312         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 313         *
 314         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 315         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 316         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 317         *
 318         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 319         * cannot easily shadow the SCA because of the ipte lock.
 320         */
 321}
 322
 323int kvm_arch_init(void *opaque)
 324{
 325        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 326        if (!kvm_s390_dbf)
 327                return -ENOMEM;
 328
 329        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 330                debug_unregister(kvm_s390_dbf);
 331                return -ENOMEM;
 332        }
 333
 334        kvm_s390_cpu_feat_init();
 335
 336        /* Register floating interrupt controller interface. */
 337        return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 338}
 339
 340void kvm_arch_exit(void)
 341{
 342        debug_unregister(kvm_s390_dbf);
 343}
 344
 345/* Section: device related */
 346long kvm_arch_dev_ioctl(struct file *filp,
 347                        unsigned int ioctl, unsigned long arg)
 348{
 349        if (ioctl == KVM_S390_ENABLE_SIE)
 350                return s390_enable_sie();
 351        return -EINVAL;
 352}
 353
 354int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 355{
 356        int r;
 357
 358        switch (ext) {
 359        case KVM_CAP_S390_PSW:
 360        case KVM_CAP_S390_GMAP:
 361        case KVM_CAP_SYNC_MMU:
 362#ifdef CONFIG_KVM_S390_UCONTROL
 363        case KVM_CAP_S390_UCONTROL:
 364#endif
 365        case KVM_CAP_ASYNC_PF:
 366        case KVM_CAP_SYNC_REGS:
 367        case KVM_CAP_ONE_REG:
 368        case KVM_CAP_ENABLE_CAP:
 369        case KVM_CAP_S390_CSS_SUPPORT:
 370        case KVM_CAP_IOEVENTFD:
 371        case KVM_CAP_DEVICE_CTRL:
 372        case KVM_CAP_ENABLE_CAP_VM:
 373        case KVM_CAP_S390_IRQCHIP:
 374        case KVM_CAP_VM_ATTRIBUTES:
 375        case KVM_CAP_MP_STATE:
 376        case KVM_CAP_IMMEDIATE_EXIT:
 377        case KVM_CAP_S390_INJECT_IRQ:
 378        case KVM_CAP_S390_USER_SIGP:
 379        case KVM_CAP_S390_USER_STSI:
 380        case KVM_CAP_S390_SKEYS:
 381        case KVM_CAP_S390_IRQ_STATE:
 382        case KVM_CAP_S390_USER_INSTR0:
 383                r = 1;
 384                break;
 385        case KVM_CAP_S390_MEM_OP:
 386                r = MEM_OP_MAX_SIZE;
 387                break;
 388        case KVM_CAP_NR_VCPUS:
 389        case KVM_CAP_MAX_VCPUS:
 390                r = KVM_S390_BSCA_CPU_SLOTS;
 391                if (!kvm_s390_use_sca_entries())
 392                        r = KVM_MAX_VCPUS;
 393                else if (sclp.has_esca && sclp.has_64bscao)
 394                        r = KVM_S390_ESCA_CPU_SLOTS;
 395                break;
 396        case KVM_CAP_NR_MEMSLOTS:
 397                r = KVM_USER_MEM_SLOTS;
 398                break;
 399        case KVM_CAP_S390_COW:
 400                r = MACHINE_HAS_ESOP;
 401                break;
 402        case KVM_CAP_S390_VECTOR_REGISTERS:
 403                r = MACHINE_HAS_VX;
 404                break;
 405        case KVM_CAP_S390_RI:
 406                r = test_facility(64);
 407                break;
 408        default:
 409                r = 0;
 410        }
 411        return r;
 412}
 413
 414static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 415                                        struct kvm_memory_slot *memslot)
 416{
 417        gfn_t cur_gfn, last_gfn;
 418        unsigned long address;
 419        struct gmap *gmap = kvm->arch.gmap;
 420
 421        /* Loop over all guest pages */
 422        last_gfn = memslot->base_gfn + memslot->npages;
 423        for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 424                address = gfn_to_hva_memslot(memslot, cur_gfn);
 425
 426                if (test_and_clear_guest_dirty(gmap->mm, address))
 427                        mark_page_dirty(kvm, cur_gfn);
 428                if (fatal_signal_pending(current))
 429                        return;
 430                cond_resched();
 431        }
 432}
 433
 434/* Section: vm related */
 435static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 436
 437/*
 438 * Get (and clear) the dirty memory log for a memory slot.
 439 */
 440int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 441                               struct kvm_dirty_log *log)
 442{
 443        int r;
 444        unsigned long n;
 445        struct kvm_memslots *slots;
 446        struct kvm_memory_slot *memslot;
 447        int is_dirty = 0;
 448
 449        if (kvm_is_ucontrol(kvm))
 450                return -EINVAL;
 451
 452        mutex_lock(&kvm->slots_lock);
 453
 454        r = -EINVAL;
 455        if (log->slot >= KVM_USER_MEM_SLOTS)
 456                goto out;
 457
 458        slots = kvm_memslots(kvm);
 459        memslot = id_to_memslot(slots, log->slot);
 460        r = -ENOENT;
 461        if (!memslot->dirty_bitmap)
 462                goto out;
 463
 464        kvm_s390_sync_dirty_log(kvm, memslot);
 465        r = kvm_get_dirty_log(kvm, log, &is_dirty);
 466        if (r)
 467                goto out;
 468
 469        /* Clear the dirty log */
 470        if (is_dirty) {
 471                n = kvm_dirty_bitmap_bytes(memslot);
 472                memset(memslot->dirty_bitmap, 0, n);
 473        }
 474        r = 0;
 475out:
 476        mutex_unlock(&kvm->slots_lock);
 477        return r;
 478}
 479
 480static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 481{
 482        unsigned int i;
 483        struct kvm_vcpu *vcpu;
 484
 485        kvm_for_each_vcpu(i, vcpu, kvm) {
 486                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 487        }
 488}
 489
 490static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 491{
 492        int r;
 493
 494        if (cap->flags)
 495                return -EINVAL;
 496
 497        switch (cap->cap) {
 498        case KVM_CAP_S390_IRQCHIP:
 499                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 500                kvm->arch.use_irqchip = 1;
 501                r = 0;
 502                break;
 503        case KVM_CAP_S390_USER_SIGP:
 504                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 505                kvm->arch.user_sigp = 1;
 506                r = 0;
 507                break;
 508        case KVM_CAP_S390_VECTOR_REGISTERS:
 509                mutex_lock(&kvm->lock);
 510                if (kvm->created_vcpus) {
 511                        r = -EBUSY;
 512                } else if (MACHINE_HAS_VX) {
 513                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 514                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 515                        if (test_facility(134)) {
 516                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 517                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 518                        }
 519                        if (test_facility(135)) {
 520                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 521                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 522                        }
 523                        r = 0;
 524                } else
 525                        r = -EINVAL;
 526                mutex_unlock(&kvm->lock);
 527                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 528                         r ? "(not available)" : "(success)");
 529                break;
 530        case KVM_CAP_S390_RI:
 531                r = -EINVAL;
 532                mutex_lock(&kvm->lock);
 533                if (kvm->created_vcpus) {
 534                        r = -EBUSY;
 535                } else if (test_facility(64)) {
 536                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 537                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 538                        r = 0;
 539                }
 540                mutex_unlock(&kvm->lock);
 541                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 542                         r ? "(not available)" : "(success)");
 543                break;
 544        case KVM_CAP_S390_USER_STSI:
 545                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 546                kvm->arch.user_stsi = 1;
 547                r = 0;
 548                break;
 549        case KVM_CAP_S390_USER_INSTR0:
 550                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 551                kvm->arch.user_instr0 = 1;
 552                icpt_operexc_on_all_vcpus(kvm);
 553                r = 0;
 554                break;
 555        default:
 556                r = -EINVAL;
 557                break;
 558        }
 559        return r;
 560}
 561
 562static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 563{
 564        int ret;
 565
 566        switch (attr->attr) {
 567        case KVM_S390_VM_MEM_LIMIT_SIZE:
 568                ret = 0;
 569                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 570                         kvm->arch.mem_limit);
 571                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 572                        ret = -EFAULT;
 573                break;
 574        default:
 575                ret = -ENXIO;
 576                break;
 577        }
 578        return ret;
 579}
 580
 581static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 582{
 583        int ret;
 584        unsigned int idx;
 585        switch (attr->attr) {
 586        case KVM_S390_VM_MEM_ENABLE_CMMA:
 587                ret = -ENXIO;
 588                if (!sclp.has_cmma)
 589                        break;
 590
 591                ret = -EBUSY;
 592                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 593                mutex_lock(&kvm->lock);
 594                if (!kvm->created_vcpus) {
 595                        kvm->arch.use_cmma = 1;
 596                        ret = 0;
 597                }
 598                mutex_unlock(&kvm->lock);
 599                break;
 600        case KVM_S390_VM_MEM_CLR_CMMA:
 601                ret = -ENXIO;
 602                if (!sclp.has_cmma)
 603                        break;
 604                ret = -EINVAL;
 605                if (!kvm->arch.use_cmma)
 606                        break;
 607
 608                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 609                mutex_lock(&kvm->lock);
 610                idx = srcu_read_lock(&kvm->srcu);
 611                s390_reset_cmma(kvm->arch.gmap->mm);
 612                srcu_read_unlock(&kvm->srcu, idx);
 613                mutex_unlock(&kvm->lock);
 614                ret = 0;
 615                break;
 616        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 617                unsigned long new_limit;
 618
 619                if (kvm_is_ucontrol(kvm))
 620                        return -EINVAL;
 621
 622                if (get_user(new_limit, (u64 __user *)attr->addr))
 623                        return -EFAULT;
 624
 625                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 626                    new_limit > kvm->arch.mem_limit)
 627                        return -E2BIG;
 628
 629                if (!new_limit)
 630                        return -EINVAL;
 631
 632                /* gmap_create takes last usable address */
 633                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 634                        new_limit -= 1;
 635
 636                ret = -EBUSY;
 637                mutex_lock(&kvm->lock);
 638                if (!kvm->created_vcpus) {
 639                        /* gmap_create will round the limit up */
 640                        struct gmap *new = gmap_create(current->mm, new_limit);
 641
 642                        if (!new) {
 643                                ret = -ENOMEM;
 644                        } else {
 645                                gmap_remove(kvm->arch.gmap);
 646                                new->private = kvm;
 647                                kvm->arch.gmap = new;
 648                                ret = 0;
 649                        }
 650                }
 651                mutex_unlock(&kvm->lock);
 652                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 653                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 654                         (void *) kvm->arch.gmap->asce);
 655                break;
 656        }
 657        default:
 658                ret = -ENXIO;
 659                break;
 660        }
 661        return ret;
 662}
 663
 664static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 665
 666static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 667{
 668        struct kvm_vcpu *vcpu;
 669        int i;
 670
 671        if (!test_kvm_facility(kvm, 76))
 672                return -EINVAL;
 673
 674        mutex_lock(&kvm->lock);
 675        switch (attr->attr) {
 676        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 677                get_random_bytes(
 678                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 679                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 680                kvm->arch.crypto.aes_kw = 1;
 681                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 682                break;
 683        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 684                get_random_bytes(
 685                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 686                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 687                kvm->arch.crypto.dea_kw = 1;
 688                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 689                break;
 690        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 691                kvm->arch.crypto.aes_kw = 0;
 692                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 693                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 694                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 695                break;
 696        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 697                kvm->arch.crypto.dea_kw = 0;
 698                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 699                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 700                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 701                break;
 702        default:
 703                mutex_unlock(&kvm->lock);
 704                return -ENXIO;
 705        }
 706
 707        kvm_for_each_vcpu(i, vcpu, kvm) {
 708                kvm_s390_vcpu_crypto_setup(vcpu);
 709                exit_sie(vcpu);
 710        }
 711        mutex_unlock(&kvm->lock);
 712        return 0;
 713}
 714
 715static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 716{
 717        u8 gtod_high;
 718
 719        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 720                                           sizeof(gtod_high)))
 721                return -EFAULT;
 722
 723        if (gtod_high != 0)
 724                return -EINVAL;
 725        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 726
 727        return 0;
 728}
 729
 730static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 731{
 732        u64 gtod;
 733
 734        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 735                return -EFAULT;
 736
 737        kvm_s390_set_tod_clock(kvm, gtod);
 738        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 739        return 0;
 740}
 741
 742static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 743{
 744        int ret;
 745
 746        if (attr->flags)
 747                return -EINVAL;
 748
 749        switch (attr->attr) {
 750        case KVM_S390_VM_TOD_HIGH:
 751                ret = kvm_s390_set_tod_high(kvm, attr);
 752                break;
 753        case KVM_S390_VM_TOD_LOW:
 754                ret = kvm_s390_set_tod_low(kvm, attr);
 755                break;
 756        default:
 757                ret = -ENXIO;
 758                break;
 759        }
 760        return ret;
 761}
 762
 763static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 764{
 765        u8 gtod_high = 0;
 766
 767        if (copy_to_user((void __user *)attr->addr, &gtod_high,
 768                                         sizeof(gtod_high)))
 769                return -EFAULT;
 770        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 771
 772        return 0;
 773}
 774
 775static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 776{
 777        u64 gtod;
 778
 779        gtod = kvm_s390_get_tod_clock_fast(kvm);
 780        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 781                return -EFAULT;
 782        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 783
 784        return 0;
 785}
 786
 787static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 788{
 789        int ret;
 790
 791        if (attr->flags)
 792                return -EINVAL;
 793
 794        switch (attr->attr) {
 795        case KVM_S390_VM_TOD_HIGH:
 796                ret = kvm_s390_get_tod_high(kvm, attr);
 797                break;
 798        case KVM_S390_VM_TOD_LOW:
 799                ret = kvm_s390_get_tod_low(kvm, attr);
 800                break;
 801        default:
 802                ret = -ENXIO;
 803                break;
 804        }
 805        return ret;
 806}
 807
 808static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 809{
 810        struct kvm_s390_vm_cpu_processor *proc;
 811        u16 lowest_ibc, unblocked_ibc;
 812        int ret = 0;
 813
 814        mutex_lock(&kvm->lock);
 815        if (kvm->created_vcpus) {
 816                ret = -EBUSY;
 817                goto out;
 818        }
 819        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 820        if (!proc) {
 821                ret = -ENOMEM;
 822                goto out;
 823        }
 824        if (!copy_from_user(proc, (void __user *)attr->addr,
 825                            sizeof(*proc))) {
 826                kvm->arch.model.cpuid = proc->cpuid;
 827                lowest_ibc = sclp.ibc >> 16 & 0xfff;
 828                unblocked_ibc = sclp.ibc & 0xfff;
 829                if (lowest_ibc && proc->ibc) {
 830                        if (proc->ibc > unblocked_ibc)
 831                                kvm->arch.model.ibc = unblocked_ibc;
 832                        else if (proc->ibc < lowest_ibc)
 833                                kvm->arch.model.ibc = lowest_ibc;
 834                        else
 835                                kvm->arch.model.ibc = proc->ibc;
 836                }
 837                memcpy(kvm->arch.model.fac_list, proc->fac_list,
 838                       S390_ARCH_FAC_LIST_SIZE_BYTE);
 839                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 840                         kvm->arch.model.ibc,
 841                         kvm->arch.model.cpuid);
 842                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 843                         kvm->arch.model.fac_list[0],
 844                         kvm->arch.model.fac_list[1],
 845                         kvm->arch.model.fac_list[2]);
 846        } else
 847                ret = -EFAULT;
 848        kfree(proc);
 849out:
 850        mutex_unlock(&kvm->lock);
 851        return ret;
 852}
 853
 854static int kvm_s390_set_processor_feat(struct kvm *kvm,
 855                                       struct kvm_device_attr *attr)
 856{
 857        struct kvm_s390_vm_cpu_feat data;
 858        int ret = -EBUSY;
 859
 860        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 861                return -EFAULT;
 862        if (!bitmap_subset((unsigned long *) data.feat,
 863                           kvm_s390_available_cpu_feat,
 864                           KVM_S390_VM_CPU_FEAT_NR_BITS))
 865                return -EINVAL;
 866
 867        mutex_lock(&kvm->lock);
 868        if (!atomic_read(&kvm->online_vcpus)) {
 869                bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 870                            KVM_S390_VM_CPU_FEAT_NR_BITS);
 871                ret = 0;
 872        }
 873        mutex_unlock(&kvm->lock);
 874        return ret;
 875}
 876
 877static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 878                                          struct kvm_device_attr *attr)
 879{
 880        /*
 881         * Once supported by kernel + hw, we have to store the subfunctions
 882         * in kvm->arch and remember that user space configured them.
 883         */
 884        return -ENXIO;
 885}
 886
 887static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 888{
 889        int ret = -ENXIO;
 890
 891        switch (attr->attr) {
 892        case KVM_S390_VM_CPU_PROCESSOR:
 893                ret = kvm_s390_set_processor(kvm, attr);
 894                break;
 895        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 896                ret = kvm_s390_set_processor_feat(kvm, attr);
 897                break;
 898        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 899                ret = kvm_s390_set_processor_subfunc(kvm, attr);
 900                break;
 901        }
 902        return ret;
 903}
 904
 905static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 906{
 907        struct kvm_s390_vm_cpu_processor *proc;
 908        int ret = 0;
 909
 910        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 911        if (!proc) {
 912                ret = -ENOMEM;
 913                goto out;
 914        }
 915        proc->cpuid = kvm->arch.model.cpuid;
 916        proc->ibc = kvm->arch.model.ibc;
 917        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 918               S390_ARCH_FAC_LIST_SIZE_BYTE);
 919        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 920                 kvm->arch.model.ibc,
 921                 kvm->arch.model.cpuid);
 922        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 923                 kvm->arch.model.fac_list[0],
 924                 kvm->arch.model.fac_list[1],
 925                 kvm->arch.model.fac_list[2]);
 926        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 927                ret = -EFAULT;
 928        kfree(proc);
 929out:
 930        return ret;
 931}
 932
 933static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 934{
 935        struct kvm_s390_vm_cpu_machine *mach;
 936        int ret = 0;
 937
 938        mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 939        if (!mach) {
 940                ret = -ENOMEM;
 941                goto out;
 942        }
 943        get_cpu_id((struct cpuid *) &mach->cpuid);
 944        mach->ibc = sclp.ibc;
 945        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 946               S390_ARCH_FAC_LIST_SIZE_BYTE);
 947        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 948               sizeof(S390_lowcore.stfle_fac_list));
 949        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
 950                 kvm->arch.model.ibc,
 951                 kvm->arch.model.cpuid);
 952        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
 953                 mach->fac_mask[0],
 954                 mach->fac_mask[1],
 955                 mach->fac_mask[2]);
 956        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
 957                 mach->fac_list[0],
 958                 mach->fac_list[1],
 959                 mach->fac_list[2]);
 960        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 961                ret = -EFAULT;
 962        kfree(mach);
 963out:
 964        return ret;
 965}
 966
 967static int kvm_s390_get_processor_feat(struct kvm *kvm,
 968                                       struct kvm_device_attr *attr)
 969{
 970        struct kvm_s390_vm_cpu_feat data;
 971
 972        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 973                    KVM_S390_VM_CPU_FEAT_NR_BITS);
 974        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 975                return -EFAULT;
 976        return 0;
 977}
 978
 979static int kvm_s390_get_machine_feat(struct kvm *kvm,
 980                                     struct kvm_device_attr *attr)
 981{
 982        struct kvm_s390_vm_cpu_feat data;
 983
 984        bitmap_copy((unsigned long *) data.feat,
 985                    kvm_s390_available_cpu_feat,
 986                    KVM_S390_VM_CPU_FEAT_NR_BITS);
 987        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 988                return -EFAULT;
 989        return 0;
 990}
 991
 992static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 993                                          struct kvm_device_attr *attr)
 994{
 995        /*
 996         * Once we can actually configure subfunctions (kernel + hw support),
 997         * we have to check if they were already set by user space, if so copy
 998         * them from kvm->arch.
 999         */
1000        return -ENXIO;
1001}
1002
1003static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1004                                        struct kvm_device_attr *attr)
1005{
1006        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1007            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1008                return -EFAULT;
1009        return 0;
1010}
1011static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1012{
1013        int ret = -ENXIO;
1014
1015        switch (attr->attr) {
1016        case KVM_S390_VM_CPU_PROCESSOR:
1017                ret = kvm_s390_get_processor(kvm, attr);
1018                break;
1019        case KVM_S390_VM_CPU_MACHINE:
1020                ret = kvm_s390_get_machine(kvm, attr);
1021                break;
1022        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1023                ret = kvm_s390_get_processor_feat(kvm, attr);
1024                break;
1025        case KVM_S390_VM_CPU_MACHINE_FEAT:
1026                ret = kvm_s390_get_machine_feat(kvm, attr);
1027                break;
1028        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1029                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1030                break;
1031        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1032                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1033                break;
1034        }
1035        return ret;
1036}
1037
1038static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1039{
1040        int ret;
1041
1042        switch (attr->group) {
1043        case KVM_S390_VM_MEM_CTRL:
1044                ret = kvm_s390_set_mem_control(kvm, attr);
1045                break;
1046        case KVM_S390_VM_TOD:
1047                ret = kvm_s390_set_tod(kvm, attr);
1048                break;
1049        case KVM_S390_VM_CPU_MODEL:
1050                ret = kvm_s390_set_cpu_model(kvm, attr);
1051                break;
1052        case KVM_S390_VM_CRYPTO:
1053                ret = kvm_s390_vm_set_crypto(kvm, attr);
1054                break;
1055        default:
1056                ret = -ENXIO;
1057                break;
1058        }
1059
1060        return ret;
1061}
1062
1063static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1064{
1065        int ret;
1066
1067        switch (attr->group) {
1068        case KVM_S390_VM_MEM_CTRL:
1069                ret = kvm_s390_get_mem_control(kvm, attr);
1070                break;
1071        case KVM_S390_VM_TOD:
1072                ret = kvm_s390_get_tod(kvm, attr);
1073                break;
1074        case KVM_S390_VM_CPU_MODEL:
1075                ret = kvm_s390_get_cpu_model(kvm, attr);
1076                break;
1077        default:
1078                ret = -ENXIO;
1079                break;
1080        }
1081
1082        return ret;
1083}
1084
1085static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1086{
1087        int ret;
1088
1089        switch (attr->group) {
1090        case KVM_S390_VM_MEM_CTRL:
1091                switch (attr->attr) {
1092                case KVM_S390_VM_MEM_ENABLE_CMMA:
1093                case KVM_S390_VM_MEM_CLR_CMMA:
1094                        ret = sclp.has_cmma ? 0 : -ENXIO;
1095                        break;
1096                case KVM_S390_VM_MEM_LIMIT_SIZE:
1097                        ret = 0;
1098                        break;
1099                default:
1100                        ret = -ENXIO;
1101                        break;
1102                }
1103                break;
1104        case KVM_S390_VM_TOD:
1105                switch (attr->attr) {
1106                case KVM_S390_VM_TOD_LOW:
1107                case KVM_S390_VM_TOD_HIGH:
1108                        ret = 0;
1109                        break;
1110                default:
1111                        ret = -ENXIO;
1112                        break;
1113                }
1114                break;
1115        case KVM_S390_VM_CPU_MODEL:
1116                switch (attr->attr) {
1117                case KVM_S390_VM_CPU_PROCESSOR:
1118                case KVM_S390_VM_CPU_MACHINE:
1119                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1120                case KVM_S390_VM_CPU_MACHINE_FEAT:
1121                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1122                        ret = 0;
1123                        break;
1124                /* configuring subfunctions is not supported yet */
1125                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1126                default:
1127                        ret = -ENXIO;
1128                        break;
1129                }
1130                break;
1131        case KVM_S390_VM_CRYPTO:
1132                switch (attr->attr) {
1133                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1134                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1135                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1136                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1137                        ret = 0;
1138                        break;
1139                default:
1140                        ret = -ENXIO;
1141                        break;
1142                }
1143                break;
1144        default:
1145                ret = -ENXIO;
1146                break;
1147        }
1148
1149        return ret;
1150}
1151
1152static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1153{
1154        uint8_t *keys;
1155        uint64_t hva;
1156        int i, r = 0;
1157
1158        if (args->flags != 0)
1159                return -EINVAL;
1160
1161        /* Is this guest using storage keys? */
1162        if (!mm_use_skey(current->mm))
1163                return KVM_S390_GET_SKEYS_NONE;
1164
1165        /* Enforce sane limit on memory allocation */
1166        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1167                return -EINVAL;
1168
1169        keys = kmalloc_array(args->count, sizeof(uint8_t),
1170                             GFP_KERNEL | __GFP_NOWARN);
1171        if (!keys)
1172                keys = vmalloc(sizeof(uint8_t) * args->count);
1173        if (!keys)
1174                return -ENOMEM;
1175
1176        down_read(&current->mm->mmap_sem);
1177        for (i = 0; i < args->count; i++) {
1178                hva = gfn_to_hva(kvm, args->start_gfn + i);
1179                if (kvm_is_error_hva(hva)) {
1180                        r = -EFAULT;
1181                        break;
1182                }
1183
1184                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1185                if (r)
1186                        break;
1187        }
1188        up_read(&current->mm->mmap_sem);
1189
1190        if (!r) {
1191                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1192                                 sizeof(uint8_t) * args->count);
1193                if (r)
1194                        r = -EFAULT;
1195        }
1196
1197        kvfree(keys);
1198        return r;
1199}
1200
1201static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1202{
1203        uint8_t *keys;
1204        uint64_t hva;
1205        int i, r = 0;
1206
1207        if (args->flags != 0)
1208                return -EINVAL;
1209
1210        /* Enforce sane limit on memory allocation */
1211        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1212                return -EINVAL;
1213
1214        keys = kmalloc_array(args->count, sizeof(uint8_t),
1215                             GFP_KERNEL | __GFP_NOWARN);
1216        if (!keys)
1217                keys = vmalloc(sizeof(uint8_t) * args->count);
1218        if (!keys)
1219                return -ENOMEM;
1220
1221        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1222                           sizeof(uint8_t) * args->count);
1223        if (r) {
1224                r = -EFAULT;
1225                goto out;
1226        }
1227
1228        /* Enable storage key handling for the guest */
1229        r = s390_enable_skey();
1230        if (r)
1231                goto out;
1232
1233        down_read(&current->mm->mmap_sem);
1234        for (i = 0; i < args->count; i++) {
1235                hva = gfn_to_hva(kvm, args->start_gfn + i);
1236                if (kvm_is_error_hva(hva)) {
1237                        r = -EFAULT;
1238                        break;
1239                }
1240
1241                /* Lowest order bit is reserved */
1242                if (keys[i] & 0x01) {
1243                        r = -EINVAL;
1244                        break;
1245                }
1246
1247                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1248                if (r)
1249                        break;
1250        }
1251        up_read(&current->mm->mmap_sem);
1252out:
1253        kvfree(keys);
1254        return r;
1255}
1256
1257long kvm_arch_vm_ioctl(struct file *filp,
1258                       unsigned int ioctl, unsigned long arg)
1259{
1260        struct kvm *kvm = filp->private_data;
1261        void __user *argp = (void __user *)arg;
1262        struct kvm_device_attr attr;
1263        int r;
1264
1265        switch (ioctl) {
1266        case KVM_S390_INTERRUPT: {
1267                struct kvm_s390_interrupt s390int;
1268
1269                r = -EFAULT;
1270                if (copy_from_user(&s390int, argp, sizeof(s390int)))
1271                        break;
1272                r = kvm_s390_inject_vm(kvm, &s390int);
1273                break;
1274        }
1275        case KVM_ENABLE_CAP: {
1276                struct kvm_enable_cap cap;
1277                r = -EFAULT;
1278                if (copy_from_user(&cap, argp, sizeof(cap)))
1279                        break;
1280                r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1281                break;
1282        }
1283        case KVM_CREATE_IRQCHIP: {
1284                struct kvm_irq_routing_entry routing;
1285
1286                r = -EINVAL;
1287                if (kvm->arch.use_irqchip) {
1288                        /* Set up dummy routing. */
1289                        memset(&routing, 0, sizeof(routing));
1290                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1291                }
1292                break;
1293        }
1294        case KVM_SET_DEVICE_ATTR: {
1295                r = -EFAULT;
1296                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1297                        break;
1298                r = kvm_s390_vm_set_attr(kvm, &attr);
1299                break;
1300        }
1301        case KVM_GET_DEVICE_ATTR: {
1302                r = -EFAULT;
1303                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1304                        break;
1305                r = kvm_s390_vm_get_attr(kvm, &attr);
1306                break;
1307        }
1308        case KVM_HAS_DEVICE_ATTR: {
1309                r = -EFAULT;
1310                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1311                        break;
1312                r = kvm_s390_vm_has_attr(kvm, &attr);
1313                break;
1314        }
1315        case KVM_S390_GET_SKEYS: {
1316                struct kvm_s390_skeys args;
1317
1318                r = -EFAULT;
1319                if (copy_from_user(&args, argp,
1320                                   sizeof(struct kvm_s390_skeys)))
1321                        break;
1322                r = kvm_s390_get_skeys(kvm, &args);
1323                break;
1324        }
1325        case KVM_S390_SET_SKEYS: {
1326                struct kvm_s390_skeys args;
1327
1328                r = -EFAULT;
1329                if (copy_from_user(&args, argp,
1330                                   sizeof(struct kvm_s390_skeys)))
1331                        break;
1332                r = kvm_s390_set_skeys(kvm, &args);
1333                break;
1334        }
1335        default:
1336                r = -ENOTTY;
1337        }
1338
1339        return r;
1340}
1341
1342static int kvm_s390_query_ap_config(u8 *config)
1343{
1344        u32 fcn_code = 0x04000000UL;
1345        u32 cc = 0;
1346
1347        memset(config, 0, 128);
1348        asm volatile(
1349                "lgr 0,%1\n"
1350                "lgr 2,%2\n"
1351                ".long 0xb2af0000\n"            /* PQAP(QCI) */
1352                "0: ipm %0\n"
1353                "srl %0,28\n"
1354                "1:\n"
1355                EX_TABLE(0b, 1b)
1356                : "+r" (cc)
1357                : "r" (fcn_code), "r" (config)
1358                : "cc", "0", "2", "memory"
1359        );
1360
1361        return cc;
1362}
1363
1364static int kvm_s390_apxa_installed(void)
1365{
1366        u8 config[128];
1367        int cc;
1368
1369        if (test_facility(12)) {
1370                cc = kvm_s390_query_ap_config(config);
1371
1372                if (cc)
1373                        pr_err("PQAP(QCI) failed with cc=%d", cc);
1374                else
1375                        return config[0] & 0x40;
1376        }
1377
1378        return 0;
1379}
1380
1381static void kvm_s390_set_crycb_format(struct kvm *kvm)
1382{
1383        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1384
1385        if (kvm_s390_apxa_installed())
1386                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1387        else
1388                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1389}
1390
1391static u64 kvm_s390_get_initial_cpuid(void)
1392{
1393        struct cpuid cpuid;
1394
1395        get_cpu_id(&cpuid);
1396        cpuid.version = 0xff;
1397        return *((u64 *) &cpuid);
1398}
1399
1400static void kvm_s390_crypto_init(struct kvm *kvm)
1401{
1402        if (!test_kvm_facility(kvm, 76))
1403                return;
1404
1405        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1406        kvm_s390_set_crycb_format(kvm);
1407
1408        /* Enable AES/DEA protected key functions by default */
1409        kvm->arch.crypto.aes_kw = 1;
1410        kvm->arch.crypto.dea_kw = 1;
1411        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1412                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1413        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1414                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1415}
1416
1417static void sca_dispose(struct kvm *kvm)
1418{
1419        if (kvm->arch.use_esca)
1420                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1421        else
1422                free_page((unsigned long)(kvm->arch.sca));
1423        kvm->arch.sca = NULL;
1424}
1425
1426int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1427{
1428        gfp_t alloc_flags = GFP_KERNEL;
1429        int i, rc;
1430        char debug_name[16];
1431        static unsigned long sca_offset;
1432
1433        rc = -EINVAL;
1434#ifdef CONFIG_KVM_S390_UCONTROL
1435        if (type & ~KVM_VM_S390_UCONTROL)
1436                goto out_err;
1437        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1438                goto out_err;
1439#else
1440        if (type)
1441                goto out_err;
1442#endif
1443
1444        rc = s390_enable_sie();
1445        if (rc)
1446                goto out_err;
1447
1448        rc = -ENOMEM;
1449
1450        ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1451
1452        kvm->arch.use_esca = 0; /* start with basic SCA */
1453        if (!sclp.has_64bscao)
1454                alloc_flags |= GFP_DMA;
1455        rwlock_init(&kvm->arch.sca_lock);
1456        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1457        if (!kvm->arch.sca)
1458                goto out_err;
1459        spin_lock(&kvm_lock);
1460        sca_offset += 16;
1461        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1462                sca_offset = 0;
1463        kvm->arch.sca = (struct bsca_block *)
1464                        ((char *) kvm->arch.sca + sca_offset);
1465        spin_unlock(&kvm_lock);
1466
1467        sprintf(debug_name, "kvm-%u", current->pid);
1468
1469        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1470        if (!kvm->arch.dbf)
1471                goto out_err;
1472
1473        kvm->arch.sie_page2 =
1474             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1475        if (!kvm->arch.sie_page2)
1476                goto out_err;
1477
1478        /* Populate the facility mask initially. */
1479        memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1480               sizeof(S390_lowcore.stfle_fac_list));
1481        for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1482                if (i < kvm_s390_fac_list_mask_size())
1483                        kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1484                else
1485                        kvm->arch.model.fac_mask[i] = 0UL;
1486        }
1487
1488        /* Populate the facility list initially. */
1489        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1490        memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1491               S390_ARCH_FAC_LIST_SIZE_BYTE);
1492
1493        set_kvm_facility(kvm->arch.model.fac_mask, 74);
1494        set_kvm_facility(kvm->arch.model.fac_list, 74);
1495
1496        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1497        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1498
1499        kvm_s390_crypto_init(kvm);
1500
1501        spin_lock_init(&kvm->arch.float_int.lock);
1502        for (i = 0; i < FIRQ_LIST_COUNT; i++)
1503                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1504        init_waitqueue_head(&kvm->arch.ipte_wq);
1505        mutex_init(&kvm->arch.ipte_mutex);
1506
1507        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1508        VM_EVENT(kvm, 3, "vm created with type %lu", type);
1509
1510        if (type & KVM_VM_S390_UCONTROL) {
1511                kvm->arch.gmap = NULL;
1512                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1513        } else {
1514                if (sclp.hamax == U64_MAX)
1515                        kvm->arch.mem_limit = TASK_MAX_SIZE;
1516                else
1517                        kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1518                                                    sclp.hamax + 1);
1519                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1520                if (!kvm->arch.gmap)
1521                        goto out_err;
1522                kvm->arch.gmap->private = kvm;
1523                kvm->arch.gmap->pfault_enabled = 0;
1524        }
1525
1526        kvm->arch.css_support = 0;
1527        kvm->arch.use_irqchip = 0;
1528        kvm->arch.epoch = 0;
1529
1530        spin_lock_init(&kvm->arch.start_stop_lock);
1531        kvm_s390_vsie_init(kvm);
1532        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1533
1534        return 0;
1535out_err:
1536        free_page((unsigned long)kvm->arch.sie_page2);
1537        debug_unregister(kvm->arch.dbf);
1538        sca_dispose(kvm);
1539        KVM_EVENT(3, "creation of vm failed: %d", rc);
1540        return rc;
1541}
1542
1543bool kvm_arch_has_vcpu_debugfs(void)
1544{
1545        return false;
1546}
1547
1548int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1549{
1550        return 0;
1551}
1552
1553void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1554{
1555        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1556        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1557        kvm_s390_clear_local_irqs(vcpu);
1558        kvm_clear_async_pf_completion_queue(vcpu);
1559        if (!kvm_is_ucontrol(vcpu->kvm))
1560                sca_del_vcpu(vcpu);
1561
1562        if (kvm_is_ucontrol(vcpu->kvm))
1563                gmap_remove(vcpu->arch.gmap);
1564
1565        if (vcpu->kvm->arch.use_cmma)
1566                kvm_s390_vcpu_unsetup_cmma(vcpu);
1567        free_page((unsigned long)(vcpu->arch.sie_block));
1568
1569        kvm_vcpu_uninit(vcpu);
1570        kmem_cache_free(kvm_vcpu_cache, vcpu);
1571}
1572
1573static void kvm_free_vcpus(struct kvm *kvm)
1574{
1575        unsigned int i;
1576        struct kvm_vcpu *vcpu;
1577
1578        kvm_for_each_vcpu(i, vcpu, kvm)
1579                kvm_arch_vcpu_destroy(vcpu);
1580
1581        mutex_lock(&kvm->lock);
1582        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1583                kvm->vcpus[i] = NULL;
1584
1585        atomic_set(&kvm->online_vcpus, 0);
1586        mutex_unlock(&kvm->lock);
1587}
1588
1589void kvm_arch_destroy_vm(struct kvm *kvm)
1590{
1591        kvm_free_vcpus(kvm);
1592        sca_dispose(kvm);
1593        debug_unregister(kvm->arch.dbf);
1594        free_page((unsigned long)kvm->arch.sie_page2);
1595        if (!kvm_is_ucontrol(kvm))
1596                gmap_remove(kvm->arch.gmap);
1597        kvm_s390_destroy_adapters(kvm);
1598        kvm_s390_clear_float_irqs(kvm);
1599        kvm_s390_vsie_destroy(kvm);
1600        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1601}
1602
1603/* Section: vcpu related */
1604static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1605{
1606        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1607        if (!vcpu->arch.gmap)
1608                return -ENOMEM;
1609        vcpu->arch.gmap->private = vcpu->kvm;
1610
1611        return 0;
1612}
1613
1614static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1615{
1616        if (!kvm_s390_use_sca_entries())
1617                return;
1618        read_lock(&vcpu->kvm->arch.sca_lock);
1619        if (vcpu->kvm->arch.use_esca) {
1620                struct esca_block *sca = vcpu->kvm->arch.sca;
1621
1622                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1623                sca->cpu[vcpu->vcpu_id].sda = 0;
1624        } else {
1625                struct bsca_block *sca = vcpu->kvm->arch.sca;
1626
1627                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1628                sca->cpu[vcpu->vcpu_id].sda = 0;
1629        }
1630        read_unlock(&vcpu->kvm->arch.sca_lock);
1631}
1632
1633static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1634{
1635        if (!kvm_s390_use_sca_entries()) {
1636                struct bsca_block *sca = vcpu->kvm->arch.sca;
1637
1638                /* we still need the basic sca for the ipte control */
1639                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1640                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1641        }
1642        read_lock(&vcpu->kvm->arch.sca_lock);
1643        if (vcpu->kvm->arch.use_esca) {
1644                struct esca_block *sca = vcpu->kvm->arch.sca;
1645
1646                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1647                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1648                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1649                vcpu->arch.sie_block->ecb2 |= 0x04U;
1650                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1651        } else {
1652                struct bsca_block *sca = vcpu->kvm->arch.sca;
1653
1654                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1655                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1656                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1657                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1658        }
1659        read_unlock(&vcpu->kvm->arch.sca_lock);
1660}
1661
1662/* Basic SCA to Extended SCA data copy routines */
1663static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1664{
1665        d->sda = s->sda;
1666        d->sigp_ctrl.c = s->sigp_ctrl.c;
1667        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1668}
1669
1670static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1671{
1672        int i;
1673
1674        d->ipte_control = s->ipte_control;
1675        d->mcn[0] = s->mcn;
1676        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1677                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1678}
1679
1680static int sca_switch_to_extended(struct kvm *kvm)
1681{
1682        struct bsca_block *old_sca = kvm->arch.sca;
1683        struct esca_block *new_sca;
1684        struct kvm_vcpu *vcpu;
1685        unsigned int vcpu_idx;
1686        u32 scaol, scaoh;
1687
1688        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1689        if (!new_sca)
1690                return -ENOMEM;
1691
1692        scaoh = (u32)((u64)(new_sca) >> 32);
1693        scaol = (u32)(u64)(new_sca) & ~0x3fU;
1694
1695        kvm_s390_vcpu_block_all(kvm);
1696        write_lock(&kvm->arch.sca_lock);
1697
1698        sca_copy_b_to_e(new_sca, old_sca);
1699
1700        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1701                vcpu->arch.sie_block->scaoh = scaoh;
1702                vcpu->arch.sie_block->scaol = scaol;
1703                vcpu->arch.sie_block->ecb2 |= 0x04U;
1704        }
1705        kvm->arch.sca = new_sca;
1706        kvm->arch.use_esca = 1;
1707
1708        write_unlock(&kvm->arch.sca_lock);
1709        kvm_s390_vcpu_unblock_all(kvm);
1710
1711        free_page((unsigned long)old_sca);
1712
1713        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1714                 old_sca, kvm->arch.sca);
1715        return 0;
1716}
1717
1718static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1719{
1720        int rc;
1721
1722        if (!kvm_s390_use_sca_entries()) {
1723                if (id < KVM_MAX_VCPUS)
1724                        return true;
1725                return false;
1726        }
1727        if (id < KVM_S390_BSCA_CPU_SLOTS)
1728                return true;
1729        if (!sclp.has_esca || !sclp.has_64bscao)
1730                return false;
1731
1732        mutex_lock(&kvm->lock);
1733        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1734        mutex_unlock(&kvm->lock);
1735
1736        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1737}
1738
1739int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1740{
1741        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1742        kvm_clear_async_pf_completion_queue(vcpu);
1743        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1744                                    KVM_SYNC_GPRS |
1745                                    KVM_SYNC_ACRS |
1746                                    KVM_SYNC_CRS |
1747                                    KVM_SYNC_ARCH0 |
1748                                    KVM_SYNC_PFAULT;
1749        kvm_s390_set_prefix(vcpu, 0);
1750        if (test_kvm_facility(vcpu->kvm, 64))
1751                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1752        /* fprs can be synchronized via vrs, even if the guest has no vx. With
1753         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1754         */
1755        if (MACHINE_HAS_VX)
1756                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1757        else
1758                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1759
1760        if (kvm_is_ucontrol(vcpu->kvm))
1761                return __kvm_ucontrol_vcpu_init(vcpu);
1762
1763        return 0;
1764}
1765
1766/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1767static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1768{
1769        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1770        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1771        vcpu->arch.cputm_start = get_tod_clock_fast();
1772        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1773}
1774
1775/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1776static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1777{
1778        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1779        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1780        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1781        vcpu->arch.cputm_start = 0;
1782        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1783}
1784
1785/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1786static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1787{
1788        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1789        vcpu->arch.cputm_enabled = true;
1790        __start_cpu_timer_accounting(vcpu);
1791}
1792
1793/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1794static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1795{
1796        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1797        __stop_cpu_timer_accounting(vcpu);
1798        vcpu->arch.cputm_enabled = false;
1799}
1800
1801static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1802{
1803        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1804        __enable_cpu_timer_accounting(vcpu);
1805        preempt_enable();
1806}
1807
1808static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1809{
1810        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1811        __disable_cpu_timer_accounting(vcpu);
1812        preempt_enable();
1813}
1814
1815/* set the cpu timer - may only be called from the VCPU thread itself */
1816void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1817{
1818        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1819        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1820        if (vcpu->arch.cputm_enabled)
1821                vcpu->arch.cputm_start = get_tod_clock_fast();
1822        vcpu->arch.sie_block->cputm = cputm;
1823        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1824        preempt_enable();
1825}
1826
1827/* update and get the cpu timer - can also be called from other VCPU threads */
1828__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1829{
1830        unsigned int seq;
1831        __u64 value;
1832
1833        if (unlikely(!vcpu->arch.cputm_enabled))
1834                return vcpu->arch.sie_block->cputm;
1835
1836        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1837        do {
1838                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1839                /*
1840                 * If the writer would ever execute a read in the critical
1841                 * section, e.g. in irq context, we have a deadlock.
1842                 */
1843                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1844                value = vcpu->arch.sie_block->cputm;
1845                /* if cputm_start is 0, accounting is being started/stopped */
1846                if (likely(vcpu->arch.cputm_start))
1847                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1848        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1849        preempt_enable();
1850        return value;
1851}
1852
1853void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1854{
1855
1856        gmap_enable(vcpu->arch.enabled_gmap);
1857        atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1858        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1859                __start_cpu_timer_accounting(vcpu);
1860        vcpu->cpu = cpu;
1861}
1862
1863void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1864{
1865        vcpu->cpu = -1;
1866        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1867                __stop_cpu_timer_accounting(vcpu);
1868        atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1869        vcpu->arch.enabled_gmap = gmap_get_enabled();
1870        gmap_disable(vcpu->arch.enabled_gmap);
1871
1872}
1873
1874static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1875{
1876        /* this equals initial cpu reset in pop, but we don't switch to ESA */
1877        vcpu->arch.sie_block->gpsw.mask = 0UL;
1878        vcpu->arch.sie_block->gpsw.addr = 0UL;
1879        kvm_s390_set_prefix(vcpu, 0);
1880        kvm_s390_set_cpu_timer(vcpu, 0);
1881        vcpu->arch.sie_block->ckc       = 0UL;
1882        vcpu->arch.sie_block->todpr     = 0;
1883        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1884        vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1885        vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1886        /* make sure the new fpc will be lazily loaded */
1887        save_fpu_regs();
1888        current->thread.fpu.fpc = 0;
1889        vcpu->arch.sie_block->gbea = 1;
1890        vcpu->arch.sie_block->pp = 0;
1891        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1892        kvm_clear_async_pf_completion_queue(vcpu);
1893        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1894                kvm_s390_vcpu_stop(vcpu);
1895        kvm_s390_clear_local_irqs(vcpu);
1896}
1897
1898void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1899{
1900        mutex_lock(&vcpu->kvm->lock);
1901        preempt_disable();
1902        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1903        preempt_enable();
1904        mutex_unlock(&vcpu->kvm->lock);
1905        if (!kvm_is_ucontrol(vcpu->kvm)) {
1906                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1907                sca_add_vcpu(vcpu);
1908        }
1909        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1910                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1911        /* make vcpu_load load the right gmap on the first trigger */
1912        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1913}
1914
1915static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1916{
1917        if (!test_kvm_facility(vcpu->kvm, 76))
1918                return;
1919
1920        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1921
1922        if (vcpu->kvm->arch.crypto.aes_kw)
1923                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1924        if (vcpu->kvm->arch.crypto.dea_kw)
1925                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1926
1927        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1928}
1929
1930void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1931{
1932        free_page(vcpu->arch.sie_block->cbrlo);
1933        vcpu->arch.sie_block->cbrlo = 0;
1934}
1935
1936int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1937{
1938        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1939        if (!vcpu->arch.sie_block->cbrlo)
1940                return -ENOMEM;
1941
1942        vcpu->arch.sie_block->ecb2 |= 0x80;
1943        vcpu->arch.sie_block->ecb2 &= ~0x08;
1944        return 0;
1945}
1946
1947static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1948{
1949        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1950
1951        vcpu->arch.sie_block->ibc = model->ibc;
1952        if (test_kvm_facility(vcpu->kvm, 7))
1953                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1954}
1955
1956int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1957{
1958        int rc = 0;
1959
1960        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1961                                                    CPUSTAT_SM |
1962                                                    CPUSTAT_STOPPED);
1963
1964        if (test_kvm_facility(vcpu->kvm, 78))
1965                atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1966        else if (test_kvm_facility(vcpu->kvm, 8))
1967                atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1968
1969        kvm_s390_vcpu_setup_model(vcpu);
1970
1971        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1972        if (MACHINE_HAS_ESOP)
1973                vcpu->arch.sie_block->ecb |= 0x02;
1974        if (test_kvm_facility(vcpu->kvm, 9))
1975                vcpu->arch.sie_block->ecb |= 0x04;
1976        if (test_kvm_facility(vcpu->kvm, 73))
1977                vcpu->arch.sie_block->ecb |= 0x10;
1978
1979        if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1980                vcpu->arch.sie_block->ecb2 |= 0x08;
1981        if (test_kvm_facility(vcpu->kvm, 130))
1982                vcpu->arch.sie_block->ecb2 |= 0x20;
1983        vcpu->arch.sie_block->eca = 0x1002000U;
1984        if (sclp.has_cei)
1985                vcpu->arch.sie_block->eca |= 0x80000000U;
1986        if (sclp.has_ib)
1987                vcpu->arch.sie_block->eca |= 0x40000000U;
1988        if (sclp.has_siif)
1989                vcpu->arch.sie_block->eca |= 1;
1990        if (sclp.has_sigpif)
1991                vcpu->arch.sie_block->eca |= 0x10000000U;
1992        if (test_kvm_facility(vcpu->kvm, 129)) {
1993                vcpu->arch.sie_block->eca |= 0x00020000;
1994                vcpu->arch.sie_block->ecd |= 0x20000000;
1995        }
1996        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1997        vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1998
1999        if (vcpu->kvm->arch.use_cmma) {
2000                rc = kvm_s390_vcpu_setup_cmma(vcpu);
2001                if (rc)
2002                        return rc;
2003        }
2004        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2005        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2006
2007        kvm_s390_vcpu_crypto_setup(vcpu);
2008
2009        return rc;
2010}
2011
2012struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2013                                      unsigned int id)
2014{
2015        struct kvm_vcpu *vcpu;
2016        struct sie_page *sie_page;
2017        int rc = -EINVAL;
2018
2019        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2020                goto out;
2021
2022        rc = -ENOMEM;
2023
2024        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2025        if (!vcpu)
2026                goto out;
2027
2028        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2029        if (!sie_page)
2030                goto out_free_cpu;
2031
2032        vcpu->arch.sie_block = &sie_page->sie_block;
2033        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2034
2035        /* the real guest size will always be smaller than msl */
2036        vcpu->arch.sie_block->mso = 0;
2037        vcpu->arch.sie_block->msl = sclp.hamax;
2038
2039        vcpu->arch.sie_block->icpua = id;
2040        spin_lock_init(&vcpu->arch.local_int.lock);
2041        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2042        vcpu->arch.local_int.wq = &vcpu->wq;
2043        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2044        seqcount_init(&vcpu->arch.cputm_seqcount);
2045
2046        rc = kvm_vcpu_init(vcpu, kvm, id);
2047        if (rc)
2048                goto out_free_sie_block;
2049        VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2050                 vcpu->arch.sie_block);
2051        trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2052
2053        return vcpu;
2054out_free_sie_block:
2055        free_page((unsigned long)(vcpu->arch.sie_block));
2056out_free_cpu:
2057        kmem_cache_free(kvm_vcpu_cache, vcpu);
2058out:
2059        return ERR_PTR(rc);
2060}
2061
2062int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2063{
2064        return kvm_s390_vcpu_has_irq(vcpu, 0);
2065}
2066
2067void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2068{
2069        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2070        exit_sie(vcpu);
2071}
2072
2073void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2074{
2075        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2076}
2077
2078static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2079{
2080        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2081        exit_sie(vcpu);
2082}
2083
2084static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2085{
2086        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2087}
2088
2089/*
2090 * Kick a guest cpu out of SIE and wait until SIE is not running.
2091 * If the CPU is not running (e.g. waiting as idle) the function will
2092 * return immediately. */
2093void exit_sie(struct kvm_vcpu *vcpu)
2094{
2095        atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2096        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2097                cpu_relax();
2098}
2099
2100/* Kick a guest cpu out of SIE to process a request synchronously */
2101void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2102{
2103        kvm_make_request(req, vcpu);
2104        kvm_s390_vcpu_request(vcpu);
2105}
2106
2107static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2108                              unsigned long end)
2109{
2110        struct kvm *kvm = gmap->private;
2111        struct kvm_vcpu *vcpu;
2112        unsigned long prefix;
2113        int i;
2114
2115        if (gmap_is_shadow(gmap))
2116                return;
2117        if (start >= 1UL << 31)
2118                /* We are only interested in prefix pages */
2119                return;
2120        kvm_for_each_vcpu(i, vcpu, kvm) {
2121                /* match against both prefix pages */
2122                prefix = kvm_s390_get_prefix(vcpu);
2123                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2124                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2125                                   start, end);
2126                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2127                }
2128        }
2129}
2130
2131int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2132{
2133        /* kvm common code refers to this, but never calls it */
2134        BUG();
2135        return 0;
2136}
2137
2138static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2139                                           struct kvm_one_reg *reg)
2140{
2141        int r = -EINVAL;
2142
2143        switch (reg->id) {
2144        case KVM_REG_S390_TODPR:
2145                r = put_user(vcpu->arch.sie_block->todpr,
2146                             (u32 __user *)reg->addr);
2147                break;
2148        case KVM_REG_S390_EPOCHDIFF:
2149                r = put_user(vcpu->arch.sie_block->epoch,
2150                             (u64 __user *)reg->addr);
2151                break;
2152        case KVM_REG_S390_CPU_TIMER:
2153                r = put_user(kvm_s390_get_cpu_timer(vcpu),
2154                             (u64 __user *)reg->addr);
2155                break;
2156        case KVM_REG_S390_CLOCK_COMP:
2157                r = put_user(vcpu->arch.sie_block->ckc,
2158                             (u64 __user *)reg->addr);
2159                break;
2160        case KVM_REG_S390_PFTOKEN:
2161                r = put_user(vcpu->arch.pfault_token,
2162                             (u64 __user *)reg->addr);
2163                break;
2164        case KVM_REG_S390_PFCOMPARE:
2165                r = put_user(vcpu->arch.pfault_compare,
2166                             (u64 __user *)reg->addr);
2167                break;
2168        case KVM_REG_S390_PFSELECT:
2169                r = put_user(vcpu->arch.pfault_select,
2170                             (u64 __user *)reg->addr);
2171                break;
2172        case KVM_REG_S390_PP:
2173                r = put_user(vcpu->arch.sie_block->pp,
2174                             (u64 __user *)reg->addr);
2175                break;
2176        case KVM_REG_S390_GBEA:
2177                r = put_user(vcpu->arch.sie_block->gbea,
2178                             (u64 __user *)reg->addr);
2179                break;
2180        default:
2181                break;
2182        }
2183
2184        return r;
2185}
2186
2187static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2188                                           struct kvm_one_reg *reg)
2189{
2190        int r = -EINVAL;
2191        __u64 val;
2192
2193        switch (reg->id) {
2194        case KVM_REG_S390_TODPR:
2195                r = get_user(vcpu->arch.sie_block->todpr,
2196                             (u32 __user *)reg->addr);
2197                break;
2198        case KVM_REG_S390_EPOCHDIFF:
2199                r = get_user(vcpu->arch.sie_block->epoch,
2200                             (u64 __user *)reg->addr);
2201                break;
2202        case KVM_REG_S390_CPU_TIMER:
2203                r = get_user(val, (u64 __user *)reg->addr);
2204                if (!r)
2205                        kvm_s390_set_cpu_timer(vcpu, val);
2206                break;
2207        case KVM_REG_S390_CLOCK_COMP:
2208                r = get_user(vcpu->arch.sie_block->ckc,
2209                             (u64 __user *)reg->addr);
2210                break;
2211        case KVM_REG_S390_PFTOKEN:
2212                r = get_user(vcpu->arch.pfault_token,
2213                             (u64 __user *)reg->addr);
2214                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2215                        kvm_clear_async_pf_completion_queue(vcpu);
2216                break;
2217        case KVM_REG_S390_PFCOMPARE:
2218                r = get_user(vcpu->arch.pfault_compare,
2219                             (u64 __user *)reg->addr);
2220                break;
2221        case KVM_REG_S390_PFSELECT:
2222                r = get_user(vcpu->arch.pfault_select,
2223                             (u64 __user *)reg->addr);
2224                break;
2225        case KVM_REG_S390_PP:
2226                r = get_user(vcpu->arch.sie_block->pp,
2227                             (u64 __user *)reg->addr);
2228                break;
2229        case KVM_REG_S390_GBEA:
2230                r = get_user(vcpu->arch.sie_block->gbea,
2231                             (u64 __user *)reg->addr);
2232                break;
2233        default:
2234                break;
2235        }
2236
2237        return r;
2238}
2239
2240static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2241{
2242        kvm_s390_vcpu_initial_reset(vcpu);
2243        return 0;
2244}
2245
2246int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2247{
2248        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2249        return 0;
2250}
2251
2252int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2253{
2254        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2255        return 0;
2256}
2257
2258int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2259                                  struct kvm_sregs *sregs)
2260{
2261        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2262        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2263        return 0;
2264}
2265
2266int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2267                                  struct kvm_sregs *sregs)
2268{
2269        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2270        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2271        return 0;
2272}
2273
2274int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2275{
2276        if (test_fp_ctl(fpu->fpc))
2277                return -EINVAL;
2278        vcpu->run->s.regs.fpc = fpu->fpc;
2279        if (MACHINE_HAS_VX)
2280                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2281                                 (freg_t *) fpu->fprs);
2282        else
2283                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2284        return 0;
2285}
2286
2287int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2288{
2289        /* make sure we have the latest values */
2290        save_fpu_regs();
2291        if (MACHINE_HAS_VX)
2292                convert_vx_to_fp((freg_t *) fpu->fprs,
2293                                 (__vector128 *) vcpu->run->s.regs.vrs);
2294        else
2295                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2296        fpu->fpc = vcpu->run->s.regs.fpc;
2297        return 0;
2298}
2299
2300static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2301{
2302        int rc = 0;
2303
2304        if (!is_vcpu_stopped(vcpu))
2305                rc = -EBUSY;
2306        else {
2307                vcpu->run->psw_mask = psw.mask;
2308                vcpu->run->psw_addr = psw.addr;
2309        }
2310        return rc;
2311}
2312
2313int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2314                                  struct kvm_translation *tr)
2315{
2316        return -EINVAL; /* not implemented yet */
2317}
2318
2319#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2320                              KVM_GUESTDBG_USE_HW_BP | \
2321                              KVM_GUESTDBG_ENABLE)
2322
2323int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2324                                        struct kvm_guest_debug *dbg)
2325{
2326        int rc = 0;
2327
2328        vcpu->guest_debug = 0;
2329        kvm_s390_clear_bp_data(vcpu);
2330
2331        if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2332                return -EINVAL;
2333        if (!sclp.has_gpere)
2334                return -EINVAL;
2335
2336        if (dbg->control & KVM_GUESTDBG_ENABLE) {
2337                vcpu->guest_debug = dbg->control;
2338                /* enforce guest PER */
2339                atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2340
2341                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2342                        rc = kvm_s390_import_bp_data(vcpu, dbg);
2343        } else {
2344                atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2345                vcpu->arch.guestdbg.last_bp = 0;
2346        }
2347
2348        if (rc) {
2349                vcpu->guest_debug = 0;
2350                kvm_s390_clear_bp_data(vcpu);
2351                atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2352        }
2353
2354        return rc;
2355}
2356
2357int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2358                                    struct kvm_mp_state *mp_state)
2359{
2360        /* CHECK_STOP and LOAD are not supported yet */
2361        return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2362                                       KVM_MP_STATE_OPERATING;
2363}
2364
2365int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2366                                    struct kvm_mp_state *mp_state)
2367{
2368        int rc = 0;
2369
2370        /* user space knows about this interface - let it control the state */
2371        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2372
2373        switch (mp_state->mp_state) {
2374        case KVM_MP_STATE_STOPPED:
2375                kvm_s390_vcpu_stop(vcpu);
2376                break;
2377        case KVM_MP_STATE_OPERATING:
2378                kvm_s390_vcpu_start(vcpu);
2379                break;
2380        case KVM_MP_STATE_LOAD:
2381        case KVM_MP_STATE_CHECK_STOP:
2382                /* fall through - CHECK_STOP and LOAD are not supported yet */
2383        default:
2384                rc = -ENXIO;
2385        }
2386
2387        return rc;
2388}
2389
2390static bool ibs_enabled(struct kvm_vcpu *vcpu)
2391{
2392        return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2393}
2394
2395static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2396{
2397retry:
2398        kvm_s390_vcpu_request_handled(vcpu);
2399        if (!vcpu->requests)
2400                return 0;
2401        /*
2402         * We use MMU_RELOAD just to re-arm the ipte notifier for the
2403         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2404         * This ensures that the ipte instruction for this request has
2405         * already finished. We might race against a second unmapper that
2406         * wants to set the blocking bit. Lets just retry the request loop.
2407         */
2408        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2409                int rc;
2410                rc = gmap_mprotect_notify(vcpu->arch.gmap,
2411                                          kvm_s390_get_prefix(vcpu),
2412                                          PAGE_SIZE * 2, PROT_WRITE);
2413                if (rc) {
2414                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2415                        return rc;
2416                }
2417                goto retry;
2418        }
2419
2420        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2421                vcpu->arch.sie_block->ihcpu = 0xffff;
2422                goto retry;
2423        }
2424
2425        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2426                if (!ibs_enabled(vcpu)) {
2427                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2428                        atomic_or(CPUSTAT_IBS,
2429                                        &vcpu->arch.sie_block->cpuflags);
2430                }
2431                goto retry;
2432        }
2433
2434        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2435                if (ibs_enabled(vcpu)) {
2436                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2437                        atomic_andnot(CPUSTAT_IBS,
2438                                          &vcpu->arch.sie_block->cpuflags);
2439                }
2440                goto retry;
2441        }
2442
2443        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2444                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2445                goto retry;
2446        }
2447
2448        /* nothing to do, just clear the request */
2449        clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2450
2451        return 0;
2452}
2453
2454void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2455{
2456        struct kvm_vcpu *vcpu;
2457        int i;
2458
2459        mutex_lock(&kvm->lock);
2460        preempt_disable();
2461        kvm->arch.epoch = tod - get_tod_clock();
2462        kvm_s390_vcpu_block_all(kvm);
2463        kvm_for_each_vcpu(i, vcpu, kvm)
2464                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2465        kvm_s390_vcpu_unblock_all(kvm);
2466        preempt_enable();
2467        mutex_unlock(&kvm->lock);
2468}
2469
2470/**
2471 * kvm_arch_fault_in_page - fault-in guest page if necessary
2472 * @vcpu: The corresponding virtual cpu
2473 * @gpa: Guest physical address
2474 * @writable: Whether the page should be writable or not
2475 *
2476 * Make sure that a guest page has been faulted-in on the host.
2477 *
2478 * Return: Zero on success, negative error code otherwise.
2479 */
2480long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2481{
2482        return gmap_fault(vcpu->arch.gmap, gpa,
2483                          writable ? FAULT_FLAG_WRITE : 0);
2484}
2485
2486static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2487                                      unsigned long token)
2488{
2489        struct kvm_s390_interrupt inti;
2490        struct kvm_s390_irq irq;
2491
2492        if (start_token) {
2493                irq.u.ext.ext_params2 = token;
2494                irq.type = KVM_S390_INT_PFAULT_INIT;
2495                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2496        } else {
2497                inti.type = KVM_S390_INT_PFAULT_DONE;
2498                inti.parm64 = token;
2499                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2500        }
2501}
2502
2503void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2504                                     struct kvm_async_pf *work)
2505{
2506        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2507        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2508}
2509
2510void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2511                                 struct kvm_async_pf *work)
2512{
2513        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2514        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2515}
2516
2517void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2518                               struct kvm_async_pf *work)
2519{
2520        /* s390 will always inject the page directly */
2521}
2522
2523bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2524{
2525        /*
2526         * s390 will always inject the page directly,
2527         * but we still want check_async_completion to cleanup
2528         */
2529        return true;
2530}
2531
2532static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2533{
2534        hva_t hva;
2535        struct kvm_arch_async_pf arch;
2536        int rc;
2537
2538        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2539                return 0;
2540        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2541            vcpu->arch.pfault_compare)
2542                return 0;
2543        if (psw_extint_disabled(vcpu))
2544                return 0;
2545        if (kvm_s390_vcpu_has_irq(vcpu, 0))
2546                return 0;
2547        if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2548                return 0;
2549        if (!vcpu->arch.gmap->pfault_enabled)
2550                return 0;
2551
2552        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2553        hva += current->thread.gmap_addr & ~PAGE_MASK;
2554        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2555                return 0;
2556
2557        rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2558        return rc;
2559}
2560
2561static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2562{
2563        int rc, cpuflags;
2564
2565        /*
2566         * On s390 notifications for arriving pages will be delivered directly
2567         * to the guest but the house keeping for completed pfaults is
2568         * handled outside the worker.
2569         */
2570        kvm_check_async_pf_completion(vcpu);
2571
2572        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2573        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2574
2575        if (need_resched())
2576                schedule();
2577
2578        if (test_cpu_flag(CIF_MCCK_PENDING))
2579                s390_handle_mcck();
2580
2581        if (!kvm_is_ucontrol(vcpu->kvm)) {
2582                rc = kvm_s390_deliver_pending_interrupts(vcpu);
2583                if (rc)
2584                        return rc;
2585        }
2586
2587        rc = kvm_s390_handle_requests(vcpu);
2588        if (rc)
2589                return rc;
2590
2591        if (guestdbg_enabled(vcpu)) {
2592                kvm_s390_backup_guest_per_regs(vcpu);
2593                kvm_s390_patch_guest_per_regs(vcpu);
2594        }
2595
2596        vcpu->arch.sie_block->icptcode = 0;
2597        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2598        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2599        trace_kvm_s390_sie_enter(vcpu, cpuflags);
2600
2601        return 0;
2602}
2603
2604static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2605{
2606        struct kvm_s390_pgm_info pgm_info = {
2607                .code = PGM_ADDRESSING,
2608        };
2609        u8 opcode, ilen;
2610        int rc;
2611
2612        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2613        trace_kvm_s390_sie_fault(vcpu);
2614
2615        /*
2616         * We want to inject an addressing exception, which is defined as a
2617         * suppressing or terminating exception. However, since we came here
2618         * by a DAT access exception, the PSW still points to the faulting
2619         * instruction since DAT exceptions are nullifying. So we've got
2620         * to look up the current opcode to get the length of the instruction
2621         * to be able to forward the PSW.
2622         */
2623        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2624        ilen = insn_length(opcode);
2625        if (rc < 0) {
2626                return rc;
2627        } else if (rc) {
2628                /* Instruction-Fetching Exceptions - we can't detect the ilen.
2629                 * Forward by arbitrary ilc, injection will take care of
2630                 * nullification if necessary.
2631                 */
2632                pgm_info = vcpu->arch.pgm;
2633                ilen = 4;
2634        }
2635        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2636        kvm_s390_forward_psw(vcpu, ilen);
2637        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2638}
2639
2640static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2641{
2642        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2643                   vcpu->arch.sie_block->icptcode);
2644        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2645
2646        if (guestdbg_enabled(vcpu))
2647                kvm_s390_restore_guest_per_regs(vcpu);
2648
2649        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2650        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2651
2652        if (vcpu->arch.sie_block->icptcode > 0) {
2653                int rc = kvm_handle_sie_intercept(vcpu);
2654
2655                if (rc != -EOPNOTSUPP)
2656                        return rc;
2657                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2658                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2659                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2660                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2661                return -EREMOTE;
2662        } else if (exit_reason != -EFAULT) {
2663                vcpu->stat.exit_null++;
2664                return 0;
2665        } else if (kvm_is_ucontrol(vcpu->kvm)) {
2666                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2667                vcpu->run->s390_ucontrol.trans_exc_code =
2668                                                current->thread.gmap_addr;
2669                vcpu->run->s390_ucontrol.pgm_code = 0x10;
2670                return -EREMOTE;
2671        } else if (current->thread.gmap_pfault) {
2672                trace_kvm_s390_major_guest_pfault(vcpu);
2673                current->thread.gmap_pfault = 0;
2674                if (kvm_arch_setup_async_pf(vcpu))
2675                        return 0;
2676                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2677        }
2678        return vcpu_post_run_fault_in_sie(vcpu);
2679}
2680
2681static int __vcpu_run(struct kvm_vcpu *vcpu)
2682{
2683        int rc, exit_reason;
2684
2685        /*
2686         * We try to hold kvm->srcu during most of vcpu_run (except when run-
2687         * ning the guest), so that memslots (and other stuff) are protected
2688         */
2689        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2690
2691        do {
2692                rc = vcpu_pre_run(vcpu);
2693                if (rc)
2694                        break;
2695
2696                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2697                /*
2698                 * As PF_VCPU will be used in fault handler, between
2699                 * guest_enter and guest_exit should be no uaccess.
2700                 */
2701                local_irq_disable();
2702                guest_enter_irqoff();
2703                __disable_cpu_timer_accounting(vcpu);
2704                local_irq_enable();
2705                exit_reason = sie64a(vcpu->arch.sie_block,
2706                                     vcpu->run->s.regs.gprs);
2707                local_irq_disable();
2708                __enable_cpu_timer_accounting(vcpu);
2709                guest_exit_irqoff();
2710                local_irq_enable();
2711                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2712
2713                rc = vcpu_post_run(vcpu, exit_reason);
2714        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2715
2716        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2717        return rc;
2718}
2719
2720static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2721{
2722        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2723        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2724        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2725                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2726        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2727                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2728                /* some control register changes require a tlb flush */
2729                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2730        }
2731        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2732                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2733                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2734                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2735                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2736                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2737        }
2738        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2739                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2740                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2741                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2742                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2743                        kvm_clear_async_pf_completion_queue(vcpu);
2744        }
2745        /*
2746         * If userspace sets the riccb (e.g. after migration) to a valid state,
2747         * we should enable RI here instead of doing the lazy enablement.
2748         */
2749        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2750            test_kvm_facility(vcpu->kvm, 64)) {
2751                struct runtime_instr_cb *riccb =
2752                        (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2753
2754                if (riccb->valid)
2755                        vcpu->arch.sie_block->ecb3 |= 0x01;
2756        }
2757        save_access_regs(vcpu->arch.host_acrs);
2758        restore_access_regs(vcpu->run->s.regs.acrs);
2759        /* save host (userspace) fprs/vrs */
2760        save_fpu_regs();
2761        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2762        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2763        if (MACHINE_HAS_VX)
2764                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2765        else
2766                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2767        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2768        if (test_fp_ctl(current->thread.fpu.fpc))
2769                /* User space provided an invalid FPC, let's clear it */
2770                current->thread.fpu.fpc = 0;
2771
2772        kvm_run->kvm_dirty_regs = 0;
2773}
2774
2775static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2776{
2777        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2778        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2779        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2780        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2781        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2782        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2783        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2784        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2785        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2786        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2787        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2788        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2789        save_access_regs(vcpu->run->s.regs.acrs);
2790        restore_access_regs(vcpu->arch.host_acrs);
2791        /* Save guest register state */
2792        save_fpu_regs();
2793        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2794        /* Restore will be done lazily at return */
2795        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2796        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2797
2798}
2799
2800int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2801{
2802        int rc;
2803        sigset_t sigsaved;
2804
2805        if (kvm_run->immediate_exit)
2806                return -EINTR;
2807
2808        if (guestdbg_exit_pending(vcpu)) {
2809                kvm_s390_prepare_debug_exit(vcpu);
2810                return 0;
2811        }
2812
2813        if (vcpu->sigset_active)
2814                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2815
2816        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2817                kvm_s390_vcpu_start(vcpu);
2818        } else if (is_vcpu_stopped(vcpu)) {
2819                pr_err_ratelimited("can't run stopped vcpu %d\n",
2820                                   vcpu->vcpu_id);
2821                return -EINVAL;
2822        }
2823
2824        sync_regs(vcpu, kvm_run);
2825        enable_cpu_timer_accounting(vcpu);
2826
2827        might_fault();
2828        rc = __vcpu_run(vcpu);
2829
2830        if (signal_pending(current) && !rc) {
2831                kvm_run->exit_reason = KVM_EXIT_INTR;
2832                rc = -EINTR;
2833        }
2834
2835        if (guestdbg_exit_pending(vcpu) && !rc)  {
2836                kvm_s390_prepare_debug_exit(vcpu);
2837                rc = 0;
2838        }
2839
2840        if (rc == -EREMOTE) {
2841                /* userspace support is needed, kvm_run has been prepared */
2842                rc = 0;
2843        }
2844
2845        disable_cpu_timer_accounting(vcpu);
2846        store_regs(vcpu, kvm_run);
2847
2848        if (vcpu->sigset_active)
2849                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2850
2851        vcpu->stat.exit_userspace++;
2852        return rc;
2853}
2854
2855/*
2856 * store status at address
2857 * we use have two special cases:
2858 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2859 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2860 */
2861int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2862{
2863        unsigned char archmode = 1;
2864        freg_t fprs[NUM_FPRS];
2865        unsigned int px;
2866        u64 clkcomp, cputm;
2867        int rc;
2868
2869        px = kvm_s390_get_prefix(vcpu);
2870        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2871                if (write_guest_abs(vcpu, 163, &archmode, 1))
2872                        return -EFAULT;
2873                gpa = 0;
2874        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2875                if (write_guest_real(vcpu, 163, &archmode, 1))
2876                        return -EFAULT;
2877                gpa = px;
2878        } else
2879                gpa -= __LC_FPREGS_SAVE_AREA;
2880
2881        /* manually convert vector registers if necessary */
2882        if (MACHINE_HAS_VX) {
2883                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2884                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2885                                     fprs, 128);
2886        } else {
2887                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2888                                     vcpu->run->s.regs.fprs, 128);
2889        }
2890        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2891                              vcpu->run->s.regs.gprs, 128);
2892        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2893                              &vcpu->arch.sie_block->gpsw, 16);
2894        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2895                              &px, 4);
2896        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2897                              &vcpu->run->s.regs.fpc, 4);
2898        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2899                              &vcpu->arch.sie_block->todpr, 4);
2900        cputm = kvm_s390_get_cpu_timer(vcpu);
2901        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2902                              &cputm, 8);
2903        clkcomp = vcpu->arch.sie_block->ckc >> 8;
2904        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2905                              &clkcomp, 8);
2906        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2907                              &vcpu->run->s.regs.acrs, 64);
2908        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2909                              &vcpu->arch.sie_block->gcr, 128);
2910        return rc ? -EFAULT : 0;
2911}
2912
2913int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2914{
2915        /*
2916         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2917         * switch in the run ioctl. Let's update our copies before we save
2918         * it into the save area
2919         */
2920        save_fpu_regs();
2921        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2922        save_access_regs(vcpu->run->s.regs.acrs);
2923
2924        return kvm_s390_store_status_unloaded(vcpu, addr);
2925}
2926
2927static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2928{
2929        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2930        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2931}
2932
2933static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2934{
2935        unsigned int i;
2936        struct kvm_vcpu *vcpu;
2937
2938        kvm_for_each_vcpu(i, vcpu, kvm) {
2939                __disable_ibs_on_vcpu(vcpu);
2940        }
2941}
2942
2943static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2944{
2945        if (!sclp.has_ibs)
2946                return;
2947        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2948        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2949}
2950
2951void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2952{
2953        int i, online_vcpus, started_vcpus = 0;
2954
2955        if (!is_vcpu_stopped(vcpu))
2956                return;
2957
2958        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2959        /* Only one cpu at a time may enter/leave the STOPPED state. */
2960        spin_lock(&vcpu->kvm->arch.start_stop_lock);
2961        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2962
2963        for (i = 0; i < online_vcpus; i++) {
2964                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2965                        started_vcpus++;
2966        }
2967
2968        if (started_vcpus == 0) {
2969                /* we're the only active VCPU -> speed it up */
2970                __enable_ibs_on_vcpu(vcpu);
2971        } else if (started_vcpus == 1) {
2972                /*
2973                 * As we are starting a second VCPU, we have to disable
2974                 * the IBS facility on all VCPUs to remove potentially
2975                 * oustanding ENABLE requests.
2976                 */
2977                __disable_ibs_on_all_vcpus(vcpu->kvm);
2978        }
2979
2980        atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2981        /*
2982         * Another VCPU might have used IBS while we were offline.
2983         * Let's play safe and flush the VCPU at startup.
2984         */
2985        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2986        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2987        return;
2988}
2989
2990void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2991{
2992        int i, online_vcpus, started_vcpus = 0;
2993        struct kvm_vcpu *started_vcpu = NULL;
2994
2995        if (is_vcpu_stopped(vcpu))
2996                return;
2997
2998        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2999        /* Only one cpu at a time may enter/leave the STOPPED state. */
3000        spin_lock(&vcpu->kvm->arch.start_stop_lock);
3001        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3002
3003        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3004        kvm_s390_clear_stop_irq(vcpu);
3005
3006        atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3007        __disable_ibs_on_vcpu(vcpu);
3008
3009        for (i = 0; i < online_vcpus; i++) {
3010                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3011                        started_vcpus++;
3012                        started_vcpu = vcpu->kvm->vcpus[i];
3013                }
3014        }
3015
3016        if (started_vcpus == 1) {
3017                /*
3018                 * As we only have one VCPU left, we want to enable the
3019                 * IBS facility for that VCPU to speed it up.
3020                 */
3021                __enable_ibs_on_vcpu(started_vcpu);
3022        }
3023
3024        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3025        return;
3026}
3027
3028static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3029                                     struct kvm_enable_cap *cap)
3030{
3031        int r;
3032
3033        if (cap->flags)
3034                return -EINVAL;
3035
3036        switch (cap->cap) {
3037        case KVM_CAP_S390_CSS_SUPPORT:
3038                if (!vcpu->kvm->arch.css_support) {
3039                        vcpu->kvm->arch.css_support = 1;
3040                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3041                        trace_kvm_s390_enable_css(vcpu->kvm);
3042                }
3043                r = 0;
3044                break;
3045        default:
3046                r = -EINVAL;
3047                break;
3048        }
3049        return r;
3050}
3051
3052static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3053                                  struct kvm_s390_mem_op *mop)
3054{
3055        void __user *uaddr = (void __user *)mop->buf;
3056        void *tmpbuf = NULL;
3057        int r, srcu_idx;
3058        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3059                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
3060
3061        if (mop->flags & ~supported_flags)
3062                return -EINVAL;
3063
3064        if (mop->size > MEM_OP_MAX_SIZE)
3065                return -E2BIG;
3066
3067        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3068                tmpbuf = vmalloc(mop->size);
3069                if (!tmpbuf)
3070                        return -ENOMEM;
3071        }
3072
3073        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3074
3075        switch (mop->op) {
3076        case KVM_S390_MEMOP_LOGICAL_READ:
3077                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3078                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3079                                            mop->size, GACC_FETCH);
3080                        break;
3081                }
3082                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3083                if (r == 0) {
3084                        if (copy_to_user(uaddr, tmpbuf, mop->size))
3085                                r = -EFAULT;
3086                }
3087                break;
3088        case KVM_S390_MEMOP_LOGICAL_WRITE:
3089                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3090                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3091                                            mop->size, GACC_STORE);
3092                        break;
3093                }
3094                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3095                        r = -EFAULT;
3096                        break;
3097                }
3098                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3099                break;
3100        default:
3101                r = -EINVAL;
3102        }
3103
3104        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3105
3106        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3107                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3108
3109        vfree(tmpbuf);
3110        return r;
3111}
3112
3113long kvm_arch_vcpu_ioctl(struct file *filp,
3114                         unsigned int ioctl, unsigned long arg)
3115{
3116        struct kvm_vcpu *vcpu = filp->private_data;
3117        void __user *argp = (void __user *)arg;
3118        int idx;
3119        long r;
3120
3121        switch (ioctl) {
3122        case KVM_S390_IRQ: {
3123                struct kvm_s390_irq s390irq;
3124
3125                r = -EFAULT;
3126                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3127                        break;
3128                r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3129                break;
3130        }
3131        case KVM_S390_INTERRUPT: {
3132                struct kvm_s390_interrupt s390int;
3133                struct kvm_s390_irq s390irq;
3134
3135                r = -EFAULT;
3136                if (copy_from_user(&s390int, argp, sizeof(s390int)))
3137                        break;
3138                if (s390int_to_s390irq(&s390int, &s390irq))
3139                        return -EINVAL;
3140                r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3141                break;
3142        }
3143        case KVM_S390_STORE_STATUS:
3144                idx = srcu_read_lock(&vcpu->kvm->srcu);
3145                r = kvm_s390_vcpu_store_status(vcpu, arg);
3146                srcu_read_unlock(&vcpu->kvm->srcu, idx);
3147                break;
3148        case KVM_S390_SET_INITIAL_PSW: {
3149                psw_t psw;
3150
3151                r = -EFAULT;
3152                if (copy_from_user(&psw, argp, sizeof(psw)))
3153                        break;
3154                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3155                break;
3156        }
3157        case KVM_S390_INITIAL_RESET:
3158                r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3159                break;
3160        case KVM_SET_ONE_REG:
3161        case KVM_GET_ONE_REG: {
3162                struct kvm_one_reg reg;
3163                r = -EFAULT;
3164                if (copy_from_user(&reg, argp, sizeof(reg)))
3165                        break;
3166                if (ioctl == KVM_SET_ONE_REG)
3167                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3168                else
3169                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3170                break;
3171        }
3172#ifdef CONFIG_KVM_S390_UCONTROL
3173        case KVM_S390_UCAS_MAP: {
3174                struct kvm_s390_ucas_mapping ucasmap;
3175
3176                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3177                        r = -EFAULT;
3178                        break;
3179                }
3180
3181                if (!kvm_is_ucontrol(vcpu->kvm)) {
3182                        r = -EINVAL;
3183                        break;
3184                }
3185
3186                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3187                                     ucasmap.vcpu_addr, ucasmap.length);
3188                break;
3189        }
3190        case KVM_S390_UCAS_UNMAP: {
3191                struct kvm_s390_ucas_mapping ucasmap;
3192
3193                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3194                        r = -EFAULT;
3195                        break;
3196                }
3197
3198                if (!kvm_is_ucontrol(vcpu->kvm)) {
3199                        r = -EINVAL;
3200                        break;
3201                }
3202
3203                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3204                        ucasmap.length);
3205                break;
3206        }
3207#endif
3208        case KVM_S390_VCPU_FAULT: {
3209                r = gmap_fault(vcpu->arch.gmap, arg, 0);
3210                break;
3211        }
3212        case KVM_ENABLE_CAP:
3213        {
3214                struct kvm_enable_cap cap;
3215                r = -EFAULT;
3216                if (copy_from_user(&cap, argp, sizeof(cap)))
3217                        break;
3218                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3219                break;
3220        }
3221        case KVM_S390_MEM_OP: {
3222                struct kvm_s390_mem_op mem_op;
3223
3224                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3225                        r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3226                else
3227                        r = -EFAULT;
3228                break;
3229        }
3230        case KVM_S390_SET_IRQ_STATE: {
3231                struct kvm_s390_irq_state irq_state;
3232
3233                r = -EFAULT;
3234                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3235                        break;
3236                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3237                    irq_state.len == 0 ||
3238                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3239                        r = -EINVAL;
3240                        break;
3241                }
3242                r = kvm_s390_set_irq_state(vcpu,
3243                                           (void __user *) irq_state.buf,
3244                                           irq_state.len);
3245                break;
3246        }
3247        case KVM_S390_GET_IRQ_STATE: {
3248                struct kvm_s390_irq_state irq_state;
3249
3250                r = -EFAULT;
3251                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3252                        break;
3253                if (irq_state.len == 0) {
3254                        r = -EINVAL;
3255                        break;
3256                }
3257                r = kvm_s390_get_irq_state(vcpu,
3258                                           (__u8 __user *)  irq_state.buf,
3259                                           irq_state.len);
3260                break;
3261        }
3262        default:
3263                r = -ENOTTY;
3264        }
3265        return r;
3266}
3267
3268int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3269{
3270#ifdef CONFIG_KVM_S390_UCONTROL
3271        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3272                 && (kvm_is_ucontrol(vcpu->kvm))) {
3273                vmf->page = virt_to_page(vcpu->arch.sie_block);
3274                get_page(vmf->page);
3275                return 0;
3276        }
3277#endif
3278        return VM_FAULT_SIGBUS;
3279}
3280
3281int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3282                            unsigned long npages)
3283{
3284        return 0;
3285}
3286
3287/* Section: memory related */
3288int kvm_arch_prepare_memory_region(struct kvm *kvm,
3289                                   struct kvm_memory_slot *memslot,
3290                                   const struct kvm_userspace_memory_region *mem,
3291                                   enum kvm_mr_change change)
3292{
3293        /* A few sanity checks. We can have memory slots which have to be
3294           located/ended at a segment boundary (1MB). The memory in userland is
3295           ok to be fragmented into various different vmas. It is okay to mmap()
3296           and munmap() stuff in this slot after doing this call at any time */
3297
3298        if (mem->userspace_addr & 0xffffful)
3299                return -EINVAL;
3300
3301        if (mem->memory_size & 0xffffful)
3302                return -EINVAL;
3303
3304        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3305                return -EINVAL;
3306
3307        return 0;
3308}
3309
3310void kvm_arch_commit_memory_region(struct kvm *kvm,
3311                                const struct kvm_userspace_memory_region *mem,
3312                                const struct kvm_memory_slot *old,
3313                                const struct kvm_memory_slot *new,
3314                                enum kvm_mr_change change)
3315{
3316        int rc;
3317
3318        /* If the basics of the memslot do not change, we do not want
3319         * to update the gmap. Every update causes several unnecessary
3320         * segment translation exceptions. This is usually handled just
3321         * fine by the normal fault handler + gmap, but it will also
3322         * cause faults on the prefix page of running guest CPUs.
3323         */
3324        if (old->userspace_addr == mem->userspace_addr &&
3325            old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3326            old->npages * PAGE_SIZE == mem->memory_size)
3327                return;
3328
3329        rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3330                mem->guest_phys_addr, mem->memory_size);
3331        if (rc)
3332                pr_warn("failed to commit memory region\n");
3333        return;
3334}
3335
3336static inline unsigned long nonhyp_mask(int i)
3337{
3338        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3339
3340        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3341}
3342
3343void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3344{
3345        vcpu->valid_wakeup = false;
3346}
3347
3348static int __init kvm_s390_init(void)
3349{
3350        int i;
3351
3352        if (!sclp.has_sief2) {
3353                pr_info("SIE not available\n");
3354                return -ENODEV;
3355        }
3356
3357        for (i = 0; i < 16; i++)
3358                kvm_s390_fac_list_mask[i] |=
3359                        S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3360
3361        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3362}
3363
3364static void __exit kvm_s390_exit(void)
3365{
3366        kvm_exit();
3367}
3368
3369module_init(kvm_s390_init);
3370module_exit(kvm_s390_exit);
3371
3372/*
3373 * Enable autoloading of the kvm module.
3374 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3375 * since x86 takes a different approach.
3376 */
3377#include <linux/miscdevice.h>
3378MODULE_ALIAS_MISCDEV(KVM_MINOR);
3379MODULE_ALIAS("devname:kvm");
3380