linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2018
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#define KMSG_COMPONENT "kvm-s390"
  15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34
  35#include <asm/asm-offsets.h>
  36#include <asm/lowcore.h>
  37#include <asm/stp.h>
  38#include <asm/pgtable.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include "kvm-s390.h"
  48#include "gaccess.h"
  49
  50#define CREATE_TRACE_POINTS
  51#include "trace.h"
  52#include "trace-s390.h"
  53
  54#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55#define LOCAL_IRQS 32
  56#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62struct kvm_stats_debugfs_item debugfs_entries[] = {
  63        { "userspace_handled", VCPU_STAT(exit_userspace) },
  64        { "exit_null", VCPU_STAT(exit_null) },
  65        { "exit_validity", VCPU_STAT(exit_validity) },
  66        { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67        { "exit_external_request", VCPU_STAT(exit_external_request) },
  68        { "exit_io_request", VCPU_STAT(exit_io_request) },
  69        { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70        { "exit_instruction", VCPU_STAT(exit_instruction) },
  71        { "exit_pei", VCPU_STAT(exit_pei) },
  72        { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73        { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74        { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77        { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78        { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80        { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81        { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82        { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83        { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84        { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85        { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86        { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87        { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88        { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89        { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90        { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91        { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92        { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93        { "deliver_program", VCPU_STAT(deliver_program) },
  94        { "deliver_io", VCPU_STAT(deliver_io) },
  95        { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96        { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97        { "inject_ckc", VCPU_STAT(inject_ckc) },
  98        { "inject_cputm", VCPU_STAT(inject_cputm) },
  99        { "inject_external_call", VCPU_STAT(inject_external_call) },
 100        { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101        { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102        { "inject_io", VM_STAT(inject_io) },
 103        { "inject_mchk", VCPU_STAT(inject_mchk) },
 104        { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105        { "inject_program", VCPU_STAT(inject_program) },
 106        { "inject_restart", VCPU_STAT(inject_restart) },
 107        { "inject_service_signal", VM_STAT(inject_service_signal) },
 108        { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109        { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110        { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111        { "inject_virtio", VM_STAT(inject_virtio) },
 112        { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113        { "instruction_gs", VCPU_STAT(instruction_gs) },
 114        { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115        { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116        { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117        { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118        { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119        { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120        { "instruction_sck", VCPU_STAT(instruction_sck) },
 121        { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122        { "instruction_spx", VCPU_STAT(instruction_spx) },
 123        { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124        { "instruction_stap", VCPU_STAT(instruction_stap) },
 125        { "instruction_iske", VCPU_STAT(instruction_iske) },
 126        { "instruction_ri", VCPU_STAT(instruction_ri) },
 127        { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128        { "instruction_sske", VCPU_STAT(instruction_sske) },
 129        { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130        { "instruction_essa", VCPU_STAT(instruction_essa) },
 131        { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133        { "instruction_tb", VCPU_STAT(instruction_tb) },
 134        { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136        { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137        { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138        { "instruction_sie", VCPU_STAT(instruction_sie) },
 139        { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142        { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143        { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144        { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148        { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152        { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153        { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154        { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155        { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156        { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157        { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158        { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159        { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160        { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161        { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162        { NULL }
 163};
 164
 165struct kvm_s390_tod_clock_ext {
 166        __u8 epoch_idx;
 167        __u64 tod;
 168        __u8 reserved[7];
 169} __packed;
 170
 171/* allow nested virtualization in KVM (if enabled by user space) */
 172static int nested;
 173module_param(nested, int, S_IRUGO);
 174MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176/* allow 1m huge page guest backing, if !nested */
 177static int hpage;
 178module_param(hpage, int, 0444);
 179MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 182static u8 halt_poll_max_steal = 10;
 183module_param(halt_poll_max_steal, byte, 0644);
 184MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 185
 186/*
 187 * For now we handle at most 16 double words as this is what the s390 base
 188 * kernel handles and stores in the prefix page. If we ever need to go beyond
 189 * this, this requires changes to code, but the external uapi can stay.
 190 */
 191#define SIZE_INTERNAL 16
 192
 193/*
 194 * Base feature mask that defines default mask for facilities. Consists of the
 195 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 196 */
 197static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 198/*
 199 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 200 * and defines the facilities that can be enabled via a cpu model.
 201 */
 202static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 203
 204static unsigned long kvm_s390_fac_size(void)
 205{
 206        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 207        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 208        BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 209                sizeof(S390_lowcore.stfle_fac_list));
 210
 211        return SIZE_INTERNAL;
 212}
 213
 214/* available cpu features supported by kvm */
 215static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 216/* available subfunctions indicated via query / "test bit" */
 217static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 218
 219static struct gmap_notifier gmap_notifier;
 220static struct gmap_notifier vsie_gmap_notifier;
 221debug_info_t *kvm_s390_dbf;
 222
 223/* Section: not file related */
 224int kvm_arch_hardware_enable(void)
 225{
 226        /* every s390 is virtualization enabled ;-) */
 227        return 0;
 228}
 229
 230int kvm_arch_check_processor_compat(void)
 231{
 232        return 0;
 233}
 234
 235static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 236                              unsigned long end);
 237
 238static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 239{
 240        u8 delta_idx = 0;
 241
 242        /*
 243         * The TOD jumps by delta, we have to compensate this by adding
 244         * -delta to the epoch.
 245         */
 246        delta = -delta;
 247
 248        /* sign-extension - we're adding to signed values below */
 249        if ((s64)delta < 0)
 250                delta_idx = -1;
 251
 252        scb->epoch += delta;
 253        if (scb->ecd & ECD_MEF) {
 254                scb->epdx += delta_idx;
 255                if (scb->epoch < delta)
 256                        scb->epdx += 1;
 257        }
 258}
 259
 260/*
 261 * This callback is executed during stop_machine(). All CPUs are therefore
 262 * temporarily stopped. In order not to change guest behavior, we have to
 263 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 264 * so a CPU won't be stopped while calculating with the epoch.
 265 */
 266static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 267                          void *v)
 268{
 269        struct kvm *kvm;
 270        struct kvm_vcpu *vcpu;
 271        int i;
 272        unsigned long long *delta = v;
 273
 274        list_for_each_entry(kvm, &vm_list, vm_list) {
 275                kvm_for_each_vcpu(i, vcpu, kvm) {
 276                        kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 277                        if (i == 0) {
 278                                kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 279                                kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 280                        }
 281                        if (vcpu->arch.cputm_enabled)
 282                                vcpu->arch.cputm_start += *delta;
 283                        if (vcpu->arch.vsie_block)
 284                                kvm_clock_sync_scb(vcpu->arch.vsie_block,
 285                                                   *delta);
 286                }
 287        }
 288        return NOTIFY_OK;
 289}
 290
 291static struct notifier_block kvm_clock_notifier = {
 292        .notifier_call = kvm_clock_sync,
 293};
 294
 295int kvm_arch_hardware_setup(void)
 296{
 297        gmap_notifier.notifier_call = kvm_gmap_notifier;
 298        gmap_register_pte_notifier(&gmap_notifier);
 299        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 300        gmap_register_pte_notifier(&vsie_gmap_notifier);
 301        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 302                                       &kvm_clock_notifier);
 303        return 0;
 304}
 305
 306void kvm_arch_hardware_unsetup(void)
 307{
 308        gmap_unregister_pte_notifier(&gmap_notifier);
 309        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 310        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 311                                         &kvm_clock_notifier);
 312}
 313
 314static void allow_cpu_feat(unsigned long nr)
 315{
 316        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 317}
 318
 319static inline int plo_test_bit(unsigned char nr)
 320{
 321        register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 322        int cc;
 323
 324        asm volatile(
 325                /* Parameter registers are ignored for "test bit" */
 326                "       plo     0,0,0,0(0)\n"
 327                "       ipm     %0\n"
 328                "       srl     %0,28\n"
 329                : "=d" (cc)
 330                : "d" (r0)
 331                : "cc");
 332        return cc == 0;
 333}
 334
 335static inline void __insn32_query(unsigned int opcode, u8 query[32])
 336{
 337        register unsigned long r0 asm("0") = 0; /* query function */
 338        register unsigned long r1 asm("1") = (unsigned long) query;
 339
 340        asm volatile(
 341                /* Parameter regs are ignored */
 342                "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 343                : "=m" (*query)
 344                : "d" (r0), "a" (r1), [opc] "i" (opcode)
 345                : "cc");
 346}
 347
 348#define INSN_SORTL 0xb938
 349#define INSN_DFLTCC 0xb939
 350
 351static void kvm_s390_cpu_feat_init(void)
 352{
 353        int i;
 354
 355        for (i = 0; i < 256; ++i) {
 356                if (plo_test_bit(i))
 357                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 358        }
 359
 360        if (test_facility(28)) /* TOD-clock steering */
 361                ptff(kvm_s390_available_subfunc.ptff,
 362                     sizeof(kvm_s390_available_subfunc.ptff),
 363                     PTFF_QAF);
 364
 365        if (test_facility(17)) { /* MSA */
 366                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 367                              kvm_s390_available_subfunc.kmac);
 368                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 369                              kvm_s390_available_subfunc.kmc);
 370                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 371                              kvm_s390_available_subfunc.km);
 372                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 373                              kvm_s390_available_subfunc.kimd);
 374                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 375                              kvm_s390_available_subfunc.klmd);
 376        }
 377        if (test_facility(76)) /* MSA3 */
 378                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 379                              kvm_s390_available_subfunc.pckmo);
 380        if (test_facility(77)) { /* MSA4 */
 381                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 382                              kvm_s390_available_subfunc.kmctr);
 383                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 384                              kvm_s390_available_subfunc.kmf);
 385                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 386                              kvm_s390_available_subfunc.kmo);
 387                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 388                              kvm_s390_available_subfunc.pcc);
 389        }
 390        if (test_facility(57)) /* MSA5 */
 391                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 392                              kvm_s390_available_subfunc.ppno);
 393
 394        if (test_facility(146)) /* MSA8 */
 395                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 396                              kvm_s390_available_subfunc.kma);
 397
 398        if (test_facility(155)) /* MSA9 */
 399                __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 400                              kvm_s390_available_subfunc.kdsa);
 401
 402        if (test_facility(150)) /* SORTL */
 403                __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 404
 405        if (test_facility(151)) /* DFLTCC */
 406                __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 407
 408        if (MACHINE_HAS_ESOP)
 409                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 410        /*
 411         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 412         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 413         */
 414        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 415            !test_facility(3) || !nested)
 416                return;
 417        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 418        if (sclp.has_64bscao)
 419                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 420        if (sclp.has_siif)
 421                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 422        if (sclp.has_gpere)
 423                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 424        if (sclp.has_gsls)
 425                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 426        if (sclp.has_ib)
 427                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 428        if (sclp.has_cei)
 429                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 430        if (sclp.has_ibs)
 431                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 432        if (sclp.has_kss)
 433                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 434        /*
 435         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 436         * all skey handling functions read/set the skey from the PGSTE
 437         * instead of the real storage key.
 438         *
 439         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 440         * pages being detected as preserved although they are resident.
 441         *
 442         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 443         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 444         *
 445         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 446         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 447         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 448         *
 449         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 450         * cannot easily shadow the SCA because of the ipte lock.
 451         */
 452}
 453
 454int kvm_arch_init(void *opaque)
 455{
 456        int rc;
 457
 458        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 459        if (!kvm_s390_dbf)
 460                return -ENOMEM;
 461
 462        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 463                rc = -ENOMEM;
 464                goto out_debug_unreg;
 465        }
 466
 467        kvm_s390_cpu_feat_init();
 468
 469        /* Register floating interrupt controller interface. */
 470        rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 471        if (rc) {
 472                pr_err("A FLIC registration call failed with rc=%d\n", rc);
 473                goto out_debug_unreg;
 474        }
 475
 476        rc = kvm_s390_gib_init(GAL_ISC);
 477        if (rc)
 478                goto out_gib_destroy;
 479
 480        return 0;
 481
 482out_gib_destroy:
 483        kvm_s390_gib_destroy();
 484out_debug_unreg:
 485        debug_unregister(kvm_s390_dbf);
 486        return rc;
 487}
 488
 489void kvm_arch_exit(void)
 490{
 491        kvm_s390_gib_destroy();
 492        debug_unregister(kvm_s390_dbf);
 493}
 494
 495/* Section: device related */
 496long kvm_arch_dev_ioctl(struct file *filp,
 497                        unsigned int ioctl, unsigned long arg)
 498{
 499        if (ioctl == KVM_S390_ENABLE_SIE)
 500                return s390_enable_sie();
 501        return -EINVAL;
 502}
 503
 504int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 505{
 506        int r;
 507
 508        switch (ext) {
 509        case KVM_CAP_S390_PSW:
 510        case KVM_CAP_S390_GMAP:
 511        case KVM_CAP_SYNC_MMU:
 512#ifdef CONFIG_KVM_S390_UCONTROL
 513        case KVM_CAP_S390_UCONTROL:
 514#endif
 515        case KVM_CAP_ASYNC_PF:
 516        case KVM_CAP_SYNC_REGS:
 517        case KVM_CAP_ONE_REG:
 518        case KVM_CAP_ENABLE_CAP:
 519        case KVM_CAP_S390_CSS_SUPPORT:
 520        case KVM_CAP_IOEVENTFD:
 521        case KVM_CAP_DEVICE_CTRL:
 522        case KVM_CAP_S390_IRQCHIP:
 523        case KVM_CAP_VM_ATTRIBUTES:
 524        case KVM_CAP_MP_STATE:
 525        case KVM_CAP_IMMEDIATE_EXIT:
 526        case KVM_CAP_S390_INJECT_IRQ:
 527        case KVM_CAP_S390_USER_SIGP:
 528        case KVM_CAP_S390_USER_STSI:
 529        case KVM_CAP_S390_SKEYS:
 530        case KVM_CAP_S390_IRQ_STATE:
 531        case KVM_CAP_S390_USER_INSTR0:
 532        case KVM_CAP_S390_CMMA_MIGRATION:
 533        case KVM_CAP_S390_AIS:
 534        case KVM_CAP_S390_AIS_MIGRATION:
 535                r = 1;
 536                break;
 537        case KVM_CAP_S390_HPAGE_1M:
 538                r = 0;
 539                if (hpage && !kvm_is_ucontrol(kvm))
 540                        r = 1;
 541                break;
 542        case KVM_CAP_S390_MEM_OP:
 543                r = MEM_OP_MAX_SIZE;
 544                break;
 545        case KVM_CAP_NR_VCPUS:
 546        case KVM_CAP_MAX_VCPUS:
 547        case KVM_CAP_MAX_VCPU_ID:
 548                r = KVM_S390_BSCA_CPU_SLOTS;
 549                if (!kvm_s390_use_sca_entries())
 550                        r = KVM_MAX_VCPUS;
 551                else if (sclp.has_esca && sclp.has_64bscao)
 552                        r = KVM_S390_ESCA_CPU_SLOTS;
 553                break;
 554        case KVM_CAP_S390_COW:
 555                r = MACHINE_HAS_ESOP;
 556                break;
 557        case KVM_CAP_S390_VECTOR_REGISTERS:
 558                r = MACHINE_HAS_VX;
 559                break;
 560        case KVM_CAP_S390_RI:
 561                r = test_facility(64);
 562                break;
 563        case KVM_CAP_S390_GS:
 564                r = test_facility(133);
 565                break;
 566        case KVM_CAP_S390_BPB:
 567                r = test_facility(82);
 568                break;
 569        default:
 570                r = 0;
 571        }
 572        return r;
 573}
 574
 575static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 576                                    struct kvm_memory_slot *memslot)
 577{
 578        int i;
 579        gfn_t cur_gfn, last_gfn;
 580        unsigned long gaddr, vmaddr;
 581        struct gmap *gmap = kvm->arch.gmap;
 582        DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 583
 584        /* Loop over all guest segments */
 585        cur_gfn = memslot->base_gfn;
 586        last_gfn = memslot->base_gfn + memslot->npages;
 587        for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 588                gaddr = gfn_to_gpa(cur_gfn);
 589                vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 590                if (kvm_is_error_hva(vmaddr))
 591                        continue;
 592
 593                bitmap_zero(bitmap, _PAGE_ENTRIES);
 594                gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 595                for (i = 0; i < _PAGE_ENTRIES; i++) {
 596                        if (test_bit(i, bitmap))
 597                                mark_page_dirty(kvm, cur_gfn + i);
 598                }
 599
 600                if (fatal_signal_pending(current))
 601                        return;
 602                cond_resched();
 603        }
 604}
 605
 606/* Section: vm related */
 607static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 608
 609/*
 610 * Get (and clear) the dirty memory log for a memory slot.
 611 */
 612int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 613                               struct kvm_dirty_log *log)
 614{
 615        int r;
 616        unsigned long n;
 617        struct kvm_memslots *slots;
 618        struct kvm_memory_slot *memslot;
 619        int is_dirty = 0;
 620
 621        if (kvm_is_ucontrol(kvm))
 622                return -EINVAL;
 623
 624        mutex_lock(&kvm->slots_lock);
 625
 626        r = -EINVAL;
 627        if (log->slot >= KVM_USER_MEM_SLOTS)
 628                goto out;
 629
 630        slots = kvm_memslots(kvm);
 631        memslot = id_to_memslot(slots, log->slot);
 632        r = -ENOENT;
 633        if (!memslot->dirty_bitmap)
 634                goto out;
 635
 636        kvm_s390_sync_dirty_log(kvm, memslot);
 637        r = kvm_get_dirty_log(kvm, log, &is_dirty);
 638        if (r)
 639                goto out;
 640
 641        /* Clear the dirty log */
 642        if (is_dirty) {
 643                n = kvm_dirty_bitmap_bytes(memslot);
 644                memset(memslot->dirty_bitmap, 0, n);
 645        }
 646        r = 0;
 647out:
 648        mutex_unlock(&kvm->slots_lock);
 649        return r;
 650}
 651
 652static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 653{
 654        unsigned int i;
 655        struct kvm_vcpu *vcpu;
 656
 657        kvm_for_each_vcpu(i, vcpu, kvm) {
 658                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 659        }
 660}
 661
 662int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 663{
 664        int r;
 665
 666        if (cap->flags)
 667                return -EINVAL;
 668
 669        switch (cap->cap) {
 670        case KVM_CAP_S390_IRQCHIP:
 671                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 672                kvm->arch.use_irqchip = 1;
 673                r = 0;
 674                break;
 675        case KVM_CAP_S390_USER_SIGP:
 676                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 677                kvm->arch.user_sigp = 1;
 678                r = 0;
 679                break;
 680        case KVM_CAP_S390_VECTOR_REGISTERS:
 681                mutex_lock(&kvm->lock);
 682                if (kvm->created_vcpus) {
 683                        r = -EBUSY;
 684                } else if (MACHINE_HAS_VX) {
 685                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 686                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 687                        if (test_facility(134)) {
 688                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 689                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 690                        }
 691                        if (test_facility(135)) {
 692                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 693                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 694                        }
 695                        if (test_facility(148)) {
 696                                set_kvm_facility(kvm->arch.model.fac_mask, 148);
 697                                set_kvm_facility(kvm->arch.model.fac_list, 148);
 698                        }
 699                        if (test_facility(152)) {
 700                                set_kvm_facility(kvm->arch.model.fac_mask, 152);
 701                                set_kvm_facility(kvm->arch.model.fac_list, 152);
 702                        }
 703                        r = 0;
 704                } else
 705                        r = -EINVAL;
 706                mutex_unlock(&kvm->lock);
 707                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 708                         r ? "(not available)" : "(success)");
 709                break;
 710        case KVM_CAP_S390_RI:
 711                r = -EINVAL;
 712                mutex_lock(&kvm->lock);
 713                if (kvm->created_vcpus) {
 714                        r = -EBUSY;
 715                } else if (test_facility(64)) {
 716                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 717                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 718                        r = 0;
 719                }
 720                mutex_unlock(&kvm->lock);
 721                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 722                         r ? "(not available)" : "(success)");
 723                break;
 724        case KVM_CAP_S390_AIS:
 725                mutex_lock(&kvm->lock);
 726                if (kvm->created_vcpus) {
 727                        r = -EBUSY;
 728                } else {
 729                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 730                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 731                        r = 0;
 732                }
 733                mutex_unlock(&kvm->lock);
 734                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 735                         r ? "(not available)" : "(success)");
 736                break;
 737        case KVM_CAP_S390_GS:
 738                r = -EINVAL;
 739                mutex_lock(&kvm->lock);
 740                if (kvm->created_vcpus) {
 741                        r = -EBUSY;
 742                } else if (test_facility(133)) {
 743                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 744                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 745                        r = 0;
 746                }
 747                mutex_unlock(&kvm->lock);
 748                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 749                         r ? "(not available)" : "(success)");
 750                break;
 751        case KVM_CAP_S390_HPAGE_1M:
 752                mutex_lock(&kvm->lock);
 753                if (kvm->created_vcpus)
 754                        r = -EBUSY;
 755                else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 756                        r = -EINVAL;
 757                else {
 758                        r = 0;
 759                        down_write(&kvm->mm->mmap_sem);
 760                        kvm->mm->context.allow_gmap_hpage_1m = 1;
 761                        up_write(&kvm->mm->mmap_sem);
 762                        /*
 763                         * We might have to create fake 4k page
 764                         * tables. To avoid that the hardware works on
 765                         * stale PGSTEs, we emulate these instructions.
 766                         */
 767                        kvm->arch.use_skf = 0;
 768                        kvm->arch.use_pfmfi = 0;
 769                }
 770                mutex_unlock(&kvm->lock);
 771                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 772                         r ? "(not available)" : "(success)");
 773                break;
 774        case KVM_CAP_S390_USER_STSI:
 775                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 776                kvm->arch.user_stsi = 1;
 777                r = 0;
 778                break;
 779        case KVM_CAP_S390_USER_INSTR0:
 780                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 781                kvm->arch.user_instr0 = 1;
 782                icpt_operexc_on_all_vcpus(kvm);
 783                r = 0;
 784                break;
 785        default:
 786                r = -EINVAL;
 787                break;
 788        }
 789        return r;
 790}
 791
 792static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 793{
 794        int ret;
 795
 796        switch (attr->attr) {
 797        case KVM_S390_VM_MEM_LIMIT_SIZE:
 798                ret = 0;
 799                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 800                         kvm->arch.mem_limit);
 801                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 802                        ret = -EFAULT;
 803                break;
 804        default:
 805                ret = -ENXIO;
 806                break;
 807        }
 808        return ret;
 809}
 810
 811static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 812{
 813        int ret;
 814        unsigned int idx;
 815        switch (attr->attr) {
 816        case KVM_S390_VM_MEM_ENABLE_CMMA:
 817                ret = -ENXIO;
 818                if (!sclp.has_cmma)
 819                        break;
 820
 821                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 822                mutex_lock(&kvm->lock);
 823                if (kvm->created_vcpus)
 824                        ret = -EBUSY;
 825                else if (kvm->mm->context.allow_gmap_hpage_1m)
 826                        ret = -EINVAL;
 827                else {
 828                        kvm->arch.use_cmma = 1;
 829                        /* Not compatible with cmma. */
 830                        kvm->arch.use_pfmfi = 0;
 831                        ret = 0;
 832                }
 833                mutex_unlock(&kvm->lock);
 834                break;
 835        case KVM_S390_VM_MEM_CLR_CMMA:
 836                ret = -ENXIO;
 837                if (!sclp.has_cmma)
 838                        break;
 839                ret = -EINVAL;
 840                if (!kvm->arch.use_cmma)
 841                        break;
 842
 843                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 844                mutex_lock(&kvm->lock);
 845                idx = srcu_read_lock(&kvm->srcu);
 846                s390_reset_cmma(kvm->arch.gmap->mm);
 847                srcu_read_unlock(&kvm->srcu, idx);
 848                mutex_unlock(&kvm->lock);
 849                ret = 0;
 850                break;
 851        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 852                unsigned long new_limit;
 853
 854                if (kvm_is_ucontrol(kvm))
 855                        return -EINVAL;
 856
 857                if (get_user(new_limit, (u64 __user *)attr->addr))
 858                        return -EFAULT;
 859
 860                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 861                    new_limit > kvm->arch.mem_limit)
 862                        return -E2BIG;
 863
 864                if (!new_limit)
 865                        return -EINVAL;
 866
 867                /* gmap_create takes last usable address */
 868                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 869                        new_limit -= 1;
 870
 871                ret = -EBUSY;
 872                mutex_lock(&kvm->lock);
 873                if (!kvm->created_vcpus) {
 874                        /* gmap_create will round the limit up */
 875                        struct gmap *new = gmap_create(current->mm, new_limit);
 876
 877                        if (!new) {
 878                                ret = -ENOMEM;
 879                        } else {
 880                                gmap_remove(kvm->arch.gmap);
 881                                new->private = kvm;
 882                                kvm->arch.gmap = new;
 883                                ret = 0;
 884                        }
 885                }
 886                mutex_unlock(&kvm->lock);
 887                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 888                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 889                         (void *) kvm->arch.gmap->asce);
 890                break;
 891        }
 892        default:
 893                ret = -ENXIO;
 894                break;
 895        }
 896        return ret;
 897}
 898
 899static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 900
 901void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 902{
 903        struct kvm_vcpu *vcpu;
 904        int i;
 905
 906        kvm_s390_vcpu_block_all(kvm);
 907
 908        kvm_for_each_vcpu(i, vcpu, kvm) {
 909                kvm_s390_vcpu_crypto_setup(vcpu);
 910                /* recreate the shadow crycb by leaving the VSIE handler */
 911                kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 912        }
 913
 914        kvm_s390_vcpu_unblock_all(kvm);
 915}
 916
 917static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 918{
 919        mutex_lock(&kvm->lock);
 920        switch (attr->attr) {
 921        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 922                if (!test_kvm_facility(kvm, 76)) {
 923                        mutex_unlock(&kvm->lock);
 924                        return -EINVAL;
 925                }
 926                get_random_bytes(
 927                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 928                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 929                kvm->arch.crypto.aes_kw = 1;
 930                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 931                break;
 932        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 933                if (!test_kvm_facility(kvm, 76)) {
 934                        mutex_unlock(&kvm->lock);
 935                        return -EINVAL;
 936                }
 937                get_random_bytes(
 938                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 939                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 940                kvm->arch.crypto.dea_kw = 1;
 941                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 942                break;
 943        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 944                if (!test_kvm_facility(kvm, 76)) {
 945                        mutex_unlock(&kvm->lock);
 946                        return -EINVAL;
 947                }
 948                kvm->arch.crypto.aes_kw = 0;
 949                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 950                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 951                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 952                break;
 953        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 954                if (!test_kvm_facility(kvm, 76)) {
 955                        mutex_unlock(&kvm->lock);
 956                        return -EINVAL;
 957                }
 958                kvm->arch.crypto.dea_kw = 0;
 959                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 960                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 961                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 962                break;
 963        case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 964                if (!ap_instructions_available()) {
 965                        mutex_unlock(&kvm->lock);
 966                        return -EOPNOTSUPP;
 967                }
 968                kvm->arch.crypto.apie = 1;
 969                break;
 970        case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 971                if (!ap_instructions_available()) {
 972                        mutex_unlock(&kvm->lock);
 973                        return -EOPNOTSUPP;
 974                }
 975                kvm->arch.crypto.apie = 0;
 976                break;
 977        default:
 978                mutex_unlock(&kvm->lock);
 979                return -ENXIO;
 980        }
 981
 982        kvm_s390_vcpu_crypto_reset_all(kvm);
 983        mutex_unlock(&kvm->lock);
 984        return 0;
 985}
 986
 987static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 988{
 989        int cx;
 990        struct kvm_vcpu *vcpu;
 991
 992        kvm_for_each_vcpu(cx, vcpu, kvm)
 993                kvm_s390_sync_request(req, vcpu);
 994}
 995
 996/*
 997 * Must be called with kvm->srcu held to avoid races on memslots, and with
 998 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 999 */
1000static int kvm_s390_vm_start_migration(struct kvm *kvm)
1001{
1002        struct kvm_memory_slot *ms;
1003        struct kvm_memslots *slots;
1004        unsigned long ram_pages = 0;
1005        int slotnr;
1006
1007        /* migration mode already enabled */
1008        if (kvm->arch.migration_mode)
1009                return 0;
1010        slots = kvm_memslots(kvm);
1011        if (!slots || !slots->used_slots)
1012                return -EINVAL;
1013
1014        if (!kvm->arch.use_cmma) {
1015                kvm->arch.migration_mode = 1;
1016                return 0;
1017        }
1018        /* mark all the pages in active slots as dirty */
1019        for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1020                ms = slots->memslots + slotnr;
1021                if (!ms->dirty_bitmap)
1022                        return -EINVAL;
1023                /*
1024                 * The second half of the bitmap is only used on x86,
1025                 * and would be wasted otherwise, so we put it to good
1026                 * use here to keep track of the state of the storage
1027                 * attributes.
1028                 */
1029                memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1030                ram_pages += ms->npages;
1031        }
1032        atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1033        kvm->arch.migration_mode = 1;
1034        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1035        return 0;
1036}
1037
1038/*
1039 * Must be called with kvm->slots_lock to avoid races with ourselves and
1040 * kvm_s390_vm_start_migration.
1041 */
1042static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1043{
1044        /* migration mode already disabled */
1045        if (!kvm->arch.migration_mode)
1046                return 0;
1047        kvm->arch.migration_mode = 0;
1048        if (kvm->arch.use_cmma)
1049                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1050        return 0;
1051}
1052
1053static int kvm_s390_vm_set_migration(struct kvm *kvm,
1054                                     struct kvm_device_attr *attr)
1055{
1056        int res = -ENXIO;
1057
1058        mutex_lock(&kvm->slots_lock);
1059        switch (attr->attr) {
1060        case KVM_S390_VM_MIGRATION_START:
1061                res = kvm_s390_vm_start_migration(kvm);
1062                break;
1063        case KVM_S390_VM_MIGRATION_STOP:
1064                res = kvm_s390_vm_stop_migration(kvm);
1065                break;
1066        default:
1067                break;
1068        }
1069        mutex_unlock(&kvm->slots_lock);
1070
1071        return res;
1072}
1073
1074static int kvm_s390_vm_get_migration(struct kvm *kvm,
1075                                     struct kvm_device_attr *attr)
1076{
1077        u64 mig = kvm->arch.migration_mode;
1078
1079        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1080                return -ENXIO;
1081
1082        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1083                return -EFAULT;
1084        return 0;
1085}
1086
1087static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1088{
1089        struct kvm_s390_vm_tod_clock gtod;
1090
1091        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1092                return -EFAULT;
1093
1094        if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1095                return -EINVAL;
1096        kvm_s390_set_tod_clock(kvm, &gtod);
1097
1098        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1099                gtod.epoch_idx, gtod.tod);
1100
1101        return 0;
1102}
1103
1104static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1105{
1106        u8 gtod_high;
1107
1108        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1109                                           sizeof(gtod_high)))
1110                return -EFAULT;
1111
1112        if (gtod_high != 0)
1113                return -EINVAL;
1114        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1115
1116        return 0;
1117}
1118
1119static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1120{
1121        struct kvm_s390_vm_tod_clock gtod = { 0 };
1122
1123        if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1124                           sizeof(gtod.tod)))
1125                return -EFAULT;
1126
1127        kvm_s390_set_tod_clock(kvm, &gtod);
1128        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1129        return 0;
1130}
1131
1132static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1133{
1134        int ret;
1135
1136        if (attr->flags)
1137                return -EINVAL;
1138
1139        switch (attr->attr) {
1140        case KVM_S390_VM_TOD_EXT:
1141                ret = kvm_s390_set_tod_ext(kvm, attr);
1142                break;
1143        case KVM_S390_VM_TOD_HIGH:
1144                ret = kvm_s390_set_tod_high(kvm, attr);
1145                break;
1146        case KVM_S390_VM_TOD_LOW:
1147                ret = kvm_s390_set_tod_low(kvm, attr);
1148                break;
1149        default:
1150                ret = -ENXIO;
1151                break;
1152        }
1153        return ret;
1154}
1155
1156static void kvm_s390_get_tod_clock(struct kvm *kvm,
1157                                   struct kvm_s390_vm_tod_clock *gtod)
1158{
1159        struct kvm_s390_tod_clock_ext htod;
1160
1161        preempt_disable();
1162
1163        get_tod_clock_ext((char *)&htod);
1164
1165        gtod->tod = htod.tod + kvm->arch.epoch;
1166        gtod->epoch_idx = 0;
1167        if (test_kvm_facility(kvm, 139)) {
1168                gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1169                if (gtod->tod < htod.tod)
1170                        gtod->epoch_idx += 1;
1171        }
1172
1173        preempt_enable();
1174}
1175
1176static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1177{
1178        struct kvm_s390_vm_tod_clock gtod;
1179
1180        memset(&gtod, 0, sizeof(gtod));
1181        kvm_s390_get_tod_clock(kvm, &gtod);
1182        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1183                return -EFAULT;
1184
1185        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1186                gtod.epoch_idx, gtod.tod);
1187        return 0;
1188}
1189
1190static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1191{
1192        u8 gtod_high = 0;
1193
1194        if (copy_to_user((void __user *)attr->addr, &gtod_high,
1195                                         sizeof(gtod_high)))
1196                return -EFAULT;
1197        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1198
1199        return 0;
1200}
1201
1202static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1203{
1204        u64 gtod;
1205
1206        gtod = kvm_s390_get_tod_clock_fast(kvm);
1207        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1208                return -EFAULT;
1209        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1210
1211        return 0;
1212}
1213
1214static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1215{
1216        int ret;
1217
1218        if (attr->flags)
1219                return -EINVAL;
1220
1221        switch (attr->attr) {
1222        case KVM_S390_VM_TOD_EXT:
1223                ret = kvm_s390_get_tod_ext(kvm, attr);
1224                break;
1225        case KVM_S390_VM_TOD_HIGH:
1226                ret = kvm_s390_get_tod_high(kvm, attr);
1227                break;
1228        case KVM_S390_VM_TOD_LOW:
1229                ret = kvm_s390_get_tod_low(kvm, attr);
1230                break;
1231        default:
1232                ret = -ENXIO;
1233                break;
1234        }
1235        return ret;
1236}
1237
1238static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1239{
1240        struct kvm_s390_vm_cpu_processor *proc;
1241        u16 lowest_ibc, unblocked_ibc;
1242        int ret = 0;
1243
1244        mutex_lock(&kvm->lock);
1245        if (kvm->created_vcpus) {
1246                ret = -EBUSY;
1247                goto out;
1248        }
1249        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1250        if (!proc) {
1251                ret = -ENOMEM;
1252                goto out;
1253        }
1254        if (!copy_from_user(proc, (void __user *)attr->addr,
1255                            sizeof(*proc))) {
1256                kvm->arch.model.cpuid = proc->cpuid;
1257                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1258                unblocked_ibc = sclp.ibc & 0xfff;
1259                if (lowest_ibc && proc->ibc) {
1260                        if (proc->ibc > unblocked_ibc)
1261                                kvm->arch.model.ibc = unblocked_ibc;
1262                        else if (proc->ibc < lowest_ibc)
1263                                kvm->arch.model.ibc = lowest_ibc;
1264                        else
1265                                kvm->arch.model.ibc = proc->ibc;
1266                }
1267                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1268                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1269                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1270                         kvm->arch.model.ibc,
1271                         kvm->arch.model.cpuid);
1272                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1273                         kvm->arch.model.fac_list[0],
1274                         kvm->arch.model.fac_list[1],
1275                         kvm->arch.model.fac_list[2]);
1276        } else
1277                ret = -EFAULT;
1278        kfree(proc);
1279out:
1280        mutex_unlock(&kvm->lock);
1281        return ret;
1282}
1283
1284static int kvm_s390_set_processor_feat(struct kvm *kvm,
1285                                       struct kvm_device_attr *attr)
1286{
1287        struct kvm_s390_vm_cpu_feat data;
1288
1289        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1290                return -EFAULT;
1291        if (!bitmap_subset((unsigned long *) data.feat,
1292                           kvm_s390_available_cpu_feat,
1293                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1294                return -EINVAL;
1295
1296        mutex_lock(&kvm->lock);
1297        if (kvm->created_vcpus) {
1298                mutex_unlock(&kvm->lock);
1299                return -EBUSY;
1300        }
1301        bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1302                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1303        mutex_unlock(&kvm->lock);
1304        VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1305                         data.feat[0],
1306                         data.feat[1],
1307                         data.feat[2]);
1308        return 0;
1309}
1310
1311static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1312                                          struct kvm_device_attr *attr)
1313{
1314        mutex_lock(&kvm->lock);
1315        if (kvm->created_vcpus) {
1316                mutex_unlock(&kvm->lock);
1317                return -EBUSY;
1318        }
1319
1320        if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1321                           sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1322                mutex_unlock(&kvm->lock);
1323                return -EFAULT;
1324        }
1325        mutex_unlock(&kvm->lock);
1326
1327        VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1328                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1329                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1330                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1331                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1332        VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1333                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1334                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1335        VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1336                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1337                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1338        VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1339                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1340                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1341        VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1342                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1343                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1344        VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1345                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1346                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1347        VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1348                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1349                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1350        VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1351                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1352                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1353        VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1354                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1355                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1356        VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1357                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1358                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1359        VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1360                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1361                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1362        VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1363                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1364                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1365        VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1366                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1367                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1368        VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1369                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1370                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1371        VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1372                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1373                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1374        VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1375                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1376                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1377                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1378                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1379        VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1380                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1381                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1382                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1383                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1384
1385        return 0;
1386}
1387
1388static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1389{
1390        int ret = -ENXIO;
1391
1392        switch (attr->attr) {
1393        case KVM_S390_VM_CPU_PROCESSOR:
1394                ret = kvm_s390_set_processor(kvm, attr);
1395                break;
1396        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1397                ret = kvm_s390_set_processor_feat(kvm, attr);
1398                break;
1399        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1400                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1401                break;
1402        }
1403        return ret;
1404}
1405
1406static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1407{
1408        struct kvm_s390_vm_cpu_processor *proc;
1409        int ret = 0;
1410
1411        proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1412        if (!proc) {
1413                ret = -ENOMEM;
1414                goto out;
1415        }
1416        proc->cpuid = kvm->arch.model.cpuid;
1417        proc->ibc = kvm->arch.model.ibc;
1418        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1419               S390_ARCH_FAC_LIST_SIZE_BYTE);
1420        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1421                 kvm->arch.model.ibc,
1422                 kvm->arch.model.cpuid);
1423        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1424                 kvm->arch.model.fac_list[0],
1425                 kvm->arch.model.fac_list[1],
1426                 kvm->arch.model.fac_list[2]);
1427        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1428                ret = -EFAULT;
1429        kfree(proc);
1430out:
1431        return ret;
1432}
1433
1434static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1435{
1436        struct kvm_s390_vm_cpu_machine *mach;
1437        int ret = 0;
1438
1439        mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1440        if (!mach) {
1441                ret = -ENOMEM;
1442                goto out;
1443        }
1444        get_cpu_id((struct cpuid *) &mach->cpuid);
1445        mach->ibc = sclp.ibc;
1446        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1447               S390_ARCH_FAC_LIST_SIZE_BYTE);
1448        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1449               sizeof(S390_lowcore.stfle_fac_list));
1450        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1451                 kvm->arch.model.ibc,
1452                 kvm->arch.model.cpuid);
1453        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1454                 mach->fac_mask[0],
1455                 mach->fac_mask[1],
1456                 mach->fac_mask[2]);
1457        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1458                 mach->fac_list[0],
1459                 mach->fac_list[1],
1460                 mach->fac_list[2]);
1461        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1462                ret = -EFAULT;
1463        kfree(mach);
1464out:
1465        return ret;
1466}
1467
1468static int kvm_s390_get_processor_feat(struct kvm *kvm,
1469                                       struct kvm_device_attr *attr)
1470{
1471        struct kvm_s390_vm_cpu_feat data;
1472
1473        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1474                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1475        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1476                return -EFAULT;
1477        VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1478                         data.feat[0],
1479                         data.feat[1],
1480                         data.feat[2]);
1481        return 0;
1482}
1483
1484static int kvm_s390_get_machine_feat(struct kvm *kvm,
1485                                     struct kvm_device_attr *attr)
1486{
1487        struct kvm_s390_vm_cpu_feat data;
1488
1489        bitmap_copy((unsigned long *) data.feat,
1490                    kvm_s390_available_cpu_feat,
1491                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1492        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1493                return -EFAULT;
1494        VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1495                         data.feat[0],
1496                         data.feat[1],
1497                         data.feat[2]);
1498        return 0;
1499}
1500
1501static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1502                                          struct kvm_device_attr *attr)
1503{
1504        if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1505            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1506                return -EFAULT;
1507
1508        VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1509                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1510                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1511                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1512                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1513        VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1514                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1515                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1516        VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1517                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1518                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1519        VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1520                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1521                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1522        VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1523                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1524                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1525        VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1526                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1527                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1528        VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1529                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1530                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1531        VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1532                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1533                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1534        VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1535                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1536                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1537        VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1538                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1539                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1540        VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1541                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1542                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1543        VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1544                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1545                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1546        VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1547                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1548                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1549        VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1550                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1551                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1552        VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1553                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1554                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1555        VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1556                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1557                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1558                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1559                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1560        VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1561                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1562                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1563                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1564                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1565
1566        return 0;
1567}
1568
1569static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1570                                        struct kvm_device_attr *attr)
1571{
1572        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1573            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1574                return -EFAULT;
1575
1576        VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1577                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1578                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1579                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1580                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1581        VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1582                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1583                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1584        VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1585                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1586                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1587        VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1588                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1589                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1590        VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1591                 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1592                 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1593        VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1594                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1595                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1596        VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1597                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1598                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1599        VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1600                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1601                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1602        VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1603                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1604                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1605        VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1606                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1607                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1608        VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1609                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1610                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1611        VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1612                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1613                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1614        VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1615                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1616                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1617        VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1618                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1619                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1620        VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1621                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1622                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1623        VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1624                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1625                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1626                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1627                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1628        VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1629                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1630                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1631                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1632                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1633
1634        return 0;
1635}
1636
1637static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1638{
1639        int ret = -ENXIO;
1640
1641        switch (attr->attr) {
1642        case KVM_S390_VM_CPU_PROCESSOR:
1643                ret = kvm_s390_get_processor(kvm, attr);
1644                break;
1645        case KVM_S390_VM_CPU_MACHINE:
1646                ret = kvm_s390_get_machine(kvm, attr);
1647                break;
1648        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1649                ret = kvm_s390_get_processor_feat(kvm, attr);
1650                break;
1651        case KVM_S390_VM_CPU_MACHINE_FEAT:
1652                ret = kvm_s390_get_machine_feat(kvm, attr);
1653                break;
1654        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1655                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1656                break;
1657        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1658                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1659                break;
1660        }
1661        return ret;
1662}
1663
1664static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1665{
1666        int ret;
1667
1668        switch (attr->group) {
1669        case KVM_S390_VM_MEM_CTRL:
1670                ret = kvm_s390_set_mem_control(kvm, attr);
1671                break;
1672        case KVM_S390_VM_TOD:
1673                ret = kvm_s390_set_tod(kvm, attr);
1674                break;
1675        case KVM_S390_VM_CPU_MODEL:
1676                ret = kvm_s390_set_cpu_model(kvm, attr);
1677                break;
1678        case KVM_S390_VM_CRYPTO:
1679                ret = kvm_s390_vm_set_crypto(kvm, attr);
1680                break;
1681        case KVM_S390_VM_MIGRATION:
1682                ret = kvm_s390_vm_set_migration(kvm, attr);
1683                break;
1684        default:
1685                ret = -ENXIO;
1686                break;
1687        }
1688
1689        return ret;
1690}
1691
1692static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1693{
1694        int ret;
1695
1696        switch (attr->group) {
1697        case KVM_S390_VM_MEM_CTRL:
1698                ret = kvm_s390_get_mem_control(kvm, attr);
1699                break;
1700        case KVM_S390_VM_TOD:
1701                ret = kvm_s390_get_tod(kvm, attr);
1702                break;
1703        case KVM_S390_VM_CPU_MODEL:
1704                ret = kvm_s390_get_cpu_model(kvm, attr);
1705                break;
1706        case KVM_S390_VM_MIGRATION:
1707                ret = kvm_s390_vm_get_migration(kvm, attr);
1708                break;
1709        default:
1710                ret = -ENXIO;
1711                break;
1712        }
1713
1714        return ret;
1715}
1716
1717static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1718{
1719        int ret;
1720
1721        switch (attr->group) {
1722        case KVM_S390_VM_MEM_CTRL:
1723                switch (attr->attr) {
1724                case KVM_S390_VM_MEM_ENABLE_CMMA:
1725                case KVM_S390_VM_MEM_CLR_CMMA:
1726                        ret = sclp.has_cmma ? 0 : -ENXIO;
1727                        break;
1728                case KVM_S390_VM_MEM_LIMIT_SIZE:
1729                        ret = 0;
1730                        break;
1731                default:
1732                        ret = -ENXIO;
1733                        break;
1734                }
1735                break;
1736        case KVM_S390_VM_TOD:
1737                switch (attr->attr) {
1738                case KVM_S390_VM_TOD_LOW:
1739                case KVM_S390_VM_TOD_HIGH:
1740                        ret = 0;
1741                        break;
1742                default:
1743                        ret = -ENXIO;
1744                        break;
1745                }
1746                break;
1747        case KVM_S390_VM_CPU_MODEL:
1748                switch (attr->attr) {
1749                case KVM_S390_VM_CPU_PROCESSOR:
1750                case KVM_S390_VM_CPU_MACHINE:
1751                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1752                case KVM_S390_VM_CPU_MACHINE_FEAT:
1753                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1754                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1755                        ret = 0;
1756                        break;
1757                default:
1758                        ret = -ENXIO;
1759                        break;
1760                }
1761                break;
1762        case KVM_S390_VM_CRYPTO:
1763                switch (attr->attr) {
1764                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1765                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1766                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1767                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1768                        ret = 0;
1769                        break;
1770                case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1771                case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1772                        ret = ap_instructions_available() ? 0 : -ENXIO;
1773                        break;
1774                default:
1775                        ret = -ENXIO;
1776                        break;
1777                }
1778                break;
1779        case KVM_S390_VM_MIGRATION:
1780                ret = 0;
1781                break;
1782        default:
1783                ret = -ENXIO;
1784                break;
1785        }
1786
1787        return ret;
1788}
1789
1790static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1791{
1792        uint8_t *keys;
1793        uint64_t hva;
1794        int srcu_idx, i, r = 0;
1795
1796        if (args->flags != 0)
1797                return -EINVAL;
1798
1799        /* Is this guest using storage keys? */
1800        if (!mm_uses_skeys(current->mm))
1801                return KVM_S390_GET_SKEYS_NONE;
1802
1803        /* Enforce sane limit on memory allocation */
1804        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1805                return -EINVAL;
1806
1807        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1808        if (!keys)
1809                return -ENOMEM;
1810
1811        down_read(&current->mm->mmap_sem);
1812        srcu_idx = srcu_read_lock(&kvm->srcu);
1813        for (i = 0; i < args->count; i++) {
1814                hva = gfn_to_hva(kvm, args->start_gfn + i);
1815                if (kvm_is_error_hva(hva)) {
1816                        r = -EFAULT;
1817                        break;
1818                }
1819
1820                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1821                if (r)
1822                        break;
1823        }
1824        srcu_read_unlock(&kvm->srcu, srcu_idx);
1825        up_read(&current->mm->mmap_sem);
1826
1827        if (!r) {
1828                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1829                                 sizeof(uint8_t) * args->count);
1830                if (r)
1831                        r = -EFAULT;
1832        }
1833
1834        kvfree(keys);
1835        return r;
1836}
1837
1838static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839{
1840        uint8_t *keys;
1841        uint64_t hva;
1842        int srcu_idx, i, r = 0;
1843        bool unlocked;
1844
1845        if (args->flags != 0)
1846                return -EINVAL;
1847
1848        /* Enforce sane limit on memory allocation */
1849        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1850                return -EINVAL;
1851
1852        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1853        if (!keys)
1854                return -ENOMEM;
1855
1856        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1857                           sizeof(uint8_t) * args->count);
1858        if (r) {
1859                r = -EFAULT;
1860                goto out;
1861        }
1862
1863        /* Enable storage key handling for the guest */
1864        r = s390_enable_skey();
1865        if (r)
1866                goto out;
1867
1868        i = 0;
1869        down_read(&current->mm->mmap_sem);
1870        srcu_idx = srcu_read_lock(&kvm->srcu);
1871        while (i < args->count) {
1872                unlocked = false;
1873                hva = gfn_to_hva(kvm, args->start_gfn + i);
1874                if (kvm_is_error_hva(hva)) {
1875                        r = -EFAULT;
1876                        break;
1877                }
1878
1879                /* Lowest order bit is reserved */
1880                if (keys[i] & 0x01) {
1881                        r = -EINVAL;
1882                        break;
1883                }
1884
1885                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1886                if (r) {
1887                        r = fixup_user_fault(current, current->mm, hva,
1888                                             FAULT_FLAG_WRITE, &unlocked);
1889                        if (r)
1890                                break;
1891                }
1892                if (!r)
1893                        i++;
1894        }
1895        srcu_read_unlock(&kvm->srcu, srcu_idx);
1896        up_read(&current->mm->mmap_sem);
1897out:
1898        kvfree(keys);
1899        return r;
1900}
1901
1902/*
1903 * Base address and length must be sent at the start of each block, therefore
1904 * it's cheaper to send some clean data, as long as it's less than the size of
1905 * two longs.
1906 */
1907#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1908/* for consistency */
1909#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1910
1911/*
1912 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1913 * address falls in a hole. In that case the index of one of the memslots
1914 * bordering the hole is returned.
1915 */
1916static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1917{
1918        int start = 0, end = slots->used_slots;
1919        int slot = atomic_read(&slots->lru_slot);
1920        struct kvm_memory_slot *memslots = slots->memslots;
1921
1922        if (gfn >= memslots[slot].base_gfn &&
1923            gfn < memslots[slot].base_gfn + memslots[slot].npages)
1924                return slot;
1925
1926        while (start < end) {
1927                slot = start + (end - start) / 2;
1928
1929                if (gfn >= memslots[slot].base_gfn)
1930                        end = slot;
1931                else
1932                        start = slot + 1;
1933        }
1934
1935        if (gfn >= memslots[start].base_gfn &&
1936            gfn < memslots[start].base_gfn + memslots[start].npages) {
1937                atomic_set(&slots->lru_slot, start);
1938        }
1939
1940        return start;
1941}
1942
1943static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1944                              u8 *res, unsigned long bufsize)
1945{
1946        unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1947
1948        args->count = 0;
1949        while (args->count < bufsize) {
1950                hva = gfn_to_hva(kvm, cur_gfn);
1951                /*
1952                 * We return an error if the first value was invalid, but we
1953                 * return successfully if at least one value was copied.
1954                 */
1955                if (kvm_is_error_hva(hva))
1956                        return args->count ? 0 : -EFAULT;
1957                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1958                        pgstev = 0;
1959                res[args->count++] = (pgstev >> 24) & 0x43;
1960                cur_gfn++;
1961        }
1962
1963        return 0;
1964}
1965
1966static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1967                                              unsigned long cur_gfn)
1968{
1969        int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1970        struct kvm_memory_slot *ms = slots->memslots + slotidx;
1971        unsigned long ofs = cur_gfn - ms->base_gfn;
1972
1973        if (ms->base_gfn + ms->npages <= cur_gfn) {
1974                slotidx--;
1975                /* If we are above the highest slot, wrap around */
1976                if (slotidx < 0)
1977                        slotidx = slots->used_slots - 1;
1978
1979                ms = slots->memslots + slotidx;
1980                ofs = 0;
1981        }
1982        ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1983        while ((slotidx > 0) && (ofs >= ms->npages)) {
1984                slotidx--;
1985                ms = slots->memslots + slotidx;
1986                ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1987        }
1988        return ms->base_gfn + ofs;
1989}
1990
1991static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1992                             u8 *res, unsigned long bufsize)
1993{
1994        unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1995        struct kvm_memslots *slots = kvm_memslots(kvm);
1996        struct kvm_memory_slot *ms;
1997
1998        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1999        ms = gfn_to_memslot(kvm, cur_gfn);
2000        args->count = 0;
2001        args->start_gfn = cur_gfn;
2002        if (!ms)
2003                return 0;
2004        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2005        mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2006
2007        while (args->count < bufsize) {
2008                hva = gfn_to_hva(kvm, cur_gfn);
2009                if (kvm_is_error_hva(hva))
2010                        return 0;
2011                /* Decrement only if we actually flipped the bit to 0 */
2012                if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2013                        atomic64_dec(&kvm->arch.cmma_dirty_pages);
2014                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2015                        pgstev = 0;
2016                /* Save the value */
2017                res[args->count++] = (pgstev >> 24) & 0x43;
2018                /* If the next bit is too far away, stop. */
2019                if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2020                        return 0;
2021                /* If we reached the previous "next", find the next one */
2022                if (cur_gfn == next_gfn)
2023                        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2024                /* Reached the end of memory or of the buffer, stop */
2025                if ((next_gfn >= mem_end) ||
2026                    (next_gfn - args->start_gfn >= bufsize))
2027                        return 0;
2028                cur_gfn++;
2029                /* Reached the end of the current memslot, take the next one. */
2030                if (cur_gfn - ms->base_gfn >= ms->npages) {
2031                        ms = gfn_to_memslot(kvm, cur_gfn);
2032                        if (!ms)
2033                                return 0;
2034                }
2035        }
2036        return 0;
2037}
2038
2039/*
2040 * This function searches for the next page with dirty CMMA attributes, and
2041 * saves the attributes in the buffer up to either the end of the buffer or
2042 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2043 * no trailing clean bytes are saved.
2044 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2045 * output buffer will indicate 0 as length.
2046 */
2047static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2048                                  struct kvm_s390_cmma_log *args)
2049{
2050        unsigned long bufsize;
2051        int srcu_idx, peek, ret;
2052        u8 *values;
2053
2054        if (!kvm->arch.use_cmma)
2055                return -ENXIO;
2056        /* Invalid/unsupported flags were specified */
2057        if (args->flags & ~KVM_S390_CMMA_PEEK)
2058                return -EINVAL;
2059        /* Migration mode query, and we are not doing a migration */
2060        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2061        if (!peek && !kvm->arch.migration_mode)
2062                return -EINVAL;
2063        /* CMMA is disabled or was not used, or the buffer has length zero */
2064        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2065        if (!bufsize || !kvm->mm->context.uses_cmm) {
2066                memset(args, 0, sizeof(*args));
2067                return 0;
2068        }
2069        /* We are not peeking, and there are no dirty pages */
2070        if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2071                memset(args, 0, sizeof(*args));
2072                return 0;
2073        }
2074
2075        values = vmalloc(bufsize);
2076        if (!values)
2077                return -ENOMEM;
2078
2079        down_read(&kvm->mm->mmap_sem);
2080        srcu_idx = srcu_read_lock(&kvm->srcu);
2081        if (peek)
2082                ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2083        else
2084                ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2085        srcu_read_unlock(&kvm->srcu, srcu_idx);
2086        up_read(&kvm->mm->mmap_sem);
2087
2088        if (kvm->arch.migration_mode)
2089                args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2090        else
2091                args->remaining = 0;
2092
2093        if (copy_to_user((void __user *)args->values, values, args->count))
2094                ret = -EFAULT;
2095
2096        vfree(values);
2097        return ret;
2098}
2099
2100/*
2101 * This function sets the CMMA attributes for the given pages. If the input
2102 * buffer has zero length, no action is taken, otherwise the attributes are
2103 * set and the mm->context.uses_cmm flag is set.
2104 */
2105static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2106                                  const struct kvm_s390_cmma_log *args)
2107{
2108        unsigned long hva, mask, pgstev, i;
2109        uint8_t *bits;
2110        int srcu_idx, r = 0;
2111
2112        mask = args->mask;
2113
2114        if (!kvm->arch.use_cmma)
2115                return -ENXIO;
2116        /* invalid/unsupported flags */
2117        if (args->flags != 0)
2118                return -EINVAL;
2119        /* Enforce sane limit on memory allocation */
2120        if (args->count > KVM_S390_CMMA_SIZE_MAX)
2121                return -EINVAL;
2122        /* Nothing to do */
2123        if (args->count == 0)
2124                return 0;
2125
2126        bits = vmalloc(array_size(sizeof(*bits), args->count));
2127        if (!bits)
2128                return -ENOMEM;
2129
2130        r = copy_from_user(bits, (void __user *)args->values, args->count);
2131        if (r) {
2132                r = -EFAULT;
2133                goto out;
2134        }
2135
2136        down_read(&kvm->mm->mmap_sem);
2137        srcu_idx = srcu_read_lock(&kvm->srcu);
2138        for (i = 0; i < args->count; i++) {
2139                hva = gfn_to_hva(kvm, args->start_gfn + i);
2140                if (kvm_is_error_hva(hva)) {
2141                        r = -EFAULT;
2142                        break;
2143                }
2144
2145                pgstev = bits[i];
2146                pgstev = pgstev << 24;
2147                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2148                set_pgste_bits(kvm->mm, hva, mask, pgstev);
2149        }
2150        srcu_read_unlock(&kvm->srcu, srcu_idx);
2151        up_read(&kvm->mm->mmap_sem);
2152
2153        if (!kvm->mm->context.uses_cmm) {
2154                down_write(&kvm->mm->mmap_sem);
2155                kvm->mm->context.uses_cmm = 1;
2156                up_write(&kvm->mm->mmap_sem);
2157        }
2158out:
2159        vfree(bits);
2160        return r;
2161}
2162
2163long kvm_arch_vm_ioctl(struct file *filp,
2164                       unsigned int ioctl, unsigned long arg)
2165{
2166        struct kvm *kvm = filp->private_data;
2167        void __user *argp = (void __user *)arg;
2168        struct kvm_device_attr attr;
2169        int r;
2170
2171        switch (ioctl) {
2172        case KVM_S390_INTERRUPT: {
2173                struct kvm_s390_interrupt s390int;
2174
2175                r = -EFAULT;
2176                if (copy_from_user(&s390int, argp, sizeof(s390int)))
2177                        break;
2178                r = kvm_s390_inject_vm(kvm, &s390int);
2179                break;
2180        }
2181        case KVM_CREATE_IRQCHIP: {
2182                struct kvm_irq_routing_entry routing;
2183
2184                r = -EINVAL;
2185                if (kvm->arch.use_irqchip) {
2186                        /* Set up dummy routing. */
2187                        memset(&routing, 0, sizeof(routing));
2188                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2189                }
2190                break;
2191        }
2192        case KVM_SET_DEVICE_ATTR: {
2193                r = -EFAULT;
2194                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2195                        break;
2196                r = kvm_s390_vm_set_attr(kvm, &attr);
2197                break;
2198        }
2199        case KVM_GET_DEVICE_ATTR: {
2200                r = -EFAULT;
2201                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2202                        break;
2203                r = kvm_s390_vm_get_attr(kvm, &attr);
2204                break;
2205        }
2206        case KVM_HAS_DEVICE_ATTR: {
2207                r = -EFAULT;
2208                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2209                        break;
2210                r = kvm_s390_vm_has_attr(kvm, &attr);
2211                break;
2212        }
2213        case KVM_S390_GET_SKEYS: {
2214                struct kvm_s390_skeys args;
2215
2216                r = -EFAULT;
2217                if (copy_from_user(&args, argp,
2218                                   sizeof(struct kvm_s390_skeys)))
2219                        break;
2220                r = kvm_s390_get_skeys(kvm, &args);
2221                break;
2222        }
2223        case KVM_S390_SET_SKEYS: {
2224                struct kvm_s390_skeys args;
2225
2226                r = -EFAULT;
2227                if (copy_from_user(&args, argp,
2228                                   sizeof(struct kvm_s390_skeys)))
2229                        break;
2230                r = kvm_s390_set_skeys(kvm, &args);
2231                break;
2232        }
2233        case KVM_S390_GET_CMMA_BITS: {
2234                struct kvm_s390_cmma_log args;
2235
2236                r = -EFAULT;
2237                if (copy_from_user(&args, argp, sizeof(args)))
2238                        break;
2239                mutex_lock(&kvm->slots_lock);
2240                r = kvm_s390_get_cmma_bits(kvm, &args);
2241                mutex_unlock(&kvm->slots_lock);
2242                if (!r) {
2243                        r = copy_to_user(argp, &args, sizeof(args));
2244                        if (r)
2245                                r = -EFAULT;
2246                }
2247                break;
2248        }
2249        case KVM_S390_SET_CMMA_BITS: {
2250                struct kvm_s390_cmma_log args;
2251
2252                r = -EFAULT;
2253                if (copy_from_user(&args, argp, sizeof(args)))
2254                        break;
2255                mutex_lock(&kvm->slots_lock);
2256                r = kvm_s390_set_cmma_bits(kvm, &args);
2257                mutex_unlock(&kvm->slots_lock);
2258                break;
2259        }
2260        default:
2261                r = -ENOTTY;
2262        }
2263
2264        return r;
2265}
2266
2267static int kvm_s390_apxa_installed(void)
2268{
2269        struct ap_config_info info;
2270
2271        if (ap_instructions_available()) {
2272                if (ap_qci(&info) == 0)
2273                        return info.apxa;
2274        }
2275
2276        return 0;
2277}
2278
2279/*
2280 * The format of the crypto control block (CRYCB) is specified in the 3 low
2281 * order bits of the CRYCB designation (CRYCBD) field as follows:
2282 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2283 *           AP extended addressing (APXA) facility are installed.
2284 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2285 * Format 2: Both the APXA and MSAX3 facilities are installed
2286 */
2287static void kvm_s390_set_crycb_format(struct kvm *kvm)
2288{
2289        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2290
2291        /* Clear the CRYCB format bits - i.e., set format 0 by default */
2292        kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2293
2294        /* Check whether MSAX3 is installed */
2295        if (!test_kvm_facility(kvm, 76))
2296                return;
2297
2298        if (kvm_s390_apxa_installed())
2299                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2300        else
2301                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2302}
2303
2304void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2305                               unsigned long *aqm, unsigned long *adm)
2306{
2307        struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2308
2309        mutex_lock(&kvm->lock);
2310        kvm_s390_vcpu_block_all(kvm);
2311
2312        switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2313        case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2314                memcpy(crycb->apcb1.apm, apm, 32);
2315                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2316                         apm[0], apm[1], apm[2], apm[3]);
2317                memcpy(crycb->apcb1.aqm, aqm, 32);
2318                VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2319                         aqm[0], aqm[1], aqm[2], aqm[3]);
2320                memcpy(crycb->apcb1.adm, adm, 32);
2321                VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2322                         adm[0], adm[1], adm[2], adm[3]);
2323                break;
2324        case CRYCB_FORMAT1:
2325        case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2326                memcpy(crycb->apcb0.apm, apm, 8);
2327                memcpy(crycb->apcb0.aqm, aqm, 2);
2328                memcpy(crycb->apcb0.adm, adm, 2);
2329                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2330                         apm[0], *((unsigned short *)aqm),
2331                         *((unsigned short *)adm));
2332                break;
2333        default:        /* Can not happen */
2334                break;
2335        }
2336
2337        /* recreate the shadow crycb for each vcpu */
2338        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2339        kvm_s390_vcpu_unblock_all(kvm);
2340        mutex_unlock(&kvm->lock);
2341}
2342EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2343
2344void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2345{
2346        mutex_lock(&kvm->lock);
2347        kvm_s390_vcpu_block_all(kvm);
2348
2349        memset(&kvm->arch.crypto.crycb->apcb0, 0,
2350               sizeof(kvm->arch.crypto.crycb->apcb0));
2351        memset(&kvm->arch.crypto.crycb->apcb1, 0,
2352               sizeof(kvm->arch.crypto.crycb->apcb1));
2353
2354        VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2355        /* recreate the shadow crycb for each vcpu */
2356        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2357        kvm_s390_vcpu_unblock_all(kvm);
2358        mutex_unlock(&kvm->lock);
2359}
2360EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2361
2362static u64 kvm_s390_get_initial_cpuid(void)
2363{
2364        struct cpuid cpuid;
2365
2366        get_cpu_id(&cpuid);
2367        cpuid.version = 0xff;
2368        return *((u64 *) &cpuid);
2369}
2370
2371static void kvm_s390_crypto_init(struct kvm *kvm)
2372{
2373        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2374        kvm_s390_set_crycb_format(kvm);
2375
2376        if (!test_kvm_facility(kvm, 76))
2377                return;
2378
2379        /* Enable AES/DEA protected key functions by default */
2380        kvm->arch.crypto.aes_kw = 1;
2381        kvm->arch.crypto.dea_kw = 1;
2382        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2383                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2384        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2385                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2386}
2387
2388static void sca_dispose(struct kvm *kvm)
2389{
2390        if (kvm->arch.use_esca)
2391                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2392        else
2393                free_page((unsigned long)(kvm->arch.sca));
2394        kvm->arch.sca = NULL;
2395}
2396
2397int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2398{
2399        gfp_t alloc_flags = GFP_KERNEL;
2400        int i, rc;
2401        char debug_name[16];
2402        static unsigned long sca_offset;
2403
2404        rc = -EINVAL;
2405#ifdef CONFIG_KVM_S390_UCONTROL
2406        if (type & ~KVM_VM_S390_UCONTROL)
2407                goto out_err;
2408        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2409                goto out_err;
2410#else
2411        if (type)
2412                goto out_err;
2413#endif
2414
2415        rc = s390_enable_sie();
2416        if (rc)
2417                goto out_err;
2418
2419        rc = -ENOMEM;
2420
2421        if (!sclp.has_64bscao)
2422                alloc_flags |= GFP_DMA;
2423        rwlock_init(&kvm->arch.sca_lock);
2424        /* start with basic SCA */
2425        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2426        if (!kvm->arch.sca)
2427                goto out_err;
2428        mutex_lock(&kvm_lock);
2429        sca_offset += 16;
2430        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2431                sca_offset = 0;
2432        kvm->arch.sca = (struct bsca_block *)
2433                        ((char *) kvm->arch.sca + sca_offset);
2434        mutex_unlock(&kvm_lock);
2435
2436        sprintf(debug_name, "kvm-%u", current->pid);
2437
2438        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2439        if (!kvm->arch.dbf)
2440                goto out_err;
2441
2442        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2443        kvm->arch.sie_page2 =
2444             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2445        if (!kvm->arch.sie_page2)
2446                goto out_err;
2447
2448        kvm->arch.sie_page2->kvm = kvm;
2449        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2450
2451        for (i = 0; i < kvm_s390_fac_size(); i++) {
2452                kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2453                                              (kvm_s390_fac_base[i] |
2454                                               kvm_s390_fac_ext[i]);
2455                kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2456                                              kvm_s390_fac_base[i];
2457        }
2458        kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2459
2460        /* we are always in czam mode - even on pre z14 machines */
2461        set_kvm_facility(kvm->arch.model.fac_mask, 138);
2462        set_kvm_facility(kvm->arch.model.fac_list, 138);
2463        /* we emulate STHYI in kvm */
2464        set_kvm_facility(kvm->arch.model.fac_mask, 74);
2465        set_kvm_facility(kvm->arch.model.fac_list, 74);
2466        if (MACHINE_HAS_TLB_GUEST) {
2467                set_kvm_facility(kvm->arch.model.fac_mask, 147);
2468                set_kvm_facility(kvm->arch.model.fac_list, 147);
2469        }
2470
2471        if (css_general_characteristics.aiv && test_facility(65))
2472                set_kvm_facility(kvm->arch.model.fac_mask, 65);
2473
2474        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2475        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2476
2477        kvm_s390_crypto_init(kvm);
2478
2479        mutex_init(&kvm->arch.float_int.ais_lock);
2480        spin_lock_init(&kvm->arch.float_int.lock);
2481        for (i = 0; i < FIRQ_LIST_COUNT; i++)
2482                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2483        init_waitqueue_head(&kvm->arch.ipte_wq);
2484        mutex_init(&kvm->arch.ipte_mutex);
2485
2486        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2487        VM_EVENT(kvm, 3, "vm created with type %lu", type);
2488
2489        if (type & KVM_VM_S390_UCONTROL) {
2490                kvm->arch.gmap = NULL;
2491                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2492        } else {
2493                if (sclp.hamax == U64_MAX)
2494                        kvm->arch.mem_limit = TASK_SIZE_MAX;
2495                else
2496                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2497                                                    sclp.hamax + 1);
2498                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2499                if (!kvm->arch.gmap)
2500                        goto out_err;
2501                kvm->arch.gmap->private = kvm;
2502                kvm->arch.gmap->pfault_enabled = 0;
2503        }
2504
2505        kvm->arch.use_pfmfi = sclp.has_pfmfi;
2506        kvm->arch.use_skf = sclp.has_skey;
2507        spin_lock_init(&kvm->arch.start_stop_lock);
2508        kvm_s390_vsie_init(kvm);
2509        kvm_s390_gisa_init(kvm);
2510        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2511
2512        return 0;
2513out_err:
2514        free_page((unsigned long)kvm->arch.sie_page2);
2515        debug_unregister(kvm->arch.dbf);
2516        sca_dispose(kvm);
2517        KVM_EVENT(3, "creation of vm failed: %d", rc);
2518        return rc;
2519}
2520
2521void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2522{
2523        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2524        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2525        kvm_s390_clear_local_irqs(vcpu);
2526        kvm_clear_async_pf_completion_queue(vcpu);
2527        if (!kvm_is_ucontrol(vcpu->kvm))
2528                sca_del_vcpu(vcpu);
2529
2530        if (kvm_is_ucontrol(vcpu->kvm))
2531                gmap_remove(vcpu->arch.gmap);
2532
2533        if (vcpu->kvm->arch.use_cmma)
2534                kvm_s390_vcpu_unsetup_cmma(vcpu);
2535        free_page((unsigned long)(vcpu->arch.sie_block));
2536
2537        kvm_vcpu_uninit(vcpu);
2538        kmem_cache_free(kvm_vcpu_cache, vcpu);
2539}
2540
2541static void kvm_free_vcpus(struct kvm *kvm)
2542{
2543        unsigned int i;
2544        struct kvm_vcpu *vcpu;
2545
2546        kvm_for_each_vcpu(i, vcpu, kvm)
2547                kvm_arch_vcpu_destroy(vcpu);
2548
2549        mutex_lock(&kvm->lock);
2550        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2551                kvm->vcpus[i] = NULL;
2552
2553        atomic_set(&kvm->online_vcpus, 0);
2554        mutex_unlock(&kvm->lock);
2555}
2556
2557void kvm_arch_destroy_vm(struct kvm *kvm)
2558{
2559        kvm_free_vcpus(kvm);
2560        sca_dispose(kvm);
2561        debug_unregister(kvm->arch.dbf);
2562        kvm_s390_gisa_destroy(kvm);
2563        free_page((unsigned long)kvm->arch.sie_page2);
2564        if (!kvm_is_ucontrol(kvm))
2565                gmap_remove(kvm->arch.gmap);
2566        kvm_s390_destroy_adapters(kvm);
2567        kvm_s390_clear_float_irqs(kvm);
2568        kvm_s390_vsie_destroy(kvm);
2569        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2570}
2571
2572/* Section: vcpu related */
2573static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2574{
2575        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2576        if (!vcpu->arch.gmap)
2577                return -ENOMEM;
2578        vcpu->arch.gmap->private = vcpu->kvm;
2579
2580        return 0;
2581}
2582
2583static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2584{
2585        if (!kvm_s390_use_sca_entries())
2586                return;
2587        read_lock(&vcpu->kvm->arch.sca_lock);
2588        if (vcpu->kvm->arch.use_esca) {
2589                struct esca_block *sca = vcpu->kvm->arch.sca;
2590
2591                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2592                sca->cpu[vcpu->vcpu_id].sda = 0;
2593        } else {
2594                struct bsca_block *sca = vcpu->kvm->arch.sca;
2595
2596                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2597                sca->cpu[vcpu->vcpu_id].sda = 0;
2598        }
2599        read_unlock(&vcpu->kvm->arch.sca_lock);
2600}
2601
2602static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2603{
2604        if (!kvm_s390_use_sca_entries()) {
2605                struct bsca_block *sca = vcpu->kvm->arch.sca;
2606
2607                /* we still need the basic sca for the ipte control */
2608                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2609                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2610                return;
2611        }
2612        read_lock(&vcpu->kvm->arch.sca_lock);
2613        if (vcpu->kvm->arch.use_esca) {
2614                struct esca_block *sca = vcpu->kvm->arch.sca;
2615
2616                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2617                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2618                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2619                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2620                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2621        } else {
2622                struct bsca_block *sca = vcpu->kvm->arch.sca;
2623
2624                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2625                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2626                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2627                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2628        }
2629        read_unlock(&vcpu->kvm->arch.sca_lock);
2630}
2631
2632/* Basic SCA to Extended SCA data copy routines */
2633static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2634{
2635        d->sda = s->sda;
2636        d->sigp_ctrl.c = s->sigp_ctrl.c;
2637        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2638}
2639
2640static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2641{
2642        int i;
2643
2644        d->ipte_control = s->ipte_control;
2645        d->mcn[0] = s->mcn;
2646        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2647                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2648}
2649
2650static int sca_switch_to_extended(struct kvm *kvm)
2651{
2652        struct bsca_block *old_sca = kvm->arch.sca;
2653        struct esca_block *new_sca;
2654        struct kvm_vcpu *vcpu;
2655        unsigned int vcpu_idx;
2656        u32 scaol, scaoh;
2657
2658        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2659        if (!new_sca)
2660                return -ENOMEM;
2661
2662        scaoh = (u32)((u64)(new_sca) >> 32);
2663        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2664
2665        kvm_s390_vcpu_block_all(kvm);
2666        write_lock(&kvm->arch.sca_lock);
2667
2668        sca_copy_b_to_e(new_sca, old_sca);
2669
2670        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2671                vcpu->arch.sie_block->scaoh = scaoh;
2672                vcpu->arch.sie_block->scaol = scaol;
2673                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2674        }
2675        kvm->arch.sca = new_sca;
2676        kvm->arch.use_esca = 1;
2677
2678        write_unlock(&kvm->arch.sca_lock);
2679        kvm_s390_vcpu_unblock_all(kvm);
2680
2681        free_page((unsigned long)old_sca);
2682
2683        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2684                 old_sca, kvm->arch.sca);
2685        return 0;
2686}
2687
2688static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2689{
2690        int rc;
2691
2692        if (!kvm_s390_use_sca_entries()) {
2693                if (id < KVM_MAX_VCPUS)
2694                        return true;
2695                return false;
2696        }
2697        if (id < KVM_S390_BSCA_CPU_SLOTS)
2698                return true;
2699        if (!sclp.has_esca || !sclp.has_64bscao)
2700                return false;
2701
2702        mutex_lock(&kvm->lock);
2703        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2704        mutex_unlock(&kvm->lock);
2705
2706        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2707}
2708
2709int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2710{
2711        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2712        kvm_clear_async_pf_completion_queue(vcpu);
2713        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2714                                    KVM_SYNC_GPRS |
2715                                    KVM_SYNC_ACRS |
2716                                    KVM_SYNC_CRS |
2717                                    KVM_SYNC_ARCH0 |
2718                                    KVM_SYNC_PFAULT;
2719        kvm_s390_set_prefix(vcpu, 0);
2720        if (test_kvm_facility(vcpu->kvm, 64))
2721                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2722        if (test_kvm_facility(vcpu->kvm, 82))
2723                vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2724        if (test_kvm_facility(vcpu->kvm, 133))
2725                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2726        if (test_kvm_facility(vcpu->kvm, 156))
2727                vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2728        /* fprs can be synchronized via vrs, even if the guest has no vx. With
2729         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2730         */
2731        if (MACHINE_HAS_VX)
2732                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2733        else
2734                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2735
2736        if (kvm_is_ucontrol(vcpu->kvm))
2737                return __kvm_ucontrol_vcpu_init(vcpu);
2738
2739        return 0;
2740}
2741
2742/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2743static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2744{
2745        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2746        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2747        vcpu->arch.cputm_start = get_tod_clock_fast();
2748        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2749}
2750
2751/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2752static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2753{
2754        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2755        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2756        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2757        vcpu->arch.cputm_start = 0;
2758        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2759}
2760
2761/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2762static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2763{
2764        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2765        vcpu->arch.cputm_enabled = true;
2766        __start_cpu_timer_accounting(vcpu);
2767}
2768
2769/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2770static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2771{
2772        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2773        __stop_cpu_timer_accounting(vcpu);
2774        vcpu->arch.cputm_enabled = false;
2775}
2776
2777static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2778{
2779        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2780        __enable_cpu_timer_accounting(vcpu);
2781        preempt_enable();
2782}
2783
2784static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2785{
2786        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2787        __disable_cpu_timer_accounting(vcpu);
2788        preempt_enable();
2789}
2790
2791/* set the cpu timer - may only be called from the VCPU thread itself */
2792void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2793{
2794        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2795        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2796        if (vcpu->arch.cputm_enabled)
2797                vcpu->arch.cputm_start = get_tod_clock_fast();
2798        vcpu->arch.sie_block->cputm = cputm;
2799        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2800        preempt_enable();
2801}
2802
2803/* update and get the cpu timer - can also be called from other VCPU threads */
2804__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2805{
2806        unsigned int seq;
2807        __u64 value;
2808
2809        if (unlikely(!vcpu->arch.cputm_enabled))
2810                return vcpu->arch.sie_block->cputm;
2811
2812        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2813        do {
2814                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2815                /*
2816                 * If the writer would ever execute a read in the critical
2817                 * section, e.g. in irq context, we have a deadlock.
2818                 */
2819                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2820                value = vcpu->arch.sie_block->cputm;
2821                /* if cputm_start is 0, accounting is being started/stopped */
2822                if (likely(vcpu->arch.cputm_start))
2823                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2824        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2825        preempt_enable();
2826        return value;
2827}
2828
2829void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2830{
2831
2832        gmap_enable(vcpu->arch.enabled_gmap);
2833        kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2834        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2835                __start_cpu_timer_accounting(vcpu);
2836        vcpu->cpu = cpu;
2837}
2838
2839void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2840{
2841        vcpu->cpu = -1;
2842        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2843                __stop_cpu_timer_accounting(vcpu);
2844        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2845        vcpu->arch.enabled_gmap = gmap_get_enabled();
2846        gmap_disable(vcpu->arch.enabled_gmap);
2847
2848}
2849
2850static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2851{
2852        /* this equals initial cpu reset in pop, but we don't switch to ESA */
2853        vcpu->arch.sie_block->gpsw.mask = 0UL;
2854        vcpu->arch.sie_block->gpsw.addr = 0UL;
2855        kvm_s390_set_prefix(vcpu, 0);
2856        kvm_s390_set_cpu_timer(vcpu, 0);
2857        vcpu->arch.sie_block->ckc       = 0UL;
2858        vcpu->arch.sie_block->todpr     = 0;
2859        memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2860        vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2861                                        CR0_INTERRUPT_KEY_SUBMASK |
2862                                        CR0_MEASUREMENT_ALERT_SUBMASK;
2863        vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2864                                        CR14_UNUSED_33 |
2865                                        CR14_EXTERNAL_DAMAGE_SUBMASK;
2866        /* make sure the new fpc will be lazily loaded */
2867        save_fpu_regs();
2868        current->thread.fpu.fpc = 0;
2869        vcpu->arch.sie_block->gbea = 1;
2870        vcpu->arch.sie_block->pp = 0;
2871        vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2872        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2873        kvm_clear_async_pf_completion_queue(vcpu);
2874        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2875                kvm_s390_vcpu_stop(vcpu);
2876        kvm_s390_clear_local_irqs(vcpu);
2877}
2878
2879void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2880{
2881        mutex_lock(&vcpu->kvm->lock);
2882        preempt_disable();
2883        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2884        vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2885        preempt_enable();
2886        mutex_unlock(&vcpu->kvm->lock);
2887        if (!kvm_is_ucontrol(vcpu->kvm)) {
2888                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2889                sca_add_vcpu(vcpu);
2890        }
2891        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2892                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2893        /* make vcpu_load load the right gmap on the first trigger */
2894        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2895}
2896
2897static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2898{
2899        if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2900            test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2901                return true;
2902        return false;
2903}
2904
2905static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2906{
2907        /* At least one ECC subfunction must be present */
2908        return kvm_has_pckmo_subfunc(kvm, 32) ||
2909               kvm_has_pckmo_subfunc(kvm, 33) ||
2910               kvm_has_pckmo_subfunc(kvm, 34) ||
2911               kvm_has_pckmo_subfunc(kvm, 40) ||
2912               kvm_has_pckmo_subfunc(kvm, 41);
2913
2914}
2915
2916static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2917{
2918        /*
2919         * If the AP instructions are not being interpreted and the MSAX3
2920         * facility is not configured for the guest, there is nothing to set up.
2921         */
2922        if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2923                return;
2924
2925        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2926        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2927        vcpu->arch.sie_block->eca &= ~ECA_APIE;
2928        vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2929
2930        if (vcpu->kvm->arch.crypto.apie)
2931                vcpu->arch.sie_block->eca |= ECA_APIE;
2932
2933        /* Set up protected key support */
2934        if (vcpu->kvm->arch.crypto.aes_kw) {
2935                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2936                /* ecc is also wrapped with AES key */
2937                if (kvm_has_pckmo_ecc(vcpu->kvm))
2938                        vcpu->arch.sie_block->ecd |= ECD_ECC;
2939        }
2940
2941        if (vcpu->kvm->arch.crypto.dea_kw)
2942                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2943}
2944
2945void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2946{
2947        free_page(vcpu->arch.sie_block->cbrlo);
2948        vcpu->arch.sie_block->cbrlo = 0;
2949}
2950
2951int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2952{
2953        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2954        if (!vcpu->arch.sie_block->cbrlo)
2955                return -ENOMEM;
2956        return 0;
2957}
2958
2959static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2960{
2961        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2962
2963        vcpu->arch.sie_block->ibc = model->ibc;
2964        if (test_kvm_facility(vcpu->kvm, 7))
2965                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2966}
2967
2968int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2969{
2970        int rc = 0;
2971
2972        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2973                                                    CPUSTAT_SM |
2974                                                    CPUSTAT_STOPPED);
2975
2976        if (test_kvm_facility(vcpu->kvm, 78))
2977                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2978        else if (test_kvm_facility(vcpu->kvm, 8))
2979                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2980
2981        kvm_s390_vcpu_setup_model(vcpu);
2982
2983        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2984        if (MACHINE_HAS_ESOP)
2985                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2986        if (test_kvm_facility(vcpu->kvm, 9))
2987                vcpu->arch.sie_block->ecb |= ECB_SRSI;
2988        if (test_kvm_facility(vcpu->kvm, 73))
2989                vcpu->arch.sie_block->ecb |= ECB_TE;
2990
2991        if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2992                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2993        if (test_kvm_facility(vcpu->kvm, 130))
2994                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2995        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2996        if (sclp.has_cei)
2997                vcpu->arch.sie_block->eca |= ECA_CEI;
2998        if (sclp.has_ib)
2999                vcpu->arch.sie_block->eca |= ECA_IB;
3000        if (sclp.has_siif)
3001                vcpu->arch.sie_block->eca |= ECA_SII;
3002        if (sclp.has_sigpif)
3003                vcpu->arch.sie_block->eca |= ECA_SIGPI;
3004        if (test_kvm_facility(vcpu->kvm, 129)) {
3005                vcpu->arch.sie_block->eca |= ECA_VX;
3006                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3007        }
3008        if (test_kvm_facility(vcpu->kvm, 139))
3009                vcpu->arch.sie_block->ecd |= ECD_MEF;
3010        if (test_kvm_facility(vcpu->kvm, 156))
3011                vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3012        if (vcpu->arch.sie_block->gd) {
3013                vcpu->arch.sie_block->eca |= ECA_AIV;
3014                VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3015                           vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3016        }
3017        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3018                                        | SDNXC;
3019        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3020
3021        if (sclp.has_kss)
3022                kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3023        else
3024                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3025
3026        if (vcpu->kvm->arch.use_cmma) {
3027                rc = kvm_s390_vcpu_setup_cmma(vcpu);
3028                if (rc)
3029                        return rc;
3030        }
3031        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3032        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3033
3034        vcpu->arch.sie_block->hpid = HPID_KVM;
3035
3036        kvm_s390_vcpu_crypto_setup(vcpu);
3037
3038        return rc;
3039}
3040
3041struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3042                                      unsigned int id)
3043{
3044        struct kvm_vcpu *vcpu;
3045        struct sie_page *sie_page;
3046        int rc = -EINVAL;
3047
3048        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3049                goto out;
3050
3051        rc = -ENOMEM;
3052
3053        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3054        if (!vcpu)
3055                goto out;
3056
3057        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3058        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3059        if (!sie_page)
3060                goto out_free_cpu;
3061
3062        vcpu->arch.sie_block = &sie_page->sie_block;
3063        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3064
3065        /* the real guest size will always be smaller than msl */
3066        vcpu->arch.sie_block->mso = 0;
3067        vcpu->arch.sie_block->msl = sclp.hamax;
3068
3069        vcpu->arch.sie_block->icpua = id;
3070        spin_lock_init(&vcpu->arch.local_int.lock);
3071        vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3072        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3073                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3074        seqcount_init(&vcpu->arch.cputm_seqcount);
3075
3076        rc = kvm_vcpu_init(vcpu, kvm, id);
3077        if (rc)
3078                goto out_free_sie_block;
3079        VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3080                 vcpu->arch.sie_block);
3081        trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3082
3083        return vcpu;
3084out_free_sie_block:
3085        free_page((unsigned long)(vcpu->arch.sie_block));
3086out_free_cpu:
3087        kmem_cache_free(kvm_vcpu_cache, vcpu);
3088out:
3089        return ERR_PTR(rc);
3090}
3091
3092int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3093{
3094        return kvm_s390_vcpu_has_irq(vcpu, 0);
3095}
3096
3097bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3098{
3099        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3100}
3101
3102void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3103{
3104        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3105        exit_sie(vcpu);
3106}
3107
3108void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3109{
3110        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3111}
3112
3113static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3114{
3115        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3116        exit_sie(vcpu);
3117}
3118
3119bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3120{
3121        return atomic_read(&vcpu->arch.sie_block->prog20) &
3122               (PROG_BLOCK_SIE | PROG_REQUEST);
3123}
3124
3125static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3126{
3127        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3128}
3129
3130/*
3131 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3132 * If the CPU is not running (e.g. waiting as idle) the function will
3133 * return immediately. */
3134void exit_sie(struct kvm_vcpu *vcpu)
3135{
3136        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3137        kvm_s390_vsie_kick(vcpu);
3138        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3139                cpu_relax();
3140}
3141
3142/* Kick a guest cpu out of SIE to process a request synchronously */
3143void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3144{
3145        kvm_make_request(req, vcpu);
3146        kvm_s390_vcpu_request(vcpu);
3147}
3148
3149static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3150                              unsigned long end)
3151{
3152        struct kvm *kvm = gmap->private;
3153        struct kvm_vcpu *vcpu;
3154        unsigned long prefix;
3155        int i;
3156
3157        if (gmap_is_shadow(gmap))
3158                return;
3159        if (start >= 1UL << 31)
3160                /* We are only interested in prefix pages */
3161                return;
3162        kvm_for_each_vcpu(i, vcpu, kvm) {
3163                /* match against both prefix pages */
3164                prefix = kvm_s390_get_prefix(vcpu);
3165                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3166                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3167                                   start, end);
3168                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3169                }
3170        }
3171}
3172
3173bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3174{
3175        /* do not poll with more than halt_poll_max_steal percent of steal time */
3176        if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3177            halt_poll_max_steal) {
3178                vcpu->stat.halt_no_poll_steal++;
3179                return true;
3180        }
3181        return false;
3182}
3183
3184int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3185{
3186        /* kvm common code refers to this, but never calls it */
3187        BUG();
3188        return 0;
3189}
3190
3191static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3192                                           struct kvm_one_reg *reg)
3193{
3194        int r = -EINVAL;
3195
3196        switch (reg->id) {
3197        case KVM_REG_S390_TODPR:
3198                r = put_user(vcpu->arch.sie_block->todpr,
3199                             (u32 __user *)reg->addr);
3200                break;
3201        case KVM_REG_S390_EPOCHDIFF:
3202                r = put_user(vcpu->arch.sie_block->epoch,
3203                             (u64 __user *)reg->addr);
3204                break;
3205        case KVM_REG_S390_CPU_TIMER:
3206                r = put_user(kvm_s390_get_cpu_timer(vcpu),
3207                             (u64 __user *)reg->addr);
3208                break;
3209        case KVM_REG_S390_CLOCK_COMP:
3210                r = put_user(vcpu->arch.sie_block->ckc,
3211                             (u64 __user *)reg->addr);
3212                break;
3213        case KVM_REG_S390_PFTOKEN:
3214                r = put_user(vcpu->arch.pfault_token,
3215                             (u64 __user *)reg->addr);
3216                break;
3217        case KVM_REG_S390_PFCOMPARE:
3218                r = put_user(vcpu->arch.pfault_compare,
3219                             (u64 __user *)reg->addr);
3220                break;
3221        case KVM_REG_S390_PFSELECT:
3222                r = put_user(vcpu->arch.pfault_select,
3223                             (u64 __user *)reg->addr);
3224                break;
3225        case KVM_REG_S390_PP:
3226                r = put_user(vcpu->arch.sie_block->pp,
3227                             (u64 __user *)reg->addr);
3228                break;
3229        case KVM_REG_S390_GBEA:
3230                r = put_user(vcpu->arch.sie_block->gbea,
3231                             (u64 __user *)reg->addr);
3232                break;
3233        default:
3234                break;
3235        }
3236
3237        return r;
3238}
3239
3240static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3241                                           struct kvm_one_reg *reg)
3242{
3243        int r = -EINVAL;
3244        __u64 val;
3245
3246        switch (reg->id) {
3247        case KVM_REG_S390_TODPR:
3248                r = get_user(vcpu->arch.sie_block->todpr,
3249                             (u32 __user *)reg->addr);
3250                break;
3251        case KVM_REG_S390_EPOCHDIFF:
3252                r = get_user(vcpu->arch.sie_block->epoch,
3253                             (u64 __user *)reg->addr);
3254                break;
3255        case KVM_REG_S390_CPU_TIMER:
3256                r = get_user(val, (u64 __user *)reg->addr);
3257                if (!r)
3258                        kvm_s390_set_cpu_timer(vcpu, val);
3259                break;
3260        case KVM_REG_S390_CLOCK_COMP:
3261                r = get_user(vcpu->arch.sie_block->ckc,
3262                             (u64 __user *)reg->addr);
3263                break;
3264        case KVM_REG_S390_PFTOKEN:
3265                r = get_user(vcpu->arch.pfault_token,
3266                             (u64 __user *)reg->addr);
3267                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3268                        kvm_clear_async_pf_completion_queue(vcpu);
3269                break;
3270        case KVM_REG_S390_PFCOMPARE:
3271                r = get_user(vcpu->arch.pfault_compare,
3272                             (u64 __user *)reg->addr);
3273                break;
3274        case KVM_REG_S390_PFSELECT:
3275                r = get_user(vcpu->arch.pfault_select,
3276                             (u64 __user *)reg->addr);
3277                break;
3278        case KVM_REG_S390_PP:
3279                r = get_user(vcpu->arch.sie_block->pp,
3280                             (u64 __user *)reg->addr);
3281                break;
3282        case KVM_REG_S390_GBEA:
3283                r = get_user(vcpu->arch.sie_block->gbea,
3284                             (u64 __user *)reg->addr);
3285                break;
3286        default:
3287                break;
3288        }
3289
3290        return r;
3291}
3292
3293static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3294{
3295        kvm_s390_vcpu_initial_reset(vcpu);
3296        return 0;
3297}
3298
3299int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3300{
3301        vcpu_load(vcpu);
3302        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3303        vcpu_put(vcpu);
3304        return 0;
3305}
3306
3307int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3308{
3309        vcpu_load(vcpu);
3310        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3311        vcpu_put(vcpu);
3312        return 0;
3313}
3314
3315int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3316                                  struct kvm_sregs *sregs)
3317{
3318        vcpu_load(vcpu);
3319
3320        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3321        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3322
3323        vcpu_put(vcpu);
3324        return 0;
3325}
3326
3327int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3328                                  struct kvm_sregs *sregs)
3329{
3330        vcpu_load(vcpu);
3331
3332        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3333        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3334
3335        vcpu_put(vcpu);
3336        return 0;
3337}
3338
3339int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3340{
3341        int ret = 0;
3342
3343        vcpu_load(vcpu);
3344
3345        if (test_fp_ctl(fpu->fpc)) {
3346                ret = -EINVAL;
3347                goto out;
3348        }
3349        vcpu->run->s.regs.fpc = fpu->fpc;
3350        if (MACHINE_HAS_VX)
3351                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3352                                 (freg_t *) fpu->fprs);
3353        else
3354                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3355
3356out:
3357        vcpu_put(vcpu);
3358        return ret;
3359}
3360
3361int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3362{
3363        vcpu_load(vcpu);
3364
3365        /* make sure we have the latest values */
3366        save_fpu_regs();
3367        if (MACHINE_HAS_VX)
3368                convert_vx_to_fp((freg_t *) fpu->fprs,
3369                                 (__vector128 *) vcpu->run->s.regs.vrs);
3370        else
3371                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3372        fpu->fpc = vcpu->run->s.regs.fpc;
3373
3374        vcpu_put(vcpu);
3375        return 0;
3376}
3377
3378static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3379{
3380        int rc = 0;
3381
3382        if (!is_vcpu_stopped(vcpu))
3383                rc = -EBUSY;
3384        else {
3385                vcpu->run->psw_mask = psw.mask;
3386                vcpu->run->psw_addr = psw.addr;
3387        }
3388        return rc;
3389}
3390
3391int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3392                                  struct kvm_translation *tr)
3393{
3394        return -EINVAL; /* not implemented yet */
3395}
3396
3397#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3398                              KVM_GUESTDBG_USE_HW_BP | \
3399                              KVM_GUESTDBG_ENABLE)
3400
3401int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3402                                        struct kvm_guest_debug *dbg)
3403{
3404        int rc = 0;
3405
3406        vcpu_load(vcpu);
3407
3408        vcpu->guest_debug = 0;
3409        kvm_s390_clear_bp_data(vcpu);
3410
3411        if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3412                rc = -EINVAL;
3413                goto out;
3414        }
3415        if (!sclp.has_gpere) {
3416                rc = -EINVAL;
3417                goto out;
3418        }
3419
3420        if (dbg->control & KVM_GUESTDBG_ENABLE) {
3421                vcpu->guest_debug = dbg->control;
3422                /* enforce guest PER */
3423                kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3424
3425                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3426                        rc = kvm_s390_import_bp_data(vcpu, dbg);
3427        } else {
3428                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3429                vcpu->arch.guestdbg.last_bp = 0;
3430        }
3431
3432        if (rc) {
3433                vcpu->guest_debug = 0;
3434                kvm_s390_clear_bp_data(vcpu);
3435                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3436        }
3437
3438out:
3439        vcpu_put(vcpu);
3440        return rc;
3441}
3442
3443int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3444                                    struct kvm_mp_state *mp_state)
3445{
3446        int ret;
3447
3448        vcpu_load(vcpu);
3449
3450        /* CHECK_STOP and LOAD are not supported yet */
3451        ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3452                                      KVM_MP_STATE_OPERATING;
3453
3454        vcpu_put(vcpu);
3455        return ret;
3456}
3457
3458int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3459                                    struct kvm_mp_state *mp_state)
3460{
3461        int rc = 0;
3462
3463        vcpu_load(vcpu);
3464
3465        /* user space knows about this interface - let it control the state */
3466        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3467
3468        switch (mp_state->mp_state) {
3469        case KVM_MP_STATE_STOPPED:
3470                kvm_s390_vcpu_stop(vcpu);
3471                break;
3472        case KVM_MP_STATE_OPERATING:
3473                kvm_s390_vcpu_start(vcpu);
3474                break;
3475        case KVM_MP_STATE_LOAD:
3476        case KVM_MP_STATE_CHECK_STOP:
3477                /* fall through - CHECK_STOP and LOAD are not supported yet */
3478        default:
3479                rc = -ENXIO;
3480        }
3481
3482        vcpu_put(vcpu);
3483        return rc;
3484}
3485
3486static bool ibs_enabled(struct kvm_vcpu *vcpu)
3487{
3488        return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3489}
3490
3491static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3492{
3493retry:
3494        kvm_s390_vcpu_request_handled(vcpu);
3495        if (!kvm_request_pending(vcpu))
3496                return 0;
3497        /*
3498         * We use MMU_RELOAD just to re-arm the ipte notifier for the
3499         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3500         * This ensures that the ipte instruction for this request has
3501         * already finished. We might race against a second unmapper that
3502         * wants to set the blocking bit. Lets just retry the request loop.
3503         */
3504        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3505                int rc;
3506                rc = gmap_mprotect_notify(vcpu->arch.gmap,
3507                                          kvm_s390_get_prefix(vcpu),
3508                                          PAGE_SIZE * 2, PROT_WRITE);
3509                if (rc) {
3510                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3511                        return rc;
3512                }
3513                goto retry;
3514        }
3515
3516        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3517                vcpu->arch.sie_block->ihcpu = 0xffff;
3518                goto retry;
3519        }
3520
3521        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3522                if (!ibs_enabled(vcpu)) {
3523                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3524                        kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3525                }
3526                goto retry;
3527        }
3528
3529        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3530                if (ibs_enabled(vcpu)) {
3531                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3532                        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3533                }
3534                goto retry;
3535        }
3536
3537        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3538                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3539                goto retry;
3540        }
3541
3542        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3543                /*
3544                 * Disable CMM virtualization; we will emulate the ESSA
3545                 * instruction manually, in order to provide additional
3546                 * functionalities needed for live migration.
3547                 */
3548                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3549                goto retry;
3550        }
3551
3552        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3553                /*
3554                 * Re-enable CMM virtualization if CMMA is available and
3555                 * CMM has been used.
3556                 */
3557                if ((vcpu->kvm->arch.use_cmma) &&
3558                    (vcpu->kvm->mm->context.uses_cmm))
3559                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3560                goto retry;
3561        }
3562
3563        /* nothing to do, just clear the request */
3564        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3565        /* we left the vsie handler, nothing to do, just clear the request */
3566        kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3567
3568        return 0;
3569}
3570
3571void kvm_s390_set_tod_clock(struct kvm *kvm,
3572                            const struct kvm_s390_vm_tod_clock *gtod)
3573{
3574        struct kvm_vcpu *vcpu;
3575        struct kvm_s390_tod_clock_ext htod;
3576        int i;
3577
3578        mutex_lock(&kvm->lock);
3579        preempt_disable();
3580
3581        get_tod_clock_ext((char *)&htod);
3582
3583        kvm->arch.epoch = gtod->tod - htod.tod;
3584        kvm->arch.epdx = 0;
3585        if (test_kvm_facility(kvm, 139)) {
3586                kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3587                if (kvm->arch.epoch > gtod->tod)
3588                        kvm->arch.epdx -= 1;
3589        }
3590
3591        kvm_s390_vcpu_block_all(kvm);
3592        kvm_for_each_vcpu(i, vcpu, kvm) {
3593                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3594                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3595        }
3596
3597        kvm_s390_vcpu_unblock_all(kvm);
3598        preempt_enable();
3599        mutex_unlock(&kvm->lock);
3600}
3601
3602/**
3603 * kvm_arch_fault_in_page - fault-in guest page if necessary
3604 * @vcpu: The corresponding virtual cpu
3605 * @gpa: Guest physical address
3606 * @writable: Whether the page should be writable or not
3607 *
3608 * Make sure that a guest page has been faulted-in on the host.
3609 *
3610 * Return: Zero on success, negative error code otherwise.
3611 */
3612long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3613{
3614        return gmap_fault(vcpu->arch.gmap, gpa,
3615                          writable ? FAULT_FLAG_WRITE : 0);
3616}
3617
3618static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3619                                      unsigned long token)
3620{
3621        struct kvm_s390_interrupt inti;
3622        struct kvm_s390_irq irq;
3623
3624        if (start_token) {
3625                irq.u.ext.ext_params2 = token;
3626                irq.type = KVM_S390_INT_PFAULT_INIT;
3627                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3628        } else {
3629                inti.type = KVM_S390_INT_PFAULT_DONE;
3630                inti.parm64 = token;
3631                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3632        }
3633}
3634
3635void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3636                                     struct kvm_async_pf *work)
3637{
3638        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3639        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3640}
3641
3642void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3643                                 struct kvm_async_pf *work)
3644{
3645        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3646        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3647}
3648
3649void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3650                               struct kvm_async_pf *work)
3651{
3652        /* s390 will always inject the page directly */
3653}
3654
3655bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3656{
3657        /*
3658         * s390 will always inject the page directly,
3659         * but we still want check_async_completion to cleanup
3660         */
3661        return true;
3662}
3663
3664static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3665{
3666        hva_t hva;
3667        struct kvm_arch_async_pf arch;
3668        int rc;
3669
3670        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3671                return 0;
3672        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3673            vcpu->arch.pfault_compare)
3674                return 0;
3675        if (psw_extint_disabled(vcpu))
3676                return 0;
3677        if (kvm_s390_vcpu_has_irq(vcpu, 0))
3678                return 0;
3679        if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3680                return 0;
3681        if (!vcpu->arch.gmap->pfault_enabled)
3682                return 0;
3683
3684        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3685        hva += current->thread.gmap_addr & ~PAGE_MASK;
3686        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3687                return 0;
3688
3689        rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3690        return rc;
3691}
3692
3693static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3694{
3695        int rc, cpuflags;
3696
3697        /*
3698         * On s390 notifications for arriving pages will be delivered directly
3699         * to the guest but the house keeping for completed pfaults is
3700         * handled outside the worker.
3701         */
3702        kvm_check_async_pf_completion(vcpu);
3703
3704        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3705        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3706
3707        if (need_resched())
3708                schedule();
3709
3710        if (test_cpu_flag(CIF_MCCK_PENDING))
3711                s390_handle_mcck();
3712
3713        if (!kvm_is_ucontrol(vcpu->kvm)) {
3714                rc = kvm_s390_deliver_pending_interrupts(vcpu);
3715                if (rc)
3716                        return rc;
3717        }
3718
3719        rc = kvm_s390_handle_requests(vcpu);
3720        if (rc)
3721                return rc;
3722
3723        if (guestdbg_enabled(vcpu)) {
3724                kvm_s390_backup_guest_per_regs(vcpu);
3725                kvm_s390_patch_guest_per_regs(vcpu);
3726        }
3727
3728        clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3729
3730        vcpu->arch.sie_block->icptcode = 0;
3731        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3732        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3733        trace_kvm_s390_sie_enter(vcpu, cpuflags);
3734
3735        return 0;
3736}
3737
3738static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3739{
3740        struct kvm_s390_pgm_info pgm_info = {
3741                .code = PGM_ADDRESSING,
3742        };
3743        u8 opcode, ilen;
3744        int rc;
3745
3746        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3747        trace_kvm_s390_sie_fault(vcpu);
3748
3749        /*
3750         * We want to inject an addressing exception, which is defined as a
3751         * suppressing or terminating exception. However, since we came here
3752         * by a DAT access exception, the PSW still points to the faulting
3753         * instruction since DAT exceptions are nullifying. So we've got
3754         * to look up the current opcode to get the length of the instruction
3755         * to be able to forward the PSW.
3756         */
3757        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3758        ilen = insn_length(opcode);
3759        if (rc < 0) {
3760                return rc;
3761        } else if (rc) {
3762                /* Instruction-Fetching Exceptions - we can't detect the ilen.
3763                 * Forward by arbitrary ilc, injection will take care of
3764                 * nullification if necessary.
3765                 */
3766                pgm_info = vcpu->arch.pgm;
3767                ilen = 4;
3768        }
3769        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3770        kvm_s390_forward_psw(vcpu, ilen);
3771        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3772}
3773
3774static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3775{
3776        struct mcck_volatile_info *mcck_info;
3777        struct sie_page *sie_page;
3778
3779        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3780                   vcpu->arch.sie_block->icptcode);
3781        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3782
3783        if (guestdbg_enabled(vcpu))
3784                kvm_s390_restore_guest_per_regs(vcpu);
3785
3786        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3787        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3788
3789        if (exit_reason == -EINTR) {
3790                VCPU_EVENT(vcpu, 3, "%s", "machine check");
3791                sie_page = container_of(vcpu->arch.sie_block,
3792                                        struct sie_page, sie_block);
3793                mcck_info = &sie_page->mcck_info;
3794                kvm_s390_reinject_machine_check(vcpu, mcck_info);
3795                return 0;
3796        }
3797
3798        if (vcpu->arch.sie_block->icptcode > 0) {
3799                int rc = kvm_handle_sie_intercept(vcpu);
3800
3801                if (rc != -EOPNOTSUPP)
3802                        return rc;
3803                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3804                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3805                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3806                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3807                return -EREMOTE;
3808        } else if (exit_reason != -EFAULT) {
3809                vcpu->stat.exit_null++;
3810                return 0;
3811        } else if (kvm_is_ucontrol(vcpu->kvm)) {
3812                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3813                vcpu->run->s390_ucontrol.trans_exc_code =
3814                                                current->thread.gmap_addr;
3815                vcpu->run->s390_ucontrol.pgm_code = 0x10;
3816                return -EREMOTE;
3817        } else if (current->thread.gmap_pfault) {
3818                trace_kvm_s390_major_guest_pfault(vcpu);
3819                current->thread.gmap_pfault = 0;
3820                if (kvm_arch_setup_async_pf(vcpu))
3821                        return 0;
3822                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3823        }
3824        return vcpu_post_run_fault_in_sie(vcpu);
3825}
3826
3827static int __vcpu_run(struct kvm_vcpu *vcpu)
3828{
3829        int rc, exit_reason;
3830
3831        /*
3832         * We try to hold kvm->srcu during most of vcpu_run (except when run-
3833         * ning the guest), so that memslots (and other stuff) are protected
3834         */
3835        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3836
3837        do {
3838                rc = vcpu_pre_run(vcpu);
3839                if (rc)
3840                        break;
3841
3842                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3843                /*
3844                 * As PF_VCPU will be used in fault handler, between
3845                 * guest_enter and guest_exit should be no uaccess.
3846                 */
3847                local_irq_disable();
3848                guest_enter_irqoff();
3849                __disable_cpu_timer_accounting(vcpu);
3850                local_irq_enable();
3851                exit_reason = sie64a(vcpu->arch.sie_block,
3852                                     vcpu->run->s.regs.gprs);
3853                local_irq_disable();
3854                __enable_cpu_timer_accounting(vcpu);
3855                guest_exit_irqoff();
3856                local_irq_enable();
3857                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3858
3859                rc = vcpu_post_run(vcpu, exit_reason);
3860        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3861
3862        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3863        return rc;
3864}
3865
3866static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3867{
3868        struct runtime_instr_cb *riccb;
3869        struct gs_cb *gscb;
3870
3871        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3872        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3873        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3874        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3875        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3876                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3877        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3878                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3879                /* some control register changes require a tlb flush */
3880                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3881        }
3882        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3883                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3884                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3885                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3886                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3887                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3888        }
3889        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3890                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3891                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3892                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3893                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3894                        kvm_clear_async_pf_completion_queue(vcpu);
3895        }
3896        /*
3897         * If userspace sets the riccb (e.g. after migration) to a valid state,
3898         * we should enable RI here instead of doing the lazy enablement.
3899         */
3900        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3901            test_kvm_facility(vcpu->kvm, 64) &&
3902            riccb->v &&
3903            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3904                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3905                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3906        }
3907        /*
3908         * If userspace sets the gscb (e.g. after migration) to non-zero,
3909         * we should enable GS here instead of doing the lazy enablement.
3910         */
3911        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3912            test_kvm_facility(vcpu->kvm, 133) &&
3913            gscb->gssm &&
3914            !vcpu->arch.gs_enabled) {
3915                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3916                vcpu->arch.sie_block->ecb |= ECB_GS;
3917                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3918                vcpu->arch.gs_enabled = 1;
3919        }
3920        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3921            test_kvm_facility(vcpu->kvm, 82)) {
3922                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3923                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3924        }
3925        save_access_regs(vcpu->arch.host_acrs);
3926        restore_access_regs(vcpu->run->s.regs.acrs);
3927        /* save host (userspace) fprs/vrs */
3928        save_fpu_regs();
3929        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3930        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3931        if (MACHINE_HAS_VX)
3932                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3933        else
3934                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3935        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3936        if (test_fp_ctl(current->thread.fpu.fpc))
3937                /* User space provided an invalid FPC, let's clear it */
3938                current->thread.fpu.fpc = 0;
3939        if (MACHINE_HAS_GS) {
3940                preempt_disable();
3941                __ctl_set_bit(2, 4);
3942                if (current->thread.gs_cb) {
3943                        vcpu->arch.host_gscb = current->thread.gs_cb;
3944                        save_gs_cb(vcpu->arch.host_gscb);
3945                }
3946                if (vcpu->arch.gs_enabled) {
3947                        current->thread.gs_cb = (struct gs_cb *)
3948                                                &vcpu->run->s.regs.gscb;
3949                        restore_gs_cb(current->thread.gs_cb);
3950                }
3951                preempt_enable();
3952        }
3953        /* SIE will load etoken directly from SDNX and therefore kvm_run */
3954
3955        kvm_run->kvm_dirty_regs = 0;
3956}
3957
3958static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3959{
3960        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3961        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3962        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3963        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3964        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3965        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3966        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3967        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3968        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3969        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3970        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3971        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3972        kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3973        save_access_regs(vcpu->run->s.regs.acrs);
3974        restore_access_regs(vcpu->arch.host_acrs);
3975        /* Save guest register state */
3976        save_fpu_regs();
3977        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3978        /* Restore will be done lazily at return */
3979        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3980        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3981        if (MACHINE_HAS_GS) {
3982                __ctl_set_bit(2, 4);
3983                if (vcpu->arch.gs_enabled)
3984                        save_gs_cb(current->thread.gs_cb);
3985                preempt_disable();
3986                current->thread.gs_cb = vcpu->arch.host_gscb;
3987                restore_gs_cb(vcpu->arch.host_gscb);
3988                preempt_enable();
3989                if (!vcpu->arch.host_gscb)
3990                        __ctl_clear_bit(2, 4);
3991                vcpu->arch.host_gscb = NULL;
3992        }
3993        /* SIE will save etoken directly into SDNX and therefore kvm_run */
3994}
3995
3996int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3997{
3998        int rc;
3999
4000        if (kvm_run->immediate_exit)
4001                return -EINTR;
4002
4003        vcpu_load(vcpu);
4004
4005        if (guestdbg_exit_pending(vcpu)) {
4006                kvm_s390_prepare_debug_exit(vcpu);
4007                rc = 0;
4008                goto out;
4009        }
4010
4011        kvm_sigset_activate(vcpu);
4012
4013        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4014                kvm_s390_vcpu_start(vcpu);
4015        } else if (is_vcpu_stopped(vcpu)) {
4016                pr_err_ratelimited("can't run stopped vcpu %d\n",
4017                                   vcpu->vcpu_id);
4018                rc = -EINVAL;
4019                goto out;
4020        }
4021
4022        sync_regs(vcpu, kvm_run);
4023        enable_cpu_timer_accounting(vcpu);
4024
4025        might_fault();
4026        rc = __vcpu_run(vcpu);
4027
4028        if (signal_pending(current) && !rc) {
4029                kvm_run->exit_reason = KVM_EXIT_INTR;
4030                rc = -EINTR;
4031        }
4032
4033        if (guestdbg_exit_pending(vcpu) && !rc)  {
4034                kvm_s390_prepare_debug_exit(vcpu);
4035                rc = 0;
4036        }
4037
4038        if (rc == -EREMOTE) {
4039                /* userspace support is needed, kvm_run has been prepared */
4040                rc = 0;
4041        }
4042
4043        disable_cpu_timer_accounting(vcpu);
4044        store_regs(vcpu, kvm_run);
4045
4046        kvm_sigset_deactivate(vcpu);
4047
4048        vcpu->stat.exit_userspace++;
4049out:
4050        vcpu_put(vcpu);
4051        return rc;
4052}
4053
4054/*
4055 * store status at address
4056 * we use have two special cases:
4057 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4058 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4059 */
4060int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4061{
4062        unsigned char archmode = 1;
4063        freg_t fprs[NUM_FPRS];
4064        unsigned int px;
4065        u64 clkcomp, cputm;
4066        int rc;
4067
4068        px = kvm_s390_get_prefix(vcpu);
4069        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4070                if (write_guest_abs(vcpu, 163, &archmode, 1))
4071                        return -EFAULT;
4072                gpa = 0;
4073        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4074                if (write_guest_real(vcpu, 163, &archmode, 1))
4075                        return -EFAULT;
4076                gpa = px;
4077        } else
4078                gpa -= __LC_FPREGS_SAVE_AREA;
4079
4080        /* manually convert vector registers if necessary */
4081        if (MACHINE_HAS_VX) {
4082                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4083                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4084                                     fprs, 128);
4085        } else {
4086                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4087                                     vcpu->run->s.regs.fprs, 128);
4088        }
4089        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4090                              vcpu->run->s.regs.gprs, 128);
4091        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4092                              &vcpu->arch.sie_block->gpsw, 16);
4093        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4094                              &px, 4);
4095        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4096                              &vcpu->run->s.regs.fpc, 4);
4097        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4098                              &vcpu->arch.sie_block->todpr, 4);
4099        cputm = kvm_s390_get_cpu_timer(vcpu);
4100        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4101                              &cputm, 8);
4102        clkcomp = vcpu->arch.sie_block->ckc >> 8;
4103        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4104                              &clkcomp, 8);
4105        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4106                              &vcpu->run->s.regs.acrs, 64);
4107        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4108                              &vcpu->arch.sie_block->gcr, 128);
4109        return rc ? -EFAULT : 0;
4110}
4111
4112int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4113{
4114        /*
4115         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4116         * switch in the run ioctl. Let's update our copies before we save
4117         * it into the save area
4118         */
4119        save_fpu_regs();
4120        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4121        save_access_regs(vcpu->run->s.regs.acrs);
4122
4123        return kvm_s390_store_status_unloaded(vcpu, addr);
4124}
4125
4126static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4127{
4128        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4129        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4130}
4131
4132static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4133{
4134        unsigned int i;
4135        struct kvm_vcpu *vcpu;
4136
4137        kvm_for_each_vcpu(i, vcpu, kvm) {
4138                __disable_ibs_on_vcpu(vcpu);
4139        }
4140}
4141
4142static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4143{
4144        if (!sclp.has_ibs)
4145                return;
4146        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4147        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4148}
4149
4150void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4151{
4152        int i, online_vcpus, started_vcpus = 0;
4153
4154        if (!is_vcpu_stopped(vcpu))
4155                return;
4156
4157        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4158        /* Only one cpu at a time may enter/leave the STOPPED state. */
4159        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4160        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4161
4162        for (i = 0; i < online_vcpus; i++) {
4163                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4164                        started_vcpus++;
4165        }
4166
4167        if (started_vcpus == 0) {
4168                /* we're the only active VCPU -> speed it up */
4169                __enable_ibs_on_vcpu(vcpu);
4170        } else if (started_vcpus == 1) {
4171                /*
4172                 * As we are starting a second VCPU, we have to disable
4173                 * the IBS facility on all VCPUs to remove potentially
4174                 * oustanding ENABLE requests.
4175                 */
4176                __disable_ibs_on_all_vcpus(vcpu->kvm);
4177        }
4178
4179        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4180        /*
4181         * Another VCPU might have used IBS while we were offline.
4182         * Let's play safe and flush the VCPU at startup.
4183         */
4184        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4185        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4186        return;
4187}
4188
4189void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4190{
4191        int i, online_vcpus, started_vcpus = 0;
4192        struct kvm_vcpu *started_vcpu = NULL;
4193
4194        if (is_vcpu_stopped(vcpu))
4195                return;
4196
4197        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4198        /* Only one cpu at a time may enter/leave the STOPPED state. */
4199        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4200        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4201
4202        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4203        kvm_s390_clear_stop_irq(vcpu);
4204
4205        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4206        __disable_ibs_on_vcpu(vcpu);
4207
4208        for (i = 0; i < online_vcpus; i++) {
4209                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4210                        started_vcpus++;
4211                        started_vcpu = vcpu->kvm->vcpus[i];
4212                }
4213        }
4214
4215        if (started_vcpus == 1) {
4216                /*
4217                 * As we only have one VCPU left, we want to enable the
4218                 * IBS facility for that VCPU to speed it up.
4219                 */
4220                __enable_ibs_on_vcpu(started_vcpu);
4221        }
4222
4223        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4224        return;
4225}
4226
4227static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4228                                     struct kvm_enable_cap *cap)
4229{
4230        int r;
4231
4232        if (cap->flags)
4233                return -EINVAL;
4234
4235        switch (cap->cap) {
4236        case KVM_CAP_S390_CSS_SUPPORT:
4237                if (!vcpu->kvm->arch.css_support) {
4238                        vcpu->kvm->arch.css_support = 1;
4239                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4240                        trace_kvm_s390_enable_css(vcpu->kvm);
4241                }
4242                r = 0;
4243                break;
4244        default:
4245                r = -EINVAL;
4246                break;
4247        }
4248        return r;
4249}
4250
4251static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4252                                  struct kvm_s390_mem_op *mop)
4253{
4254        void __user *uaddr = (void __user *)mop->buf;
4255        void *tmpbuf = NULL;
4256        int r, srcu_idx;
4257        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4258                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
4259
4260        if (mop->flags & ~supported_flags)
4261                return -EINVAL;
4262
4263        if (mop->size > MEM_OP_MAX_SIZE)
4264                return -E2BIG;
4265
4266        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4267                tmpbuf = vmalloc(mop->size);
4268                if (!tmpbuf)
4269                        return -ENOMEM;
4270        }
4271
4272        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4273
4274        switch (mop->op) {
4275        case KVM_S390_MEMOP_LOGICAL_READ:
4276                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4277                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4278                                            mop->size, GACC_FETCH);
4279                        break;
4280                }
4281                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4282                if (r == 0) {
4283                        if (copy_to_user(uaddr, tmpbuf, mop->size))
4284                                r = -EFAULT;
4285                }
4286                break;
4287        case KVM_S390_MEMOP_LOGICAL_WRITE:
4288                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4289                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4290                                            mop->size, GACC_STORE);
4291                        break;
4292                }
4293                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4294                        r = -EFAULT;
4295                        break;
4296                }
4297                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4298                break;
4299        default:
4300                r = -EINVAL;
4301        }
4302
4303        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4304
4305        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4306                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4307
4308        vfree(tmpbuf);
4309        return r;
4310}
4311
4312long kvm_arch_vcpu_async_ioctl(struct file *filp,
4313                               unsigned int ioctl, unsigned long arg)
4314{
4315        struct kvm_vcpu *vcpu = filp->private_data;
4316        void __user *argp = (void __user *)arg;
4317
4318        switch (ioctl) {
4319        case KVM_S390_IRQ: {
4320                struct kvm_s390_irq s390irq;
4321
4322                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4323                        return -EFAULT;
4324                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4325        }
4326        case KVM_S390_INTERRUPT: {
4327                struct kvm_s390_interrupt s390int;
4328                struct kvm_s390_irq s390irq = {};
4329
4330                if (copy_from_user(&s390int, argp, sizeof(s390int)))
4331                        return -EFAULT;
4332                if (s390int_to_s390irq(&s390int, &s390irq))
4333                        return -EINVAL;
4334                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4335        }
4336        }
4337        return -ENOIOCTLCMD;
4338}
4339
4340long kvm_arch_vcpu_ioctl(struct file *filp,
4341                         unsigned int ioctl, unsigned long arg)
4342{
4343        struct kvm_vcpu *vcpu = filp->private_data;
4344        void __user *argp = (void __user *)arg;
4345        int idx;
4346        long r;
4347
4348        vcpu_load(vcpu);
4349
4350        switch (ioctl) {
4351        case KVM_S390_STORE_STATUS:
4352                idx = srcu_read_lock(&vcpu->kvm->srcu);
4353                r = kvm_s390_vcpu_store_status(vcpu, arg);
4354                srcu_read_unlock(&vcpu->kvm->srcu, idx);
4355                break;
4356        case KVM_S390_SET_INITIAL_PSW: {
4357                psw_t psw;
4358
4359                r = -EFAULT;
4360                if (copy_from_user(&psw, argp, sizeof(psw)))
4361                        break;
4362                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4363                break;
4364        }
4365        case KVM_S390_INITIAL_RESET:
4366                r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4367                break;
4368        case KVM_SET_ONE_REG:
4369        case KVM_GET_ONE_REG: {
4370                struct kvm_one_reg reg;
4371                r = -EFAULT;
4372                if (copy_from_user(&reg, argp, sizeof(reg)))
4373                        break;
4374                if (ioctl == KVM_SET_ONE_REG)
4375                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4376                else
4377                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4378                break;
4379        }
4380#ifdef CONFIG_KVM_S390_UCONTROL
4381        case KVM_S390_UCAS_MAP: {
4382                struct kvm_s390_ucas_mapping ucasmap;
4383
4384                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4385                        r = -EFAULT;
4386                        break;
4387                }
4388
4389                if (!kvm_is_ucontrol(vcpu->kvm)) {
4390                        r = -EINVAL;
4391                        break;
4392                }
4393
4394                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4395                                     ucasmap.vcpu_addr, ucasmap.length);
4396                break;
4397        }
4398        case KVM_S390_UCAS_UNMAP: {
4399                struct kvm_s390_ucas_mapping ucasmap;
4400
4401                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4402                        r = -EFAULT;
4403                        break;
4404                }
4405
4406                if (!kvm_is_ucontrol(vcpu->kvm)) {
4407                        r = -EINVAL;
4408                        break;
4409                }
4410
4411                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4412                        ucasmap.length);
4413                break;
4414        }
4415#endif
4416        case KVM_S390_VCPU_FAULT: {
4417                r = gmap_fault(vcpu->arch.gmap, arg, 0);
4418                break;
4419        }
4420        case KVM_ENABLE_CAP:
4421        {
4422                struct kvm_enable_cap cap;
4423                r = -EFAULT;
4424                if (copy_from_user(&cap, argp, sizeof(cap)))
4425                        break;
4426                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4427                break;
4428        }
4429        case KVM_S390_MEM_OP: {
4430                struct kvm_s390_mem_op mem_op;
4431
4432                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4433                        r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4434                else
4435                        r = -EFAULT;
4436                break;
4437        }
4438        case KVM_S390_SET_IRQ_STATE: {
4439                struct kvm_s390_irq_state irq_state;
4440
4441                r = -EFAULT;
4442                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4443                        break;
4444                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4445                    irq_state.len == 0 ||
4446                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4447                        r = -EINVAL;
4448                        break;
4449                }
4450                /* do not use irq_state.flags, it will break old QEMUs */
4451                r = kvm_s390_set_irq_state(vcpu,
4452                                           (void __user *) irq_state.buf,
4453                                           irq_state.len);
4454                break;
4455        }
4456        case KVM_S390_GET_IRQ_STATE: {
4457                struct kvm_s390_irq_state irq_state;
4458
4459                r = -EFAULT;
4460                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4461                        break;
4462                if (irq_state.len == 0) {
4463                        r = -EINVAL;
4464                        break;
4465                }
4466                /* do not use irq_state.flags, it will break old QEMUs */
4467                r = kvm_s390_get_irq_state(vcpu,
4468                                           (__u8 __user *)  irq_state.buf,
4469                                           irq_state.len);
4470                break;
4471        }
4472        default:
4473                r = -ENOTTY;
4474        }
4475
4476        vcpu_put(vcpu);
4477        return r;
4478}
4479
4480vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4481{
4482#ifdef CONFIG_KVM_S390_UCONTROL
4483        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4484                 && (kvm_is_ucontrol(vcpu->kvm))) {
4485                vmf->page = virt_to_page(vcpu->arch.sie_block);
4486                get_page(vmf->page);
4487                return 0;
4488        }
4489#endif
4490        return VM_FAULT_SIGBUS;
4491}
4492
4493int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4494                            unsigned long npages)
4495{
4496        return 0;
4497}
4498
4499/* Section: memory related */
4500int kvm_arch_prepare_memory_region(struct kvm *kvm,
4501                                   struct kvm_memory_slot *memslot,
4502                                   const struct kvm_userspace_memory_region *mem,
4503                                   enum kvm_mr_change change)
4504{
4505        /* A few sanity checks. We can have memory slots which have to be
4506           located/ended at a segment boundary (1MB). The memory in userland is
4507           ok to be fragmented into various different vmas. It is okay to mmap()
4508           and munmap() stuff in this slot after doing this call at any time */
4509
4510        if (mem->userspace_addr & 0xffffful)
4511                return -EINVAL;
4512
4513        if (mem->memory_size & 0xffffful)
4514                return -EINVAL;
4515
4516        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4517                return -EINVAL;
4518
4519        return 0;
4520}
4521
4522void kvm_arch_commit_memory_region(struct kvm *kvm,
4523                                const struct kvm_userspace_memory_region *mem,
4524                                const struct kvm_memory_slot *old,
4525                                const struct kvm_memory_slot *new,
4526                                enum kvm_mr_change change)
4527{
4528        int rc = 0;
4529
4530        switch (change) {
4531        case KVM_MR_DELETE:
4532                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4533                                        old->npages * PAGE_SIZE);
4534                break;
4535        case KVM_MR_MOVE:
4536                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4537                                        old->npages * PAGE_SIZE);
4538                if (rc)
4539                        break;
4540                /* FALLTHROUGH */
4541        case KVM_MR_CREATE:
4542                rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4543                                      mem->guest_phys_addr, mem->memory_size);
4544                break;
4545        case KVM_MR_FLAGS_ONLY:
4546                break;
4547        default:
4548                WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4549        }
4550        if (rc)
4551                pr_warn("failed to commit memory region\n");
4552        return;
4553}
4554
4555static inline unsigned long nonhyp_mask(int i)
4556{
4557        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4558
4559        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4560}
4561
4562void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4563{
4564        vcpu->valid_wakeup = false;
4565}
4566
4567static int __init kvm_s390_init(void)
4568{
4569        int i;
4570
4571        if (!sclp.has_sief2) {
4572                pr_info("SIE is not available\n");
4573                return -ENODEV;
4574        }
4575
4576        if (nested && hpage) {
4577                pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4578                return -EINVAL;
4579        }
4580
4581        for (i = 0; i < 16; i++)
4582                kvm_s390_fac_base[i] |=
4583                        S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4584
4585        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4586}
4587
4588static void __exit kvm_s390_exit(void)
4589{
4590        kvm_exit();
4591}
4592
4593module_init(kvm_s390_init);
4594module_exit(kvm_s390_exit);
4595
4596/*
4597 * Enable autoloading of the kvm module.
4598 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4599 * since x86 takes a different approach.
4600 */
4601#include <linux/miscdevice.h>
4602MODULE_ALIAS_MISCDEV(KVM_MINOR);
4603MODULE_ALIAS("devname:kvm");
4604