linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2020
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#define KMSG_COMPONENT "kvm-s390"
  15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34#include <linux/pgtable.h>
  35
  36#include <asm/asm-offsets.h>
  37#include <asm/lowcore.h>
  38#include <asm/stp.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include <asm/uv.h>
  48#include <asm/fpu/api.h>
  49#include "kvm-s390.h"
  50#include "gaccess.h"
  51
  52#define CREATE_TRACE_POINTS
  53#include "trace.h"
  54#include "trace-s390.h"
  55
  56#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57#define LOCAL_IRQS 32
  58#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62        KVM_GENERIC_VM_STATS(),
  63        STATS_DESC_COUNTER(VM, inject_io),
  64        STATS_DESC_COUNTER(VM, inject_float_mchk),
  65        STATS_DESC_COUNTER(VM, inject_pfault_done),
  66        STATS_DESC_COUNTER(VM, inject_service_signal),
  67        STATS_DESC_COUNTER(VM, inject_virtio)
  68};
  69static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
  70                sizeof(struct kvm_vm_stat) / sizeof(u64));
  71
  72const struct kvm_stats_header kvm_vm_stats_header = {
  73        .name_size = KVM_STATS_NAME_SIZE,
  74        .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  75        .id_offset = sizeof(struct kvm_stats_header),
  76        .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  77        .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  78                       sizeof(kvm_vm_stats_desc),
  79};
  80
  81const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  82        KVM_GENERIC_VCPU_STATS(),
  83        STATS_DESC_COUNTER(VCPU, exit_userspace),
  84        STATS_DESC_COUNTER(VCPU, exit_null),
  85        STATS_DESC_COUNTER(VCPU, exit_external_request),
  86        STATS_DESC_COUNTER(VCPU, exit_io_request),
  87        STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  88        STATS_DESC_COUNTER(VCPU, exit_stop_request),
  89        STATS_DESC_COUNTER(VCPU, exit_validity),
  90        STATS_DESC_COUNTER(VCPU, exit_instruction),
  91        STATS_DESC_COUNTER(VCPU, exit_pei),
  92        STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  93        STATS_DESC_COUNTER(VCPU, instruction_lctl),
  94        STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  95        STATS_DESC_COUNTER(VCPU, instruction_stctl),
  96        STATS_DESC_COUNTER(VCPU, instruction_stctg),
  97        STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  98        STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  99        STATS_DESC_COUNTER(VCPU, exit_operation_exception),
 100        STATS_DESC_COUNTER(VCPU, deliver_ckc),
 101        STATS_DESC_COUNTER(VCPU, deliver_cputm),
 102        STATS_DESC_COUNTER(VCPU, deliver_external_call),
 103        STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 104        STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 105        STATS_DESC_COUNTER(VCPU, deliver_virtio),
 106        STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 107        STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 108        STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 109        STATS_DESC_COUNTER(VCPU, deliver_program),
 110        STATS_DESC_COUNTER(VCPU, deliver_io),
 111        STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 112        STATS_DESC_COUNTER(VCPU, exit_wait_state),
 113        STATS_DESC_COUNTER(VCPU, inject_ckc),
 114        STATS_DESC_COUNTER(VCPU, inject_cputm),
 115        STATS_DESC_COUNTER(VCPU, inject_external_call),
 116        STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 117        STATS_DESC_COUNTER(VCPU, inject_mchk),
 118        STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 119        STATS_DESC_COUNTER(VCPU, inject_program),
 120        STATS_DESC_COUNTER(VCPU, inject_restart),
 121        STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 122        STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 123        STATS_DESC_COUNTER(VCPU, instruction_epsw),
 124        STATS_DESC_COUNTER(VCPU, instruction_gs),
 125        STATS_DESC_COUNTER(VCPU, instruction_io_other),
 126        STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 127        STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 128        STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 129        STATS_DESC_COUNTER(VCPU, instruction_ptff),
 130        STATS_DESC_COUNTER(VCPU, instruction_sck),
 131        STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 132        STATS_DESC_COUNTER(VCPU, instruction_stidp),
 133        STATS_DESC_COUNTER(VCPU, instruction_spx),
 134        STATS_DESC_COUNTER(VCPU, instruction_stpx),
 135        STATS_DESC_COUNTER(VCPU, instruction_stap),
 136        STATS_DESC_COUNTER(VCPU, instruction_iske),
 137        STATS_DESC_COUNTER(VCPU, instruction_ri),
 138        STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 139        STATS_DESC_COUNTER(VCPU, instruction_sske),
 140        STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 141        STATS_DESC_COUNTER(VCPU, instruction_stsi),
 142        STATS_DESC_COUNTER(VCPU, instruction_stfl),
 143        STATS_DESC_COUNTER(VCPU, instruction_tb),
 144        STATS_DESC_COUNTER(VCPU, instruction_tpi),
 145        STATS_DESC_COUNTER(VCPU, instruction_tprot),
 146        STATS_DESC_COUNTER(VCPU, instruction_tsch),
 147        STATS_DESC_COUNTER(VCPU, instruction_sie),
 148        STATS_DESC_COUNTER(VCPU, instruction_essa),
 149        STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 150        STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 151        STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 152        STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 153        STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 154        STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 155        STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 156        STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 157        STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 158        STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 159        STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 160        STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 161        STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 162        STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 163        STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 164        STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 165        STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 166        STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 167        STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 168        STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 169        STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 170        STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 171        STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 172        STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 173        STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 174        STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 175        STATS_DESC_COUNTER(VCPU, pfault_sync)
 176};
 177static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
 178                sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 179
 180const struct kvm_stats_header kvm_vcpu_stats_header = {
 181        .name_size = KVM_STATS_NAME_SIZE,
 182        .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 183        .id_offset = sizeof(struct kvm_stats_header),
 184        .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 185        .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 186                       sizeof(kvm_vcpu_stats_desc),
 187};
 188
 189/* allow nested virtualization in KVM (if enabled by user space) */
 190static int nested;
 191module_param(nested, int, S_IRUGO);
 192MODULE_PARM_DESC(nested, "Nested virtualization support");
 193
 194/* allow 1m huge page guest backing, if !nested */
 195static int hpage;
 196module_param(hpage, int, 0444);
 197MODULE_PARM_DESC(hpage, "1m huge page backing support");
 198
 199/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 200static u8 halt_poll_max_steal = 10;
 201module_param(halt_poll_max_steal, byte, 0644);
 202MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 203
 204/* if set to true, the GISA will be initialized and used if available */
 205static bool use_gisa  = true;
 206module_param(use_gisa, bool, 0644);
 207MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 208
 209/* maximum diag9c forwarding per second */
 210unsigned int diag9c_forwarding_hz;
 211module_param(diag9c_forwarding_hz, uint, 0644);
 212MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 213
 214/*
 215 * For now we handle at most 16 double words as this is what the s390 base
 216 * kernel handles and stores in the prefix page. If we ever need to go beyond
 217 * this, this requires changes to code, but the external uapi can stay.
 218 */
 219#define SIZE_INTERNAL 16
 220
 221/*
 222 * Base feature mask that defines default mask for facilities. Consists of the
 223 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 224 */
 225static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 226/*
 227 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 228 * and defines the facilities that can be enabled via a cpu model.
 229 */
 230static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 231
 232static unsigned long kvm_s390_fac_size(void)
 233{
 234        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 235        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 236        BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 237                sizeof(stfle_fac_list));
 238
 239        return SIZE_INTERNAL;
 240}
 241
 242/* available cpu features supported by kvm */
 243static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 244/* available subfunctions indicated via query / "test bit" */
 245static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 246
 247static struct gmap_notifier gmap_notifier;
 248static struct gmap_notifier vsie_gmap_notifier;
 249debug_info_t *kvm_s390_dbf;
 250debug_info_t *kvm_s390_dbf_uv;
 251
 252/* Section: not file related */
 253int kvm_arch_hardware_enable(void)
 254{
 255        /* every s390 is virtualization enabled ;-) */
 256        return 0;
 257}
 258
 259int kvm_arch_check_processor_compat(void *opaque)
 260{
 261        return 0;
 262}
 263
 264/* forward declarations */
 265static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 266                              unsigned long end);
 267static int sca_switch_to_extended(struct kvm *kvm);
 268
 269static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 270{
 271        u8 delta_idx = 0;
 272
 273        /*
 274         * The TOD jumps by delta, we have to compensate this by adding
 275         * -delta to the epoch.
 276         */
 277        delta = -delta;
 278
 279        /* sign-extension - we're adding to signed values below */
 280        if ((s64)delta < 0)
 281                delta_idx = -1;
 282
 283        scb->epoch += delta;
 284        if (scb->ecd & ECD_MEF) {
 285                scb->epdx += delta_idx;
 286                if (scb->epoch < delta)
 287                        scb->epdx += 1;
 288        }
 289}
 290
 291/*
 292 * This callback is executed during stop_machine(). All CPUs are therefore
 293 * temporarily stopped. In order not to change guest behavior, we have to
 294 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 295 * so a CPU won't be stopped while calculating with the epoch.
 296 */
 297static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 298                          void *v)
 299{
 300        struct kvm *kvm;
 301        struct kvm_vcpu *vcpu;
 302        int i;
 303        unsigned long long *delta = v;
 304
 305        list_for_each_entry(kvm, &vm_list, vm_list) {
 306                kvm_for_each_vcpu(i, vcpu, kvm) {
 307                        kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 308                        if (i == 0) {
 309                                kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 310                                kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 311                        }
 312                        if (vcpu->arch.cputm_enabled)
 313                                vcpu->arch.cputm_start += *delta;
 314                        if (vcpu->arch.vsie_block)
 315                                kvm_clock_sync_scb(vcpu->arch.vsie_block,
 316                                                   *delta);
 317                }
 318        }
 319        return NOTIFY_OK;
 320}
 321
 322static struct notifier_block kvm_clock_notifier = {
 323        .notifier_call = kvm_clock_sync,
 324};
 325
 326int kvm_arch_hardware_setup(void *opaque)
 327{
 328        gmap_notifier.notifier_call = kvm_gmap_notifier;
 329        gmap_register_pte_notifier(&gmap_notifier);
 330        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 331        gmap_register_pte_notifier(&vsie_gmap_notifier);
 332        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 333                                       &kvm_clock_notifier);
 334        return 0;
 335}
 336
 337void kvm_arch_hardware_unsetup(void)
 338{
 339        gmap_unregister_pte_notifier(&gmap_notifier);
 340        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 341        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 342                                         &kvm_clock_notifier);
 343}
 344
 345static void allow_cpu_feat(unsigned long nr)
 346{
 347        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 348}
 349
 350static inline int plo_test_bit(unsigned char nr)
 351{
 352        unsigned long function = (unsigned long)nr | 0x100;
 353        int cc;
 354
 355        asm volatile(
 356                "       lgr     0,%[function]\n"
 357                /* Parameter registers are ignored for "test bit" */
 358                "       plo     0,0,0,0(0)\n"
 359                "       ipm     %0\n"
 360                "       srl     %0,28\n"
 361                : "=d" (cc)
 362                : [function] "d" (function)
 363                : "cc", "0");
 364        return cc == 0;
 365}
 366
 367static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 368{
 369        asm volatile(
 370                "       lghi    0,0\n"
 371                "       lgr     1,%[query]\n"
 372                /* Parameter registers are ignored */
 373                "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 374                :
 375                : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 376                : "cc", "memory", "0", "1");
 377}
 378
 379#define INSN_SORTL 0xb938
 380#define INSN_DFLTCC 0xb939
 381
 382static void kvm_s390_cpu_feat_init(void)
 383{
 384        int i;
 385
 386        for (i = 0; i < 256; ++i) {
 387                if (plo_test_bit(i))
 388                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 389        }
 390
 391        if (test_facility(28)) /* TOD-clock steering */
 392                ptff(kvm_s390_available_subfunc.ptff,
 393                     sizeof(kvm_s390_available_subfunc.ptff),
 394                     PTFF_QAF);
 395
 396        if (test_facility(17)) { /* MSA */
 397                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 398                              kvm_s390_available_subfunc.kmac);
 399                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 400                              kvm_s390_available_subfunc.kmc);
 401                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 402                              kvm_s390_available_subfunc.km);
 403                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 404                              kvm_s390_available_subfunc.kimd);
 405                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 406                              kvm_s390_available_subfunc.klmd);
 407        }
 408        if (test_facility(76)) /* MSA3 */
 409                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 410                              kvm_s390_available_subfunc.pckmo);
 411        if (test_facility(77)) { /* MSA4 */
 412                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 413                              kvm_s390_available_subfunc.kmctr);
 414                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 415                              kvm_s390_available_subfunc.kmf);
 416                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 417                              kvm_s390_available_subfunc.kmo);
 418                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 419                              kvm_s390_available_subfunc.pcc);
 420        }
 421        if (test_facility(57)) /* MSA5 */
 422                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 423                              kvm_s390_available_subfunc.ppno);
 424
 425        if (test_facility(146)) /* MSA8 */
 426                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 427                              kvm_s390_available_subfunc.kma);
 428
 429        if (test_facility(155)) /* MSA9 */
 430                __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 431                              kvm_s390_available_subfunc.kdsa);
 432
 433        if (test_facility(150)) /* SORTL */
 434                __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 435
 436        if (test_facility(151)) /* DFLTCC */
 437                __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 438
 439        if (MACHINE_HAS_ESOP)
 440                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 441        /*
 442         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 443         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 444         */
 445        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 446            !test_facility(3) || !nested)
 447                return;
 448        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 449        if (sclp.has_64bscao)
 450                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 451        if (sclp.has_siif)
 452                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 453        if (sclp.has_gpere)
 454                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 455        if (sclp.has_gsls)
 456                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 457        if (sclp.has_ib)
 458                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 459        if (sclp.has_cei)
 460                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 461        if (sclp.has_ibs)
 462                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 463        if (sclp.has_kss)
 464                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 465        /*
 466         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 467         * all skey handling functions read/set the skey from the PGSTE
 468         * instead of the real storage key.
 469         *
 470         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 471         * pages being detected as preserved although they are resident.
 472         *
 473         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 474         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 475         *
 476         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 477         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 478         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 479         *
 480         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 481         * cannot easily shadow the SCA because of the ipte lock.
 482         */
 483}
 484
 485int kvm_arch_init(void *opaque)
 486{
 487        int rc = -ENOMEM;
 488
 489        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 490        if (!kvm_s390_dbf)
 491                return -ENOMEM;
 492
 493        kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 494        if (!kvm_s390_dbf_uv)
 495                goto out;
 496
 497        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 498            debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 499                goto out;
 500
 501        kvm_s390_cpu_feat_init();
 502
 503        /* Register floating interrupt controller interface. */
 504        rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 505        if (rc) {
 506                pr_err("A FLIC registration call failed with rc=%d\n", rc);
 507                goto out;
 508        }
 509
 510        rc = kvm_s390_gib_init(GAL_ISC);
 511        if (rc)
 512                goto out;
 513
 514        return 0;
 515
 516out:
 517        kvm_arch_exit();
 518        return rc;
 519}
 520
 521void kvm_arch_exit(void)
 522{
 523        kvm_s390_gib_destroy();
 524        debug_unregister(kvm_s390_dbf);
 525        debug_unregister(kvm_s390_dbf_uv);
 526}
 527
 528/* Section: device related */
 529long kvm_arch_dev_ioctl(struct file *filp,
 530                        unsigned int ioctl, unsigned long arg)
 531{
 532        if (ioctl == KVM_S390_ENABLE_SIE)
 533                return s390_enable_sie();
 534        return -EINVAL;
 535}
 536
 537int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 538{
 539        int r;
 540
 541        switch (ext) {
 542        case KVM_CAP_S390_PSW:
 543        case KVM_CAP_S390_GMAP:
 544        case KVM_CAP_SYNC_MMU:
 545#ifdef CONFIG_KVM_S390_UCONTROL
 546        case KVM_CAP_S390_UCONTROL:
 547#endif
 548        case KVM_CAP_ASYNC_PF:
 549        case KVM_CAP_SYNC_REGS:
 550        case KVM_CAP_ONE_REG:
 551        case KVM_CAP_ENABLE_CAP:
 552        case KVM_CAP_S390_CSS_SUPPORT:
 553        case KVM_CAP_IOEVENTFD:
 554        case KVM_CAP_DEVICE_CTRL:
 555        case KVM_CAP_S390_IRQCHIP:
 556        case KVM_CAP_VM_ATTRIBUTES:
 557        case KVM_CAP_MP_STATE:
 558        case KVM_CAP_IMMEDIATE_EXIT:
 559        case KVM_CAP_S390_INJECT_IRQ:
 560        case KVM_CAP_S390_USER_SIGP:
 561        case KVM_CAP_S390_USER_STSI:
 562        case KVM_CAP_S390_SKEYS:
 563        case KVM_CAP_S390_IRQ_STATE:
 564        case KVM_CAP_S390_USER_INSTR0:
 565        case KVM_CAP_S390_CMMA_MIGRATION:
 566        case KVM_CAP_S390_AIS:
 567        case KVM_CAP_S390_AIS_MIGRATION:
 568        case KVM_CAP_S390_VCPU_RESETS:
 569        case KVM_CAP_SET_GUEST_DEBUG:
 570        case KVM_CAP_S390_DIAG318:
 571                r = 1;
 572                break;
 573        case KVM_CAP_SET_GUEST_DEBUG2:
 574                r = KVM_GUESTDBG_VALID_MASK;
 575                break;
 576        case KVM_CAP_S390_HPAGE_1M:
 577                r = 0;
 578                if (hpage && !kvm_is_ucontrol(kvm))
 579                        r = 1;
 580                break;
 581        case KVM_CAP_S390_MEM_OP:
 582                r = MEM_OP_MAX_SIZE;
 583                break;
 584        case KVM_CAP_NR_VCPUS:
 585        case KVM_CAP_MAX_VCPUS:
 586        case KVM_CAP_MAX_VCPU_ID:
 587                r = KVM_S390_BSCA_CPU_SLOTS;
 588                if (!kvm_s390_use_sca_entries())
 589                        r = KVM_MAX_VCPUS;
 590                else if (sclp.has_esca && sclp.has_64bscao)
 591                        r = KVM_S390_ESCA_CPU_SLOTS;
 592                break;
 593        case KVM_CAP_S390_COW:
 594                r = MACHINE_HAS_ESOP;
 595                break;
 596        case KVM_CAP_S390_VECTOR_REGISTERS:
 597                r = MACHINE_HAS_VX;
 598                break;
 599        case KVM_CAP_S390_RI:
 600                r = test_facility(64);
 601                break;
 602        case KVM_CAP_S390_GS:
 603                r = test_facility(133);
 604                break;
 605        case KVM_CAP_S390_BPB:
 606                r = test_facility(82);
 607                break;
 608        case KVM_CAP_S390_PROTECTED:
 609                r = is_prot_virt_host();
 610                break;
 611        default:
 612                r = 0;
 613        }
 614        return r;
 615}
 616
 617void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 618{
 619        int i;
 620        gfn_t cur_gfn, last_gfn;
 621        unsigned long gaddr, vmaddr;
 622        struct gmap *gmap = kvm->arch.gmap;
 623        DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 624
 625        /* Loop over all guest segments */
 626        cur_gfn = memslot->base_gfn;
 627        last_gfn = memslot->base_gfn + memslot->npages;
 628        for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 629                gaddr = gfn_to_gpa(cur_gfn);
 630                vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 631                if (kvm_is_error_hva(vmaddr))
 632                        continue;
 633
 634                bitmap_zero(bitmap, _PAGE_ENTRIES);
 635                gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 636                for (i = 0; i < _PAGE_ENTRIES; i++) {
 637                        if (test_bit(i, bitmap))
 638                                mark_page_dirty(kvm, cur_gfn + i);
 639                }
 640
 641                if (fatal_signal_pending(current))
 642                        return;
 643                cond_resched();
 644        }
 645}
 646
 647/* Section: vm related */
 648static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 649
 650/*
 651 * Get (and clear) the dirty memory log for a memory slot.
 652 */
 653int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 654                               struct kvm_dirty_log *log)
 655{
 656        int r;
 657        unsigned long n;
 658        struct kvm_memory_slot *memslot;
 659        int is_dirty;
 660
 661        if (kvm_is_ucontrol(kvm))
 662                return -EINVAL;
 663
 664        mutex_lock(&kvm->slots_lock);
 665
 666        r = -EINVAL;
 667        if (log->slot >= KVM_USER_MEM_SLOTS)
 668                goto out;
 669
 670        r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 671        if (r)
 672                goto out;
 673
 674        /* Clear the dirty log */
 675        if (is_dirty) {
 676                n = kvm_dirty_bitmap_bytes(memslot);
 677                memset(memslot->dirty_bitmap, 0, n);
 678        }
 679        r = 0;
 680out:
 681        mutex_unlock(&kvm->slots_lock);
 682        return r;
 683}
 684
 685static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 686{
 687        unsigned int i;
 688        struct kvm_vcpu *vcpu;
 689
 690        kvm_for_each_vcpu(i, vcpu, kvm) {
 691                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 692        }
 693}
 694
 695int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 696{
 697        int r;
 698
 699        if (cap->flags)
 700                return -EINVAL;
 701
 702        switch (cap->cap) {
 703        case KVM_CAP_S390_IRQCHIP:
 704                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 705                kvm->arch.use_irqchip = 1;
 706                r = 0;
 707                break;
 708        case KVM_CAP_S390_USER_SIGP:
 709                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 710                kvm->arch.user_sigp = 1;
 711                r = 0;
 712                break;
 713        case KVM_CAP_S390_VECTOR_REGISTERS:
 714                mutex_lock(&kvm->lock);
 715                if (kvm->created_vcpus) {
 716                        r = -EBUSY;
 717                } else if (MACHINE_HAS_VX) {
 718                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 719                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 720                        if (test_facility(134)) {
 721                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 722                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 723                        }
 724                        if (test_facility(135)) {
 725                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 726                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 727                        }
 728                        if (test_facility(148)) {
 729                                set_kvm_facility(kvm->arch.model.fac_mask, 148);
 730                                set_kvm_facility(kvm->arch.model.fac_list, 148);
 731                        }
 732                        if (test_facility(152)) {
 733                                set_kvm_facility(kvm->arch.model.fac_mask, 152);
 734                                set_kvm_facility(kvm->arch.model.fac_list, 152);
 735                        }
 736                        if (test_facility(192)) {
 737                                set_kvm_facility(kvm->arch.model.fac_mask, 192);
 738                                set_kvm_facility(kvm->arch.model.fac_list, 192);
 739                        }
 740                        r = 0;
 741                } else
 742                        r = -EINVAL;
 743                mutex_unlock(&kvm->lock);
 744                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 745                         r ? "(not available)" : "(success)");
 746                break;
 747        case KVM_CAP_S390_RI:
 748                r = -EINVAL;
 749                mutex_lock(&kvm->lock);
 750                if (kvm->created_vcpus) {
 751                        r = -EBUSY;
 752                } else if (test_facility(64)) {
 753                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 754                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 755                        r = 0;
 756                }
 757                mutex_unlock(&kvm->lock);
 758                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 759                         r ? "(not available)" : "(success)");
 760                break;
 761        case KVM_CAP_S390_AIS:
 762                mutex_lock(&kvm->lock);
 763                if (kvm->created_vcpus) {
 764                        r = -EBUSY;
 765                } else {
 766                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 767                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 768                        r = 0;
 769                }
 770                mutex_unlock(&kvm->lock);
 771                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 772                         r ? "(not available)" : "(success)");
 773                break;
 774        case KVM_CAP_S390_GS:
 775                r = -EINVAL;
 776                mutex_lock(&kvm->lock);
 777                if (kvm->created_vcpus) {
 778                        r = -EBUSY;
 779                } else if (test_facility(133)) {
 780                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 781                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 782                        r = 0;
 783                }
 784                mutex_unlock(&kvm->lock);
 785                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 786                         r ? "(not available)" : "(success)");
 787                break;
 788        case KVM_CAP_S390_HPAGE_1M:
 789                mutex_lock(&kvm->lock);
 790                if (kvm->created_vcpus)
 791                        r = -EBUSY;
 792                else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 793                        r = -EINVAL;
 794                else {
 795                        r = 0;
 796                        mmap_write_lock(kvm->mm);
 797                        kvm->mm->context.allow_gmap_hpage_1m = 1;
 798                        mmap_write_unlock(kvm->mm);
 799                        /*
 800                         * We might have to create fake 4k page
 801                         * tables. To avoid that the hardware works on
 802                         * stale PGSTEs, we emulate these instructions.
 803                         */
 804                        kvm->arch.use_skf = 0;
 805                        kvm->arch.use_pfmfi = 0;
 806                }
 807                mutex_unlock(&kvm->lock);
 808                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 809                         r ? "(not available)" : "(success)");
 810                break;
 811        case KVM_CAP_S390_USER_STSI:
 812                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 813                kvm->arch.user_stsi = 1;
 814                r = 0;
 815                break;
 816        case KVM_CAP_S390_USER_INSTR0:
 817                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 818                kvm->arch.user_instr0 = 1;
 819                icpt_operexc_on_all_vcpus(kvm);
 820                r = 0;
 821                break;
 822        default:
 823                r = -EINVAL;
 824                break;
 825        }
 826        return r;
 827}
 828
 829static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 830{
 831        int ret;
 832
 833        switch (attr->attr) {
 834        case KVM_S390_VM_MEM_LIMIT_SIZE:
 835                ret = 0;
 836                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 837                         kvm->arch.mem_limit);
 838                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 839                        ret = -EFAULT;
 840                break;
 841        default:
 842                ret = -ENXIO;
 843                break;
 844        }
 845        return ret;
 846}
 847
 848static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 849{
 850        int ret;
 851        unsigned int idx;
 852        switch (attr->attr) {
 853        case KVM_S390_VM_MEM_ENABLE_CMMA:
 854                ret = -ENXIO;
 855                if (!sclp.has_cmma)
 856                        break;
 857
 858                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 859                mutex_lock(&kvm->lock);
 860                if (kvm->created_vcpus)
 861                        ret = -EBUSY;
 862                else if (kvm->mm->context.allow_gmap_hpage_1m)
 863                        ret = -EINVAL;
 864                else {
 865                        kvm->arch.use_cmma = 1;
 866                        /* Not compatible with cmma. */
 867                        kvm->arch.use_pfmfi = 0;
 868                        ret = 0;
 869                }
 870                mutex_unlock(&kvm->lock);
 871                break;
 872        case KVM_S390_VM_MEM_CLR_CMMA:
 873                ret = -ENXIO;
 874                if (!sclp.has_cmma)
 875                        break;
 876                ret = -EINVAL;
 877                if (!kvm->arch.use_cmma)
 878                        break;
 879
 880                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 881                mutex_lock(&kvm->lock);
 882                idx = srcu_read_lock(&kvm->srcu);
 883                s390_reset_cmma(kvm->arch.gmap->mm);
 884                srcu_read_unlock(&kvm->srcu, idx);
 885                mutex_unlock(&kvm->lock);
 886                ret = 0;
 887                break;
 888        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 889                unsigned long new_limit;
 890
 891                if (kvm_is_ucontrol(kvm))
 892                        return -EINVAL;
 893
 894                if (get_user(new_limit, (u64 __user *)attr->addr))
 895                        return -EFAULT;
 896
 897                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 898                    new_limit > kvm->arch.mem_limit)
 899                        return -E2BIG;
 900
 901                if (!new_limit)
 902                        return -EINVAL;
 903
 904                /* gmap_create takes last usable address */
 905                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 906                        new_limit -= 1;
 907
 908                ret = -EBUSY;
 909                mutex_lock(&kvm->lock);
 910                if (!kvm->created_vcpus) {
 911                        /* gmap_create will round the limit up */
 912                        struct gmap *new = gmap_create(current->mm, new_limit);
 913
 914                        if (!new) {
 915                                ret = -ENOMEM;
 916                        } else {
 917                                gmap_remove(kvm->arch.gmap);
 918                                new->private = kvm;
 919                                kvm->arch.gmap = new;
 920                                ret = 0;
 921                        }
 922                }
 923                mutex_unlock(&kvm->lock);
 924                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 925                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 926                         (void *) kvm->arch.gmap->asce);
 927                break;
 928        }
 929        default:
 930                ret = -ENXIO;
 931                break;
 932        }
 933        return ret;
 934}
 935
 936static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 937
 938void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 939{
 940        struct kvm_vcpu *vcpu;
 941        int i;
 942
 943        kvm_s390_vcpu_block_all(kvm);
 944
 945        kvm_for_each_vcpu(i, vcpu, kvm) {
 946                kvm_s390_vcpu_crypto_setup(vcpu);
 947                /* recreate the shadow crycb by leaving the VSIE handler */
 948                kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 949        }
 950
 951        kvm_s390_vcpu_unblock_all(kvm);
 952}
 953
 954static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 955{
 956        mutex_lock(&kvm->lock);
 957        switch (attr->attr) {
 958        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 959                if (!test_kvm_facility(kvm, 76)) {
 960                        mutex_unlock(&kvm->lock);
 961                        return -EINVAL;
 962                }
 963                get_random_bytes(
 964                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 965                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 966                kvm->arch.crypto.aes_kw = 1;
 967                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 968                break;
 969        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 970                if (!test_kvm_facility(kvm, 76)) {
 971                        mutex_unlock(&kvm->lock);
 972                        return -EINVAL;
 973                }
 974                get_random_bytes(
 975                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 976                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 977                kvm->arch.crypto.dea_kw = 1;
 978                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 979                break;
 980        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 981                if (!test_kvm_facility(kvm, 76)) {
 982                        mutex_unlock(&kvm->lock);
 983                        return -EINVAL;
 984                }
 985                kvm->arch.crypto.aes_kw = 0;
 986                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 987                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 988                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 989                break;
 990        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 991                if (!test_kvm_facility(kvm, 76)) {
 992                        mutex_unlock(&kvm->lock);
 993                        return -EINVAL;
 994                }
 995                kvm->arch.crypto.dea_kw = 0;
 996                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 997                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 998                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 999                break;
1000        case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001                if (!ap_instructions_available()) {
1002                        mutex_unlock(&kvm->lock);
1003                        return -EOPNOTSUPP;
1004                }
1005                kvm->arch.crypto.apie = 1;
1006                break;
1007        case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008                if (!ap_instructions_available()) {
1009                        mutex_unlock(&kvm->lock);
1010                        return -EOPNOTSUPP;
1011                }
1012                kvm->arch.crypto.apie = 0;
1013                break;
1014        default:
1015                mutex_unlock(&kvm->lock);
1016                return -ENXIO;
1017        }
1018
1019        kvm_s390_vcpu_crypto_reset_all(kvm);
1020        mutex_unlock(&kvm->lock);
1021        return 0;
1022}
1023
1024static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025{
1026        int cx;
1027        struct kvm_vcpu *vcpu;
1028
1029        kvm_for_each_vcpu(cx, vcpu, kvm)
1030                kvm_s390_sync_request(req, vcpu);
1031}
1032
1033/*
1034 * Must be called with kvm->srcu held to avoid races on memslots, and with
1035 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1036 */
1037static int kvm_s390_vm_start_migration(struct kvm *kvm)
1038{
1039        struct kvm_memory_slot *ms;
1040        struct kvm_memslots *slots;
1041        unsigned long ram_pages = 0;
1042        int slotnr;
1043
1044        /* migration mode already enabled */
1045        if (kvm->arch.migration_mode)
1046                return 0;
1047        slots = kvm_memslots(kvm);
1048        if (!slots || !slots->used_slots)
1049                return -EINVAL;
1050
1051        if (!kvm->arch.use_cmma) {
1052                kvm->arch.migration_mode = 1;
1053                return 0;
1054        }
1055        /* mark all the pages in active slots as dirty */
1056        for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057                ms = slots->memslots + slotnr;
1058                if (!ms->dirty_bitmap)
1059                        return -EINVAL;
1060                /*
1061                 * The second half of the bitmap is only used on x86,
1062                 * and would be wasted otherwise, so we put it to good
1063                 * use here to keep track of the state of the storage
1064                 * attributes.
1065                 */
1066                memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067                ram_pages += ms->npages;
1068        }
1069        atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070        kvm->arch.migration_mode = 1;
1071        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072        return 0;
1073}
1074
1075/*
1076 * Must be called with kvm->slots_lock to avoid races with ourselves and
1077 * kvm_s390_vm_start_migration.
1078 */
1079static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1080{
1081        /* migration mode already disabled */
1082        if (!kvm->arch.migration_mode)
1083                return 0;
1084        kvm->arch.migration_mode = 0;
1085        if (kvm->arch.use_cmma)
1086                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087        return 0;
1088}
1089
1090static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091                                     struct kvm_device_attr *attr)
1092{
1093        int res = -ENXIO;
1094
1095        mutex_lock(&kvm->slots_lock);
1096        switch (attr->attr) {
1097        case KVM_S390_VM_MIGRATION_START:
1098                res = kvm_s390_vm_start_migration(kvm);
1099                break;
1100        case KVM_S390_VM_MIGRATION_STOP:
1101                res = kvm_s390_vm_stop_migration(kvm);
1102                break;
1103        default:
1104                break;
1105        }
1106        mutex_unlock(&kvm->slots_lock);
1107
1108        return res;
1109}
1110
1111static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112                                     struct kvm_device_attr *attr)
1113{
1114        u64 mig = kvm->arch.migration_mode;
1115
1116        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1117                return -ENXIO;
1118
1119        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120                return -EFAULT;
1121        return 0;
1122}
1123
1124static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125{
1126        struct kvm_s390_vm_tod_clock gtod;
1127
1128        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1129                return -EFAULT;
1130
1131        if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1132                return -EINVAL;
1133        kvm_s390_set_tod_clock(kvm, &gtod);
1134
1135        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136                gtod.epoch_idx, gtod.tod);
1137
1138        return 0;
1139}
1140
1141static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142{
1143        u8 gtod_high;
1144
1145        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1146                                           sizeof(gtod_high)))
1147                return -EFAULT;
1148
1149        if (gtod_high != 0)
1150                return -EINVAL;
1151        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152
1153        return 0;
1154}
1155
1156static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1157{
1158        struct kvm_s390_vm_tod_clock gtod = { 0 };
1159
1160        if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1161                           sizeof(gtod.tod)))
1162                return -EFAULT;
1163
1164        kvm_s390_set_tod_clock(kvm, &gtod);
1165        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166        return 0;
1167}
1168
1169static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170{
1171        int ret;
1172
1173        if (attr->flags)
1174                return -EINVAL;
1175
1176        switch (attr->attr) {
1177        case KVM_S390_VM_TOD_EXT:
1178                ret = kvm_s390_set_tod_ext(kvm, attr);
1179                break;
1180        case KVM_S390_VM_TOD_HIGH:
1181                ret = kvm_s390_set_tod_high(kvm, attr);
1182                break;
1183        case KVM_S390_VM_TOD_LOW:
1184                ret = kvm_s390_set_tod_low(kvm, attr);
1185                break;
1186        default:
1187                ret = -ENXIO;
1188                break;
1189        }
1190        return ret;
1191}
1192
1193static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194                                   struct kvm_s390_vm_tod_clock *gtod)
1195{
1196        union tod_clock clk;
1197
1198        preempt_disable();
1199
1200        store_tod_clock_ext(&clk);
1201
1202        gtod->tod = clk.tod + kvm->arch.epoch;
1203        gtod->epoch_idx = 0;
1204        if (test_kvm_facility(kvm, 139)) {
1205                gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206                if (gtod->tod < clk.tod)
1207                        gtod->epoch_idx += 1;
1208        }
1209
1210        preempt_enable();
1211}
1212
1213static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1214{
1215        struct kvm_s390_vm_tod_clock gtod;
1216
1217        memset(&gtod, 0, sizeof(gtod));
1218        kvm_s390_get_tod_clock(kvm, &gtod);
1219        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1220                return -EFAULT;
1221
1222        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223                gtod.epoch_idx, gtod.tod);
1224        return 0;
1225}
1226
1227static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228{
1229        u8 gtod_high = 0;
1230
1231        if (copy_to_user((void __user *)attr->addr, &gtod_high,
1232                                         sizeof(gtod_high)))
1233                return -EFAULT;
1234        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235
1236        return 0;
1237}
1238
1239static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240{
1241        u64 gtod;
1242
1243        gtod = kvm_s390_get_tod_clock_fast(kvm);
1244        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1245                return -EFAULT;
1246        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247
1248        return 0;
1249}
1250
1251static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1252{
1253        int ret;
1254
1255        if (attr->flags)
1256                return -EINVAL;
1257
1258        switch (attr->attr) {
1259        case KVM_S390_VM_TOD_EXT:
1260                ret = kvm_s390_get_tod_ext(kvm, attr);
1261                break;
1262        case KVM_S390_VM_TOD_HIGH:
1263                ret = kvm_s390_get_tod_high(kvm, attr);
1264                break;
1265        case KVM_S390_VM_TOD_LOW:
1266                ret = kvm_s390_get_tod_low(kvm, attr);
1267                break;
1268        default:
1269                ret = -ENXIO;
1270                break;
1271        }
1272        return ret;
1273}
1274
1275static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1276{
1277        struct kvm_s390_vm_cpu_processor *proc;
1278        u16 lowest_ibc, unblocked_ibc;
1279        int ret = 0;
1280
1281        mutex_lock(&kvm->lock);
1282        if (kvm->created_vcpus) {
1283                ret = -EBUSY;
1284                goto out;
1285        }
1286        proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287        if (!proc) {
1288                ret = -ENOMEM;
1289                goto out;
1290        }
1291        if (!copy_from_user(proc, (void __user *)attr->addr,
1292                            sizeof(*proc))) {
1293                kvm->arch.model.cpuid = proc->cpuid;
1294                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295                unblocked_ibc = sclp.ibc & 0xfff;
1296                if (lowest_ibc && proc->ibc) {
1297                        if (proc->ibc > unblocked_ibc)
1298                                kvm->arch.model.ibc = unblocked_ibc;
1299                        else if (proc->ibc < lowest_ibc)
1300                                kvm->arch.model.ibc = lowest_ibc;
1301                        else
1302                                kvm->arch.model.ibc = proc->ibc;
1303                }
1304                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1306                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307                         kvm->arch.model.ibc,
1308                         kvm->arch.model.cpuid);
1309                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310                         kvm->arch.model.fac_list[0],
1311                         kvm->arch.model.fac_list[1],
1312                         kvm->arch.model.fac_list[2]);
1313        } else
1314                ret = -EFAULT;
1315        kfree(proc);
1316out:
1317        mutex_unlock(&kvm->lock);
1318        return ret;
1319}
1320
1321static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322                                       struct kvm_device_attr *attr)
1323{
1324        struct kvm_s390_vm_cpu_feat data;
1325
1326        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1327                return -EFAULT;
1328        if (!bitmap_subset((unsigned long *) data.feat,
1329                           kvm_s390_available_cpu_feat,
1330                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1331                return -EINVAL;
1332
1333        mutex_lock(&kvm->lock);
1334        if (kvm->created_vcpus) {
1335                mutex_unlock(&kvm->lock);
1336                return -EBUSY;
1337        }
1338        bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1340        mutex_unlock(&kvm->lock);
1341        VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1342                         data.feat[0],
1343                         data.feat[1],
1344                         data.feat[2]);
1345        return 0;
1346}
1347
1348static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349                                          struct kvm_device_attr *attr)
1350{
1351        mutex_lock(&kvm->lock);
1352        if (kvm->created_vcpus) {
1353                mutex_unlock(&kvm->lock);
1354                return -EBUSY;
1355        }
1356
1357        if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358                           sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359                mutex_unlock(&kvm->lock);
1360                return -EFAULT;
1361        }
1362        mutex_unlock(&kvm->lock);
1363
1364        VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369        VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1370                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372        VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1373                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375        VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1376                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378        VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1379                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381        VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1382                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384        VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1385                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387        VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1388                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390        VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1391                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393        VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1394                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396        VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1397                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399        VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1400                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402        VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1403                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405        VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1406                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408        VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1409                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411        VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416        VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421
1422        return 0;
1423}
1424
1425static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426{
1427        int ret = -ENXIO;
1428
1429        switch (attr->attr) {
1430        case KVM_S390_VM_CPU_PROCESSOR:
1431                ret = kvm_s390_set_processor(kvm, attr);
1432                break;
1433        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434                ret = kvm_s390_set_processor_feat(kvm, attr);
1435                break;
1436        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1438                break;
1439        }
1440        return ret;
1441}
1442
1443static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1444{
1445        struct kvm_s390_vm_cpu_processor *proc;
1446        int ret = 0;
1447
1448        proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449        if (!proc) {
1450                ret = -ENOMEM;
1451                goto out;
1452        }
1453        proc->cpuid = kvm->arch.model.cpuid;
1454        proc->ibc = kvm->arch.model.ibc;
1455        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456               S390_ARCH_FAC_LIST_SIZE_BYTE);
1457        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458                 kvm->arch.model.ibc,
1459                 kvm->arch.model.cpuid);
1460        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461                 kvm->arch.model.fac_list[0],
1462                 kvm->arch.model.fac_list[1],
1463                 kvm->arch.model.fac_list[2]);
1464        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1465                ret = -EFAULT;
1466        kfree(proc);
1467out:
1468        return ret;
1469}
1470
1471static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1472{
1473        struct kvm_s390_vm_cpu_machine *mach;
1474        int ret = 0;
1475
1476        mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477        if (!mach) {
1478                ret = -ENOMEM;
1479                goto out;
1480        }
1481        get_cpu_id((struct cpuid *) &mach->cpuid);
1482        mach->ibc = sclp.ibc;
1483        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484               S390_ARCH_FAC_LIST_SIZE_BYTE);
1485        memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486               sizeof(stfle_fac_list));
1487        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1488                 kvm->arch.model.ibc,
1489                 kvm->arch.model.cpuid);
1490        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1491                 mach->fac_mask[0],
1492                 mach->fac_mask[1],
1493                 mach->fac_mask[2]);
1494        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1495                 mach->fac_list[0],
1496                 mach->fac_list[1],
1497                 mach->fac_list[2]);
1498        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1499                ret = -EFAULT;
1500        kfree(mach);
1501out:
1502        return ret;
1503}
1504
1505static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506                                       struct kvm_device_attr *attr)
1507{
1508        struct kvm_s390_vm_cpu_feat data;
1509
1510        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1512        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1513                return -EFAULT;
1514        VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1515                         data.feat[0],
1516                         data.feat[1],
1517                         data.feat[2]);
1518        return 0;
1519}
1520
1521static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522                                     struct kvm_device_attr *attr)
1523{
1524        struct kvm_s390_vm_cpu_feat data;
1525
1526        bitmap_copy((unsigned long *) data.feat,
1527                    kvm_s390_available_cpu_feat,
1528                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1529        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1530                return -EFAULT;
1531        VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532                         data.feat[0],
1533                         data.feat[1],
1534                         data.feat[2]);
1535        return 0;
1536}
1537
1538static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539                                          struct kvm_device_attr *attr)
1540{
1541        if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1543                return -EFAULT;
1544
1545        VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550        VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1551                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553        VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1554                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556        VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1557                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559        VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1560                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562        VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1563                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565        VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1566                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568        VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1569                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571        VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1572                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574        VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1575                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577        VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1578                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580        VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1581                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583        VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1584                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586        VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1587                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589        VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1590                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592        VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597        VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602
1603        return 0;
1604}
1605
1606static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607                                        struct kvm_device_attr *attr)
1608{
1609        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1611                return -EFAULT;
1612
1613        VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618        VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1619                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621        VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1622                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624        VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1625                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627        VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1628                 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629                 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630        VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1631                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633        VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1634                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636        VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1637                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639        VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1640                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642        VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1643                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645        VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1646                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648        VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1649                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651        VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1652                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654        VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1655                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657        VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1658                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660        VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665        VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670
1671        return 0;
1672}
1673
1674static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675{
1676        int ret = -ENXIO;
1677
1678        switch (attr->attr) {
1679        case KVM_S390_VM_CPU_PROCESSOR:
1680                ret = kvm_s390_get_processor(kvm, attr);
1681                break;
1682        case KVM_S390_VM_CPU_MACHINE:
1683                ret = kvm_s390_get_machine(kvm, attr);
1684                break;
1685        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686                ret = kvm_s390_get_processor_feat(kvm, attr);
1687                break;
1688        case KVM_S390_VM_CPU_MACHINE_FEAT:
1689                ret = kvm_s390_get_machine_feat(kvm, attr);
1690                break;
1691        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1693                break;
1694        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1696                break;
1697        }
1698        return ret;
1699}
1700
1701static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702{
1703        int ret;
1704
1705        switch (attr->group) {
1706        case KVM_S390_VM_MEM_CTRL:
1707                ret = kvm_s390_set_mem_control(kvm, attr);
1708                break;
1709        case KVM_S390_VM_TOD:
1710                ret = kvm_s390_set_tod(kvm, attr);
1711                break;
1712        case KVM_S390_VM_CPU_MODEL:
1713                ret = kvm_s390_set_cpu_model(kvm, attr);
1714                break;
1715        case KVM_S390_VM_CRYPTO:
1716                ret = kvm_s390_vm_set_crypto(kvm, attr);
1717                break;
1718        case KVM_S390_VM_MIGRATION:
1719                ret = kvm_s390_vm_set_migration(kvm, attr);
1720                break;
1721        default:
1722                ret = -ENXIO;
1723                break;
1724        }
1725
1726        return ret;
1727}
1728
1729static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730{
1731        int ret;
1732
1733        switch (attr->group) {
1734        case KVM_S390_VM_MEM_CTRL:
1735                ret = kvm_s390_get_mem_control(kvm, attr);
1736                break;
1737        case KVM_S390_VM_TOD:
1738                ret = kvm_s390_get_tod(kvm, attr);
1739                break;
1740        case KVM_S390_VM_CPU_MODEL:
1741                ret = kvm_s390_get_cpu_model(kvm, attr);
1742                break;
1743        case KVM_S390_VM_MIGRATION:
1744                ret = kvm_s390_vm_get_migration(kvm, attr);
1745                break;
1746        default:
1747                ret = -ENXIO;
1748                break;
1749        }
1750
1751        return ret;
1752}
1753
1754static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755{
1756        int ret;
1757
1758        switch (attr->group) {
1759        case KVM_S390_VM_MEM_CTRL:
1760                switch (attr->attr) {
1761                case KVM_S390_VM_MEM_ENABLE_CMMA:
1762                case KVM_S390_VM_MEM_CLR_CMMA:
1763                        ret = sclp.has_cmma ? 0 : -ENXIO;
1764                        break;
1765                case KVM_S390_VM_MEM_LIMIT_SIZE:
1766                        ret = 0;
1767                        break;
1768                default:
1769                        ret = -ENXIO;
1770                        break;
1771                }
1772                break;
1773        case KVM_S390_VM_TOD:
1774                switch (attr->attr) {
1775                case KVM_S390_VM_TOD_LOW:
1776                case KVM_S390_VM_TOD_HIGH:
1777                        ret = 0;
1778                        break;
1779                default:
1780                        ret = -ENXIO;
1781                        break;
1782                }
1783                break;
1784        case KVM_S390_VM_CPU_MODEL:
1785                switch (attr->attr) {
1786                case KVM_S390_VM_CPU_PROCESSOR:
1787                case KVM_S390_VM_CPU_MACHINE:
1788                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789                case KVM_S390_VM_CPU_MACHINE_FEAT:
1790                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1792                        ret = 0;
1793                        break;
1794                default:
1795                        ret = -ENXIO;
1796                        break;
1797                }
1798                break;
1799        case KVM_S390_VM_CRYPTO:
1800                switch (attr->attr) {
1801                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1805                        ret = 0;
1806                        break;
1807                case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808                case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809                        ret = ap_instructions_available() ? 0 : -ENXIO;
1810                        break;
1811                default:
1812                        ret = -ENXIO;
1813                        break;
1814                }
1815                break;
1816        case KVM_S390_VM_MIGRATION:
1817                ret = 0;
1818                break;
1819        default:
1820                ret = -ENXIO;
1821                break;
1822        }
1823
1824        return ret;
1825}
1826
1827static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828{
1829        uint8_t *keys;
1830        uint64_t hva;
1831        int srcu_idx, i, r = 0;
1832
1833        if (args->flags != 0)
1834                return -EINVAL;
1835
1836        /* Is this guest using storage keys? */
1837        if (!mm_uses_skeys(current->mm))
1838                return KVM_S390_GET_SKEYS_NONE;
1839
1840        /* Enforce sane limit on memory allocation */
1841        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842                return -EINVAL;
1843
1844        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845        if (!keys)
1846                return -ENOMEM;
1847
1848        mmap_read_lock(current->mm);
1849        srcu_idx = srcu_read_lock(&kvm->srcu);
1850        for (i = 0; i < args->count; i++) {
1851                hva = gfn_to_hva(kvm, args->start_gfn + i);
1852                if (kvm_is_error_hva(hva)) {
1853                        r = -EFAULT;
1854                        break;
1855                }
1856
1857                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858                if (r)
1859                        break;
1860        }
1861        srcu_read_unlock(&kvm->srcu, srcu_idx);
1862        mmap_read_unlock(current->mm);
1863
1864        if (!r) {
1865                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866                                 sizeof(uint8_t) * args->count);
1867                if (r)
1868                        r = -EFAULT;
1869        }
1870
1871        kvfree(keys);
1872        return r;
1873}
1874
1875static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876{
1877        uint8_t *keys;
1878        uint64_t hva;
1879        int srcu_idx, i, r = 0;
1880        bool unlocked;
1881
1882        if (args->flags != 0)
1883                return -EINVAL;
1884
1885        /* Enforce sane limit on memory allocation */
1886        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1887                return -EINVAL;
1888
1889        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890        if (!keys)
1891                return -ENOMEM;
1892
1893        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894                           sizeof(uint8_t) * args->count);
1895        if (r) {
1896                r = -EFAULT;
1897                goto out;
1898        }
1899
1900        /* Enable storage key handling for the guest */
1901        r = s390_enable_skey();
1902        if (r)
1903                goto out;
1904
1905        i = 0;
1906        mmap_read_lock(current->mm);
1907        srcu_idx = srcu_read_lock(&kvm->srcu);
1908        while (i < args->count) {
1909                unlocked = false;
1910                hva = gfn_to_hva(kvm, args->start_gfn + i);
1911                if (kvm_is_error_hva(hva)) {
1912                        r = -EFAULT;
1913                        break;
1914                }
1915
1916                /* Lowest order bit is reserved */
1917                if (keys[i] & 0x01) {
1918                        r = -EINVAL;
1919                        break;
1920                }
1921
1922                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1923                if (r) {
1924                        r = fixup_user_fault(current->mm, hva,
1925                                             FAULT_FLAG_WRITE, &unlocked);
1926                        if (r)
1927                                break;
1928                }
1929                if (!r)
1930                        i++;
1931        }
1932        srcu_read_unlock(&kvm->srcu, srcu_idx);
1933        mmap_read_unlock(current->mm);
1934out:
1935        kvfree(keys);
1936        return r;
1937}
1938
1939/*
1940 * Base address and length must be sent at the start of each block, therefore
1941 * it's cheaper to send some clean data, as long as it's less than the size of
1942 * two longs.
1943 */
1944#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945/* for consistency */
1946#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1947
1948/*
1949 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950 * address falls in a hole. In that case the index of one of the memslots
1951 * bordering the hole is returned.
1952 */
1953static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1954{
1955        int start = 0, end = slots->used_slots;
1956        int slot = atomic_read(&slots->lru_slot);
1957        struct kvm_memory_slot *memslots = slots->memslots;
1958
1959        if (gfn >= memslots[slot].base_gfn &&
1960            gfn < memslots[slot].base_gfn + memslots[slot].npages)
1961                return slot;
1962
1963        while (start < end) {
1964                slot = start + (end - start) / 2;
1965
1966                if (gfn >= memslots[slot].base_gfn)
1967                        end = slot;
1968                else
1969                        start = slot + 1;
1970        }
1971
1972        if (start >= slots->used_slots)
1973                return slots->used_slots - 1;
1974
1975        if (gfn >= memslots[start].base_gfn &&
1976            gfn < memslots[start].base_gfn + memslots[start].npages) {
1977                atomic_set(&slots->lru_slot, start);
1978        }
1979
1980        return start;
1981}
1982
1983static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984                              u8 *res, unsigned long bufsize)
1985{
1986        unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1987
1988        args->count = 0;
1989        while (args->count < bufsize) {
1990                hva = gfn_to_hva(kvm, cur_gfn);
1991                /*
1992                 * We return an error if the first value was invalid, but we
1993                 * return successfully if at least one value was copied.
1994                 */
1995                if (kvm_is_error_hva(hva))
1996                        return args->count ? 0 : -EFAULT;
1997                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1998                        pgstev = 0;
1999                res[args->count++] = (pgstev >> 24) & 0x43;
2000                cur_gfn++;
2001        }
2002
2003        return 0;
2004}
2005
2006static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007                                              unsigned long cur_gfn)
2008{
2009        int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010        struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011        unsigned long ofs = cur_gfn - ms->base_gfn;
2012
2013        if (ms->base_gfn + ms->npages <= cur_gfn) {
2014                slotidx--;
2015                /* If we are above the highest slot, wrap around */
2016                if (slotidx < 0)
2017                        slotidx = slots->used_slots - 1;
2018
2019                ms = slots->memslots + slotidx;
2020                ofs = 0;
2021        }
2022        ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023        while ((slotidx > 0) && (ofs >= ms->npages)) {
2024                slotidx--;
2025                ms = slots->memslots + slotidx;
2026                ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2027        }
2028        return ms->base_gfn + ofs;
2029}
2030
2031static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032                             u8 *res, unsigned long bufsize)
2033{
2034        unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035        struct kvm_memslots *slots = kvm_memslots(kvm);
2036        struct kvm_memory_slot *ms;
2037
2038        if (unlikely(!slots->used_slots))
2039                return 0;
2040
2041        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042        ms = gfn_to_memslot(kvm, cur_gfn);
2043        args->count = 0;
2044        args->start_gfn = cur_gfn;
2045        if (!ms)
2046                return 0;
2047        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048        mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2049
2050        while (args->count < bufsize) {
2051                hva = gfn_to_hva(kvm, cur_gfn);
2052                if (kvm_is_error_hva(hva))
2053                        return 0;
2054                /* Decrement only if we actually flipped the bit to 0 */
2055                if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056                        atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2058                        pgstev = 0;
2059                /* Save the value */
2060                res[args->count++] = (pgstev >> 24) & 0x43;
2061                /* If the next bit is too far away, stop. */
2062                if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2063                        return 0;
2064                /* If we reached the previous "next", find the next one */
2065                if (cur_gfn == next_gfn)
2066                        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067                /* Reached the end of memory or of the buffer, stop */
2068                if ((next_gfn >= mem_end) ||
2069                    (next_gfn - args->start_gfn >= bufsize))
2070                        return 0;
2071                cur_gfn++;
2072                /* Reached the end of the current memslot, take the next one. */
2073                if (cur_gfn - ms->base_gfn >= ms->npages) {
2074                        ms = gfn_to_memslot(kvm, cur_gfn);
2075                        if (!ms)
2076                                return 0;
2077                }
2078        }
2079        return 0;
2080}
2081
2082/*
2083 * This function searches for the next page with dirty CMMA attributes, and
2084 * saves the attributes in the buffer up to either the end of the buffer or
2085 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086 * no trailing clean bytes are saved.
2087 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088 * output buffer will indicate 0 as length.
2089 */
2090static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091                                  struct kvm_s390_cmma_log *args)
2092{
2093        unsigned long bufsize;
2094        int srcu_idx, peek, ret;
2095        u8 *values;
2096
2097        if (!kvm->arch.use_cmma)
2098                return -ENXIO;
2099        /* Invalid/unsupported flags were specified */
2100        if (args->flags & ~KVM_S390_CMMA_PEEK)
2101                return -EINVAL;
2102        /* Migration mode query, and we are not doing a migration */
2103        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104        if (!peek && !kvm->arch.migration_mode)
2105                return -EINVAL;
2106        /* CMMA is disabled or was not used, or the buffer has length zero */
2107        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108        if (!bufsize || !kvm->mm->context.uses_cmm) {
2109                memset(args, 0, sizeof(*args));
2110                return 0;
2111        }
2112        /* We are not peeking, and there are no dirty pages */
2113        if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114                memset(args, 0, sizeof(*args));
2115                return 0;
2116        }
2117
2118        values = vmalloc(bufsize);
2119        if (!values)
2120                return -ENOMEM;
2121
2122        mmap_read_lock(kvm->mm);
2123        srcu_idx = srcu_read_lock(&kvm->srcu);
2124        if (peek)
2125                ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2126        else
2127                ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128        srcu_read_unlock(&kvm->srcu, srcu_idx);
2129        mmap_read_unlock(kvm->mm);
2130
2131        if (kvm->arch.migration_mode)
2132                args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2133        else
2134                args->remaining = 0;
2135
2136        if (copy_to_user((void __user *)args->values, values, args->count))
2137                ret = -EFAULT;
2138
2139        vfree(values);
2140        return ret;
2141}
2142
2143/*
2144 * This function sets the CMMA attributes for the given pages. If the input
2145 * buffer has zero length, no action is taken, otherwise the attributes are
2146 * set and the mm->context.uses_cmm flag is set.
2147 */
2148static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149                                  const struct kvm_s390_cmma_log *args)
2150{
2151        unsigned long hva, mask, pgstev, i;
2152        uint8_t *bits;
2153        int srcu_idx, r = 0;
2154
2155        mask = args->mask;
2156
2157        if (!kvm->arch.use_cmma)
2158                return -ENXIO;
2159        /* invalid/unsupported flags */
2160        if (args->flags != 0)
2161                return -EINVAL;
2162        /* Enforce sane limit on memory allocation */
2163        if (args->count > KVM_S390_CMMA_SIZE_MAX)
2164                return -EINVAL;
2165        /* Nothing to do */
2166        if (args->count == 0)
2167                return 0;
2168
2169        bits = vmalloc(array_size(sizeof(*bits), args->count));
2170        if (!bits)
2171                return -ENOMEM;
2172
2173        r = copy_from_user(bits, (void __user *)args->values, args->count);
2174        if (r) {
2175                r = -EFAULT;
2176                goto out;
2177        }
2178
2179        mmap_read_lock(kvm->mm);
2180        srcu_idx = srcu_read_lock(&kvm->srcu);
2181        for (i = 0; i < args->count; i++) {
2182                hva = gfn_to_hva(kvm, args->start_gfn + i);
2183                if (kvm_is_error_hva(hva)) {
2184                        r = -EFAULT;
2185                        break;
2186                }
2187
2188                pgstev = bits[i];
2189                pgstev = pgstev << 24;
2190                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191                set_pgste_bits(kvm->mm, hva, mask, pgstev);
2192        }
2193        srcu_read_unlock(&kvm->srcu, srcu_idx);
2194        mmap_read_unlock(kvm->mm);
2195
2196        if (!kvm->mm->context.uses_cmm) {
2197                mmap_write_lock(kvm->mm);
2198                kvm->mm->context.uses_cmm = 1;
2199                mmap_write_unlock(kvm->mm);
2200        }
2201out:
2202        vfree(bits);
2203        return r;
2204}
2205
2206static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2207{
2208        struct kvm_vcpu *vcpu;
2209        u16 rc, rrc;
2210        int ret = 0;
2211        int i;
2212
2213        /*
2214         * We ignore failures and try to destroy as many CPUs as possible.
2215         * At the same time we must not free the assigned resources when
2216         * this fails, as the ultravisor has still access to that memory.
2217         * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2218         * behind.
2219         * We want to return the first failure rc and rrc, though.
2220         */
2221        kvm_for_each_vcpu(i, vcpu, kvm) {
2222                mutex_lock(&vcpu->mutex);
2223                if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224                        *rcp = rc;
2225                        *rrcp = rrc;
2226                        ret = -EIO;
2227                }
2228                mutex_unlock(&vcpu->mutex);
2229        }
2230        return ret;
2231}
2232
2233static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234{
2235        int i, r = 0;
2236        u16 dummy;
2237
2238        struct kvm_vcpu *vcpu;
2239
2240        kvm_for_each_vcpu(i, vcpu, kvm) {
2241                mutex_lock(&vcpu->mutex);
2242                r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243                mutex_unlock(&vcpu->mutex);
2244                if (r)
2245                        break;
2246        }
2247        if (r)
2248                kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2249        return r;
2250}
2251
2252static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2253{
2254        int r = 0;
2255        u16 dummy;
2256        void __user *argp = (void __user *)cmd->data;
2257
2258        switch (cmd->cmd) {
2259        case KVM_PV_ENABLE: {
2260                r = -EINVAL;
2261                if (kvm_s390_pv_is_protected(kvm))
2262                        break;
2263
2264                /*
2265                 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2266                 *  esca, we need no cleanup in the error cases below
2267                 */
2268                r = sca_switch_to_extended(kvm);
2269                if (r)
2270                        break;
2271
2272                mmap_write_lock(current->mm);
2273                r = gmap_mark_unmergeable();
2274                mmap_write_unlock(current->mm);
2275                if (r)
2276                        break;
2277
2278                r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2279                if (r)
2280                        break;
2281
2282                r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2283                if (r)
2284                        kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2285
2286                /* we need to block service interrupts from now on */
2287                set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2288                break;
2289        }
2290        case KVM_PV_DISABLE: {
2291                r = -EINVAL;
2292                if (!kvm_s390_pv_is_protected(kvm))
2293                        break;
2294
2295                r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2296                /*
2297                 * If a CPU could not be destroyed, destroy VM will also fail.
2298                 * There is no point in trying to destroy it. Instead return
2299                 * the rc and rrc from the first CPU that failed destroying.
2300                 */
2301                if (r)
2302                        break;
2303                r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2304
2305                /* no need to block service interrupts any more */
2306                clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2307                break;
2308        }
2309        case KVM_PV_SET_SEC_PARMS: {
2310                struct kvm_s390_pv_sec_parm parms = {};
2311                void *hdr;
2312
2313                r = -EINVAL;
2314                if (!kvm_s390_pv_is_protected(kvm))
2315                        break;
2316
2317                r = -EFAULT;
2318                if (copy_from_user(&parms, argp, sizeof(parms)))
2319                        break;
2320
2321                /* Currently restricted to 8KB */
2322                r = -EINVAL;
2323                if (parms.length > PAGE_SIZE * 2)
2324                        break;
2325
2326                r = -ENOMEM;
2327                hdr = vmalloc(parms.length);
2328                if (!hdr)
2329                        break;
2330
2331                r = -EFAULT;
2332                if (!copy_from_user(hdr, (void __user *)parms.origin,
2333                                    parms.length))
2334                        r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335                                                      &cmd->rc, &cmd->rrc);
2336
2337                vfree(hdr);
2338                break;
2339        }
2340        case KVM_PV_UNPACK: {
2341                struct kvm_s390_pv_unp unp = {};
2342
2343                r = -EINVAL;
2344                if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2345                        break;
2346
2347                r = -EFAULT;
2348                if (copy_from_user(&unp, argp, sizeof(unp)))
2349                        break;
2350
2351                r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352                                       &cmd->rc, &cmd->rrc);
2353                break;
2354        }
2355        case KVM_PV_VERIFY: {
2356                r = -EINVAL;
2357                if (!kvm_s390_pv_is_protected(kvm))
2358                        break;
2359
2360                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361                                  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362                KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2363                             cmd->rrc);
2364                break;
2365        }
2366        case KVM_PV_PREP_RESET: {
2367                r = -EINVAL;
2368                if (!kvm_s390_pv_is_protected(kvm))
2369                        break;
2370
2371                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372                                  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373                KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2374                             cmd->rc, cmd->rrc);
2375                break;
2376        }
2377        case KVM_PV_UNSHARE_ALL: {
2378                r = -EINVAL;
2379                if (!kvm_s390_pv_is_protected(kvm))
2380                        break;
2381
2382                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383                                  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384                KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2385                             cmd->rc, cmd->rrc);
2386                break;
2387        }
2388        default:
2389                r = -ENOTTY;
2390        }
2391        return r;
2392}
2393
2394long kvm_arch_vm_ioctl(struct file *filp,
2395                       unsigned int ioctl, unsigned long arg)
2396{
2397        struct kvm *kvm = filp->private_data;
2398        void __user *argp = (void __user *)arg;
2399        struct kvm_device_attr attr;
2400        int r;
2401
2402        switch (ioctl) {
2403        case KVM_S390_INTERRUPT: {
2404                struct kvm_s390_interrupt s390int;
2405
2406                r = -EFAULT;
2407                if (copy_from_user(&s390int, argp, sizeof(s390int)))
2408                        break;
2409                r = kvm_s390_inject_vm(kvm, &s390int);
2410                break;
2411        }
2412        case KVM_CREATE_IRQCHIP: {
2413                struct kvm_irq_routing_entry routing;
2414
2415                r = -EINVAL;
2416                if (kvm->arch.use_irqchip) {
2417                        /* Set up dummy routing. */
2418                        memset(&routing, 0, sizeof(routing));
2419                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2420                }
2421                break;
2422        }
2423        case KVM_SET_DEVICE_ATTR: {
2424                r = -EFAULT;
2425                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426                        break;
2427                r = kvm_s390_vm_set_attr(kvm, &attr);
2428                break;
2429        }
2430        case KVM_GET_DEVICE_ATTR: {
2431                r = -EFAULT;
2432                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2433                        break;
2434                r = kvm_s390_vm_get_attr(kvm, &attr);
2435                break;
2436        }
2437        case KVM_HAS_DEVICE_ATTR: {
2438                r = -EFAULT;
2439                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2440                        break;
2441                r = kvm_s390_vm_has_attr(kvm, &attr);
2442                break;
2443        }
2444        case KVM_S390_GET_SKEYS: {
2445                struct kvm_s390_skeys args;
2446
2447                r = -EFAULT;
2448                if (copy_from_user(&args, argp,
2449                                   sizeof(struct kvm_s390_skeys)))
2450                        break;
2451                r = kvm_s390_get_skeys(kvm, &args);
2452                break;
2453        }
2454        case KVM_S390_SET_SKEYS: {
2455                struct kvm_s390_skeys args;
2456
2457                r = -EFAULT;
2458                if (copy_from_user(&args, argp,
2459                                   sizeof(struct kvm_s390_skeys)))
2460                        break;
2461                r = kvm_s390_set_skeys(kvm, &args);
2462                break;
2463        }
2464        case KVM_S390_GET_CMMA_BITS: {
2465                struct kvm_s390_cmma_log args;
2466
2467                r = -EFAULT;
2468                if (copy_from_user(&args, argp, sizeof(args)))
2469                        break;
2470                mutex_lock(&kvm->slots_lock);
2471                r = kvm_s390_get_cmma_bits(kvm, &args);
2472                mutex_unlock(&kvm->slots_lock);
2473                if (!r) {
2474                        r = copy_to_user(argp, &args, sizeof(args));
2475                        if (r)
2476                                r = -EFAULT;
2477                }
2478                break;
2479        }
2480        case KVM_S390_SET_CMMA_BITS: {
2481                struct kvm_s390_cmma_log args;
2482
2483                r = -EFAULT;
2484                if (copy_from_user(&args, argp, sizeof(args)))
2485                        break;
2486                mutex_lock(&kvm->slots_lock);
2487                r = kvm_s390_set_cmma_bits(kvm, &args);
2488                mutex_unlock(&kvm->slots_lock);
2489                break;
2490        }
2491        case KVM_S390_PV_COMMAND: {
2492                struct kvm_pv_cmd args;
2493
2494                /* protvirt means user sigp */
2495                kvm->arch.user_cpu_state_ctrl = 1;
2496                r = 0;
2497                if (!is_prot_virt_host()) {
2498                        r = -EINVAL;
2499                        break;
2500                }
2501                if (copy_from_user(&args, argp, sizeof(args))) {
2502                        r = -EFAULT;
2503                        break;
2504                }
2505                if (args.flags) {
2506                        r = -EINVAL;
2507                        break;
2508                }
2509                mutex_lock(&kvm->lock);
2510                r = kvm_s390_handle_pv(kvm, &args);
2511                mutex_unlock(&kvm->lock);
2512                if (copy_to_user(argp, &args, sizeof(args))) {
2513                        r = -EFAULT;
2514                        break;
2515                }
2516                break;
2517        }
2518        default:
2519                r = -ENOTTY;
2520        }
2521
2522        return r;
2523}
2524
2525static int kvm_s390_apxa_installed(void)
2526{
2527        struct ap_config_info info;
2528
2529        if (ap_instructions_available()) {
2530                if (ap_qci(&info) == 0)
2531                        return info.apxa;
2532        }
2533
2534        return 0;
2535}
2536
2537/*
2538 * The format of the crypto control block (CRYCB) is specified in the 3 low
2539 * order bits of the CRYCB designation (CRYCBD) field as follows:
2540 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541 *           AP extended addressing (APXA) facility are installed.
2542 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543 * Format 2: Both the APXA and MSAX3 facilities are installed
2544 */
2545static void kvm_s390_set_crycb_format(struct kvm *kvm)
2546{
2547        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2548
2549        /* Clear the CRYCB format bits - i.e., set format 0 by default */
2550        kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2551
2552        /* Check whether MSAX3 is installed */
2553        if (!test_kvm_facility(kvm, 76))
2554                return;
2555
2556        if (kvm_s390_apxa_installed())
2557                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2558        else
2559                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2560}
2561
2562void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2563                               unsigned long *aqm, unsigned long *adm)
2564{
2565        struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2566
2567        mutex_lock(&kvm->lock);
2568        kvm_s390_vcpu_block_all(kvm);
2569
2570        switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2571        case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2572                memcpy(crycb->apcb1.apm, apm, 32);
2573                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2574                         apm[0], apm[1], apm[2], apm[3]);
2575                memcpy(crycb->apcb1.aqm, aqm, 32);
2576                VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2577                         aqm[0], aqm[1], aqm[2], aqm[3]);
2578                memcpy(crycb->apcb1.adm, adm, 32);
2579                VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2580                         adm[0], adm[1], adm[2], adm[3]);
2581                break;
2582        case CRYCB_FORMAT1:
2583        case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2584                memcpy(crycb->apcb0.apm, apm, 8);
2585                memcpy(crycb->apcb0.aqm, aqm, 2);
2586                memcpy(crycb->apcb0.adm, adm, 2);
2587                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2588                         apm[0], *((unsigned short *)aqm),
2589                         *((unsigned short *)adm));
2590                break;
2591        default:        /* Can not happen */
2592                break;
2593        }
2594
2595        /* recreate the shadow crycb for each vcpu */
2596        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2597        kvm_s390_vcpu_unblock_all(kvm);
2598        mutex_unlock(&kvm->lock);
2599}
2600EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2601
2602void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2603{
2604        mutex_lock(&kvm->lock);
2605        kvm_s390_vcpu_block_all(kvm);
2606
2607        memset(&kvm->arch.crypto.crycb->apcb0, 0,
2608               sizeof(kvm->arch.crypto.crycb->apcb0));
2609        memset(&kvm->arch.crypto.crycb->apcb1, 0,
2610               sizeof(kvm->arch.crypto.crycb->apcb1));
2611
2612        VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2613        /* recreate the shadow crycb for each vcpu */
2614        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2615        kvm_s390_vcpu_unblock_all(kvm);
2616        mutex_unlock(&kvm->lock);
2617}
2618EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2619
2620static u64 kvm_s390_get_initial_cpuid(void)
2621{
2622        struct cpuid cpuid;
2623
2624        get_cpu_id(&cpuid);
2625        cpuid.version = 0xff;
2626        return *((u64 *) &cpuid);
2627}
2628
2629static void kvm_s390_crypto_init(struct kvm *kvm)
2630{
2631        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2632        kvm_s390_set_crycb_format(kvm);
2633
2634        if (!test_kvm_facility(kvm, 76))
2635                return;
2636
2637        /* Enable AES/DEA protected key functions by default */
2638        kvm->arch.crypto.aes_kw = 1;
2639        kvm->arch.crypto.dea_kw = 1;
2640        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2641                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2642        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2643                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2644}
2645
2646static void sca_dispose(struct kvm *kvm)
2647{
2648        if (kvm->arch.use_esca)
2649                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2650        else
2651                free_page((unsigned long)(kvm->arch.sca));
2652        kvm->arch.sca = NULL;
2653}
2654
2655int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2656{
2657        gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2658        int i, rc;
2659        char debug_name[16];
2660        static unsigned long sca_offset;
2661
2662        rc = -EINVAL;
2663#ifdef CONFIG_KVM_S390_UCONTROL
2664        if (type & ~KVM_VM_S390_UCONTROL)
2665                goto out_err;
2666        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2667                goto out_err;
2668#else
2669        if (type)
2670                goto out_err;
2671#endif
2672
2673        rc = s390_enable_sie();
2674        if (rc)
2675                goto out_err;
2676
2677        rc = -ENOMEM;
2678
2679        if (!sclp.has_64bscao)
2680                alloc_flags |= GFP_DMA;
2681        rwlock_init(&kvm->arch.sca_lock);
2682        /* start with basic SCA */
2683        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2684        if (!kvm->arch.sca)
2685                goto out_err;
2686        mutex_lock(&kvm_lock);
2687        sca_offset += 16;
2688        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2689                sca_offset = 0;
2690        kvm->arch.sca = (struct bsca_block *)
2691                        ((char *) kvm->arch.sca + sca_offset);
2692        mutex_unlock(&kvm_lock);
2693
2694        sprintf(debug_name, "kvm-%u", current->pid);
2695
2696        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2697        if (!kvm->arch.dbf)
2698                goto out_err;
2699
2700        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2701        kvm->arch.sie_page2 =
2702             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2703        if (!kvm->arch.sie_page2)
2704                goto out_err;
2705
2706        kvm->arch.sie_page2->kvm = kvm;
2707        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2708
2709        for (i = 0; i < kvm_s390_fac_size(); i++) {
2710                kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2711                                              (kvm_s390_fac_base[i] |
2712                                               kvm_s390_fac_ext[i]);
2713                kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2714                                              kvm_s390_fac_base[i];
2715        }
2716        kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2717
2718        /* we are always in czam mode - even on pre z14 machines */
2719        set_kvm_facility(kvm->arch.model.fac_mask, 138);
2720        set_kvm_facility(kvm->arch.model.fac_list, 138);
2721        /* we emulate STHYI in kvm */
2722        set_kvm_facility(kvm->arch.model.fac_mask, 74);
2723        set_kvm_facility(kvm->arch.model.fac_list, 74);
2724        if (MACHINE_HAS_TLB_GUEST) {
2725                set_kvm_facility(kvm->arch.model.fac_mask, 147);
2726                set_kvm_facility(kvm->arch.model.fac_list, 147);
2727        }
2728
2729        if (css_general_characteristics.aiv && test_facility(65))
2730                set_kvm_facility(kvm->arch.model.fac_mask, 65);
2731
2732        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2733        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2734
2735        kvm_s390_crypto_init(kvm);
2736
2737        mutex_init(&kvm->arch.float_int.ais_lock);
2738        spin_lock_init(&kvm->arch.float_int.lock);
2739        for (i = 0; i < FIRQ_LIST_COUNT; i++)
2740                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2741        init_waitqueue_head(&kvm->arch.ipte_wq);
2742        mutex_init(&kvm->arch.ipte_mutex);
2743
2744        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2745        VM_EVENT(kvm, 3, "vm created with type %lu", type);
2746
2747        if (type & KVM_VM_S390_UCONTROL) {
2748                kvm->arch.gmap = NULL;
2749                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2750        } else {
2751                if (sclp.hamax == U64_MAX)
2752                        kvm->arch.mem_limit = TASK_SIZE_MAX;
2753                else
2754                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2755                                                    sclp.hamax + 1);
2756                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2757                if (!kvm->arch.gmap)
2758                        goto out_err;
2759                kvm->arch.gmap->private = kvm;
2760                kvm->arch.gmap->pfault_enabled = 0;
2761        }
2762
2763        kvm->arch.use_pfmfi = sclp.has_pfmfi;
2764        kvm->arch.use_skf = sclp.has_skey;
2765        spin_lock_init(&kvm->arch.start_stop_lock);
2766        kvm_s390_vsie_init(kvm);
2767        if (use_gisa)
2768                kvm_s390_gisa_init(kvm);
2769        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2770
2771        return 0;
2772out_err:
2773        free_page((unsigned long)kvm->arch.sie_page2);
2774        debug_unregister(kvm->arch.dbf);
2775        sca_dispose(kvm);
2776        KVM_EVENT(3, "creation of vm failed: %d", rc);
2777        return rc;
2778}
2779
2780void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2781{
2782        u16 rc, rrc;
2783
2784        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2785        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2786        kvm_s390_clear_local_irqs(vcpu);
2787        kvm_clear_async_pf_completion_queue(vcpu);
2788        if (!kvm_is_ucontrol(vcpu->kvm))
2789                sca_del_vcpu(vcpu);
2790
2791        if (kvm_is_ucontrol(vcpu->kvm))
2792                gmap_remove(vcpu->arch.gmap);
2793
2794        if (vcpu->kvm->arch.use_cmma)
2795                kvm_s390_vcpu_unsetup_cmma(vcpu);
2796        /* We can not hold the vcpu mutex here, we are already dying */
2797        if (kvm_s390_pv_cpu_get_handle(vcpu))
2798                kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2799        free_page((unsigned long)(vcpu->arch.sie_block));
2800}
2801
2802static void kvm_free_vcpus(struct kvm *kvm)
2803{
2804        unsigned int i;
2805        struct kvm_vcpu *vcpu;
2806
2807        kvm_for_each_vcpu(i, vcpu, kvm)
2808                kvm_vcpu_destroy(vcpu);
2809
2810        mutex_lock(&kvm->lock);
2811        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2812                kvm->vcpus[i] = NULL;
2813
2814        atomic_set(&kvm->online_vcpus, 0);
2815        mutex_unlock(&kvm->lock);
2816}
2817
2818void kvm_arch_destroy_vm(struct kvm *kvm)
2819{
2820        u16 rc, rrc;
2821
2822        kvm_free_vcpus(kvm);
2823        sca_dispose(kvm);
2824        kvm_s390_gisa_destroy(kvm);
2825        /*
2826         * We are already at the end of life and kvm->lock is not taken.
2827         * This is ok as the file descriptor is closed by now and nobody
2828         * can mess with the pv state. To avoid lockdep_assert_held from
2829         * complaining we do not use kvm_s390_pv_is_protected.
2830         */
2831        if (kvm_s390_pv_get_handle(kvm))
2832                kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2833        debug_unregister(kvm->arch.dbf);
2834        free_page((unsigned long)kvm->arch.sie_page2);
2835        if (!kvm_is_ucontrol(kvm))
2836                gmap_remove(kvm->arch.gmap);
2837        kvm_s390_destroy_adapters(kvm);
2838        kvm_s390_clear_float_irqs(kvm);
2839        kvm_s390_vsie_destroy(kvm);
2840        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2841}
2842
2843/* Section: vcpu related */
2844static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2845{
2846        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2847        if (!vcpu->arch.gmap)
2848                return -ENOMEM;
2849        vcpu->arch.gmap->private = vcpu->kvm;
2850
2851        return 0;
2852}
2853
2854static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2855{
2856        if (!kvm_s390_use_sca_entries())
2857                return;
2858        read_lock(&vcpu->kvm->arch.sca_lock);
2859        if (vcpu->kvm->arch.use_esca) {
2860                struct esca_block *sca = vcpu->kvm->arch.sca;
2861
2862                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863                sca->cpu[vcpu->vcpu_id].sda = 0;
2864        } else {
2865                struct bsca_block *sca = vcpu->kvm->arch.sca;
2866
2867                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2868                sca->cpu[vcpu->vcpu_id].sda = 0;
2869        }
2870        read_unlock(&vcpu->kvm->arch.sca_lock);
2871}
2872
2873static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2874{
2875        if (!kvm_s390_use_sca_entries()) {
2876                struct bsca_block *sca = vcpu->kvm->arch.sca;
2877
2878                /* we still need the basic sca for the ipte control */
2879                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2880                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2881                return;
2882        }
2883        read_lock(&vcpu->kvm->arch.sca_lock);
2884        if (vcpu->kvm->arch.use_esca) {
2885                struct esca_block *sca = vcpu->kvm->arch.sca;
2886
2887                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2888                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2889                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2890                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2891                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2892        } else {
2893                struct bsca_block *sca = vcpu->kvm->arch.sca;
2894
2895                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2896                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2897                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2898                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2899        }
2900        read_unlock(&vcpu->kvm->arch.sca_lock);
2901}
2902
2903/* Basic SCA to Extended SCA data copy routines */
2904static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2905{
2906        d->sda = s->sda;
2907        d->sigp_ctrl.c = s->sigp_ctrl.c;
2908        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2909}
2910
2911static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2912{
2913        int i;
2914
2915        d->ipte_control = s->ipte_control;
2916        d->mcn[0] = s->mcn;
2917        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2918                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2919}
2920
2921static int sca_switch_to_extended(struct kvm *kvm)
2922{
2923        struct bsca_block *old_sca = kvm->arch.sca;
2924        struct esca_block *new_sca;
2925        struct kvm_vcpu *vcpu;
2926        unsigned int vcpu_idx;
2927        u32 scaol, scaoh;
2928
2929        if (kvm->arch.use_esca)
2930                return 0;
2931
2932        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2933        if (!new_sca)
2934                return -ENOMEM;
2935
2936        scaoh = (u32)((u64)(new_sca) >> 32);
2937        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2938
2939        kvm_s390_vcpu_block_all(kvm);
2940        write_lock(&kvm->arch.sca_lock);
2941
2942        sca_copy_b_to_e(new_sca, old_sca);
2943
2944        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2945                vcpu->arch.sie_block->scaoh = scaoh;
2946                vcpu->arch.sie_block->scaol = scaol;
2947                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2948        }
2949        kvm->arch.sca = new_sca;
2950        kvm->arch.use_esca = 1;
2951
2952        write_unlock(&kvm->arch.sca_lock);
2953        kvm_s390_vcpu_unblock_all(kvm);
2954
2955        free_page((unsigned long)old_sca);
2956
2957        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2958                 old_sca, kvm->arch.sca);
2959        return 0;
2960}
2961
2962static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2963{
2964        int rc;
2965
2966        if (!kvm_s390_use_sca_entries()) {
2967                if (id < KVM_MAX_VCPUS)
2968                        return true;
2969                return false;
2970        }
2971        if (id < KVM_S390_BSCA_CPU_SLOTS)
2972                return true;
2973        if (!sclp.has_esca || !sclp.has_64bscao)
2974                return false;
2975
2976        mutex_lock(&kvm->lock);
2977        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2978        mutex_unlock(&kvm->lock);
2979
2980        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2981}
2982
2983/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2985{
2986        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2987        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2988        vcpu->arch.cputm_start = get_tod_clock_fast();
2989        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990}
2991
2992/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994{
2995        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2996        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2997        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2998        vcpu->arch.cputm_start = 0;
2999        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3000}
3001
3002/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3003static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3004{
3005        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3006        vcpu->arch.cputm_enabled = true;
3007        __start_cpu_timer_accounting(vcpu);
3008}
3009
3010/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3011static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3012{
3013        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3014        __stop_cpu_timer_accounting(vcpu);
3015        vcpu->arch.cputm_enabled = false;
3016}
3017
3018static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3019{
3020        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021        __enable_cpu_timer_accounting(vcpu);
3022        preempt_enable();
3023}
3024
3025static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3026{
3027        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3028        __disable_cpu_timer_accounting(vcpu);
3029        preempt_enable();
3030}
3031
3032/* set the cpu timer - may only be called from the VCPU thread itself */
3033void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3034{
3035        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3036        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3037        if (vcpu->arch.cputm_enabled)
3038                vcpu->arch.cputm_start = get_tod_clock_fast();
3039        vcpu->arch.sie_block->cputm = cputm;
3040        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3041        preempt_enable();
3042}
3043
3044/* update and get the cpu timer - can also be called from other VCPU threads */
3045__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3046{
3047        unsigned int seq;
3048        __u64 value;
3049
3050        if (unlikely(!vcpu->arch.cputm_enabled))
3051                return vcpu->arch.sie_block->cputm;
3052
3053        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3054        do {
3055                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3056                /*
3057                 * If the writer would ever execute a read in the critical
3058                 * section, e.g. in irq context, we have a deadlock.
3059                 */
3060                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3061                value = vcpu->arch.sie_block->cputm;
3062                /* if cputm_start is 0, accounting is being started/stopped */
3063                if (likely(vcpu->arch.cputm_start))
3064                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3065        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3066        preempt_enable();
3067        return value;
3068}
3069
3070void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3071{
3072
3073        gmap_enable(vcpu->arch.enabled_gmap);
3074        kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3075        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3076                __start_cpu_timer_accounting(vcpu);
3077        vcpu->cpu = cpu;
3078}
3079
3080void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3081{
3082        vcpu->cpu = -1;
3083        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3084                __stop_cpu_timer_accounting(vcpu);
3085        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3086        vcpu->arch.enabled_gmap = gmap_get_enabled();
3087        gmap_disable(vcpu->arch.enabled_gmap);
3088
3089}
3090
3091void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3092{
3093        mutex_lock(&vcpu->kvm->lock);
3094        preempt_disable();
3095        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3096        vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3097        preempt_enable();
3098        mutex_unlock(&vcpu->kvm->lock);
3099        if (!kvm_is_ucontrol(vcpu->kvm)) {
3100                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3101                sca_add_vcpu(vcpu);
3102        }
3103        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3104                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3105        /* make vcpu_load load the right gmap on the first trigger */
3106        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3107}
3108
3109static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3110{
3111        if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3112            test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3113                return true;
3114        return false;
3115}
3116
3117static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3118{
3119        /* At least one ECC subfunction must be present */
3120        return kvm_has_pckmo_subfunc(kvm, 32) ||
3121               kvm_has_pckmo_subfunc(kvm, 33) ||
3122               kvm_has_pckmo_subfunc(kvm, 34) ||
3123               kvm_has_pckmo_subfunc(kvm, 40) ||
3124               kvm_has_pckmo_subfunc(kvm, 41);
3125
3126}
3127
3128static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3129{
3130        /*
3131         * If the AP instructions are not being interpreted and the MSAX3
3132         * facility is not configured for the guest, there is nothing to set up.
3133         */
3134        if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3135                return;
3136
3137        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3138        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3139        vcpu->arch.sie_block->eca &= ~ECA_APIE;
3140        vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3141
3142        if (vcpu->kvm->arch.crypto.apie)
3143                vcpu->arch.sie_block->eca |= ECA_APIE;
3144
3145        /* Set up protected key support */
3146        if (vcpu->kvm->arch.crypto.aes_kw) {
3147                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3148                /* ecc is also wrapped with AES key */
3149                if (kvm_has_pckmo_ecc(vcpu->kvm))
3150                        vcpu->arch.sie_block->ecd |= ECD_ECC;
3151        }
3152
3153        if (vcpu->kvm->arch.crypto.dea_kw)
3154                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3155}
3156
3157void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3158{
3159        free_page(vcpu->arch.sie_block->cbrlo);
3160        vcpu->arch.sie_block->cbrlo = 0;
3161}
3162
3163int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3164{
3165        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3166        if (!vcpu->arch.sie_block->cbrlo)
3167                return -ENOMEM;
3168        return 0;
3169}
3170
3171static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3172{
3173        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3174
3175        vcpu->arch.sie_block->ibc = model->ibc;
3176        if (test_kvm_facility(vcpu->kvm, 7))
3177                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3178}
3179
3180static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3181{
3182        int rc = 0;
3183        u16 uvrc, uvrrc;
3184
3185        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3186                                                    CPUSTAT_SM |
3187                                                    CPUSTAT_STOPPED);
3188
3189        if (test_kvm_facility(vcpu->kvm, 78))
3190                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3191        else if (test_kvm_facility(vcpu->kvm, 8))
3192                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3193
3194        kvm_s390_vcpu_setup_model(vcpu);
3195
3196        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3197        if (MACHINE_HAS_ESOP)
3198                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3199        if (test_kvm_facility(vcpu->kvm, 9))
3200                vcpu->arch.sie_block->ecb |= ECB_SRSI;
3201        if (test_kvm_facility(vcpu->kvm, 73))
3202                vcpu->arch.sie_block->ecb |= ECB_TE;
3203
3204        if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3205                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3206        if (test_kvm_facility(vcpu->kvm, 130))
3207                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3208        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3209        if (sclp.has_cei)
3210                vcpu->arch.sie_block->eca |= ECA_CEI;
3211        if (sclp.has_ib)
3212                vcpu->arch.sie_block->eca |= ECA_IB;
3213        if (sclp.has_siif)
3214                vcpu->arch.sie_block->eca |= ECA_SII;
3215        if (sclp.has_sigpif)
3216                vcpu->arch.sie_block->eca |= ECA_SIGPI;
3217        if (test_kvm_facility(vcpu->kvm, 129)) {
3218                vcpu->arch.sie_block->eca |= ECA_VX;
3219                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3220        }
3221        if (test_kvm_facility(vcpu->kvm, 139))
3222                vcpu->arch.sie_block->ecd |= ECD_MEF;
3223        if (test_kvm_facility(vcpu->kvm, 156))
3224                vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3225        if (vcpu->arch.sie_block->gd) {
3226                vcpu->arch.sie_block->eca |= ECA_AIV;
3227                VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3228                           vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3229        }
3230        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3231                                        | SDNXC;
3232        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3233
3234        if (sclp.has_kss)
3235                kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3236        else
3237                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3238
3239        if (vcpu->kvm->arch.use_cmma) {
3240                rc = kvm_s390_vcpu_setup_cmma(vcpu);
3241                if (rc)
3242                        return rc;
3243        }
3244        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3245        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3246
3247        vcpu->arch.sie_block->hpid = HPID_KVM;
3248
3249        kvm_s390_vcpu_crypto_setup(vcpu);
3250
3251        mutex_lock(&vcpu->kvm->lock);
3252        if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3253                rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3254                if (rc)
3255                        kvm_s390_vcpu_unsetup_cmma(vcpu);
3256        }
3257        mutex_unlock(&vcpu->kvm->lock);
3258
3259        return rc;
3260}
3261
3262int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3263{
3264        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3265                return -EINVAL;
3266        return 0;
3267}
3268
3269int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3270{
3271        struct sie_page *sie_page;
3272        int rc;
3273
3274        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3275        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3276        if (!sie_page)
3277                return -ENOMEM;
3278
3279        vcpu->arch.sie_block = &sie_page->sie_block;
3280        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3281
3282        /* the real guest size will always be smaller than msl */
3283        vcpu->arch.sie_block->mso = 0;
3284        vcpu->arch.sie_block->msl = sclp.hamax;
3285
3286        vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3287        spin_lock_init(&vcpu->arch.local_int.lock);
3288        vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3289        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3290                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3291        seqcount_init(&vcpu->arch.cputm_seqcount);
3292
3293        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3294        kvm_clear_async_pf_completion_queue(vcpu);
3295        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3296                                    KVM_SYNC_GPRS |
3297                                    KVM_SYNC_ACRS |
3298                                    KVM_SYNC_CRS |
3299                                    KVM_SYNC_ARCH0 |
3300                                    KVM_SYNC_PFAULT |
3301                                    KVM_SYNC_DIAG318;
3302        kvm_s390_set_prefix(vcpu, 0);
3303        if (test_kvm_facility(vcpu->kvm, 64))
3304                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3305        if (test_kvm_facility(vcpu->kvm, 82))
3306                vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3307        if (test_kvm_facility(vcpu->kvm, 133))
3308                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3309        if (test_kvm_facility(vcpu->kvm, 156))
3310                vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3311        /* fprs can be synchronized via vrs, even if the guest has no vx. With
3312         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3313         */
3314        if (MACHINE_HAS_VX)
3315                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3316        else
3317                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3318
3319        if (kvm_is_ucontrol(vcpu->kvm)) {
3320                rc = __kvm_ucontrol_vcpu_init(vcpu);
3321                if (rc)
3322                        goto out_free_sie_block;
3323        }
3324
3325        VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3326                 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3327        trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3328
3329        rc = kvm_s390_vcpu_setup(vcpu);
3330        if (rc)
3331                goto out_ucontrol_uninit;
3332        return 0;
3333
3334out_ucontrol_uninit:
3335        if (kvm_is_ucontrol(vcpu->kvm))
3336                gmap_remove(vcpu->arch.gmap);
3337out_free_sie_block:
3338        free_page((unsigned long)(vcpu->arch.sie_block));
3339        return rc;
3340}
3341
3342int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3343{
3344        return kvm_s390_vcpu_has_irq(vcpu, 0);
3345}
3346
3347bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3348{
3349        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3350}
3351
3352void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3353{
3354        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3355        exit_sie(vcpu);
3356}
3357
3358void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3359{
3360        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3361}
3362
3363static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3364{
3365        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3366        exit_sie(vcpu);
3367}
3368
3369bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3370{
3371        return atomic_read(&vcpu->arch.sie_block->prog20) &
3372               (PROG_BLOCK_SIE | PROG_REQUEST);
3373}
3374
3375static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3376{
3377        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3378}
3379
3380/*
3381 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3382 * If the CPU is not running (e.g. waiting as idle) the function will
3383 * return immediately. */
3384void exit_sie(struct kvm_vcpu *vcpu)
3385{
3386        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3387        kvm_s390_vsie_kick(vcpu);
3388        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3389                cpu_relax();
3390}
3391
3392/* Kick a guest cpu out of SIE to process a request synchronously */
3393void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3394{
3395        kvm_make_request(req, vcpu);
3396        kvm_s390_vcpu_request(vcpu);
3397}
3398
3399static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3400                              unsigned long end)
3401{
3402        struct kvm *kvm = gmap->private;
3403        struct kvm_vcpu *vcpu;
3404        unsigned long prefix;
3405        int i;
3406
3407        if (gmap_is_shadow(gmap))
3408                return;
3409        if (start >= 1UL << 31)
3410                /* We are only interested in prefix pages */
3411                return;
3412        kvm_for_each_vcpu(i, vcpu, kvm) {
3413                /* match against both prefix pages */
3414                prefix = kvm_s390_get_prefix(vcpu);
3415                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3416                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3417                                   start, end);
3418                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3419                }
3420        }
3421}
3422
3423bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3424{
3425        /* do not poll with more than halt_poll_max_steal percent of steal time */
3426        if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3427            halt_poll_max_steal) {
3428                vcpu->stat.halt_no_poll_steal++;
3429                return true;
3430        }
3431        return false;
3432}
3433
3434int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3435{
3436        /* kvm common code refers to this, but never calls it */
3437        BUG();
3438        return 0;
3439}
3440
3441static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3442                                           struct kvm_one_reg *reg)
3443{
3444        int r = -EINVAL;
3445
3446        switch (reg->id) {
3447        case KVM_REG_S390_TODPR:
3448                r = put_user(vcpu->arch.sie_block->todpr,
3449                             (u32 __user *)reg->addr);
3450                break;
3451        case KVM_REG_S390_EPOCHDIFF:
3452                r = put_user(vcpu->arch.sie_block->epoch,
3453                             (u64 __user *)reg->addr);
3454                break;
3455        case KVM_REG_S390_CPU_TIMER:
3456                r = put_user(kvm_s390_get_cpu_timer(vcpu),
3457                             (u64 __user *)reg->addr);
3458                break;
3459        case KVM_REG_S390_CLOCK_COMP:
3460                r = put_user(vcpu->arch.sie_block->ckc,
3461                             (u64 __user *)reg->addr);
3462                break;
3463        case KVM_REG_S390_PFTOKEN:
3464                r = put_user(vcpu->arch.pfault_token,
3465                             (u64 __user *)reg->addr);
3466                break;
3467        case KVM_REG_S390_PFCOMPARE:
3468                r = put_user(vcpu->arch.pfault_compare,
3469                             (u64 __user *)reg->addr);
3470                break;
3471        case KVM_REG_S390_PFSELECT:
3472                r = put_user(vcpu->arch.pfault_select,
3473                             (u64 __user *)reg->addr);
3474                break;
3475        case KVM_REG_S390_PP:
3476                r = put_user(vcpu->arch.sie_block->pp,
3477                             (u64 __user *)reg->addr);
3478                break;
3479        case KVM_REG_S390_GBEA:
3480                r = put_user(vcpu->arch.sie_block->gbea,
3481                             (u64 __user *)reg->addr);
3482                break;
3483        default:
3484                break;
3485        }
3486
3487        return r;
3488}
3489
3490static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3491                                           struct kvm_one_reg *reg)
3492{
3493        int r = -EINVAL;
3494        __u64 val;
3495
3496        switch (reg->id) {
3497        case KVM_REG_S390_TODPR:
3498                r = get_user(vcpu->arch.sie_block->todpr,
3499                             (u32 __user *)reg->addr);
3500                break;
3501        case KVM_REG_S390_EPOCHDIFF:
3502                r = get_user(vcpu->arch.sie_block->epoch,
3503                             (u64 __user *)reg->addr);
3504                break;
3505        case KVM_REG_S390_CPU_TIMER:
3506                r = get_user(val, (u64 __user *)reg->addr);
3507                if (!r)
3508                        kvm_s390_set_cpu_timer(vcpu, val);
3509                break;
3510        case KVM_REG_S390_CLOCK_COMP:
3511                r = get_user(vcpu->arch.sie_block->ckc,
3512                             (u64 __user *)reg->addr);
3513                break;
3514        case KVM_REG_S390_PFTOKEN:
3515                r = get_user(vcpu->arch.pfault_token,
3516                             (u64 __user *)reg->addr);
3517                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3518                        kvm_clear_async_pf_completion_queue(vcpu);
3519                break;
3520        case KVM_REG_S390_PFCOMPARE:
3521                r = get_user(vcpu->arch.pfault_compare,
3522                             (u64 __user *)reg->addr);
3523                break;
3524        case KVM_REG_S390_PFSELECT:
3525                r = get_user(vcpu->arch.pfault_select,
3526                             (u64 __user *)reg->addr);
3527                break;
3528        case KVM_REG_S390_PP:
3529                r = get_user(vcpu->arch.sie_block->pp,
3530                             (u64 __user *)reg->addr);
3531                break;
3532        case KVM_REG_S390_GBEA:
3533                r = get_user(vcpu->arch.sie_block->gbea,
3534                             (u64 __user *)reg->addr);
3535                break;
3536        default:
3537                break;
3538        }
3539
3540        return r;
3541}
3542
3543static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3544{
3545        vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3546        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3547        memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3548
3549        kvm_clear_async_pf_completion_queue(vcpu);
3550        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3551                kvm_s390_vcpu_stop(vcpu);
3552        kvm_s390_clear_local_irqs(vcpu);
3553}
3554
3555static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3556{
3557        /* Initial reset is a superset of the normal reset */
3558        kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3559
3560        /*
3561         * This equals initial cpu reset in pop, but we don't switch to ESA.
3562         * We do not only reset the internal data, but also ...
3563         */
3564        vcpu->arch.sie_block->gpsw.mask = 0;
3565        vcpu->arch.sie_block->gpsw.addr = 0;
3566        kvm_s390_set_prefix(vcpu, 0);
3567        kvm_s390_set_cpu_timer(vcpu, 0);
3568        vcpu->arch.sie_block->ckc = 0;
3569        memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3570        vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3571        vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3572
3573        /* ... the data in sync regs */
3574        memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3575        vcpu->run->s.regs.ckc = 0;
3576        vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3577        vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3578        vcpu->run->psw_addr = 0;
3579        vcpu->run->psw_mask = 0;
3580        vcpu->run->s.regs.todpr = 0;
3581        vcpu->run->s.regs.cputm = 0;
3582        vcpu->run->s.regs.ckc = 0;
3583        vcpu->run->s.regs.pp = 0;
3584        vcpu->run->s.regs.gbea = 1;
3585        vcpu->run->s.regs.fpc = 0;
3586        /*
3587         * Do not reset these registers in the protected case, as some of
3588         * them are overlayed and they are not accessible in this case
3589         * anyway.
3590         */
3591        if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3592                vcpu->arch.sie_block->gbea = 1;
3593                vcpu->arch.sie_block->pp = 0;
3594                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3595                vcpu->arch.sie_block->todpr = 0;
3596        }
3597}
3598
3599static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3600{
3601        struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3602
3603        /* Clear reset is a superset of the initial reset */
3604        kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3605
3606        memset(&regs->gprs, 0, sizeof(regs->gprs));
3607        memset(&regs->vrs, 0, sizeof(regs->vrs));
3608        memset(&regs->acrs, 0, sizeof(regs->acrs));
3609        memset(&regs->gscb, 0, sizeof(regs->gscb));
3610
3611        regs->etoken = 0;
3612        regs->etoken_extension = 0;
3613}
3614
3615int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3616{
3617        vcpu_load(vcpu);
3618        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3619        vcpu_put(vcpu);
3620        return 0;
3621}
3622
3623int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3624{
3625        vcpu_load(vcpu);
3626        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3627        vcpu_put(vcpu);
3628        return 0;
3629}
3630
3631int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3632                                  struct kvm_sregs *sregs)
3633{
3634        vcpu_load(vcpu);
3635
3636        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3637        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3638
3639        vcpu_put(vcpu);
3640        return 0;
3641}
3642
3643int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3644                                  struct kvm_sregs *sregs)
3645{
3646        vcpu_load(vcpu);
3647
3648        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3649        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3650
3651        vcpu_put(vcpu);
3652        return 0;
3653}
3654
3655int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3656{
3657        int ret = 0;
3658
3659        vcpu_load(vcpu);
3660
3661        if (test_fp_ctl(fpu->fpc)) {
3662                ret = -EINVAL;
3663                goto out;
3664        }
3665        vcpu->run->s.regs.fpc = fpu->fpc;
3666        if (MACHINE_HAS_VX)
3667                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3668                                 (freg_t *) fpu->fprs);
3669        else
3670                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3671
3672out:
3673        vcpu_put(vcpu);
3674        return ret;
3675}
3676
3677int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3678{
3679        vcpu_load(vcpu);
3680
3681        /* make sure we have the latest values */
3682        save_fpu_regs();
3683        if (MACHINE_HAS_VX)
3684                convert_vx_to_fp((freg_t *) fpu->fprs,
3685                                 (__vector128 *) vcpu->run->s.regs.vrs);
3686        else
3687                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3688        fpu->fpc = vcpu->run->s.regs.fpc;
3689
3690        vcpu_put(vcpu);
3691        return 0;
3692}
3693
3694static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3695{
3696        int rc = 0;
3697
3698        if (!is_vcpu_stopped(vcpu))
3699                rc = -EBUSY;
3700        else {
3701                vcpu->run->psw_mask = psw.mask;
3702                vcpu->run->psw_addr = psw.addr;
3703        }
3704        return rc;
3705}
3706
3707int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3708                                  struct kvm_translation *tr)
3709{
3710        return -EINVAL; /* not implemented yet */
3711}
3712
3713#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3714                              KVM_GUESTDBG_USE_HW_BP | \
3715                              KVM_GUESTDBG_ENABLE)
3716
3717int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3718                                        struct kvm_guest_debug *dbg)
3719{
3720        int rc = 0;
3721
3722        vcpu_load(vcpu);
3723
3724        vcpu->guest_debug = 0;
3725        kvm_s390_clear_bp_data(vcpu);
3726
3727        if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3728                rc = -EINVAL;
3729                goto out;
3730        }
3731        if (!sclp.has_gpere) {
3732                rc = -EINVAL;
3733                goto out;
3734        }
3735
3736        if (dbg->control & KVM_GUESTDBG_ENABLE) {
3737                vcpu->guest_debug = dbg->control;
3738                /* enforce guest PER */
3739                kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3740
3741                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3742                        rc = kvm_s390_import_bp_data(vcpu, dbg);
3743        } else {
3744                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3745                vcpu->arch.guestdbg.last_bp = 0;
3746        }
3747
3748        if (rc) {
3749                vcpu->guest_debug = 0;
3750                kvm_s390_clear_bp_data(vcpu);
3751                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3752        }
3753
3754out:
3755        vcpu_put(vcpu);
3756        return rc;
3757}
3758
3759int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3760                                    struct kvm_mp_state *mp_state)
3761{
3762        int ret;
3763
3764        vcpu_load(vcpu);
3765
3766        /* CHECK_STOP and LOAD are not supported yet */
3767        ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3768                                      KVM_MP_STATE_OPERATING;
3769
3770        vcpu_put(vcpu);
3771        return ret;
3772}
3773
3774int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3775                                    struct kvm_mp_state *mp_state)
3776{
3777        int rc = 0;
3778
3779        vcpu_load(vcpu);
3780
3781        /* user space knows about this interface - let it control the state */
3782        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3783
3784        switch (mp_state->mp_state) {
3785        case KVM_MP_STATE_STOPPED:
3786                rc = kvm_s390_vcpu_stop(vcpu);
3787                break;
3788        case KVM_MP_STATE_OPERATING:
3789                rc = kvm_s390_vcpu_start(vcpu);
3790                break;
3791        case KVM_MP_STATE_LOAD:
3792                if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3793                        rc = -ENXIO;
3794                        break;
3795                }
3796                rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3797                break;
3798        case KVM_MP_STATE_CHECK_STOP:
3799                fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3800        default:
3801                rc = -ENXIO;
3802        }
3803
3804        vcpu_put(vcpu);
3805        return rc;
3806}
3807
3808static bool ibs_enabled(struct kvm_vcpu *vcpu)
3809{
3810        return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3811}
3812
3813static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3814{
3815retry:
3816        kvm_s390_vcpu_request_handled(vcpu);
3817        if (!kvm_request_pending(vcpu))
3818                return 0;
3819        /*
3820         * We use MMU_RELOAD just to re-arm the ipte notifier for the
3821         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3822         * This ensures that the ipte instruction for this request has
3823         * already finished. We might race against a second unmapper that
3824         * wants to set the blocking bit. Lets just retry the request loop.
3825         */
3826        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3827                int rc;
3828                rc = gmap_mprotect_notify(vcpu->arch.gmap,
3829                                          kvm_s390_get_prefix(vcpu),
3830                                          PAGE_SIZE * 2, PROT_WRITE);
3831                if (rc) {
3832                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3833                        return rc;
3834                }
3835                goto retry;
3836        }
3837
3838        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3839                vcpu->arch.sie_block->ihcpu = 0xffff;
3840                goto retry;
3841        }
3842
3843        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3844                if (!ibs_enabled(vcpu)) {
3845                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3846                        kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3847                }
3848                goto retry;
3849        }
3850
3851        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3852                if (ibs_enabled(vcpu)) {
3853                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3854                        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3855                }
3856                goto retry;
3857        }
3858
3859        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3860                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3861                goto retry;
3862        }
3863
3864        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3865                /*
3866                 * Disable CMM virtualization; we will emulate the ESSA
3867                 * instruction manually, in order to provide additional
3868                 * functionalities needed for live migration.
3869                 */
3870                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3871                goto retry;
3872        }
3873
3874        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3875                /*
3876                 * Re-enable CMM virtualization if CMMA is available and
3877                 * CMM has been used.
3878                 */
3879                if ((vcpu->kvm->arch.use_cmma) &&
3880                    (vcpu->kvm->mm->context.uses_cmm))
3881                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3882                goto retry;
3883        }
3884
3885        /* nothing to do, just clear the request */
3886        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3887        /* we left the vsie handler, nothing to do, just clear the request */
3888        kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3889
3890        return 0;
3891}
3892
3893void kvm_s390_set_tod_clock(struct kvm *kvm,
3894                            const struct kvm_s390_vm_tod_clock *gtod)
3895{
3896        struct kvm_vcpu *vcpu;
3897        union tod_clock clk;
3898        int i;
3899
3900        mutex_lock(&kvm->lock);
3901        preempt_disable();
3902
3903        store_tod_clock_ext(&clk);
3904
3905        kvm->arch.epoch = gtod->tod - clk.tod;
3906        kvm->arch.epdx = 0;
3907        if (test_kvm_facility(kvm, 139)) {
3908                kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3909                if (kvm->arch.epoch > gtod->tod)
3910                        kvm->arch.epdx -= 1;
3911        }
3912
3913        kvm_s390_vcpu_block_all(kvm);
3914        kvm_for_each_vcpu(i, vcpu, kvm) {
3915                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3916                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3917        }
3918
3919        kvm_s390_vcpu_unblock_all(kvm);
3920        preempt_enable();
3921        mutex_unlock(&kvm->lock);
3922}
3923
3924/**
3925 * kvm_arch_fault_in_page - fault-in guest page if necessary
3926 * @vcpu: The corresponding virtual cpu
3927 * @gpa: Guest physical address
3928 * @writable: Whether the page should be writable or not
3929 *
3930 * Make sure that a guest page has been faulted-in on the host.
3931 *
3932 * Return: Zero on success, negative error code otherwise.
3933 */
3934long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3935{
3936        return gmap_fault(vcpu->arch.gmap, gpa,
3937                          writable ? FAULT_FLAG_WRITE : 0);
3938}
3939
3940static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3941                                      unsigned long token)
3942{
3943        struct kvm_s390_interrupt inti;
3944        struct kvm_s390_irq irq;
3945
3946        if (start_token) {
3947                irq.u.ext.ext_params2 = token;
3948                irq.type = KVM_S390_INT_PFAULT_INIT;
3949                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3950        } else {
3951                inti.type = KVM_S390_INT_PFAULT_DONE;
3952                inti.parm64 = token;
3953                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3954        }
3955}
3956
3957bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3958                                     struct kvm_async_pf *work)
3959{
3960        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3961        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3962
3963        return true;
3964}
3965
3966void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3967                                 struct kvm_async_pf *work)
3968{
3969        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3970        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3971}
3972
3973void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3974                               struct kvm_async_pf *work)
3975{
3976        /* s390 will always inject the page directly */
3977}
3978
3979bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3980{
3981        /*
3982         * s390 will always inject the page directly,
3983         * but we still want check_async_completion to cleanup
3984         */
3985        return true;
3986}
3987
3988static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3989{
3990        hva_t hva;
3991        struct kvm_arch_async_pf arch;
3992
3993        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3994                return false;
3995        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3996            vcpu->arch.pfault_compare)
3997                return false;
3998        if (psw_extint_disabled(vcpu))
3999                return false;
4000        if (kvm_s390_vcpu_has_irq(vcpu, 0))
4001                return false;
4002        if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4003                return false;
4004        if (!vcpu->arch.gmap->pfault_enabled)
4005                return false;
4006
4007        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4008        hva += current->thread.gmap_addr & ~PAGE_MASK;
4009        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4010                return false;
4011
4012        return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4013}
4014
4015static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4016{
4017        int rc, cpuflags;
4018
4019        /*
4020         * On s390 notifications for arriving pages will be delivered directly
4021         * to the guest but the house keeping for completed pfaults is
4022         * handled outside the worker.
4023         */
4024        kvm_check_async_pf_completion(vcpu);
4025
4026        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4027        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4028
4029        if (need_resched())
4030                schedule();
4031
4032        if (!kvm_is_ucontrol(vcpu->kvm)) {
4033                rc = kvm_s390_deliver_pending_interrupts(vcpu);
4034                if (rc)
4035                        return rc;
4036        }
4037
4038        rc = kvm_s390_handle_requests(vcpu);
4039        if (rc)
4040                return rc;
4041
4042        if (guestdbg_enabled(vcpu)) {
4043                kvm_s390_backup_guest_per_regs(vcpu);
4044                kvm_s390_patch_guest_per_regs(vcpu);
4045        }
4046
4047        clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4048
4049        vcpu->arch.sie_block->icptcode = 0;
4050        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4051        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4052        trace_kvm_s390_sie_enter(vcpu, cpuflags);
4053
4054        return 0;
4055}
4056
4057static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4058{
4059        struct kvm_s390_pgm_info pgm_info = {
4060                .code = PGM_ADDRESSING,
4061        };
4062        u8 opcode, ilen;
4063        int rc;
4064
4065        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4066        trace_kvm_s390_sie_fault(vcpu);
4067
4068        /*
4069         * We want to inject an addressing exception, which is defined as a
4070         * suppressing or terminating exception. However, since we came here
4071         * by a DAT access exception, the PSW still points to the faulting
4072         * instruction since DAT exceptions are nullifying. So we've got
4073         * to look up the current opcode to get the length of the instruction
4074         * to be able to forward the PSW.
4075         */
4076        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4077        ilen = insn_length(opcode);
4078        if (rc < 0) {
4079                return rc;
4080        } else if (rc) {
4081                /* Instruction-Fetching Exceptions - we can't detect the ilen.
4082                 * Forward by arbitrary ilc, injection will take care of
4083                 * nullification if necessary.
4084                 */
4085                pgm_info = vcpu->arch.pgm;
4086                ilen = 4;
4087        }
4088        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4089        kvm_s390_forward_psw(vcpu, ilen);
4090        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4091}
4092
4093static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4094{
4095        struct mcck_volatile_info *mcck_info;
4096        struct sie_page *sie_page;
4097
4098        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4099                   vcpu->arch.sie_block->icptcode);
4100        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4101
4102        if (guestdbg_enabled(vcpu))
4103                kvm_s390_restore_guest_per_regs(vcpu);
4104
4105        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4106        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4107
4108        if (exit_reason == -EINTR) {
4109                VCPU_EVENT(vcpu, 3, "%s", "machine check");
4110                sie_page = container_of(vcpu->arch.sie_block,
4111                                        struct sie_page, sie_block);
4112                mcck_info = &sie_page->mcck_info;
4113                kvm_s390_reinject_machine_check(vcpu, mcck_info);
4114                return 0;
4115        }
4116
4117        if (vcpu->arch.sie_block->icptcode > 0) {
4118                int rc = kvm_handle_sie_intercept(vcpu);
4119
4120                if (rc != -EOPNOTSUPP)
4121                        return rc;
4122                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4123                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4124                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4125                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4126                return -EREMOTE;
4127        } else if (exit_reason != -EFAULT) {
4128                vcpu->stat.exit_null++;
4129                return 0;
4130        } else if (kvm_is_ucontrol(vcpu->kvm)) {
4131                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4132                vcpu->run->s390_ucontrol.trans_exc_code =
4133                                                current->thread.gmap_addr;
4134                vcpu->run->s390_ucontrol.pgm_code = 0x10;
4135                return -EREMOTE;
4136        } else if (current->thread.gmap_pfault) {
4137                trace_kvm_s390_major_guest_pfault(vcpu);
4138                current->thread.gmap_pfault = 0;
4139                if (kvm_arch_setup_async_pf(vcpu))
4140                        return 0;
4141                vcpu->stat.pfault_sync++;
4142                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4143        }
4144        return vcpu_post_run_fault_in_sie(vcpu);
4145}
4146
4147#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4148static int __vcpu_run(struct kvm_vcpu *vcpu)
4149{
4150        int rc, exit_reason;
4151        struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4152
4153        /*
4154         * We try to hold kvm->srcu during most of vcpu_run (except when run-
4155         * ning the guest), so that memslots (and other stuff) are protected
4156         */
4157        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4158
4159        do {
4160                rc = vcpu_pre_run(vcpu);
4161                if (rc)
4162                        break;
4163
4164                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4165                /*
4166                 * As PF_VCPU will be used in fault handler, between
4167                 * guest_enter and guest_exit should be no uaccess.
4168                 */
4169                local_irq_disable();
4170                guest_enter_irqoff();
4171                __disable_cpu_timer_accounting(vcpu);
4172                local_irq_enable();
4173                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4174                        memcpy(sie_page->pv_grregs,
4175                               vcpu->run->s.regs.gprs,
4176                               sizeof(sie_page->pv_grregs));
4177                }
4178                if (test_cpu_flag(CIF_FPU))
4179                        load_fpu_regs();
4180                exit_reason = sie64a(vcpu->arch.sie_block,
4181                                     vcpu->run->s.regs.gprs);
4182                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4183                        memcpy(vcpu->run->s.regs.gprs,
4184                               sie_page->pv_grregs,
4185                               sizeof(sie_page->pv_grregs));
4186                        /*
4187                         * We're not allowed to inject interrupts on intercepts
4188                         * that leave the guest state in an "in-between" state
4189                         * where the next SIE entry will do a continuation.
4190                         * Fence interrupts in our "internal" PSW.
4191                         */
4192                        if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4193                            vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4194                                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4195                        }
4196                }
4197                local_irq_disable();
4198                __enable_cpu_timer_accounting(vcpu);
4199                guest_exit_irqoff();
4200                local_irq_enable();
4201                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4202
4203                rc = vcpu_post_run(vcpu, exit_reason);
4204        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4205
4206        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4207        return rc;
4208}
4209
4210static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4211{
4212        struct kvm_run *kvm_run = vcpu->run;
4213        struct runtime_instr_cb *riccb;
4214        struct gs_cb *gscb;
4215
4216        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4217        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4218        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4219        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4220        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4221                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4222                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4223                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4224        }
4225        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4226                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4227                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4228                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4229                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4230                        kvm_clear_async_pf_completion_queue(vcpu);
4231        }
4232        if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4233                vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4234                vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4235        }
4236        /*
4237         * If userspace sets the riccb (e.g. after migration) to a valid state,
4238         * we should enable RI here instead of doing the lazy enablement.
4239         */
4240        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4241            test_kvm_facility(vcpu->kvm, 64) &&
4242            riccb->v &&
4243            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4244                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4245                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4246        }
4247        /*
4248         * If userspace sets the gscb (e.g. after migration) to non-zero,
4249         * we should enable GS here instead of doing the lazy enablement.
4250         */
4251        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4252            test_kvm_facility(vcpu->kvm, 133) &&
4253            gscb->gssm &&
4254            !vcpu->arch.gs_enabled) {
4255                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4256                vcpu->arch.sie_block->ecb |= ECB_GS;
4257                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4258                vcpu->arch.gs_enabled = 1;
4259        }
4260        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4261            test_kvm_facility(vcpu->kvm, 82)) {
4262                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4263                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4264        }
4265        if (MACHINE_HAS_GS) {
4266                preempt_disable();
4267                __ctl_set_bit(2, 4);
4268                if (current->thread.gs_cb) {
4269                        vcpu->arch.host_gscb = current->thread.gs_cb;
4270                        save_gs_cb(vcpu->arch.host_gscb);
4271                }
4272                if (vcpu->arch.gs_enabled) {
4273                        current->thread.gs_cb = (struct gs_cb *)
4274                                                &vcpu->run->s.regs.gscb;
4275                        restore_gs_cb(current->thread.gs_cb);
4276                }
4277                preempt_enable();
4278        }
4279        /* SIE will load etoken directly from SDNX and therefore kvm_run */
4280}
4281
4282static void sync_regs(struct kvm_vcpu *vcpu)
4283{
4284        struct kvm_run *kvm_run = vcpu->run;
4285
4286        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4287                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4288        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4289                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4290                /* some control register changes require a tlb flush */
4291                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4292        }
4293        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4294                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4295                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4296        }
4297        save_access_regs(vcpu->arch.host_acrs);
4298        restore_access_regs(vcpu->run->s.regs.acrs);
4299        /* save host (userspace) fprs/vrs */
4300        save_fpu_regs();
4301        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4302        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4303        if (MACHINE_HAS_VX)
4304                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4305        else
4306                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4307        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4308        if (test_fp_ctl(current->thread.fpu.fpc))
4309                /* User space provided an invalid FPC, let's clear it */
4310                current->thread.fpu.fpc = 0;
4311
4312        /* Sync fmt2 only data */
4313        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4314                sync_regs_fmt2(vcpu);
4315        } else {
4316                /*
4317                 * In several places we have to modify our internal view to
4318                 * not do things that are disallowed by the ultravisor. For
4319                 * example we must not inject interrupts after specific exits
4320                 * (e.g. 112 prefix page not secure). We do this by turning
4321                 * off the machine check, external and I/O interrupt bits
4322                 * of our PSW copy. To avoid getting validity intercepts, we
4323                 * do only accept the condition code from userspace.
4324                 */
4325                vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4326                vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4327                                                   PSW_MASK_CC;
4328        }
4329
4330        kvm_run->kvm_dirty_regs = 0;
4331}
4332
4333static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4334{
4335        struct kvm_run *kvm_run = vcpu->run;
4336
4337        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4338        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4339        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4340        kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4341        kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4342        if (MACHINE_HAS_GS) {
4343                preempt_disable();
4344                __ctl_set_bit(2, 4);
4345                if (vcpu->arch.gs_enabled)
4346                        save_gs_cb(current->thread.gs_cb);
4347                current->thread.gs_cb = vcpu->arch.host_gscb;
4348                restore_gs_cb(vcpu->arch.host_gscb);
4349                if (!vcpu->arch.host_gscb)
4350                        __ctl_clear_bit(2, 4);
4351                vcpu->arch.host_gscb = NULL;
4352                preempt_enable();
4353        }
4354        /* SIE will save etoken directly into SDNX and therefore kvm_run */
4355}
4356
4357static void store_regs(struct kvm_vcpu *vcpu)
4358{
4359        struct kvm_run *kvm_run = vcpu->run;
4360
4361        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4362        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4363        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4364        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4365        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4366        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4367        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4368        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4369        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4370        save_access_regs(vcpu->run->s.regs.acrs);
4371        restore_access_regs(vcpu->arch.host_acrs);
4372        /* Save guest register state */
4373        save_fpu_regs();
4374        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4375        /* Restore will be done lazily at return */
4376        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4377        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4378        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4379                store_regs_fmt2(vcpu);
4380}
4381
4382int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4383{
4384        struct kvm_run *kvm_run = vcpu->run;
4385        int rc;
4386
4387        if (kvm_run->immediate_exit)
4388                return -EINTR;
4389
4390        if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4391            kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4392                return -EINVAL;
4393
4394        vcpu_load(vcpu);
4395
4396        if (guestdbg_exit_pending(vcpu)) {
4397                kvm_s390_prepare_debug_exit(vcpu);
4398                rc = 0;
4399                goto out;
4400        }
4401
4402        kvm_sigset_activate(vcpu);
4403
4404        /*
4405         * no need to check the return value of vcpu_start as it can only have
4406         * an error for protvirt, but protvirt means user cpu state
4407         */
4408        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4409                kvm_s390_vcpu_start(vcpu);
4410        } else if (is_vcpu_stopped(vcpu)) {
4411                pr_err_ratelimited("can't run stopped vcpu %d\n",
4412                                   vcpu->vcpu_id);
4413                rc = -EINVAL;
4414                goto out;
4415        }
4416
4417        sync_regs(vcpu);
4418        enable_cpu_timer_accounting(vcpu);
4419
4420        might_fault();
4421        rc = __vcpu_run(vcpu);
4422
4423        if (signal_pending(current) && !rc) {
4424                kvm_run->exit_reason = KVM_EXIT_INTR;
4425                rc = -EINTR;
4426        }
4427
4428        if (guestdbg_exit_pending(vcpu) && !rc)  {
4429                kvm_s390_prepare_debug_exit(vcpu);
4430                rc = 0;
4431        }
4432
4433        if (rc == -EREMOTE) {
4434                /* userspace support is needed, kvm_run has been prepared */
4435                rc = 0;
4436        }
4437
4438        disable_cpu_timer_accounting(vcpu);
4439        store_regs(vcpu);
4440
4441        kvm_sigset_deactivate(vcpu);
4442
4443        vcpu->stat.exit_userspace++;
4444out:
4445        vcpu_put(vcpu);
4446        return rc;
4447}
4448
4449/*
4450 * store status at address
4451 * we use have two special cases:
4452 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4453 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4454 */
4455int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4456{
4457        unsigned char archmode = 1;
4458        freg_t fprs[NUM_FPRS];
4459        unsigned int px;
4460        u64 clkcomp, cputm;
4461        int rc;
4462
4463        px = kvm_s390_get_prefix(vcpu);
4464        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4465                if (write_guest_abs(vcpu, 163, &archmode, 1))
4466                        return -EFAULT;
4467                gpa = 0;
4468        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4469                if (write_guest_real(vcpu, 163, &archmode, 1))
4470                        return -EFAULT;
4471                gpa = px;
4472        } else
4473                gpa -= __LC_FPREGS_SAVE_AREA;
4474
4475        /* manually convert vector registers if necessary */
4476        if (MACHINE_HAS_VX) {
4477                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4478                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4479                                     fprs, 128);
4480        } else {
4481                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4482                                     vcpu->run->s.regs.fprs, 128);
4483        }
4484        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4485                              vcpu->run->s.regs.gprs, 128);
4486        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4487                              &vcpu->arch.sie_block->gpsw, 16);
4488        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4489                              &px, 4);
4490        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4491                              &vcpu->run->s.regs.fpc, 4);
4492        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4493                              &vcpu->arch.sie_block->todpr, 4);
4494        cputm = kvm_s390_get_cpu_timer(vcpu);
4495        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4496                              &cputm, 8);
4497        clkcomp = vcpu->arch.sie_block->ckc >> 8;
4498        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4499                              &clkcomp, 8);
4500        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4501                              &vcpu->run->s.regs.acrs, 64);
4502        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4503                              &vcpu->arch.sie_block->gcr, 128);
4504        return rc ? -EFAULT : 0;
4505}
4506
4507int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4508{
4509        /*
4510         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4511         * switch in the run ioctl. Let's update our copies before we save
4512         * it into the save area
4513         */
4514        save_fpu_regs();
4515        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4516        save_access_regs(vcpu->run->s.regs.acrs);
4517
4518        return kvm_s390_store_status_unloaded(vcpu, addr);
4519}
4520
4521static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4522{
4523        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4524        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4525}
4526
4527static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4528{
4529        unsigned int i;
4530        struct kvm_vcpu *vcpu;
4531
4532        kvm_for_each_vcpu(i, vcpu, kvm) {
4533                __disable_ibs_on_vcpu(vcpu);
4534        }
4535}
4536
4537static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4538{
4539        if (!sclp.has_ibs)
4540                return;
4541        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4542        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4543}
4544
4545int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4546{
4547        int i, online_vcpus, r = 0, started_vcpus = 0;
4548
4549        if (!is_vcpu_stopped(vcpu))
4550                return 0;
4551
4552        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4553        /* Only one cpu at a time may enter/leave the STOPPED state. */
4554        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4555        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4556
4557        /* Let's tell the UV that we want to change into the operating state */
4558        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4559                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4560                if (r) {
4561                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4562                        return r;
4563                }
4564        }
4565
4566        for (i = 0; i < online_vcpus; i++) {
4567                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4568                        started_vcpus++;
4569        }
4570
4571        if (started_vcpus == 0) {
4572                /* we're the only active VCPU -> speed it up */
4573                __enable_ibs_on_vcpu(vcpu);
4574        } else if (started_vcpus == 1) {
4575                /*
4576                 * As we are starting a second VCPU, we have to disable
4577                 * the IBS facility on all VCPUs to remove potentially
4578                 * outstanding ENABLE requests.
4579                 */
4580                __disable_ibs_on_all_vcpus(vcpu->kvm);
4581        }
4582
4583        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4584        /*
4585         * The real PSW might have changed due to a RESTART interpreted by the
4586         * ultravisor. We block all interrupts and let the next sie exit
4587         * refresh our view.
4588         */
4589        if (kvm_s390_pv_cpu_is_protected(vcpu))
4590                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4591        /*
4592         * Another VCPU might have used IBS while we were offline.
4593         * Let's play safe and flush the VCPU at startup.
4594         */
4595        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4596        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4597        return 0;
4598}
4599
4600int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4601{
4602        int i, online_vcpus, r = 0, started_vcpus = 0;
4603        struct kvm_vcpu *started_vcpu = NULL;
4604
4605        if (is_vcpu_stopped(vcpu))
4606                return 0;
4607
4608        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4609        /* Only one cpu at a time may enter/leave the STOPPED state. */
4610        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4611        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4612
4613        /* Let's tell the UV that we want to change into the stopped state */
4614        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4615                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4616                if (r) {
4617                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618                        return r;
4619                }
4620        }
4621
4622        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4623        kvm_s390_clear_stop_irq(vcpu);
4624
4625        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4626        __disable_ibs_on_vcpu(vcpu);
4627
4628        for (i = 0; i < online_vcpus; i++) {
4629                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4630                        started_vcpus++;
4631                        started_vcpu = vcpu->kvm->vcpus[i];
4632                }
4633        }
4634
4635        if (started_vcpus == 1) {
4636                /*
4637                 * As we only have one VCPU left, we want to enable the
4638                 * IBS facility for that VCPU to speed it up.
4639                 */
4640                __enable_ibs_on_vcpu(started_vcpu);
4641        }
4642
4643        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4644        return 0;
4645}
4646
4647static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4648                                     struct kvm_enable_cap *cap)
4649{
4650        int r;
4651
4652        if (cap->flags)
4653                return -EINVAL;
4654
4655        switch (cap->cap) {
4656        case KVM_CAP_S390_CSS_SUPPORT:
4657                if (!vcpu->kvm->arch.css_support) {
4658                        vcpu->kvm->arch.css_support = 1;
4659                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4660                        trace_kvm_s390_enable_css(vcpu->kvm);
4661                }
4662                r = 0;
4663                break;
4664        default:
4665                r = -EINVAL;
4666                break;
4667        }
4668        return r;
4669}
4670
4671static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4672                                   struct kvm_s390_mem_op *mop)
4673{
4674        void __user *uaddr = (void __user *)mop->buf;
4675        int r = 0;
4676
4677        if (mop->flags || !mop->size)
4678                return -EINVAL;
4679        if (mop->size + mop->sida_offset < mop->size)
4680                return -EINVAL;
4681        if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4682                return -E2BIG;
4683
4684        switch (mop->op) {
4685        case KVM_S390_MEMOP_SIDA_READ:
4686                if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4687                                 mop->sida_offset), mop->size))
4688                        r = -EFAULT;
4689
4690                break;
4691        case KVM_S390_MEMOP_SIDA_WRITE:
4692                if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4693                                   mop->sida_offset), uaddr, mop->size))
4694                        r = -EFAULT;
4695                break;
4696        }
4697        return r;
4698}
4699static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4700                                  struct kvm_s390_mem_op *mop)
4701{
4702        void __user *uaddr = (void __user *)mop->buf;
4703        void *tmpbuf = NULL;
4704        int r = 0;
4705        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4706                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
4707
4708        if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4709                return -EINVAL;
4710
4711        if (mop->size > MEM_OP_MAX_SIZE)
4712                return -E2BIG;
4713
4714        if (kvm_s390_pv_cpu_is_protected(vcpu))
4715                return -EINVAL;
4716
4717        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4718                tmpbuf = vmalloc(mop->size);
4719                if (!tmpbuf)
4720                        return -ENOMEM;
4721        }
4722
4723        switch (mop->op) {
4724        case KVM_S390_MEMOP_LOGICAL_READ:
4725                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4726                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4727                                            mop->size, GACC_FETCH);
4728                        break;
4729                }
4730                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4731                if (r == 0) {
4732                        if (copy_to_user(uaddr, tmpbuf, mop->size))
4733                                r = -EFAULT;
4734                }
4735                break;
4736        case KVM_S390_MEMOP_LOGICAL_WRITE:
4737                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4738                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4739                                            mop->size, GACC_STORE);
4740                        break;
4741                }
4742                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4743                        r = -EFAULT;
4744                        break;
4745                }
4746                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4747                break;
4748        }
4749
4750        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4751                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4752
4753        vfree(tmpbuf);
4754        return r;
4755}
4756
4757static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4758                                      struct kvm_s390_mem_op *mop)
4759{
4760        int r, srcu_idx;
4761
4762        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4763
4764        switch (mop->op) {
4765        case KVM_S390_MEMOP_LOGICAL_READ:
4766        case KVM_S390_MEMOP_LOGICAL_WRITE:
4767                r = kvm_s390_guest_mem_op(vcpu, mop);
4768                break;
4769        case KVM_S390_MEMOP_SIDA_READ:
4770        case KVM_S390_MEMOP_SIDA_WRITE:
4771                /* we are locked against sida going away by the vcpu->mutex */
4772                r = kvm_s390_guest_sida_op(vcpu, mop);
4773                break;
4774        default:
4775                r = -EINVAL;
4776        }
4777
4778        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4779        return r;
4780}
4781
4782long kvm_arch_vcpu_async_ioctl(struct file *filp,
4783                               unsigned int ioctl, unsigned long arg)
4784{
4785        struct kvm_vcpu *vcpu = filp->private_data;
4786        void __user *argp = (void __user *)arg;
4787
4788        switch (ioctl) {
4789        case KVM_S390_IRQ: {
4790                struct kvm_s390_irq s390irq;
4791
4792                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4793                        return -EFAULT;
4794                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4795        }
4796        case KVM_S390_INTERRUPT: {
4797                struct kvm_s390_interrupt s390int;
4798                struct kvm_s390_irq s390irq = {};
4799
4800                if (copy_from_user(&s390int, argp, sizeof(s390int)))
4801                        return -EFAULT;
4802                if (s390int_to_s390irq(&s390int, &s390irq))
4803                        return -EINVAL;
4804                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4805        }
4806        }
4807        return -ENOIOCTLCMD;
4808}
4809
4810long kvm_arch_vcpu_ioctl(struct file *filp,
4811                         unsigned int ioctl, unsigned long arg)
4812{
4813        struct kvm_vcpu *vcpu = filp->private_data;
4814        void __user *argp = (void __user *)arg;
4815        int idx;
4816        long r;
4817        u16 rc, rrc;
4818
4819        vcpu_load(vcpu);
4820
4821        switch (ioctl) {
4822        case KVM_S390_STORE_STATUS:
4823                idx = srcu_read_lock(&vcpu->kvm->srcu);
4824                r = kvm_s390_store_status_unloaded(vcpu, arg);
4825                srcu_read_unlock(&vcpu->kvm->srcu, idx);
4826                break;
4827        case KVM_S390_SET_INITIAL_PSW: {
4828                psw_t psw;
4829
4830                r = -EFAULT;
4831                if (copy_from_user(&psw, argp, sizeof(psw)))
4832                        break;
4833                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4834                break;
4835        }
4836        case KVM_S390_CLEAR_RESET:
4837                r = 0;
4838                kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4839                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4840                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4841                                          UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4842                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4843                                   rc, rrc);
4844                }
4845                break;
4846        case KVM_S390_INITIAL_RESET:
4847                r = 0;
4848                kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4849                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4850                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4851                                          UVC_CMD_CPU_RESET_INITIAL,
4852                                          &rc, &rrc);
4853                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4854                                   rc, rrc);
4855                }
4856                break;
4857        case KVM_S390_NORMAL_RESET:
4858                r = 0;
4859                kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4860                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4861                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4862                                          UVC_CMD_CPU_RESET, &rc, &rrc);
4863                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4864                                   rc, rrc);
4865                }
4866                break;
4867        case KVM_SET_ONE_REG:
4868        case KVM_GET_ONE_REG: {
4869                struct kvm_one_reg reg;
4870                r = -EINVAL;
4871                if (kvm_s390_pv_cpu_is_protected(vcpu))
4872                        break;
4873                r = -EFAULT;
4874                if (copy_from_user(&reg, argp, sizeof(reg)))
4875                        break;
4876                if (ioctl == KVM_SET_ONE_REG)
4877                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4878                else
4879                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4880                break;
4881        }
4882#ifdef CONFIG_KVM_S390_UCONTROL
4883        case KVM_S390_UCAS_MAP: {
4884                struct kvm_s390_ucas_mapping ucasmap;
4885
4886                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4887                        r = -EFAULT;
4888                        break;
4889                }
4890
4891                if (!kvm_is_ucontrol(vcpu->kvm)) {
4892                        r = -EINVAL;
4893                        break;
4894                }
4895
4896                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4897                                     ucasmap.vcpu_addr, ucasmap.length);
4898                break;
4899        }
4900        case KVM_S390_UCAS_UNMAP: {
4901                struct kvm_s390_ucas_mapping ucasmap;
4902
4903                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4904                        r = -EFAULT;
4905                        break;
4906                }
4907
4908                if (!kvm_is_ucontrol(vcpu->kvm)) {
4909                        r = -EINVAL;
4910                        break;
4911                }
4912
4913                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4914                        ucasmap.length);
4915                break;
4916        }
4917#endif
4918        case KVM_S390_VCPU_FAULT: {
4919                r = gmap_fault(vcpu->arch.gmap, arg, 0);
4920                break;
4921        }
4922        case KVM_ENABLE_CAP:
4923        {
4924                struct kvm_enable_cap cap;
4925                r = -EFAULT;
4926                if (copy_from_user(&cap, argp, sizeof(cap)))
4927                        break;
4928                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4929                break;
4930        }
4931        case KVM_S390_MEM_OP: {
4932                struct kvm_s390_mem_op mem_op;
4933
4934                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4935                        r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4936                else
4937                        r = -EFAULT;
4938                break;
4939        }
4940        case KVM_S390_SET_IRQ_STATE: {
4941                struct kvm_s390_irq_state irq_state;
4942
4943                r = -EFAULT;
4944                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4945                        break;
4946                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4947                    irq_state.len == 0 ||
4948                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4949                        r = -EINVAL;
4950                        break;
4951                }
4952                /* do not use irq_state.flags, it will break old QEMUs */
4953                r = kvm_s390_set_irq_state(vcpu,
4954                                           (void __user *) irq_state.buf,
4955                                           irq_state.len);
4956                break;
4957        }
4958        case KVM_S390_GET_IRQ_STATE: {
4959                struct kvm_s390_irq_state irq_state;
4960
4961                r = -EFAULT;
4962                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4963                        break;
4964                if (irq_state.len == 0) {
4965                        r = -EINVAL;
4966                        break;
4967                }
4968                /* do not use irq_state.flags, it will break old QEMUs */
4969                r = kvm_s390_get_irq_state(vcpu,
4970                                           (__u8 __user *)  irq_state.buf,
4971                                           irq_state.len);
4972                break;
4973        }
4974        default:
4975                r = -ENOTTY;
4976        }
4977
4978        vcpu_put(vcpu);
4979        return r;
4980}
4981
4982vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4983{
4984#ifdef CONFIG_KVM_S390_UCONTROL
4985        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4986                 && (kvm_is_ucontrol(vcpu->kvm))) {
4987                vmf->page = virt_to_page(vcpu->arch.sie_block);
4988                get_page(vmf->page);
4989                return 0;
4990        }
4991#endif
4992        return VM_FAULT_SIGBUS;
4993}
4994
4995/* Section: memory related */
4996int kvm_arch_prepare_memory_region(struct kvm *kvm,
4997                                   struct kvm_memory_slot *memslot,
4998                                   const struct kvm_userspace_memory_region *mem,
4999                                   enum kvm_mr_change change)
5000{
5001        /* A few sanity checks. We can have memory slots which have to be
5002           located/ended at a segment boundary (1MB). The memory in userland is
5003           ok to be fragmented into various different vmas. It is okay to mmap()
5004           and munmap() stuff in this slot after doing this call at any time */
5005
5006        if (mem->userspace_addr & 0xffffful)
5007                return -EINVAL;
5008
5009        if (mem->memory_size & 0xffffful)
5010                return -EINVAL;
5011
5012        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5013                return -EINVAL;
5014
5015        /* When we are protected, we should not change the memory slots */
5016        if (kvm_s390_pv_get_handle(kvm))
5017                return -EINVAL;
5018        return 0;
5019}
5020
5021void kvm_arch_commit_memory_region(struct kvm *kvm,
5022                                const struct kvm_userspace_memory_region *mem,
5023                                struct kvm_memory_slot *old,
5024                                const struct kvm_memory_slot *new,
5025                                enum kvm_mr_change change)
5026{
5027        int rc = 0;
5028
5029        switch (change) {
5030        case KVM_MR_DELETE:
5031                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5032                                        old->npages * PAGE_SIZE);
5033                break;
5034        case KVM_MR_MOVE:
5035                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5036                                        old->npages * PAGE_SIZE);
5037                if (rc)
5038                        break;
5039                fallthrough;
5040        case KVM_MR_CREATE:
5041                rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5042                                      mem->guest_phys_addr, mem->memory_size);
5043                break;
5044        case KVM_MR_FLAGS_ONLY:
5045                break;
5046        default:
5047                WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5048        }
5049        if (rc)
5050                pr_warn("failed to commit memory region\n");
5051        return;
5052}
5053
5054static inline unsigned long nonhyp_mask(int i)
5055{
5056        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5057
5058        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5059}
5060
5061void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5062{
5063        vcpu->valid_wakeup = false;
5064}
5065
5066static int __init kvm_s390_init(void)
5067{
5068        int i;
5069
5070        if (!sclp.has_sief2) {
5071                pr_info("SIE is not available\n");
5072                return -ENODEV;
5073        }
5074
5075        if (nested && hpage) {
5076                pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5077                return -EINVAL;
5078        }
5079
5080        for (i = 0; i < 16; i++)
5081                kvm_s390_fac_base[i] |=
5082                        stfle_fac_list[i] & nonhyp_mask(i);
5083
5084        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5085}
5086
5087static void __exit kvm_s390_exit(void)
5088{
5089        kvm_exit();
5090}
5091
5092module_init(kvm_s390_init);
5093module_exit(kvm_s390_exit);
5094
5095/*
5096 * Enable autoloading of the kvm module.
5097 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5098 * since x86 takes a different approach.
5099 */
5100#include <linux/miscdevice.h>
5101MODULE_ALIAS_MISCDEV(KVM_MINOR);
5102MODULE_ALIAS("devname:kvm");
5103