linux/arch/s390/kvm/kvm-s390.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * hosting IBM Z kernel virtual machines (s390x)
   4 *
   5 * Copyright IBM Corp. 2008, 2020
   6 *
   7 *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8 *               Christian Borntraeger <borntraeger@de.ibm.com>
   9 *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11 *               Jason J. Herne <jjherne@us.ibm.com>
  12 */
  13
  14#define KMSG_COMPONENT "kvm-s390"
  15#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17#include <linux/compiler.h>
  18#include <linux/err.h>
  19#include <linux/fs.h>
  20#include <linux/hrtimer.h>
  21#include <linux/init.h>
  22#include <linux/kvm.h>
  23#include <linux/kvm_host.h>
  24#include <linux/mman.h>
  25#include <linux/module.h>
  26#include <linux/moduleparam.h>
  27#include <linux/random.h>
  28#include <linux/slab.h>
  29#include <linux/timer.h>
  30#include <linux/vmalloc.h>
  31#include <linux/bitmap.h>
  32#include <linux/sched/signal.h>
  33#include <linux/string.h>
  34#include <linux/pgtable.h>
  35
  36#include <asm/asm-offsets.h>
  37#include <asm/lowcore.h>
  38#include <asm/stp.h>
  39#include <asm/gmap.h>
  40#include <asm/nmi.h>
  41#include <asm/switch_to.h>
  42#include <asm/isc.h>
  43#include <asm/sclp.h>
  44#include <asm/cpacf.h>
  45#include <asm/timex.h>
  46#include <asm/ap.h>
  47#include <asm/uv.h>
  48#include <asm/fpu/api.h>
  49#include "kvm-s390.h"
  50#include "gaccess.h"
  51
  52#define CREATE_TRACE_POINTS
  53#include "trace.h"
  54#include "trace-s390.h"
  55
  56#define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57#define LOCAL_IRQS 32
  58#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                           (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62        KVM_GENERIC_VM_STATS(),
  63        STATS_DESC_COUNTER(VM, inject_io),
  64        STATS_DESC_COUNTER(VM, inject_float_mchk),
  65        STATS_DESC_COUNTER(VM, inject_pfault_done),
  66        STATS_DESC_COUNTER(VM, inject_service_signal),
  67        STATS_DESC_COUNTER(VM, inject_virtio)
  68};
  69
  70const struct kvm_stats_header kvm_vm_stats_header = {
  71        .name_size = KVM_STATS_NAME_SIZE,
  72        .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73        .id_offset = sizeof(struct kvm_stats_header),
  74        .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75        .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                       sizeof(kvm_vm_stats_desc),
  77};
  78
  79const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80        KVM_GENERIC_VCPU_STATS(),
  81        STATS_DESC_COUNTER(VCPU, exit_userspace),
  82        STATS_DESC_COUNTER(VCPU, exit_null),
  83        STATS_DESC_COUNTER(VCPU, exit_external_request),
  84        STATS_DESC_COUNTER(VCPU, exit_io_request),
  85        STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86        STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87        STATS_DESC_COUNTER(VCPU, exit_validity),
  88        STATS_DESC_COUNTER(VCPU, exit_instruction),
  89        STATS_DESC_COUNTER(VCPU, exit_pei),
  90        STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91        STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92        STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93        STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94        STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95        STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96        STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97        STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98        STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99        STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100        STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101        STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102        STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103        STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104        STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105        STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106        STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107        STATS_DESC_COUNTER(VCPU, deliver_program),
 108        STATS_DESC_COUNTER(VCPU, deliver_io),
 109        STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110        STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111        STATS_DESC_COUNTER(VCPU, inject_ckc),
 112        STATS_DESC_COUNTER(VCPU, inject_cputm),
 113        STATS_DESC_COUNTER(VCPU, inject_external_call),
 114        STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115        STATS_DESC_COUNTER(VCPU, inject_mchk),
 116        STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117        STATS_DESC_COUNTER(VCPU, inject_program),
 118        STATS_DESC_COUNTER(VCPU, inject_restart),
 119        STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120        STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121        STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122        STATS_DESC_COUNTER(VCPU, instruction_gs),
 123        STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124        STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125        STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126        STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127        STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128        STATS_DESC_COUNTER(VCPU, instruction_sck),
 129        STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130        STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131        STATS_DESC_COUNTER(VCPU, instruction_spx),
 132        STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133        STATS_DESC_COUNTER(VCPU, instruction_stap),
 134        STATS_DESC_COUNTER(VCPU, instruction_iske),
 135        STATS_DESC_COUNTER(VCPU, instruction_ri),
 136        STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137        STATS_DESC_COUNTER(VCPU, instruction_sske),
 138        STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139        STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140        STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141        STATS_DESC_COUNTER(VCPU, instruction_tb),
 142        STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143        STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144        STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145        STATS_DESC_COUNTER(VCPU, instruction_sie),
 146        STATS_DESC_COUNTER(VCPU, instruction_essa),
 147        STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148        STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149        STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150        STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151        STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152        STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153        STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154        STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155        STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156        STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157        STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158        STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159        STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160        STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161        STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162        STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163        STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164        STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165        STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166        STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167        STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168        STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169        STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170        STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171        STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172        STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173        STATS_DESC_COUNTER(VCPU, pfault_sync)
 174};
 175
 176const struct kvm_stats_header kvm_vcpu_stats_header = {
 177        .name_size = KVM_STATS_NAME_SIZE,
 178        .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179        .id_offset = sizeof(struct kvm_stats_header),
 180        .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181        .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                       sizeof(kvm_vcpu_stats_desc),
 183};
 184
 185/* allow nested virtualization in KVM (if enabled by user space) */
 186static int nested;
 187module_param(nested, int, S_IRUGO);
 188MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190/* allow 1m huge page guest backing, if !nested */
 191static int hpage;
 192module_param(hpage, int, 0444);
 193MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195/* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196static u8 halt_poll_max_steal = 10;
 197module_param(halt_poll_max_steal, byte, 0644);
 198MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200/* if set to true, the GISA will be initialized and used if available */
 201static bool use_gisa  = true;
 202module_param(use_gisa, bool, 0644);
 203MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205/* maximum diag9c forwarding per second */
 206unsigned int diag9c_forwarding_hz;
 207module_param(diag9c_forwarding_hz, uint, 0644);
 208MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210/*
 211 * For now we handle at most 16 double words as this is what the s390 base
 212 * kernel handles and stores in the prefix page. If we ever need to go beyond
 213 * this, this requires changes to code, but the external uapi can stay.
 214 */
 215#define SIZE_INTERNAL 16
 216
 217/*
 218 * Base feature mask that defines default mask for facilities. Consists of the
 219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220 */
 221static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222/*
 223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224 * and defines the facilities that can be enabled via a cpu model.
 225 */
 226static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228static unsigned long kvm_s390_fac_size(void)
 229{
 230        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231        BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232        BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                sizeof(stfle_fac_list));
 234
 235        return SIZE_INTERNAL;
 236}
 237
 238/* available cpu features supported by kvm */
 239static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240/* available subfunctions indicated via query / "test bit" */
 241static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243static struct gmap_notifier gmap_notifier;
 244static struct gmap_notifier vsie_gmap_notifier;
 245debug_info_t *kvm_s390_dbf;
 246debug_info_t *kvm_s390_dbf_uv;
 247
 248/* Section: not file related */
 249int kvm_arch_hardware_enable(void)
 250{
 251        /* every s390 is virtualization enabled ;-) */
 252        return 0;
 253}
 254
 255int kvm_arch_check_processor_compat(void *opaque)
 256{
 257        return 0;
 258}
 259
 260/* forward declarations */
 261static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                              unsigned long end);
 263static int sca_switch_to_extended(struct kvm *kvm);
 264
 265static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266{
 267        u8 delta_idx = 0;
 268
 269        /*
 270         * The TOD jumps by delta, we have to compensate this by adding
 271         * -delta to the epoch.
 272         */
 273        delta = -delta;
 274
 275        /* sign-extension - we're adding to signed values below */
 276        if ((s64)delta < 0)
 277                delta_idx = -1;
 278
 279        scb->epoch += delta;
 280        if (scb->ecd & ECD_MEF) {
 281                scb->epdx += delta_idx;
 282                if (scb->epoch < delta)
 283                        scb->epdx += 1;
 284        }
 285}
 286
 287/*
 288 * This callback is executed during stop_machine(). All CPUs are therefore
 289 * temporarily stopped. In order not to change guest behavior, we have to
 290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291 * so a CPU won't be stopped while calculating with the epoch.
 292 */
 293static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                          void *v)
 295{
 296        struct kvm *kvm;
 297        struct kvm_vcpu *vcpu;
 298        int i;
 299        unsigned long long *delta = v;
 300
 301        list_for_each_entry(kvm, &vm_list, vm_list) {
 302                kvm_for_each_vcpu(i, vcpu, kvm) {
 303                        kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                        if (i == 0) {
 305                                kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                        }
 308                        if (vcpu->arch.cputm_enabled)
 309                                vcpu->arch.cputm_start += *delta;
 310                        if (vcpu->arch.vsie_block)
 311                                kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                   *delta);
 313                }
 314        }
 315        return NOTIFY_OK;
 316}
 317
 318static struct notifier_block kvm_clock_notifier = {
 319        .notifier_call = kvm_clock_sync,
 320};
 321
 322int kvm_arch_hardware_setup(void *opaque)
 323{
 324        gmap_notifier.notifier_call = kvm_gmap_notifier;
 325        gmap_register_pte_notifier(&gmap_notifier);
 326        vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327        gmap_register_pte_notifier(&vsie_gmap_notifier);
 328        atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                       &kvm_clock_notifier);
 330        return 0;
 331}
 332
 333void kvm_arch_hardware_unsetup(void)
 334{
 335        gmap_unregister_pte_notifier(&gmap_notifier);
 336        gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337        atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                         &kvm_clock_notifier);
 339}
 340
 341static void allow_cpu_feat(unsigned long nr)
 342{
 343        set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344}
 345
 346static inline int plo_test_bit(unsigned char nr)
 347{
 348        unsigned long function = (unsigned long)nr | 0x100;
 349        int cc;
 350
 351        asm volatile(
 352                "       lgr     0,%[function]\n"
 353                /* Parameter registers are ignored for "test bit" */
 354                "       plo     0,0,0,0(0)\n"
 355                "       ipm     %0\n"
 356                "       srl     %0,28\n"
 357                : "=d" (cc)
 358                : [function] "d" (function)
 359                : "cc", "0");
 360        return cc == 0;
 361}
 362
 363static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364{
 365        asm volatile(
 366                "       lghi    0,0\n"
 367                "       lgr     1,%[query]\n"
 368                /* Parameter registers are ignored */
 369                "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                :
 371                : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                : "cc", "memory", "0", "1");
 373}
 374
 375#define INSN_SORTL 0xb938
 376#define INSN_DFLTCC 0xb939
 377
 378static void kvm_s390_cpu_feat_init(void)
 379{
 380        int i;
 381
 382        for (i = 0; i < 256; ++i) {
 383                if (plo_test_bit(i))
 384                        kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385        }
 386
 387        if (test_facility(28)) /* TOD-clock steering */
 388                ptff(kvm_s390_available_subfunc.ptff,
 389                     sizeof(kvm_s390_available_subfunc.ptff),
 390                     PTFF_QAF);
 391
 392        if (test_facility(17)) { /* MSA */
 393                __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                              kvm_s390_available_subfunc.kmac);
 395                __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                              kvm_s390_available_subfunc.kmc);
 397                __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                              kvm_s390_available_subfunc.km);
 399                __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                              kvm_s390_available_subfunc.kimd);
 401                __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                              kvm_s390_available_subfunc.klmd);
 403        }
 404        if (test_facility(76)) /* MSA3 */
 405                __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                              kvm_s390_available_subfunc.pckmo);
 407        if (test_facility(77)) { /* MSA4 */
 408                __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                              kvm_s390_available_subfunc.kmctr);
 410                __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                              kvm_s390_available_subfunc.kmf);
 412                __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                              kvm_s390_available_subfunc.kmo);
 414                __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                              kvm_s390_available_subfunc.pcc);
 416        }
 417        if (test_facility(57)) /* MSA5 */
 418                __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                              kvm_s390_available_subfunc.ppno);
 420
 421        if (test_facility(146)) /* MSA8 */
 422                __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                              kvm_s390_available_subfunc.kma);
 424
 425        if (test_facility(155)) /* MSA9 */
 426                __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                              kvm_s390_available_subfunc.kdsa);
 428
 429        if (test_facility(150)) /* SORTL */
 430                __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432        if (test_facility(151)) /* DFLTCC */
 433                __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435        if (MACHINE_HAS_ESOP)
 436                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437        /*
 438         * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439         * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440         */
 441        if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442            !test_facility(3) || !nested)
 443                return;
 444        allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445        if (sclp.has_64bscao)
 446                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447        if (sclp.has_siif)
 448                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449        if (sclp.has_gpere)
 450                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451        if (sclp.has_gsls)
 452                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453        if (sclp.has_ib)
 454                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455        if (sclp.has_cei)
 456                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457        if (sclp.has_ibs)
 458                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459        if (sclp.has_kss)
 460                allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461        /*
 462         * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463         * all skey handling functions read/set the skey from the PGSTE
 464         * instead of the real storage key.
 465         *
 466         * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467         * pages being detected as preserved although they are resident.
 468         *
 469         * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470         * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471         *
 472         * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473         * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474         * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475         *
 476         * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477         * cannot easily shadow the SCA because of the ipte lock.
 478         */
 479}
 480
 481int kvm_arch_init(void *opaque)
 482{
 483        int rc = -ENOMEM;
 484
 485        kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486        if (!kvm_s390_dbf)
 487                return -ENOMEM;
 488
 489        kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490        if (!kvm_s390_dbf_uv)
 491                goto out;
 492
 493        if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494            debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                goto out;
 496
 497        kvm_s390_cpu_feat_init();
 498
 499        /* Register floating interrupt controller interface. */
 500        rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501        if (rc) {
 502                pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                goto out;
 504        }
 505
 506        rc = kvm_s390_gib_init(GAL_ISC);
 507        if (rc)
 508                goto out;
 509
 510        return 0;
 511
 512out:
 513        kvm_arch_exit();
 514        return rc;
 515}
 516
 517void kvm_arch_exit(void)
 518{
 519        kvm_s390_gib_destroy();
 520        debug_unregister(kvm_s390_dbf);
 521        debug_unregister(kvm_s390_dbf_uv);
 522}
 523
 524/* Section: device related */
 525long kvm_arch_dev_ioctl(struct file *filp,
 526                        unsigned int ioctl, unsigned long arg)
 527{
 528        if (ioctl == KVM_S390_ENABLE_SIE)
 529                return s390_enable_sie();
 530        return -EINVAL;
 531}
 532
 533int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534{
 535        int r;
 536
 537        switch (ext) {
 538        case KVM_CAP_S390_PSW:
 539        case KVM_CAP_S390_GMAP:
 540        case KVM_CAP_SYNC_MMU:
 541#ifdef CONFIG_KVM_S390_UCONTROL
 542        case KVM_CAP_S390_UCONTROL:
 543#endif
 544        case KVM_CAP_ASYNC_PF:
 545        case KVM_CAP_SYNC_REGS:
 546        case KVM_CAP_ONE_REG:
 547        case KVM_CAP_ENABLE_CAP:
 548        case KVM_CAP_S390_CSS_SUPPORT:
 549        case KVM_CAP_IOEVENTFD:
 550        case KVM_CAP_DEVICE_CTRL:
 551        case KVM_CAP_S390_IRQCHIP:
 552        case KVM_CAP_VM_ATTRIBUTES:
 553        case KVM_CAP_MP_STATE:
 554        case KVM_CAP_IMMEDIATE_EXIT:
 555        case KVM_CAP_S390_INJECT_IRQ:
 556        case KVM_CAP_S390_USER_SIGP:
 557        case KVM_CAP_S390_USER_STSI:
 558        case KVM_CAP_S390_SKEYS:
 559        case KVM_CAP_S390_IRQ_STATE:
 560        case KVM_CAP_S390_USER_INSTR0:
 561        case KVM_CAP_S390_CMMA_MIGRATION:
 562        case KVM_CAP_S390_AIS:
 563        case KVM_CAP_S390_AIS_MIGRATION:
 564        case KVM_CAP_S390_VCPU_RESETS:
 565        case KVM_CAP_SET_GUEST_DEBUG:
 566        case KVM_CAP_S390_DIAG318:
 567                r = 1;
 568                break;
 569        case KVM_CAP_SET_GUEST_DEBUG2:
 570                r = KVM_GUESTDBG_VALID_MASK;
 571                break;
 572        case KVM_CAP_S390_HPAGE_1M:
 573                r = 0;
 574                if (hpage && !kvm_is_ucontrol(kvm))
 575                        r = 1;
 576                break;
 577        case KVM_CAP_S390_MEM_OP:
 578                r = MEM_OP_MAX_SIZE;
 579                break;
 580        case KVM_CAP_NR_VCPUS:
 581        case KVM_CAP_MAX_VCPUS:
 582        case KVM_CAP_MAX_VCPU_ID:
 583                r = KVM_S390_BSCA_CPU_SLOTS;
 584                if (!kvm_s390_use_sca_entries())
 585                        r = KVM_MAX_VCPUS;
 586                else if (sclp.has_esca && sclp.has_64bscao)
 587                        r = KVM_S390_ESCA_CPU_SLOTS;
 588                break;
 589        case KVM_CAP_S390_COW:
 590                r = MACHINE_HAS_ESOP;
 591                break;
 592        case KVM_CAP_S390_VECTOR_REGISTERS:
 593                r = MACHINE_HAS_VX;
 594                break;
 595        case KVM_CAP_S390_RI:
 596                r = test_facility(64);
 597                break;
 598        case KVM_CAP_S390_GS:
 599                r = test_facility(133);
 600                break;
 601        case KVM_CAP_S390_BPB:
 602                r = test_facility(82);
 603                break;
 604        case KVM_CAP_S390_PROTECTED:
 605                r = is_prot_virt_host();
 606                break;
 607        default:
 608                r = 0;
 609        }
 610        return r;
 611}
 612
 613void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 614{
 615        int i;
 616        gfn_t cur_gfn, last_gfn;
 617        unsigned long gaddr, vmaddr;
 618        struct gmap *gmap = kvm->arch.gmap;
 619        DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 620
 621        /* Loop over all guest segments */
 622        cur_gfn = memslot->base_gfn;
 623        last_gfn = memslot->base_gfn + memslot->npages;
 624        for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 625                gaddr = gfn_to_gpa(cur_gfn);
 626                vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 627                if (kvm_is_error_hva(vmaddr))
 628                        continue;
 629
 630                bitmap_zero(bitmap, _PAGE_ENTRIES);
 631                gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 632                for (i = 0; i < _PAGE_ENTRIES; i++) {
 633                        if (test_bit(i, bitmap))
 634                                mark_page_dirty(kvm, cur_gfn + i);
 635                }
 636
 637                if (fatal_signal_pending(current))
 638                        return;
 639                cond_resched();
 640        }
 641}
 642
 643/* Section: vm related */
 644static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 645
 646/*
 647 * Get (and clear) the dirty memory log for a memory slot.
 648 */
 649int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 650                               struct kvm_dirty_log *log)
 651{
 652        int r;
 653        unsigned long n;
 654        struct kvm_memory_slot *memslot;
 655        int is_dirty;
 656
 657        if (kvm_is_ucontrol(kvm))
 658                return -EINVAL;
 659
 660        mutex_lock(&kvm->slots_lock);
 661
 662        r = -EINVAL;
 663        if (log->slot >= KVM_USER_MEM_SLOTS)
 664                goto out;
 665
 666        r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 667        if (r)
 668                goto out;
 669
 670        /* Clear the dirty log */
 671        if (is_dirty) {
 672                n = kvm_dirty_bitmap_bytes(memslot);
 673                memset(memslot->dirty_bitmap, 0, n);
 674        }
 675        r = 0;
 676out:
 677        mutex_unlock(&kvm->slots_lock);
 678        return r;
 679}
 680
 681static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 682{
 683        unsigned int i;
 684        struct kvm_vcpu *vcpu;
 685
 686        kvm_for_each_vcpu(i, vcpu, kvm) {
 687                kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 688        }
 689}
 690
 691int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 692{
 693        int r;
 694
 695        if (cap->flags)
 696                return -EINVAL;
 697
 698        switch (cap->cap) {
 699        case KVM_CAP_S390_IRQCHIP:
 700                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 701                kvm->arch.use_irqchip = 1;
 702                r = 0;
 703                break;
 704        case KVM_CAP_S390_USER_SIGP:
 705                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 706                kvm->arch.user_sigp = 1;
 707                r = 0;
 708                break;
 709        case KVM_CAP_S390_VECTOR_REGISTERS:
 710                mutex_lock(&kvm->lock);
 711                if (kvm->created_vcpus) {
 712                        r = -EBUSY;
 713                } else if (MACHINE_HAS_VX) {
 714                        set_kvm_facility(kvm->arch.model.fac_mask, 129);
 715                        set_kvm_facility(kvm->arch.model.fac_list, 129);
 716                        if (test_facility(134)) {
 717                                set_kvm_facility(kvm->arch.model.fac_mask, 134);
 718                                set_kvm_facility(kvm->arch.model.fac_list, 134);
 719                        }
 720                        if (test_facility(135)) {
 721                                set_kvm_facility(kvm->arch.model.fac_mask, 135);
 722                                set_kvm_facility(kvm->arch.model.fac_list, 135);
 723                        }
 724                        if (test_facility(148)) {
 725                                set_kvm_facility(kvm->arch.model.fac_mask, 148);
 726                                set_kvm_facility(kvm->arch.model.fac_list, 148);
 727                        }
 728                        if (test_facility(152)) {
 729                                set_kvm_facility(kvm->arch.model.fac_mask, 152);
 730                                set_kvm_facility(kvm->arch.model.fac_list, 152);
 731                        }
 732                        if (test_facility(192)) {
 733                                set_kvm_facility(kvm->arch.model.fac_mask, 192);
 734                                set_kvm_facility(kvm->arch.model.fac_list, 192);
 735                        }
 736                        r = 0;
 737                } else
 738                        r = -EINVAL;
 739                mutex_unlock(&kvm->lock);
 740                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 741                         r ? "(not available)" : "(success)");
 742                break;
 743        case KVM_CAP_S390_RI:
 744                r = -EINVAL;
 745                mutex_lock(&kvm->lock);
 746                if (kvm->created_vcpus) {
 747                        r = -EBUSY;
 748                } else if (test_facility(64)) {
 749                        set_kvm_facility(kvm->arch.model.fac_mask, 64);
 750                        set_kvm_facility(kvm->arch.model.fac_list, 64);
 751                        r = 0;
 752                }
 753                mutex_unlock(&kvm->lock);
 754                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 755                         r ? "(not available)" : "(success)");
 756                break;
 757        case KVM_CAP_S390_AIS:
 758                mutex_lock(&kvm->lock);
 759                if (kvm->created_vcpus) {
 760                        r = -EBUSY;
 761                } else {
 762                        set_kvm_facility(kvm->arch.model.fac_mask, 72);
 763                        set_kvm_facility(kvm->arch.model.fac_list, 72);
 764                        r = 0;
 765                }
 766                mutex_unlock(&kvm->lock);
 767                VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 768                         r ? "(not available)" : "(success)");
 769                break;
 770        case KVM_CAP_S390_GS:
 771                r = -EINVAL;
 772                mutex_lock(&kvm->lock);
 773                if (kvm->created_vcpus) {
 774                        r = -EBUSY;
 775                } else if (test_facility(133)) {
 776                        set_kvm_facility(kvm->arch.model.fac_mask, 133);
 777                        set_kvm_facility(kvm->arch.model.fac_list, 133);
 778                        r = 0;
 779                }
 780                mutex_unlock(&kvm->lock);
 781                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 782                         r ? "(not available)" : "(success)");
 783                break;
 784        case KVM_CAP_S390_HPAGE_1M:
 785                mutex_lock(&kvm->lock);
 786                if (kvm->created_vcpus)
 787                        r = -EBUSY;
 788                else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 789                        r = -EINVAL;
 790                else {
 791                        r = 0;
 792                        mmap_write_lock(kvm->mm);
 793                        kvm->mm->context.allow_gmap_hpage_1m = 1;
 794                        mmap_write_unlock(kvm->mm);
 795                        /*
 796                         * We might have to create fake 4k page
 797                         * tables. To avoid that the hardware works on
 798                         * stale PGSTEs, we emulate these instructions.
 799                         */
 800                        kvm->arch.use_skf = 0;
 801                        kvm->arch.use_pfmfi = 0;
 802                }
 803                mutex_unlock(&kvm->lock);
 804                VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 805                         r ? "(not available)" : "(success)");
 806                break;
 807        case KVM_CAP_S390_USER_STSI:
 808                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 809                kvm->arch.user_stsi = 1;
 810                r = 0;
 811                break;
 812        case KVM_CAP_S390_USER_INSTR0:
 813                VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 814                kvm->arch.user_instr0 = 1;
 815                icpt_operexc_on_all_vcpus(kvm);
 816                r = 0;
 817                break;
 818        default:
 819                r = -EINVAL;
 820                break;
 821        }
 822        return r;
 823}
 824
 825static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 826{
 827        int ret;
 828
 829        switch (attr->attr) {
 830        case KVM_S390_VM_MEM_LIMIT_SIZE:
 831                ret = 0;
 832                VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 833                         kvm->arch.mem_limit);
 834                if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 835                        ret = -EFAULT;
 836                break;
 837        default:
 838                ret = -ENXIO;
 839                break;
 840        }
 841        return ret;
 842}
 843
 844static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 845{
 846        int ret;
 847        unsigned int idx;
 848        switch (attr->attr) {
 849        case KVM_S390_VM_MEM_ENABLE_CMMA:
 850                ret = -ENXIO;
 851                if (!sclp.has_cmma)
 852                        break;
 853
 854                VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 855                mutex_lock(&kvm->lock);
 856                if (kvm->created_vcpus)
 857                        ret = -EBUSY;
 858                else if (kvm->mm->context.allow_gmap_hpage_1m)
 859                        ret = -EINVAL;
 860                else {
 861                        kvm->arch.use_cmma = 1;
 862                        /* Not compatible with cmma. */
 863                        kvm->arch.use_pfmfi = 0;
 864                        ret = 0;
 865                }
 866                mutex_unlock(&kvm->lock);
 867                break;
 868        case KVM_S390_VM_MEM_CLR_CMMA:
 869                ret = -ENXIO;
 870                if (!sclp.has_cmma)
 871                        break;
 872                ret = -EINVAL;
 873                if (!kvm->arch.use_cmma)
 874                        break;
 875
 876                VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 877                mutex_lock(&kvm->lock);
 878                idx = srcu_read_lock(&kvm->srcu);
 879                s390_reset_cmma(kvm->arch.gmap->mm);
 880                srcu_read_unlock(&kvm->srcu, idx);
 881                mutex_unlock(&kvm->lock);
 882                ret = 0;
 883                break;
 884        case KVM_S390_VM_MEM_LIMIT_SIZE: {
 885                unsigned long new_limit;
 886
 887                if (kvm_is_ucontrol(kvm))
 888                        return -EINVAL;
 889
 890                if (get_user(new_limit, (u64 __user *)attr->addr))
 891                        return -EFAULT;
 892
 893                if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 894                    new_limit > kvm->arch.mem_limit)
 895                        return -E2BIG;
 896
 897                if (!new_limit)
 898                        return -EINVAL;
 899
 900                /* gmap_create takes last usable address */
 901                if (new_limit != KVM_S390_NO_MEM_LIMIT)
 902                        new_limit -= 1;
 903
 904                ret = -EBUSY;
 905                mutex_lock(&kvm->lock);
 906                if (!kvm->created_vcpus) {
 907                        /* gmap_create will round the limit up */
 908                        struct gmap *new = gmap_create(current->mm, new_limit);
 909
 910                        if (!new) {
 911                                ret = -ENOMEM;
 912                        } else {
 913                                gmap_remove(kvm->arch.gmap);
 914                                new->private = kvm;
 915                                kvm->arch.gmap = new;
 916                                ret = 0;
 917                        }
 918                }
 919                mutex_unlock(&kvm->lock);
 920                VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 921                VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 922                         (void *) kvm->arch.gmap->asce);
 923                break;
 924        }
 925        default:
 926                ret = -ENXIO;
 927                break;
 928        }
 929        return ret;
 930}
 931
 932static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 933
 934void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 935{
 936        struct kvm_vcpu *vcpu;
 937        int i;
 938
 939        kvm_s390_vcpu_block_all(kvm);
 940
 941        kvm_for_each_vcpu(i, vcpu, kvm) {
 942                kvm_s390_vcpu_crypto_setup(vcpu);
 943                /* recreate the shadow crycb by leaving the VSIE handler */
 944                kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 945        }
 946
 947        kvm_s390_vcpu_unblock_all(kvm);
 948}
 949
 950static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 951{
 952        mutex_lock(&kvm->lock);
 953        switch (attr->attr) {
 954        case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 955                if (!test_kvm_facility(kvm, 76)) {
 956                        mutex_unlock(&kvm->lock);
 957                        return -EINVAL;
 958                }
 959                get_random_bytes(
 960                        kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 961                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 962                kvm->arch.crypto.aes_kw = 1;
 963                VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 964                break;
 965        case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 966                if (!test_kvm_facility(kvm, 76)) {
 967                        mutex_unlock(&kvm->lock);
 968                        return -EINVAL;
 969                }
 970                get_random_bytes(
 971                        kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 972                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 973                kvm->arch.crypto.dea_kw = 1;
 974                VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 975                break;
 976        case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 977                if (!test_kvm_facility(kvm, 76)) {
 978                        mutex_unlock(&kvm->lock);
 979                        return -EINVAL;
 980                }
 981                kvm->arch.crypto.aes_kw = 0;
 982                memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 983                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 984                VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 985                break;
 986        case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 987                if (!test_kvm_facility(kvm, 76)) {
 988                        mutex_unlock(&kvm->lock);
 989                        return -EINVAL;
 990                }
 991                kvm->arch.crypto.dea_kw = 0;
 992                memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 993                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 994                VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 995                break;
 996        case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 997                if (!ap_instructions_available()) {
 998                        mutex_unlock(&kvm->lock);
 999                        return -EOPNOTSUPP;
1000                }
1001                kvm->arch.crypto.apie = 1;
1002                break;
1003        case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004                if (!ap_instructions_available()) {
1005                        mutex_unlock(&kvm->lock);
1006                        return -EOPNOTSUPP;
1007                }
1008                kvm->arch.crypto.apie = 0;
1009                break;
1010        default:
1011                mutex_unlock(&kvm->lock);
1012                return -ENXIO;
1013        }
1014
1015        kvm_s390_vcpu_crypto_reset_all(kvm);
1016        mutex_unlock(&kvm->lock);
1017        return 0;
1018}
1019
1020static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021{
1022        int cx;
1023        struct kvm_vcpu *vcpu;
1024
1025        kvm_for_each_vcpu(cx, vcpu, kvm)
1026                kvm_s390_sync_request(req, vcpu);
1027}
1028
1029/*
1030 * Must be called with kvm->srcu held to avoid races on memslots, and with
1031 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032 */
1033static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034{
1035        struct kvm_memory_slot *ms;
1036        struct kvm_memslots *slots;
1037        unsigned long ram_pages = 0;
1038        int slotnr;
1039
1040        /* migration mode already enabled */
1041        if (kvm->arch.migration_mode)
1042                return 0;
1043        slots = kvm_memslots(kvm);
1044        if (!slots || !slots->used_slots)
1045                return -EINVAL;
1046
1047        if (!kvm->arch.use_cmma) {
1048                kvm->arch.migration_mode = 1;
1049                return 0;
1050        }
1051        /* mark all the pages in active slots as dirty */
1052        for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053                ms = slots->memslots + slotnr;
1054                if (!ms->dirty_bitmap)
1055                        return -EINVAL;
1056                /*
1057                 * The second half of the bitmap is only used on x86,
1058                 * and would be wasted otherwise, so we put it to good
1059                 * use here to keep track of the state of the storage
1060                 * attributes.
1061                 */
1062                memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063                ram_pages += ms->npages;
1064        }
1065        atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066        kvm->arch.migration_mode = 1;
1067        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068        return 0;
1069}
1070
1071/*
1072 * Must be called with kvm->slots_lock to avoid races with ourselves and
1073 * kvm_s390_vm_start_migration.
1074 */
1075static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076{
1077        /* migration mode already disabled */
1078        if (!kvm->arch.migration_mode)
1079                return 0;
1080        kvm->arch.migration_mode = 0;
1081        if (kvm->arch.use_cmma)
1082                kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083        return 0;
1084}
1085
1086static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087                                     struct kvm_device_attr *attr)
1088{
1089        int res = -ENXIO;
1090
1091        mutex_lock(&kvm->slots_lock);
1092        switch (attr->attr) {
1093        case KVM_S390_VM_MIGRATION_START:
1094                res = kvm_s390_vm_start_migration(kvm);
1095                break;
1096        case KVM_S390_VM_MIGRATION_STOP:
1097                res = kvm_s390_vm_stop_migration(kvm);
1098                break;
1099        default:
1100                break;
1101        }
1102        mutex_unlock(&kvm->slots_lock);
1103
1104        return res;
1105}
1106
1107static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108                                     struct kvm_device_attr *attr)
1109{
1110        u64 mig = kvm->arch.migration_mode;
1111
1112        if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113                return -ENXIO;
1114
1115        if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116                return -EFAULT;
1117        return 0;
1118}
1119
1120static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1121{
1122        struct kvm_s390_vm_tod_clock gtod;
1123
1124        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1125                return -EFAULT;
1126
1127        if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1128                return -EINVAL;
1129        kvm_s390_set_tod_clock(kvm, &gtod);
1130
1131        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1132                gtod.epoch_idx, gtod.tod);
1133
1134        return 0;
1135}
1136
1137static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1138{
1139        u8 gtod_high;
1140
1141        if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1142                                           sizeof(gtod_high)))
1143                return -EFAULT;
1144
1145        if (gtod_high != 0)
1146                return -EINVAL;
1147        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1148
1149        return 0;
1150}
1151
1152static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1153{
1154        struct kvm_s390_vm_tod_clock gtod = { 0 };
1155
1156        if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1157                           sizeof(gtod.tod)))
1158                return -EFAULT;
1159
1160        kvm_s390_set_tod_clock(kvm, &gtod);
1161        VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1162        return 0;
1163}
1164
1165static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1166{
1167        int ret;
1168
1169        if (attr->flags)
1170                return -EINVAL;
1171
1172        switch (attr->attr) {
1173        case KVM_S390_VM_TOD_EXT:
1174                ret = kvm_s390_set_tod_ext(kvm, attr);
1175                break;
1176        case KVM_S390_VM_TOD_HIGH:
1177                ret = kvm_s390_set_tod_high(kvm, attr);
1178                break;
1179        case KVM_S390_VM_TOD_LOW:
1180                ret = kvm_s390_set_tod_low(kvm, attr);
1181                break;
1182        default:
1183                ret = -ENXIO;
1184                break;
1185        }
1186        return ret;
1187}
1188
1189static void kvm_s390_get_tod_clock(struct kvm *kvm,
1190                                   struct kvm_s390_vm_tod_clock *gtod)
1191{
1192        union tod_clock clk;
1193
1194        preempt_disable();
1195
1196        store_tod_clock_ext(&clk);
1197
1198        gtod->tod = clk.tod + kvm->arch.epoch;
1199        gtod->epoch_idx = 0;
1200        if (test_kvm_facility(kvm, 139)) {
1201                gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1202                if (gtod->tod < clk.tod)
1203                        gtod->epoch_idx += 1;
1204        }
1205
1206        preempt_enable();
1207}
1208
1209static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1210{
1211        struct kvm_s390_vm_tod_clock gtod;
1212
1213        memset(&gtod, 0, sizeof(gtod));
1214        kvm_s390_get_tod_clock(kvm, &gtod);
1215        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216                return -EFAULT;
1217
1218        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1219                gtod.epoch_idx, gtod.tod);
1220        return 0;
1221}
1222
1223static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1224{
1225        u8 gtod_high = 0;
1226
1227        if (copy_to_user((void __user *)attr->addr, &gtod_high,
1228                                         sizeof(gtod_high)))
1229                return -EFAULT;
1230        VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1231
1232        return 0;
1233}
1234
1235static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1236{
1237        u64 gtod;
1238
1239        gtod = kvm_s390_get_tod_clock_fast(kvm);
1240        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1241                return -EFAULT;
1242        VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1243
1244        return 0;
1245}
1246
1247static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1248{
1249        int ret;
1250
1251        if (attr->flags)
1252                return -EINVAL;
1253
1254        switch (attr->attr) {
1255        case KVM_S390_VM_TOD_EXT:
1256                ret = kvm_s390_get_tod_ext(kvm, attr);
1257                break;
1258        case KVM_S390_VM_TOD_HIGH:
1259                ret = kvm_s390_get_tod_high(kvm, attr);
1260                break;
1261        case KVM_S390_VM_TOD_LOW:
1262                ret = kvm_s390_get_tod_low(kvm, attr);
1263                break;
1264        default:
1265                ret = -ENXIO;
1266                break;
1267        }
1268        return ret;
1269}
1270
1271static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1272{
1273        struct kvm_s390_vm_cpu_processor *proc;
1274        u16 lowest_ibc, unblocked_ibc;
1275        int ret = 0;
1276
1277        mutex_lock(&kvm->lock);
1278        if (kvm->created_vcpus) {
1279                ret = -EBUSY;
1280                goto out;
1281        }
1282        proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1283        if (!proc) {
1284                ret = -ENOMEM;
1285                goto out;
1286        }
1287        if (!copy_from_user(proc, (void __user *)attr->addr,
1288                            sizeof(*proc))) {
1289                kvm->arch.model.cpuid = proc->cpuid;
1290                lowest_ibc = sclp.ibc >> 16 & 0xfff;
1291                unblocked_ibc = sclp.ibc & 0xfff;
1292                if (lowest_ibc && proc->ibc) {
1293                        if (proc->ibc > unblocked_ibc)
1294                                kvm->arch.model.ibc = unblocked_ibc;
1295                        else if (proc->ibc < lowest_ibc)
1296                                kvm->arch.model.ibc = lowest_ibc;
1297                        else
1298                                kvm->arch.model.ibc = proc->ibc;
1299                }
1300                memcpy(kvm->arch.model.fac_list, proc->fac_list,
1301                       S390_ARCH_FAC_LIST_SIZE_BYTE);
1302                VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303                         kvm->arch.model.ibc,
1304                         kvm->arch.model.cpuid);
1305                VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306                         kvm->arch.model.fac_list[0],
1307                         kvm->arch.model.fac_list[1],
1308                         kvm->arch.model.fac_list[2]);
1309        } else
1310                ret = -EFAULT;
1311        kfree(proc);
1312out:
1313        mutex_unlock(&kvm->lock);
1314        return ret;
1315}
1316
1317static int kvm_s390_set_processor_feat(struct kvm *kvm,
1318                                       struct kvm_device_attr *attr)
1319{
1320        struct kvm_s390_vm_cpu_feat data;
1321
1322        if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1323                return -EFAULT;
1324        if (!bitmap_subset((unsigned long *) data.feat,
1325                           kvm_s390_available_cpu_feat,
1326                           KVM_S390_VM_CPU_FEAT_NR_BITS))
1327                return -EINVAL;
1328
1329        mutex_lock(&kvm->lock);
1330        if (kvm->created_vcpus) {
1331                mutex_unlock(&kvm->lock);
1332                return -EBUSY;
1333        }
1334        bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1335                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1336        mutex_unlock(&kvm->lock);
1337        VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1338                         data.feat[0],
1339                         data.feat[1],
1340                         data.feat[2]);
1341        return 0;
1342}
1343
1344static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345                                          struct kvm_device_attr *attr)
1346{
1347        mutex_lock(&kvm->lock);
1348        if (kvm->created_vcpus) {
1349                mutex_unlock(&kvm->lock);
1350                return -EBUSY;
1351        }
1352
1353        if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354                           sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355                mutex_unlock(&kvm->lock);
1356                return -EFAULT;
1357        }
1358        mutex_unlock(&kvm->lock);
1359
1360        VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365        VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1366                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368        VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1369                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371        VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1372                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374        VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1375                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377        VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1378                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380        VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1381                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383        VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1384                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386        VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1387                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389        VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1390                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392        VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1393                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395        VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1396                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398        VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1399                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401        VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1402                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404        VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1405                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407        VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412        VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1417
1418        return 0;
1419}
1420
1421static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1422{
1423        int ret = -ENXIO;
1424
1425        switch (attr->attr) {
1426        case KVM_S390_VM_CPU_PROCESSOR:
1427                ret = kvm_s390_set_processor(kvm, attr);
1428                break;
1429        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430                ret = kvm_s390_set_processor_feat(kvm, attr);
1431                break;
1432        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433                ret = kvm_s390_set_processor_subfunc(kvm, attr);
1434                break;
1435        }
1436        return ret;
1437}
1438
1439static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1440{
1441        struct kvm_s390_vm_cpu_processor *proc;
1442        int ret = 0;
1443
1444        proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1445        if (!proc) {
1446                ret = -ENOMEM;
1447                goto out;
1448        }
1449        proc->cpuid = kvm->arch.model.cpuid;
1450        proc->ibc = kvm->arch.model.ibc;
1451        memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452               S390_ARCH_FAC_LIST_SIZE_BYTE);
1453        VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454                 kvm->arch.model.ibc,
1455                 kvm->arch.model.cpuid);
1456        VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457                 kvm->arch.model.fac_list[0],
1458                 kvm->arch.model.fac_list[1],
1459                 kvm->arch.model.fac_list[2]);
1460        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1461                ret = -EFAULT;
1462        kfree(proc);
1463out:
1464        return ret;
1465}
1466
1467static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1468{
1469        struct kvm_s390_vm_cpu_machine *mach;
1470        int ret = 0;
1471
1472        mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1473        if (!mach) {
1474                ret = -ENOMEM;
1475                goto out;
1476        }
1477        get_cpu_id((struct cpuid *) &mach->cpuid);
1478        mach->ibc = sclp.ibc;
1479        memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480               S390_ARCH_FAC_LIST_SIZE_BYTE);
1481        memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482               sizeof(stfle_fac_list));
1483        VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1484                 kvm->arch.model.ibc,
1485                 kvm->arch.model.cpuid);
1486        VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1487                 mach->fac_mask[0],
1488                 mach->fac_mask[1],
1489                 mach->fac_mask[2]);
1490        VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1491                 mach->fac_list[0],
1492                 mach->fac_list[1],
1493                 mach->fac_list[2]);
1494        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1495                ret = -EFAULT;
1496        kfree(mach);
1497out:
1498        return ret;
1499}
1500
1501static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502                                       struct kvm_device_attr *attr)
1503{
1504        struct kvm_s390_vm_cpu_feat data;
1505
1506        bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1507                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1508        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1509                return -EFAULT;
1510        VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1511                         data.feat[0],
1512                         data.feat[1],
1513                         data.feat[2]);
1514        return 0;
1515}
1516
1517static int kvm_s390_get_machine_feat(struct kvm *kvm,
1518                                     struct kvm_device_attr *attr)
1519{
1520        struct kvm_s390_vm_cpu_feat data;
1521
1522        bitmap_copy((unsigned long *) data.feat,
1523                    kvm_s390_available_cpu_feat,
1524                    KVM_S390_VM_CPU_FEAT_NR_BITS);
1525        if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1526                return -EFAULT;
1527        VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1528                         data.feat[0],
1529                         data.feat[1],
1530                         data.feat[2]);
1531        return 0;
1532}
1533
1534static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1535                                          struct kvm_device_attr *attr)
1536{
1537        if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1538            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1539                return -EFAULT;
1540
1541        VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1542                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1543                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1544                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1545                 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1546        VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1547                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1548                 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1549        VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1550                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1551                 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1552        VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1553                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1554                 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1555        VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1556                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1557                 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1558        VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1559                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1560                 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1561        VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1562                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1563                 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1564        VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1565                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1566                 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1567        VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1568                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1569                 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1570        VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1571                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1572                 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1573        VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1574                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1575                 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1576        VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1577                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1578                 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1579        VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1580                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1581                 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1582        VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1583                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1584                 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1585        VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1586                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1587                 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1588        VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1589                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1590                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1591                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1592                 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1593        VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1594                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1595                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1596                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1597                 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1598
1599        return 0;
1600}
1601
1602static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1603                                        struct kvm_device_attr *attr)
1604{
1605        if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1606            sizeof(struct kvm_s390_vm_cpu_subfunc)))
1607                return -EFAULT;
1608
1609        VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1610                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1611                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1612                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1613                 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1614        VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1615                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1616                 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1617        VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1618                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1619                 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1620        VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1621                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1622                 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1623        VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1624                 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1625                 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1626        VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1627                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1628                 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1629        VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1630                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1631                 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1632        VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1633                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1634                 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1635        VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1636                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1637                 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1638        VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1639                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1640                 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1641        VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1642                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1643                 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1644        VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1645                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1646                 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1647        VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1648                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1649                 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1650        VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1651                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1652                 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1653        VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1654                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1655                 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1656        VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1657                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1658                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1659                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1660                 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1661        VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1662                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1663                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1664                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1665                 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1666
1667        return 0;
1668}
1669
1670static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1671{
1672        int ret = -ENXIO;
1673
1674        switch (attr->attr) {
1675        case KVM_S390_VM_CPU_PROCESSOR:
1676                ret = kvm_s390_get_processor(kvm, attr);
1677                break;
1678        case KVM_S390_VM_CPU_MACHINE:
1679                ret = kvm_s390_get_machine(kvm, attr);
1680                break;
1681        case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1682                ret = kvm_s390_get_processor_feat(kvm, attr);
1683                break;
1684        case KVM_S390_VM_CPU_MACHINE_FEAT:
1685                ret = kvm_s390_get_machine_feat(kvm, attr);
1686                break;
1687        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1688                ret = kvm_s390_get_processor_subfunc(kvm, attr);
1689                break;
1690        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1691                ret = kvm_s390_get_machine_subfunc(kvm, attr);
1692                break;
1693        }
1694        return ret;
1695}
1696
1697static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1698{
1699        int ret;
1700
1701        switch (attr->group) {
1702        case KVM_S390_VM_MEM_CTRL:
1703                ret = kvm_s390_set_mem_control(kvm, attr);
1704                break;
1705        case KVM_S390_VM_TOD:
1706                ret = kvm_s390_set_tod(kvm, attr);
1707                break;
1708        case KVM_S390_VM_CPU_MODEL:
1709                ret = kvm_s390_set_cpu_model(kvm, attr);
1710                break;
1711        case KVM_S390_VM_CRYPTO:
1712                ret = kvm_s390_vm_set_crypto(kvm, attr);
1713                break;
1714        case KVM_S390_VM_MIGRATION:
1715                ret = kvm_s390_vm_set_migration(kvm, attr);
1716                break;
1717        default:
1718                ret = -ENXIO;
1719                break;
1720        }
1721
1722        return ret;
1723}
1724
1725static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726{
1727        int ret;
1728
1729        switch (attr->group) {
1730        case KVM_S390_VM_MEM_CTRL:
1731                ret = kvm_s390_get_mem_control(kvm, attr);
1732                break;
1733        case KVM_S390_VM_TOD:
1734                ret = kvm_s390_get_tod(kvm, attr);
1735                break;
1736        case KVM_S390_VM_CPU_MODEL:
1737                ret = kvm_s390_get_cpu_model(kvm, attr);
1738                break;
1739        case KVM_S390_VM_MIGRATION:
1740                ret = kvm_s390_vm_get_migration(kvm, attr);
1741                break;
1742        default:
1743                ret = -ENXIO;
1744                break;
1745        }
1746
1747        return ret;
1748}
1749
1750static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1751{
1752        int ret;
1753
1754        switch (attr->group) {
1755        case KVM_S390_VM_MEM_CTRL:
1756                switch (attr->attr) {
1757                case KVM_S390_VM_MEM_ENABLE_CMMA:
1758                case KVM_S390_VM_MEM_CLR_CMMA:
1759                        ret = sclp.has_cmma ? 0 : -ENXIO;
1760                        break;
1761                case KVM_S390_VM_MEM_LIMIT_SIZE:
1762                        ret = 0;
1763                        break;
1764                default:
1765                        ret = -ENXIO;
1766                        break;
1767                }
1768                break;
1769        case KVM_S390_VM_TOD:
1770                switch (attr->attr) {
1771                case KVM_S390_VM_TOD_LOW:
1772                case KVM_S390_VM_TOD_HIGH:
1773                        ret = 0;
1774                        break;
1775                default:
1776                        ret = -ENXIO;
1777                        break;
1778                }
1779                break;
1780        case KVM_S390_VM_CPU_MODEL:
1781                switch (attr->attr) {
1782                case KVM_S390_VM_CPU_PROCESSOR:
1783                case KVM_S390_VM_CPU_MACHINE:
1784                case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1785                case KVM_S390_VM_CPU_MACHINE_FEAT:
1786                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1787                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1788                        ret = 0;
1789                        break;
1790                default:
1791                        ret = -ENXIO;
1792                        break;
1793                }
1794                break;
1795        case KVM_S390_VM_CRYPTO:
1796                switch (attr->attr) {
1797                case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1798                case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1799                case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1800                case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1801                        ret = 0;
1802                        break;
1803                case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1804                case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1805                        ret = ap_instructions_available() ? 0 : -ENXIO;
1806                        break;
1807                default:
1808                        ret = -ENXIO;
1809                        break;
1810                }
1811                break;
1812        case KVM_S390_VM_MIGRATION:
1813                ret = 0;
1814                break;
1815        default:
1816                ret = -ENXIO;
1817                break;
1818        }
1819
1820        return ret;
1821}
1822
1823static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1824{
1825        uint8_t *keys;
1826        uint64_t hva;
1827        int srcu_idx, i, r = 0;
1828
1829        if (args->flags != 0)
1830                return -EINVAL;
1831
1832        /* Is this guest using storage keys? */
1833        if (!mm_uses_skeys(current->mm))
1834                return KVM_S390_GET_SKEYS_NONE;
1835
1836        /* Enforce sane limit on memory allocation */
1837        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1838                return -EINVAL;
1839
1840        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1841        if (!keys)
1842                return -ENOMEM;
1843
1844        mmap_read_lock(current->mm);
1845        srcu_idx = srcu_read_lock(&kvm->srcu);
1846        for (i = 0; i < args->count; i++) {
1847                hva = gfn_to_hva(kvm, args->start_gfn + i);
1848                if (kvm_is_error_hva(hva)) {
1849                        r = -EFAULT;
1850                        break;
1851                }
1852
1853                r = get_guest_storage_key(current->mm, hva, &keys[i]);
1854                if (r)
1855                        break;
1856        }
1857        srcu_read_unlock(&kvm->srcu, srcu_idx);
1858        mmap_read_unlock(current->mm);
1859
1860        if (!r) {
1861                r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1862                                 sizeof(uint8_t) * args->count);
1863                if (r)
1864                        r = -EFAULT;
1865        }
1866
1867        kvfree(keys);
1868        return r;
1869}
1870
1871static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1872{
1873        uint8_t *keys;
1874        uint64_t hva;
1875        int srcu_idx, i, r = 0;
1876        bool unlocked;
1877
1878        if (args->flags != 0)
1879                return -EINVAL;
1880
1881        /* Enforce sane limit on memory allocation */
1882        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1883                return -EINVAL;
1884
1885        keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1886        if (!keys)
1887                return -ENOMEM;
1888
1889        r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1890                           sizeof(uint8_t) * args->count);
1891        if (r) {
1892                r = -EFAULT;
1893                goto out;
1894        }
1895
1896        /* Enable storage key handling for the guest */
1897        r = s390_enable_skey();
1898        if (r)
1899                goto out;
1900
1901        i = 0;
1902        mmap_read_lock(current->mm);
1903        srcu_idx = srcu_read_lock(&kvm->srcu);
1904        while (i < args->count) {
1905                unlocked = false;
1906                hva = gfn_to_hva(kvm, args->start_gfn + i);
1907                if (kvm_is_error_hva(hva)) {
1908                        r = -EFAULT;
1909                        break;
1910                }
1911
1912                /* Lowest order bit is reserved */
1913                if (keys[i] & 0x01) {
1914                        r = -EINVAL;
1915                        break;
1916                }
1917
1918                r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1919                if (r) {
1920                        r = fixup_user_fault(current->mm, hva,
1921                                             FAULT_FLAG_WRITE, &unlocked);
1922                        if (r)
1923                                break;
1924                }
1925                if (!r)
1926                        i++;
1927        }
1928        srcu_read_unlock(&kvm->srcu, srcu_idx);
1929        mmap_read_unlock(current->mm);
1930out:
1931        kvfree(keys);
1932        return r;
1933}
1934
1935/*
1936 * Base address and length must be sent at the start of each block, therefore
1937 * it's cheaper to send some clean data, as long as it's less than the size of
1938 * two longs.
1939 */
1940#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1941/* for consistency */
1942#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1943
1944/*
1945 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1946 * address falls in a hole. In that case the index of one of the memslots
1947 * bordering the hole is returned.
1948 */
1949static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1950{
1951        int start = 0, end = slots->used_slots;
1952        int slot = atomic_read(&slots->last_used_slot);
1953        struct kvm_memory_slot *memslots = slots->memslots;
1954
1955        if (gfn >= memslots[slot].base_gfn &&
1956            gfn < memslots[slot].base_gfn + memslots[slot].npages)
1957                return slot;
1958
1959        while (start < end) {
1960                slot = start + (end - start) / 2;
1961
1962                if (gfn >= memslots[slot].base_gfn)
1963                        end = slot;
1964                else
1965                        start = slot + 1;
1966        }
1967
1968        if (start >= slots->used_slots)
1969                return slots->used_slots - 1;
1970
1971        if (gfn >= memslots[start].base_gfn &&
1972            gfn < memslots[start].base_gfn + memslots[start].npages) {
1973                atomic_set(&slots->last_used_slot, start);
1974        }
1975
1976        return start;
1977}
1978
1979static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1980                              u8 *res, unsigned long bufsize)
1981{
1982        unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1983
1984        args->count = 0;
1985        while (args->count < bufsize) {
1986                hva = gfn_to_hva(kvm, cur_gfn);
1987                /*
1988                 * We return an error if the first value was invalid, but we
1989                 * return successfully if at least one value was copied.
1990                 */
1991                if (kvm_is_error_hva(hva))
1992                        return args->count ? 0 : -EFAULT;
1993                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1994                        pgstev = 0;
1995                res[args->count++] = (pgstev >> 24) & 0x43;
1996                cur_gfn++;
1997        }
1998
1999        return 0;
2000}
2001
2002static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2003                                              unsigned long cur_gfn)
2004{
2005        int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2006        struct kvm_memory_slot *ms = slots->memslots + slotidx;
2007        unsigned long ofs = cur_gfn - ms->base_gfn;
2008
2009        if (ms->base_gfn + ms->npages <= cur_gfn) {
2010                slotidx--;
2011                /* If we are above the highest slot, wrap around */
2012                if (slotidx < 0)
2013                        slotidx = slots->used_slots - 1;
2014
2015                ms = slots->memslots + slotidx;
2016                ofs = 0;
2017        }
2018        ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2019        while ((slotidx > 0) && (ofs >= ms->npages)) {
2020                slotidx--;
2021                ms = slots->memslots + slotidx;
2022                ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2023        }
2024        return ms->base_gfn + ofs;
2025}
2026
2027static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2028                             u8 *res, unsigned long bufsize)
2029{
2030        unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2031        struct kvm_memslots *slots = kvm_memslots(kvm);
2032        struct kvm_memory_slot *ms;
2033
2034        if (unlikely(!slots->used_slots))
2035                return 0;
2036
2037        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2038        ms = gfn_to_memslot(kvm, cur_gfn);
2039        args->count = 0;
2040        args->start_gfn = cur_gfn;
2041        if (!ms)
2042                return 0;
2043        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2044        mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2045
2046        while (args->count < bufsize) {
2047                hva = gfn_to_hva(kvm, cur_gfn);
2048                if (kvm_is_error_hva(hva))
2049                        return 0;
2050                /* Decrement only if we actually flipped the bit to 0 */
2051                if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2052                        atomic64_dec(&kvm->arch.cmma_dirty_pages);
2053                if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2054                        pgstev = 0;
2055                /* Save the value */
2056                res[args->count++] = (pgstev >> 24) & 0x43;
2057                /* If the next bit is too far away, stop. */
2058                if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2059                        return 0;
2060                /* If we reached the previous "next", find the next one */
2061                if (cur_gfn == next_gfn)
2062                        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063                /* Reached the end of memory or of the buffer, stop */
2064                if ((next_gfn >= mem_end) ||
2065                    (next_gfn - args->start_gfn >= bufsize))
2066                        return 0;
2067                cur_gfn++;
2068                /* Reached the end of the current memslot, take the next one. */
2069                if (cur_gfn - ms->base_gfn >= ms->npages) {
2070                        ms = gfn_to_memslot(kvm, cur_gfn);
2071                        if (!ms)
2072                                return 0;
2073                }
2074        }
2075        return 0;
2076}
2077
2078/*
2079 * This function searches for the next page with dirty CMMA attributes, and
2080 * saves the attributes in the buffer up to either the end of the buffer or
2081 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2082 * no trailing clean bytes are saved.
2083 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2084 * output buffer will indicate 0 as length.
2085 */
2086static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2087                                  struct kvm_s390_cmma_log *args)
2088{
2089        unsigned long bufsize;
2090        int srcu_idx, peek, ret;
2091        u8 *values;
2092
2093        if (!kvm->arch.use_cmma)
2094                return -ENXIO;
2095        /* Invalid/unsupported flags were specified */
2096        if (args->flags & ~KVM_S390_CMMA_PEEK)
2097                return -EINVAL;
2098        /* Migration mode query, and we are not doing a migration */
2099        peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2100        if (!peek && !kvm->arch.migration_mode)
2101                return -EINVAL;
2102        /* CMMA is disabled or was not used, or the buffer has length zero */
2103        bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2104        if (!bufsize || !kvm->mm->context.uses_cmm) {
2105                memset(args, 0, sizeof(*args));
2106                return 0;
2107        }
2108        /* We are not peeking, and there are no dirty pages */
2109        if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2110                memset(args, 0, sizeof(*args));
2111                return 0;
2112        }
2113
2114        values = vmalloc(bufsize);
2115        if (!values)
2116                return -ENOMEM;
2117
2118        mmap_read_lock(kvm->mm);
2119        srcu_idx = srcu_read_lock(&kvm->srcu);
2120        if (peek)
2121                ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2122        else
2123                ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2124        srcu_read_unlock(&kvm->srcu, srcu_idx);
2125        mmap_read_unlock(kvm->mm);
2126
2127        if (kvm->arch.migration_mode)
2128                args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2129        else
2130                args->remaining = 0;
2131
2132        if (copy_to_user((void __user *)args->values, values, args->count))
2133                ret = -EFAULT;
2134
2135        vfree(values);
2136        return ret;
2137}
2138
2139/*
2140 * This function sets the CMMA attributes for the given pages. If the input
2141 * buffer has zero length, no action is taken, otherwise the attributes are
2142 * set and the mm->context.uses_cmm flag is set.
2143 */
2144static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2145                                  const struct kvm_s390_cmma_log *args)
2146{
2147        unsigned long hva, mask, pgstev, i;
2148        uint8_t *bits;
2149        int srcu_idx, r = 0;
2150
2151        mask = args->mask;
2152
2153        if (!kvm->arch.use_cmma)
2154                return -ENXIO;
2155        /* invalid/unsupported flags */
2156        if (args->flags != 0)
2157                return -EINVAL;
2158        /* Enforce sane limit on memory allocation */
2159        if (args->count > KVM_S390_CMMA_SIZE_MAX)
2160                return -EINVAL;
2161        /* Nothing to do */
2162        if (args->count == 0)
2163                return 0;
2164
2165        bits = vmalloc(array_size(sizeof(*bits), args->count));
2166        if (!bits)
2167                return -ENOMEM;
2168
2169        r = copy_from_user(bits, (void __user *)args->values, args->count);
2170        if (r) {
2171                r = -EFAULT;
2172                goto out;
2173        }
2174
2175        mmap_read_lock(kvm->mm);
2176        srcu_idx = srcu_read_lock(&kvm->srcu);
2177        for (i = 0; i < args->count; i++) {
2178                hva = gfn_to_hva(kvm, args->start_gfn + i);
2179                if (kvm_is_error_hva(hva)) {
2180                        r = -EFAULT;
2181                        break;
2182                }
2183
2184                pgstev = bits[i];
2185                pgstev = pgstev << 24;
2186                mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2187                set_pgste_bits(kvm->mm, hva, mask, pgstev);
2188        }
2189        srcu_read_unlock(&kvm->srcu, srcu_idx);
2190        mmap_read_unlock(kvm->mm);
2191
2192        if (!kvm->mm->context.uses_cmm) {
2193                mmap_write_lock(kvm->mm);
2194                kvm->mm->context.uses_cmm = 1;
2195                mmap_write_unlock(kvm->mm);
2196        }
2197out:
2198        vfree(bits);
2199        return r;
2200}
2201
2202static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2203{
2204        struct kvm_vcpu *vcpu;
2205        u16 rc, rrc;
2206        int ret = 0;
2207        int i;
2208
2209        /*
2210         * We ignore failures and try to destroy as many CPUs as possible.
2211         * At the same time we must not free the assigned resources when
2212         * this fails, as the ultravisor has still access to that memory.
2213         * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2214         * behind.
2215         * We want to return the first failure rc and rrc, though.
2216         */
2217        kvm_for_each_vcpu(i, vcpu, kvm) {
2218                mutex_lock(&vcpu->mutex);
2219                if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2220                        *rcp = rc;
2221                        *rrcp = rrc;
2222                        ret = -EIO;
2223                }
2224                mutex_unlock(&vcpu->mutex);
2225        }
2226        return ret;
2227}
2228
2229static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2230{
2231        int i, r = 0;
2232        u16 dummy;
2233
2234        struct kvm_vcpu *vcpu;
2235
2236        kvm_for_each_vcpu(i, vcpu, kvm) {
2237                mutex_lock(&vcpu->mutex);
2238                r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2239                mutex_unlock(&vcpu->mutex);
2240                if (r)
2241                        break;
2242        }
2243        if (r)
2244                kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2245        return r;
2246}
2247
2248static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2249{
2250        int r = 0;
2251        u16 dummy;
2252        void __user *argp = (void __user *)cmd->data;
2253
2254        switch (cmd->cmd) {
2255        case KVM_PV_ENABLE: {
2256                r = -EINVAL;
2257                if (kvm_s390_pv_is_protected(kvm))
2258                        break;
2259
2260                /*
2261                 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2262                 *  esca, we need no cleanup in the error cases below
2263                 */
2264                r = sca_switch_to_extended(kvm);
2265                if (r)
2266                        break;
2267
2268                mmap_write_lock(current->mm);
2269                r = gmap_mark_unmergeable();
2270                mmap_write_unlock(current->mm);
2271                if (r)
2272                        break;
2273
2274                r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2275                if (r)
2276                        break;
2277
2278                r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2279                if (r)
2280                        kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2281
2282                /* we need to block service interrupts from now on */
2283                set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2284                break;
2285        }
2286        case KVM_PV_DISABLE: {
2287                r = -EINVAL;
2288                if (!kvm_s390_pv_is_protected(kvm))
2289                        break;
2290
2291                r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2292                /*
2293                 * If a CPU could not be destroyed, destroy VM will also fail.
2294                 * There is no point in trying to destroy it. Instead return
2295                 * the rc and rrc from the first CPU that failed destroying.
2296                 */
2297                if (r)
2298                        break;
2299                r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2300
2301                /* no need to block service interrupts any more */
2302                clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2303                break;
2304        }
2305        case KVM_PV_SET_SEC_PARMS: {
2306                struct kvm_s390_pv_sec_parm parms = {};
2307                void *hdr;
2308
2309                r = -EINVAL;
2310                if (!kvm_s390_pv_is_protected(kvm))
2311                        break;
2312
2313                r = -EFAULT;
2314                if (copy_from_user(&parms, argp, sizeof(parms)))
2315                        break;
2316
2317                /* Currently restricted to 8KB */
2318                r = -EINVAL;
2319                if (parms.length > PAGE_SIZE * 2)
2320                        break;
2321
2322                r = -ENOMEM;
2323                hdr = vmalloc(parms.length);
2324                if (!hdr)
2325                        break;
2326
2327                r = -EFAULT;
2328                if (!copy_from_user(hdr, (void __user *)parms.origin,
2329                                    parms.length))
2330                        r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2331                                                      &cmd->rc, &cmd->rrc);
2332
2333                vfree(hdr);
2334                break;
2335        }
2336        case KVM_PV_UNPACK: {
2337                struct kvm_s390_pv_unp unp = {};
2338
2339                r = -EINVAL;
2340                if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2341                        break;
2342
2343                r = -EFAULT;
2344                if (copy_from_user(&unp, argp, sizeof(unp)))
2345                        break;
2346
2347                r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2348                                       &cmd->rc, &cmd->rrc);
2349                break;
2350        }
2351        case KVM_PV_VERIFY: {
2352                r = -EINVAL;
2353                if (!kvm_s390_pv_is_protected(kvm))
2354                        break;
2355
2356                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2357                                  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2358                KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2359                             cmd->rrc);
2360                break;
2361        }
2362        case KVM_PV_PREP_RESET: {
2363                r = -EINVAL;
2364                if (!kvm_s390_pv_is_protected(kvm))
2365                        break;
2366
2367                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2368                                  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2369                KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2370                             cmd->rc, cmd->rrc);
2371                break;
2372        }
2373        case KVM_PV_UNSHARE_ALL: {
2374                r = -EINVAL;
2375                if (!kvm_s390_pv_is_protected(kvm))
2376                        break;
2377
2378                r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2379                                  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2380                KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2381                             cmd->rc, cmd->rrc);
2382                break;
2383        }
2384        default:
2385                r = -ENOTTY;
2386        }
2387        return r;
2388}
2389
2390long kvm_arch_vm_ioctl(struct file *filp,
2391                       unsigned int ioctl, unsigned long arg)
2392{
2393        struct kvm *kvm = filp->private_data;
2394        void __user *argp = (void __user *)arg;
2395        struct kvm_device_attr attr;
2396        int r;
2397
2398        switch (ioctl) {
2399        case KVM_S390_INTERRUPT: {
2400                struct kvm_s390_interrupt s390int;
2401
2402                r = -EFAULT;
2403                if (copy_from_user(&s390int, argp, sizeof(s390int)))
2404                        break;
2405                r = kvm_s390_inject_vm(kvm, &s390int);
2406                break;
2407        }
2408        case KVM_CREATE_IRQCHIP: {
2409                struct kvm_irq_routing_entry routing;
2410
2411                r = -EINVAL;
2412                if (kvm->arch.use_irqchip) {
2413                        /* Set up dummy routing. */
2414                        memset(&routing, 0, sizeof(routing));
2415                        r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2416                }
2417                break;
2418        }
2419        case KVM_SET_DEVICE_ATTR: {
2420                r = -EFAULT;
2421                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2422                        break;
2423                r = kvm_s390_vm_set_attr(kvm, &attr);
2424                break;
2425        }
2426        case KVM_GET_DEVICE_ATTR: {
2427                r = -EFAULT;
2428                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2429                        break;
2430                r = kvm_s390_vm_get_attr(kvm, &attr);
2431                break;
2432        }
2433        case KVM_HAS_DEVICE_ATTR: {
2434                r = -EFAULT;
2435                if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2436                        break;
2437                r = kvm_s390_vm_has_attr(kvm, &attr);
2438                break;
2439        }
2440        case KVM_S390_GET_SKEYS: {
2441                struct kvm_s390_skeys args;
2442
2443                r = -EFAULT;
2444                if (copy_from_user(&args, argp,
2445                                   sizeof(struct kvm_s390_skeys)))
2446                        break;
2447                r = kvm_s390_get_skeys(kvm, &args);
2448                break;
2449        }
2450        case KVM_S390_SET_SKEYS: {
2451                struct kvm_s390_skeys args;
2452
2453                r = -EFAULT;
2454                if (copy_from_user(&args, argp,
2455                                   sizeof(struct kvm_s390_skeys)))
2456                        break;
2457                r = kvm_s390_set_skeys(kvm, &args);
2458                break;
2459        }
2460        case KVM_S390_GET_CMMA_BITS: {
2461                struct kvm_s390_cmma_log args;
2462
2463                r = -EFAULT;
2464                if (copy_from_user(&args, argp, sizeof(args)))
2465                        break;
2466                mutex_lock(&kvm->slots_lock);
2467                r = kvm_s390_get_cmma_bits(kvm, &args);
2468                mutex_unlock(&kvm->slots_lock);
2469                if (!r) {
2470                        r = copy_to_user(argp, &args, sizeof(args));
2471                        if (r)
2472                                r = -EFAULT;
2473                }
2474                break;
2475        }
2476        case KVM_S390_SET_CMMA_BITS: {
2477                struct kvm_s390_cmma_log args;
2478
2479                r = -EFAULT;
2480                if (copy_from_user(&args, argp, sizeof(args)))
2481                        break;
2482                mutex_lock(&kvm->slots_lock);
2483                r = kvm_s390_set_cmma_bits(kvm, &args);
2484                mutex_unlock(&kvm->slots_lock);
2485                break;
2486        }
2487        case KVM_S390_PV_COMMAND: {
2488                struct kvm_pv_cmd args;
2489
2490                /* protvirt means user sigp */
2491                kvm->arch.user_cpu_state_ctrl = 1;
2492                r = 0;
2493                if (!is_prot_virt_host()) {
2494                        r = -EINVAL;
2495                        break;
2496                }
2497                if (copy_from_user(&args, argp, sizeof(args))) {
2498                        r = -EFAULT;
2499                        break;
2500                }
2501                if (args.flags) {
2502                        r = -EINVAL;
2503                        break;
2504                }
2505                mutex_lock(&kvm->lock);
2506                r = kvm_s390_handle_pv(kvm, &args);
2507                mutex_unlock(&kvm->lock);
2508                if (copy_to_user(argp, &args, sizeof(args))) {
2509                        r = -EFAULT;
2510                        break;
2511                }
2512                break;
2513        }
2514        default:
2515                r = -ENOTTY;
2516        }
2517
2518        return r;
2519}
2520
2521static int kvm_s390_apxa_installed(void)
2522{
2523        struct ap_config_info info;
2524
2525        if (ap_instructions_available()) {
2526                if (ap_qci(&info) == 0)
2527                        return info.apxa;
2528        }
2529
2530        return 0;
2531}
2532
2533/*
2534 * The format of the crypto control block (CRYCB) is specified in the 3 low
2535 * order bits of the CRYCB designation (CRYCBD) field as follows:
2536 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2537 *           AP extended addressing (APXA) facility are installed.
2538 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2539 * Format 2: Both the APXA and MSAX3 facilities are installed
2540 */
2541static void kvm_s390_set_crycb_format(struct kvm *kvm)
2542{
2543        kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2544
2545        /* Clear the CRYCB format bits - i.e., set format 0 by default */
2546        kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2547
2548        /* Check whether MSAX3 is installed */
2549        if (!test_kvm_facility(kvm, 76))
2550                return;
2551
2552        if (kvm_s390_apxa_installed())
2553                kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2554        else
2555                kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2556}
2557
2558/*
2559 * kvm_arch_crypto_set_masks
2560 *
2561 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2562 *       to be set.
2563 * @apm: the mask identifying the accessible AP adapters
2564 * @aqm: the mask identifying the accessible AP domains
2565 * @adm: the mask identifying the accessible AP control domains
2566 *
2567 * Set the masks that identify the adapters, domains and control domains to
2568 * which the KVM guest is granted access.
2569 *
2570 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2571 *       function.
2572 */
2573void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2574                               unsigned long *aqm, unsigned long *adm)
2575{
2576        struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2577
2578        kvm_s390_vcpu_block_all(kvm);
2579
2580        switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2581        case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2582                memcpy(crycb->apcb1.apm, apm, 32);
2583                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2584                         apm[0], apm[1], apm[2], apm[3]);
2585                memcpy(crycb->apcb1.aqm, aqm, 32);
2586                VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2587                         aqm[0], aqm[1], aqm[2], aqm[3]);
2588                memcpy(crycb->apcb1.adm, adm, 32);
2589                VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2590                         adm[0], adm[1], adm[2], adm[3]);
2591                break;
2592        case CRYCB_FORMAT1:
2593        case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2594                memcpy(crycb->apcb0.apm, apm, 8);
2595                memcpy(crycb->apcb0.aqm, aqm, 2);
2596                memcpy(crycb->apcb0.adm, adm, 2);
2597                VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2598                         apm[0], *((unsigned short *)aqm),
2599                         *((unsigned short *)adm));
2600                break;
2601        default:        /* Can not happen */
2602                break;
2603        }
2604
2605        /* recreate the shadow crycb for each vcpu */
2606        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607        kvm_s390_vcpu_unblock_all(kvm);
2608}
2609EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2610
2611/*
2612 * kvm_arch_crypto_clear_masks
2613 *
2614 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2615 *       to be cleared.
2616 *
2617 * Clear the masks that identify the adapters, domains and control domains to
2618 * which the KVM guest is granted access.
2619 *
2620 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2621 *       function.
2622 */
2623void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2624{
2625        kvm_s390_vcpu_block_all(kvm);
2626
2627        memset(&kvm->arch.crypto.crycb->apcb0, 0,
2628               sizeof(kvm->arch.crypto.crycb->apcb0));
2629        memset(&kvm->arch.crypto.crycb->apcb1, 0,
2630               sizeof(kvm->arch.crypto.crycb->apcb1));
2631
2632        VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2633        /* recreate the shadow crycb for each vcpu */
2634        kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2635        kvm_s390_vcpu_unblock_all(kvm);
2636}
2637EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2638
2639static u64 kvm_s390_get_initial_cpuid(void)
2640{
2641        struct cpuid cpuid;
2642
2643        get_cpu_id(&cpuid);
2644        cpuid.version = 0xff;
2645        return *((u64 *) &cpuid);
2646}
2647
2648static void kvm_s390_crypto_init(struct kvm *kvm)
2649{
2650        kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2651        kvm_s390_set_crycb_format(kvm);
2652        init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2653
2654        if (!test_kvm_facility(kvm, 76))
2655                return;
2656
2657        /* Enable AES/DEA protected key functions by default */
2658        kvm->arch.crypto.aes_kw = 1;
2659        kvm->arch.crypto.dea_kw = 1;
2660        get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2661                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2662        get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2663                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2664}
2665
2666static void sca_dispose(struct kvm *kvm)
2667{
2668        if (kvm->arch.use_esca)
2669                free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2670        else
2671                free_page((unsigned long)(kvm->arch.sca));
2672        kvm->arch.sca = NULL;
2673}
2674
2675int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2676{
2677        gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2678        int i, rc;
2679        char debug_name[16];
2680        static unsigned long sca_offset;
2681
2682        rc = -EINVAL;
2683#ifdef CONFIG_KVM_S390_UCONTROL
2684        if (type & ~KVM_VM_S390_UCONTROL)
2685                goto out_err;
2686        if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2687                goto out_err;
2688#else
2689        if (type)
2690                goto out_err;
2691#endif
2692
2693        rc = s390_enable_sie();
2694        if (rc)
2695                goto out_err;
2696
2697        rc = -ENOMEM;
2698
2699        if (!sclp.has_64bscao)
2700                alloc_flags |= GFP_DMA;
2701        rwlock_init(&kvm->arch.sca_lock);
2702        /* start with basic SCA */
2703        kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2704        if (!kvm->arch.sca)
2705                goto out_err;
2706        mutex_lock(&kvm_lock);
2707        sca_offset += 16;
2708        if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2709                sca_offset = 0;
2710        kvm->arch.sca = (struct bsca_block *)
2711                        ((char *) kvm->arch.sca + sca_offset);
2712        mutex_unlock(&kvm_lock);
2713
2714        sprintf(debug_name, "kvm-%u", current->pid);
2715
2716        kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2717        if (!kvm->arch.dbf)
2718                goto out_err;
2719
2720        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2721        kvm->arch.sie_page2 =
2722             (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2723        if (!kvm->arch.sie_page2)
2724                goto out_err;
2725
2726        kvm->arch.sie_page2->kvm = kvm;
2727        kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2728
2729        for (i = 0; i < kvm_s390_fac_size(); i++) {
2730                kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2731                                              (kvm_s390_fac_base[i] |
2732                                               kvm_s390_fac_ext[i]);
2733                kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2734                                              kvm_s390_fac_base[i];
2735        }
2736        kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2737
2738        /* we are always in czam mode - even on pre z14 machines */
2739        set_kvm_facility(kvm->arch.model.fac_mask, 138);
2740        set_kvm_facility(kvm->arch.model.fac_list, 138);
2741        /* we emulate STHYI in kvm */
2742        set_kvm_facility(kvm->arch.model.fac_mask, 74);
2743        set_kvm_facility(kvm->arch.model.fac_list, 74);
2744        if (MACHINE_HAS_TLB_GUEST) {
2745                set_kvm_facility(kvm->arch.model.fac_mask, 147);
2746                set_kvm_facility(kvm->arch.model.fac_list, 147);
2747        }
2748
2749        if (css_general_characteristics.aiv && test_facility(65))
2750                set_kvm_facility(kvm->arch.model.fac_mask, 65);
2751
2752        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2753        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2754
2755        kvm_s390_crypto_init(kvm);
2756
2757        mutex_init(&kvm->arch.float_int.ais_lock);
2758        spin_lock_init(&kvm->arch.float_int.lock);
2759        for (i = 0; i < FIRQ_LIST_COUNT; i++)
2760                INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2761        init_waitqueue_head(&kvm->arch.ipte_wq);
2762        mutex_init(&kvm->arch.ipte_mutex);
2763
2764        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2765        VM_EVENT(kvm, 3, "vm created with type %lu", type);
2766
2767        if (type & KVM_VM_S390_UCONTROL) {
2768                kvm->arch.gmap = NULL;
2769                kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2770        } else {
2771                if (sclp.hamax == U64_MAX)
2772                        kvm->arch.mem_limit = TASK_SIZE_MAX;
2773                else
2774                        kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2775                                                    sclp.hamax + 1);
2776                kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2777                if (!kvm->arch.gmap)
2778                        goto out_err;
2779                kvm->arch.gmap->private = kvm;
2780                kvm->arch.gmap->pfault_enabled = 0;
2781        }
2782
2783        kvm->arch.use_pfmfi = sclp.has_pfmfi;
2784        kvm->arch.use_skf = sclp.has_skey;
2785        spin_lock_init(&kvm->arch.start_stop_lock);
2786        kvm_s390_vsie_init(kvm);
2787        if (use_gisa)
2788                kvm_s390_gisa_init(kvm);
2789        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2790
2791        return 0;
2792out_err:
2793        free_page((unsigned long)kvm->arch.sie_page2);
2794        debug_unregister(kvm->arch.dbf);
2795        sca_dispose(kvm);
2796        KVM_EVENT(3, "creation of vm failed: %d", rc);
2797        return rc;
2798}
2799
2800void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2801{
2802        u16 rc, rrc;
2803
2804        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2805        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2806        kvm_s390_clear_local_irqs(vcpu);
2807        kvm_clear_async_pf_completion_queue(vcpu);
2808        if (!kvm_is_ucontrol(vcpu->kvm))
2809                sca_del_vcpu(vcpu);
2810
2811        if (kvm_is_ucontrol(vcpu->kvm))
2812                gmap_remove(vcpu->arch.gmap);
2813
2814        if (vcpu->kvm->arch.use_cmma)
2815                kvm_s390_vcpu_unsetup_cmma(vcpu);
2816        /* We can not hold the vcpu mutex here, we are already dying */
2817        if (kvm_s390_pv_cpu_get_handle(vcpu))
2818                kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2819        free_page((unsigned long)(vcpu->arch.sie_block));
2820}
2821
2822static void kvm_free_vcpus(struct kvm *kvm)
2823{
2824        unsigned int i;
2825        struct kvm_vcpu *vcpu;
2826
2827        kvm_for_each_vcpu(i, vcpu, kvm)
2828                kvm_vcpu_destroy(vcpu);
2829
2830        mutex_lock(&kvm->lock);
2831        for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2832                kvm->vcpus[i] = NULL;
2833
2834        atomic_set(&kvm->online_vcpus, 0);
2835        mutex_unlock(&kvm->lock);
2836}
2837
2838void kvm_arch_destroy_vm(struct kvm *kvm)
2839{
2840        u16 rc, rrc;
2841
2842        kvm_free_vcpus(kvm);
2843        sca_dispose(kvm);
2844        kvm_s390_gisa_destroy(kvm);
2845        /*
2846         * We are already at the end of life and kvm->lock is not taken.
2847         * This is ok as the file descriptor is closed by now and nobody
2848         * can mess with the pv state. To avoid lockdep_assert_held from
2849         * complaining we do not use kvm_s390_pv_is_protected.
2850         */
2851        if (kvm_s390_pv_get_handle(kvm))
2852                kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2853        debug_unregister(kvm->arch.dbf);
2854        free_page((unsigned long)kvm->arch.sie_page2);
2855        if (!kvm_is_ucontrol(kvm))
2856                gmap_remove(kvm->arch.gmap);
2857        kvm_s390_destroy_adapters(kvm);
2858        kvm_s390_clear_float_irqs(kvm);
2859        kvm_s390_vsie_destroy(kvm);
2860        KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2861}
2862
2863/* Section: vcpu related */
2864static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2865{
2866        vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2867        if (!vcpu->arch.gmap)
2868                return -ENOMEM;
2869        vcpu->arch.gmap->private = vcpu->kvm;
2870
2871        return 0;
2872}
2873
2874static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2875{
2876        if (!kvm_s390_use_sca_entries())
2877                return;
2878        read_lock(&vcpu->kvm->arch.sca_lock);
2879        if (vcpu->kvm->arch.use_esca) {
2880                struct esca_block *sca = vcpu->kvm->arch.sca;
2881
2882                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2883                sca->cpu[vcpu->vcpu_id].sda = 0;
2884        } else {
2885                struct bsca_block *sca = vcpu->kvm->arch.sca;
2886
2887                clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2888                sca->cpu[vcpu->vcpu_id].sda = 0;
2889        }
2890        read_unlock(&vcpu->kvm->arch.sca_lock);
2891}
2892
2893static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2894{
2895        if (!kvm_s390_use_sca_entries()) {
2896                struct bsca_block *sca = vcpu->kvm->arch.sca;
2897
2898                /* we still need the basic sca for the ipte control */
2899                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2900                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2901                return;
2902        }
2903        read_lock(&vcpu->kvm->arch.sca_lock);
2904        if (vcpu->kvm->arch.use_esca) {
2905                struct esca_block *sca = vcpu->kvm->arch.sca;
2906
2907                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2908                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2909                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2910                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2911                set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2912        } else {
2913                struct bsca_block *sca = vcpu->kvm->arch.sca;
2914
2915                sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2916                vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2917                vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2918                set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2919        }
2920        read_unlock(&vcpu->kvm->arch.sca_lock);
2921}
2922
2923/* Basic SCA to Extended SCA data copy routines */
2924static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2925{
2926        d->sda = s->sda;
2927        d->sigp_ctrl.c = s->sigp_ctrl.c;
2928        d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2929}
2930
2931static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2932{
2933        int i;
2934
2935        d->ipte_control = s->ipte_control;
2936        d->mcn[0] = s->mcn;
2937        for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2938                sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2939}
2940
2941static int sca_switch_to_extended(struct kvm *kvm)
2942{
2943        struct bsca_block *old_sca = kvm->arch.sca;
2944        struct esca_block *new_sca;
2945        struct kvm_vcpu *vcpu;
2946        unsigned int vcpu_idx;
2947        u32 scaol, scaoh;
2948
2949        if (kvm->arch.use_esca)
2950                return 0;
2951
2952        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2953        if (!new_sca)
2954                return -ENOMEM;
2955
2956        scaoh = (u32)((u64)(new_sca) >> 32);
2957        scaol = (u32)(u64)(new_sca) & ~0x3fU;
2958
2959        kvm_s390_vcpu_block_all(kvm);
2960        write_lock(&kvm->arch.sca_lock);
2961
2962        sca_copy_b_to_e(new_sca, old_sca);
2963
2964        kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2965                vcpu->arch.sie_block->scaoh = scaoh;
2966                vcpu->arch.sie_block->scaol = scaol;
2967                vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2968        }
2969        kvm->arch.sca = new_sca;
2970        kvm->arch.use_esca = 1;
2971
2972        write_unlock(&kvm->arch.sca_lock);
2973        kvm_s390_vcpu_unblock_all(kvm);
2974
2975        free_page((unsigned long)old_sca);
2976
2977        VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2978                 old_sca, kvm->arch.sca);
2979        return 0;
2980}
2981
2982static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2983{
2984        int rc;
2985
2986        if (!kvm_s390_use_sca_entries()) {
2987                if (id < KVM_MAX_VCPUS)
2988                        return true;
2989                return false;
2990        }
2991        if (id < KVM_S390_BSCA_CPU_SLOTS)
2992                return true;
2993        if (!sclp.has_esca || !sclp.has_64bscao)
2994                return false;
2995
2996        mutex_lock(&kvm->lock);
2997        rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2998        mutex_unlock(&kvm->lock);
2999
3000        return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3001}
3002
3003/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3004static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005{
3006        WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3007        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008        vcpu->arch.cputm_start = get_tod_clock_fast();
3009        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3010}
3011
3012/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3013static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3014{
3015        WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3016        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3017        vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3018        vcpu->arch.cputm_start = 0;
3019        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3020}
3021
3022/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3023static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3024{
3025        WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3026        vcpu->arch.cputm_enabled = true;
3027        __start_cpu_timer_accounting(vcpu);
3028}
3029
3030/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3031static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3032{
3033        WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3034        __stop_cpu_timer_accounting(vcpu);
3035        vcpu->arch.cputm_enabled = false;
3036}
3037
3038static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3039{
3040        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3041        __enable_cpu_timer_accounting(vcpu);
3042        preempt_enable();
3043}
3044
3045static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3046{
3047        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3048        __disable_cpu_timer_accounting(vcpu);
3049        preempt_enable();
3050}
3051
3052/* set the cpu timer - may only be called from the VCPU thread itself */
3053void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3054{
3055        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3056        raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057        if (vcpu->arch.cputm_enabled)
3058                vcpu->arch.cputm_start = get_tod_clock_fast();
3059        vcpu->arch.sie_block->cputm = cputm;
3060        raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3061        preempt_enable();
3062}
3063
3064/* update and get the cpu timer - can also be called from other VCPU threads */
3065__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3066{
3067        unsigned int seq;
3068        __u64 value;
3069
3070        if (unlikely(!vcpu->arch.cputm_enabled))
3071                return vcpu->arch.sie_block->cputm;
3072
3073        preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3074        do {
3075                seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3076                /*
3077                 * If the writer would ever execute a read in the critical
3078                 * section, e.g. in irq context, we have a deadlock.
3079                 */
3080                WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3081                value = vcpu->arch.sie_block->cputm;
3082                /* if cputm_start is 0, accounting is being started/stopped */
3083                if (likely(vcpu->arch.cputm_start))
3084                        value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3085        } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3086        preempt_enable();
3087        return value;
3088}
3089
3090void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3091{
3092
3093        gmap_enable(vcpu->arch.enabled_gmap);
3094        kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3095        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3096                __start_cpu_timer_accounting(vcpu);
3097        vcpu->cpu = cpu;
3098}
3099
3100void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3101{
3102        vcpu->cpu = -1;
3103        if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3104                __stop_cpu_timer_accounting(vcpu);
3105        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3106        vcpu->arch.enabled_gmap = gmap_get_enabled();
3107        gmap_disable(vcpu->arch.enabled_gmap);
3108
3109}
3110
3111void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3112{
3113        mutex_lock(&vcpu->kvm->lock);
3114        preempt_disable();
3115        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3116        vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3117        preempt_enable();
3118        mutex_unlock(&vcpu->kvm->lock);
3119        if (!kvm_is_ucontrol(vcpu->kvm)) {
3120                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3121                sca_add_vcpu(vcpu);
3122        }
3123        if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3124                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3125        /* make vcpu_load load the right gmap on the first trigger */
3126        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3127}
3128
3129static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3130{
3131        if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3132            test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3133                return true;
3134        return false;
3135}
3136
3137static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3138{
3139        /* At least one ECC subfunction must be present */
3140        return kvm_has_pckmo_subfunc(kvm, 32) ||
3141               kvm_has_pckmo_subfunc(kvm, 33) ||
3142               kvm_has_pckmo_subfunc(kvm, 34) ||
3143               kvm_has_pckmo_subfunc(kvm, 40) ||
3144               kvm_has_pckmo_subfunc(kvm, 41);
3145
3146}
3147
3148static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3149{
3150        /*
3151         * If the AP instructions are not being interpreted and the MSAX3
3152         * facility is not configured for the guest, there is nothing to set up.
3153         */
3154        if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3155                return;
3156
3157        vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3158        vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3159        vcpu->arch.sie_block->eca &= ~ECA_APIE;
3160        vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3161
3162        if (vcpu->kvm->arch.crypto.apie)
3163                vcpu->arch.sie_block->eca |= ECA_APIE;
3164
3165        /* Set up protected key support */
3166        if (vcpu->kvm->arch.crypto.aes_kw) {
3167                vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3168                /* ecc is also wrapped with AES key */
3169                if (kvm_has_pckmo_ecc(vcpu->kvm))
3170                        vcpu->arch.sie_block->ecd |= ECD_ECC;
3171        }
3172
3173        if (vcpu->kvm->arch.crypto.dea_kw)
3174                vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3175}
3176
3177void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3178{
3179        free_page(vcpu->arch.sie_block->cbrlo);
3180        vcpu->arch.sie_block->cbrlo = 0;
3181}
3182
3183int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3184{
3185        vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3186        if (!vcpu->arch.sie_block->cbrlo)
3187                return -ENOMEM;
3188        return 0;
3189}
3190
3191static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3192{
3193        struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3194
3195        vcpu->arch.sie_block->ibc = model->ibc;
3196        if (test_kvm_facility(vcpu->kvm, 7))
3197                vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3198}
3199
3200static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3201{
3202        int rc = 0;
3203        u16 uvrc, uvrrc;
3204
3205        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3206                                                    CPUSTAT_SM |
3207                                                    CPUSTAT_STOPPED);
3208
3209        if (test_kvm_facility(vcpu->kvm, 78))
3210                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3211        else if (test_kvm_facility(vcpu->kvm, 8))
3212                kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3213
3214        kvm_s390_vcpu_setup_model(vcpu);
3215
3216        /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3217        if (MACHINE_HAS_ESOP)
3218                vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3219        if (test_kvm_facility(vcpu->kvm, 9))
3220                vcpu->arch.sie_block->ecb |= ECB_SRSI;
3221        if (test_kvm_facility(vcpu->kvm, 73))
3222                vcpu->arch.sie_block->ecb |= ECB_TE;
3223        if (!kvm_is_ucontrol(vcpu->kvm))
3224                vcpu->arch.sie_block->ecb |= ECB_SPECI;
3225
3226        if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3227                vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3228        if (test_kvm_facility(vcpu->kvm, 130))
3229                vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3230        vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3231        if (sclp.has_cei)
3232                vcpu->arch.sie_block->eca |= ECA_CEI;
3233        if (sclp.has_ib)
3234                vcpu->arch.sie_block->eca |= ECA_IB;
3235        if (sclp.has_siif)
3236                vcpu->arch.sie_block->eca |= ECA_SII;
3237        if (sclp.has_sigpif)
3238                vcpu->arch.sie_block->eca |= ECA_SIGPI;
3239        if (test_kvm_facility(vcpu->kvm, 129)) {
3240                vcpu->arch.sie_block->eca |= ECA_VX;
3241                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3242        }
3243        if (test_kvm_facility(vcpu->kvm, 139))
3244                vcpu->arch.sie_block->ecd |= ECD_MEF;
3245        if (test_kvm_facility(vcpu->kvm, 156))
3246                vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3247        if (vcpu->arch.sie_block->gd) {
3248                vcpu->arch.sie_block->eca |= ECA_AIV;
3249                VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3250                           vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3251        }
3252        vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3253                                        | SDNXC;
3254        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3255
3256        if (sclp.has_kss)
3257                kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3258        else
3259                vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3260
3261        if (vcpu->kvm->arch.use_cmma) {
3262                rc = kvm_s390_vcpu_setup_cmma(vcpu);
3263                if (rc)
3264                        return rc;
3265        }
3266        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3267        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3268
3269        vcpu->arch.sie_block->hpid = HPID_KVM;
3270
3271        kvm_s390_vcpu_crypto_setup(vcpu);
3272
3273        mutex_lock(&vcpu->kvm->lock);
3274        if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3275                rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3276                if (rc)
3277                        kvm_s390_vcpu_unsetup_cmma(vcpu);
3278        }
3279        mutex_unlock(&vcpu->kvm->lock);
3280
3281        return rc;
3282}
3283
3284int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3285{
3286        if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3287                return -EINVAL;
3288        return 0;
3289}
3290
3291int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3292{
3293        struct sie_page *sie_page;
3294        int rc;
3295
3296        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3297        sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3298        if (!sie_page)
3299                return -ENOMEM;
3300
3301        vcpu->arch.sie_block = &sie_page->sie_block;
3302        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3303
3304        /* the real guest size will always be smaller than msl */
3305        vcpu->arch.sie_block->mso = 0;
3306        vcpu->arch.sie_block->msl = sclp.hamax;
3307
3308        vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3309        spin_lock_init(&vcpu->arch.local_int.lock);
3310        vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3311        if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3312                vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3313        seqcount_init(&vcpu->arch.cputm_seqcount);
3314
3315        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3316        kvm_clear_async_pf_completion_queue(vcpu);
3317        vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3318                                    KVM_SYNC_GPRS |
3319                                    KVM_SYNC_ACRS |
3320                                    KVM_SYNC_CRS |
3321                                    KVM_SYNC_ARCH0 |
3322                                    KVM_SYNC_PFAULT |
3323                                    KVM_SYNC_DIAG318;
3324        kvm_s390_set_prefix(vcpu, 0);
3325        if (test_kvm_facility(vcpu->kvm, 64))
3326                vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3327        if (test_kvm_facility(vcpu->kvm, 82))
3328                vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3329        if (test_kvm_facility(vcpu->kvm, 133))
3330                vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3331        if (test_kvm_facility(vcpu->kvm, 156))
3332                vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3333        /* fprs can be synchronized via vrs, even if the guest has no vx. With
3334         * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3335         */
3336        if (MACHINE_HAS_VX)
3337                vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3338        else
3339                vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3340
3341        if (kvm_is_ucontrol(vcpu->kvm)) {
3342                rc = __kvm_ucontrol_vcpu_init(vcpu);
3343                if (rc)
3344                        goto out_free_sie_block;
3345        }
3346
3347        VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3348                 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3349        trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3350
3351        rc = kvm_s390_vcpu_setup(vcpu);
3352        if (rc)
3353                goto out_ucontrol_uninit;
3354        return 0;
3355
3356out_ucontrol_uninit:
3357        if (kvm_is_ucontrol(vcpu->kvm))
3358                gmap_remove(vcpu->arch.gmap);
3359out_free_sie_block:
3360        free_page((unsigned long)(vcpu->arch.sie_block));
3361        return rc;
3362}
3363
3364int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3365{
3366        clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3367        return kvm_s390_vcpu_has_irq(vcpu, 0);
3368}
3369
3370bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3371{
3372        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3373}
3374
3375void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3376{
3377        atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3378        exit_sie(vcpu);
3379}
3380
3381void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3382{
3383        atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3384}
3385
3386static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3387{
3388        atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3389        exit_sie(vcpu);
3390}
3391
3392bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3393{
3394        return atomic_read(&vcpu->arch.sie_block->prog20) &
3395               (PROG_BLOCK_SIE | PROG_REQUEST);
3396}
3397
3398static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3399{
3400        atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3401}
3402
3403/*
3404 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3405 * If the CPU is not running (e.g. waiting as idle) the function will
3406 * return immediately. */
3407void exit_sie(struct kvm_vcpu *vcpu)
3408{
3409        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3410        kvm_s390_vsie_kick(vcpu);
3411        while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3412                cpu_relax();
3413}
3414
3415/* Kick a guest cpu out of SIE to process a request synchronously */
3416void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3417{
3418        kvm_make_request(req, vcpu);
3419        kvm_s390_vcpu_request(vcpu);
3420}
3421
3422static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3423                              unsigned long end)
3424{
3425        struct kvm *kvm = gmap->private;
3426        struct kvm_vcpu *vcpu;
3427        unsigned long prefix;
3428        int i;
3429
3430        if (gmap_is_shadow(gmap))
3431                return;
3432        if (start >= 1UL << 31)
3433                /* We are only interested in prefix pages */
3434                return;
3435        kvm_for_each_vcpu(i, vcpu, kvm) {
3436                /* match against both prefix pages */
3437                prefix = kvm_s390_get_prefix(vcpu);
3438                if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3439                        VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3440                                   start, end);
3441                        kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3442                }
3443        }
3444}
3445
3446bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3447{
3448        /* do not poll with more than halt_poll_max_steal percent of steal time */
3449        if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3450            halt_poll_max_steal) {
3451                vcpu->stat.halt_no_poll_steal++;
3452                return true;
3453        }
3454        return false;
3455}
3456
3457int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3458{
3459        /* kvm common code refers to this, but never calls it */
3460        BUG();
3461        return 0;
3462}
3463
3464static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3465                                           struct kvm_one_reg *reg)
3466{
3467        int r = -EINVAL;
3468
3469        switch (reg->id) {
3470        case KVM_REG_S390_TODPR:
3471                r = put_user(vcpu->arch.sie_block->todpr,
3472                             (u32 __user *)reg->addr);
3473                break;
3474        case KVM_REG_S390_EPOCHDIFF:
3475                r = put_user(vcpu->arch.sie_block->epoch,
3476                             (u64 __user *)reg->addr);
3477                break;
3478        case KVM_REG_S390_CPU_TIMER:
3479                r = put_user(kvm_s390_get_cpu_timer(vcpu),
3480                             (u64 __user *)reg->addr);
3481                break;
3482        case KVM_REG_S390_CLOCK_COMP:
3483                r = put_user(vcpu->arch.sie_block->ckc,
3484                             (u64 __user *)reg->addr);
3485                break;
3486        case KVM_REG_S390_PFTOKEN:
3487                r = put_user(vcpu->arch.pfault_token,
3488                             (u64 __user *)reg->addr);
3489                break;
3490        case KVM_REG_S390_PFCOMPARE:
3491                r = put_user(vcpu->arch.pfault_compare,
3492                             (u64 __user *)reg->addr);
3493                break;
3494        case KVM_REG_S390_PFSELECT:
3495                r = put_user(vcpu->arch.pfault_select,
3496                             (u64 __user *)reg->addr);
3497                break;
3498        case KVM_REG_S390_PP:
3499                r = put_user(vcpu->arch.sie_block->pp,
3500                             (u64 __user *)reg->addr);
3501                break;
3502        case KVM_REG_S390_GBEA:
3503                r = put_user(vcpu->arch.sie_block->gbea,
3504                             (u64 __user *)reg->addr);
3505                break;
3506        default:
3507                break;
3508        }
3509
3510        return r;
3511}
3512
3513static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3514                                           struct kvm_one_reg *reg)
3515{
3516        int r = -EINVAL;
3517        __u64 val;
3518
3519        switch (reg->id) {
3520        case KVM_REG_S390_TODPR:
3521                r = get_user(vcpu->arch.sie_block->todpr,
3522                             (u32 __user *)reg->addr);
3523                break;
3524        case KVM_REG_S390_EPOCHDIFF:
3525                r = get_user(vcpu->arch.sie_block->epoch,
3526                             (u64 __user *)reg->addr);
3527                break;
3528        case KVM_REG_S390_CPU_TIMER:
3529                r = get_user(val, (u64 __user *)reg->addr);
3530                if (!r)
3531                        kvm_s390_set_cpu_timer(vcpu, val);
3532                break;
3533        case KVM_REG_S390_CLOCK_COMP:
3534                r = get_user(vcpu->arch.sie_block->ckc,
3535                             (u64 __user *)reg->addr);
3536                break;
3537        case KVM_REG_S390_PFTOKEN:
3538                r = get_user(vcpu->arch.pfault_token,
3539                             (u64 __user *)reg->addr);
3540                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3541                        kvm_clear_async_pf_completion_queue(vcpu);
3542                break;
3543        case KVM_REG_S390_PFCOMPARE:
3544                r = get_user(vcpu->arch.pfault_compare,
3545                             (u64 __user *)reg->addr);
3546                break;
3547        case KVM_REG_S390_PFSELECT:
3548                r = get_user(vcpu->arch.pfault_select,
3549                             (u64 __user *)reg->addr);
3550                break;
3551        case KVM_REG_S390_PP:
3552                r = get_user(vcpu->arch.sie_block->pp,
3553                             (u64 __user *)reg->addr);
3554                break;
3555        case KVM_REG_S390_GBEA:
3556                r = get_user(vcpu->arch.sie_block->gbea,
3557                             (u64 __user *)reg->addr);
3558                break;
3559        default:
3560                break;
3561        }
3562
3563        return r;
3564}
3565
3566static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3567{
3568        vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3569        vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3570        memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3571
3572        kvm_clear_async_pf_completion_queue(vcpu);
3573        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3574                kvm_s390_vcpu_stop(vcpu);
3575        kvm_s390_clear_local_irqs(vcpu);
3576}
3577
3578static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3579{
3580        /* Initial reset is a superset of the normal reset */
3581        kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3582
3583        /*
3584         * This equals initial cpu reset in pop, but we don't switch to ESA.
3585         * We do not only reset the internal data, but also ...
3586         */
3587        vcpu->arch.sie_block->gpsw.mask = 0;
3588        vcpu->arch.sie_block->gpsw.addr = 0;
3589        kvm_s390_set_prefix(vcpu, 0);
3590        kvm_s390_set_cpu_timer(vcpu, 0);
3591        vcpu->arch.sie_block->ckc = 0;
3592        memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3593        vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3594        vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3595
3596        /* ... the data in sync regs */
3597        memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3598        vcpu->run->s.regs.ckc = 0;
3599        vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3600        vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3601        vcpu->run->psw_addr = 0;
3602        vcpu->run->psw_mask = 0;
3603        vcpu->run->s.regs.todpr = 0;
3604        vcpu->run->s.regs.cputm = 0;
3605        vcpu->run->s.regs.ckc = 0;
3606        vcpu->run->s.regs.pp = 0;
3607        vcpu->run->s.regs.gbea = 1;
3608        vcpu->run->s.regs.fpc = 0;
3609        /*
3610         * Do not reset these registers in the protected case, as some of
3611         * them are overlayed and they are not accessible in this case
3612         * anyway.
3613         */
3614        if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3615                vcpu->arch.sie_block->gbea = 1;
3616                vcpu->arch.sie_block->pp = 0;
3617                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3618                vcpu->arch.sie_block->todpr = 0;
3619        }
3620}
3621
3622static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3623{
3624        struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3625
3626        /* Clear reset is a superset of the initial reset */
3627        kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3628
3629        memset(&regs->gprs, 0, sizeof(regs->gprs));
3630        memset(&regs->vrs, 0, sizeof(regs->vrs));
3631        memset(&regs->acrs, 0, sizeof(regs->acrs));
3632        memset(&regs->gscb, 0, sizeof(regs->gscb));
3633
3634        regs->etoken = 0;
3635        regs->etoken_extension = 0;
3636}
3637
3638int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3639{
3640        vcpu_load(vcpu);
3641        memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3642        vcpu_put(vcpu);
3643        return 0;
3644}
3645
3646int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3647{
3648        vcpu_load(vcpu);
3649        memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3650        vcpu_put(vcpu);
3651        return 0;
3652}
3653
3654int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3655                                  struct kvm_sregs *sregs)
3656{
3657        vcpu_load(vcpu);
3658
3659        memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3660        memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3661
3662        vcpu_put(vcpu);
3663        return 0;
3664}
3665
3666int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3667                                  struct kvm_sregs *sregs)
3668{
3669        vcpu_load(vcpu);
3670
3671        memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3672        memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3673
3674        vcpu_put(vcpu);
3675        return 0;
3676}
3677
3678int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3679{
3680        int ret = 0;
3681
3682        vcpu_load(vcpu);
3683
3684        if (test_fp_ctl(fpu->fpc)) {
3685                ret = -EINVAL;
3686                goto out;
3687        }
3688        vcpu->run->s.regs.fpc = fpu->fpc;
3689        if (MACHINE_HAS_VX)
3690                convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3691                                 (freg_t *) fpu->fprs);
3692        else
3693                memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3694
3695out:
3696        vcpu_put(vcpu);
3697        return ret;
3698}
3699
3700int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3701{
3702        vcpu_load(vcpu);
3703
3704        /* make sure we have the latest values */
3705        save_fpu_regs();
3706        if (MACHINE_HAS_VX)
3707                convert_vx_to_fp((freg_t *) fpu->fprs,
3708                                 (__vector128 *) vcpu->run->s.regs.vrs);
3709        else
3710                memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3711        fpu->fpc = vcpu->run->s.regs.fpc;
3712
3713        vcpu_put(vcpu);
3714        return 0;
3715}
3716
3717static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3718{
3719        int rc = 0;
3720
3721        if (!is_vcpu_stopped(vcpu))
3722                rc = -EBUSY;
3723        else {
3724                vcpu->run->psw_mask = psw.mask;
3725                vcpu->run->psw_addr = psw.addr;
3726        }
3727        return rc;
3728}
3729
3730int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3731                                  struct kvm_translation *tr)
3732{
3733        return -EINVAL; /* not implemented yet */
3734}
3735
3736#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3737                              KVM_GUESTDBG_USE_HW_BP | \
3738                              KVM_GUESTDBG_ENABLE)
3739
3740int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3741                                        struct kvm_guest_debug *dbg)
3742{
3743        int rc = 0;
3744
3745        vcpu_load(vcpu);
3746
3747        vcpu->guest_debug = 0;
3748        kvm_s390_clear_bp_data(vcpu);
3749
3750        if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3751                rc = -EINVAL;
3752                goto out;
3753        }
3754        if (!sclp.has_gpere) {
3755                rc = -EINVAL;
3756                goto out;
3757        }
3758
3759        if (dbg->control & KVM_GUESTDBG_ENABLE) {
3760                vcpu->guest_debug = dbg->control;
3761                /* enforce guest PER */
3762                kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3763
3764                if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3765                        rc = kvm_s390_import_bp_data(vcpu, dbg);
3766        } else {
3767                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3768                vcpu->arch.guestdbg.last_bp = 0;
3769        }
3770
3771        if (rc) {
3772                vcpu->guest_debug = 0;
3773                kvm_s390_clear_bp_data(vcpu);
3774                kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3775        }
3776
3777out:
3778        vcpu_put(vcpu);
3779        return rc;
3780}
3781
3782int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3783                                    struct kvm_mp_state *mp_state)
3784{
3785        int ret;
3786
3787        vcpu_load(vcpu);
3788
3789        /* CHECK_STOP and LOAD are not supported yet */
3790        ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3791                                      KVM_MP_STATE_OPERATING;
3792
3793        vcpu_put(vcpu);
3794        return ret;
3795}
3796
3797int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3798                                    struct kvm_mp_state *mp_state)
3799{
3800        int rc = 0;
3801
3802        vcpu_load(vcpu);
3803
3804        /* user space knows about this interface - let it control the state */
3805        vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3806
3807        switch (mp_state->mp_state) {
3808        case KVM_MP_STATE_STOPPED:
3809                rc = kvm_s390_vcpu_stop(vcpu);
3810                break;
3811        case KVM_MP_STATE_OPERATING:
3812                rc = kvm_s390_vcpu_start(vcpu);
3813                break;
3814        case KVM_MP_STATE_LOAD:
3815                if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3816                        rc = -ENXIO;
3817                        break;
3818                }
3819                rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3820                break;
3821        case KVM_MP_STATE_CHECK_STOP:
3822                fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3823        default:
3824                rc = -ENXIO;
3825        }
3826
3827        vcpu_put(vcpu);
3828        return rc;
3829}
3830
3831static bool ibs_enabled(struct kvm_vcpu *vcpu)
3832{
3833        return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3834}
3835
3836static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3837{
3838retry:
3839        kvm_s390_vcpu_request_handled(vcpu);
3840        if (!kvm_request_pending(vcpu))
3841                return 0;
3842        /*
3843         * We use MMU_RELOAD just to re-arm the ipte notifier for the
3844         * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3845         * This ensures that the ipte instruction for this request has
3846         * already finished. We might race against a second unmapper that
3847         * wants to set the blocking bit. Lets just retry the request loop.
3848         */
3849        if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3850                int rc;
3851                rc = gmap_mprotect_notify(vcpu->arch.gmap,
3852                                          kvm_s390_get_prefix(vcpu),
3853                                          PAGE_SIZE * 2, PROT_WRITE);
3854                if (rc) {
3855                        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3856                        return rc;
3857                }
3858                goto retry;
3859        }
3860
3861        if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3862                vcpu->arch.sie_block->ihcpu = 0xffff;
3863                goto retry;
3864        }
3865
3866        if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3867                if (!ibs_enabled(vcpu)) {
3868                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3869                        kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3870                }
3871                goto retry;
3872        }
3873
3874        if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3875                if (ibs_enabled(vcpu)) {
3876                        trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3877                        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3878                }
3879                goto retry;
3880        }
3881
3882        if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3883                vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3884                goto retry;
3885        }
3886
3887        if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3888                /*
3889                 * Disable CMM virtualization; we will emulate the ESSA
3890                 * instruction manually, in order to provide additional
3891                 * functionalities needed for live migration.
3892                 */
3893                vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3894                goto retry;
3895        }
3896
3897        if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3898                /*
3899                 * Re-enable CMM virtualization if CMMA is available and
3900                 * CMM has been used.
3901                 */
3902                if ((vcpu->kvm->arch.use_cmma) &&
3903                    (vcpu->kvm->mm->context.uses_cmm))
3904                        vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3905                goto retry;
3906        }
3907
3908        /* nothing to do, just clear the request */
3909        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3910        /* we left the vsie handler, nothing to do, just clear the request */
3911        kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3912
3913        return 0;
3914}
3915
3916void kvm_s390_set_tod_clock(struct kvm *kvm,
3917                            const struct kvm_s390_vm_tod_clock *gtod)
3918{
3919        struct kvm_vcpu *vcpu;
3920        union tod_clock clk;
3921        int i;
3922
3923        mutex_lock(&kvm->lock);
3924        preempt_disable();
3925
3926        store_tod_clock_ext(&clk);
3927
3928        kvm->arch.epoch = gtod->tod - clk.tod;
3929        kvm->arch.epdx = 0;
3930        if (test_kvm_facility(kvm, 139)) {
3931                kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3932                if (kvm->arch.epoch > gtod->tod)
3933                        kvm->arch.epdx -= 1;
3934        }
3935
3936        kvm_s390_vcpu_block_all(kvm);
3937        kvm_for_each_vcpu(i, vcpu, kvm) {
3938                vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3939                vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3940        }
3941
3942        kvm_s390_vcpu_unblock_all(kvm);
3943        preempt_enable();
3944        mutex_unlock(&kvm->lock);
3945}
3946
3947/**
3948 * kvm_arch_fault_in_page - fault-in guest page if necessary
3949 * @vcpu: The corresponding virtual cpu
3950 * @gpa: Guest physical address
3951 * @writable: Whether the page should be writable or not
3952 *
3953 * Make sure that a guest page has been faulted-in on the host.
3954 *
3955 * Return: Zero on success, negative error code otherwise.
3956 */
3957long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3958{
3959        return gmap_fault(vcpu->arch.gmap, gpa,
3960                          writable ? FAULT_FLAG_WRITE : 0);
3961}
3962
3963static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3964                                      unsigned long token)
3965{
3966        struct kvm_s390_interrupt inti;
3967        struct kvm_s390_irq irq;
3968
3969        if (start_token) {
3970                irq.u.ext.ext_params2 = token;
3971                irq.type = KVM_S390_INT_PFAULT_INIT;
3972                WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3973        } else {
3974                inti.type = KVM_S390_INT_PFAULT_DONE;
3975                inti.parm64 = token;
3976                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3977        }
3978}
3979
3980bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3981                                     struct kvm_async_pf *work)
3982{
3983        trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3984        __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3985
3986        return true;
3987}
3988
3989void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3990                                 struct kvm_async_pf *work)
3991{
3992        trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3993        __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3994}
3995
3996void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3997                               struct kvm_async_pf *work)
3998{
3999        /* s390 will always inject the page directly */
4000}
4001
4002bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4003{
4004        /*
4005         * s390 will always inject the page directly,
4006         * but we still want check_async_completion to cleanup
4007         */
4008        return true;
4009}
4010
4011static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4012{
4013        hva_t hva;
4014        struct kvm_arch_async_pf arch;
4015
4016        if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4017                return false;
4018        if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4019            vcpu->arch.pfault_compare)
4020                return false;
4021        if (psw_extint_disabled(vcpu))
4022                return false;
4023        if (kvm_s390_vcpu_has_irq(vcpu, 0))
4024                return false;
4025        if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4026                return false;
4027        if (!vcpu->arch.gmap->pfault_enabled)
4028                return false;
4029
4030        hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4031        hva += current->thread.gmap_addr & ~PAGE_MASK;
4032        if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4033                return false;
4034
4035        return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4036}
4037
4038static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4039{
4040        int rc, cpuflags;
4041
4042        /*
4043         * On s390 notifications for arriving pages will be delivered directly
4044         * to the guest but the house keeping for completed pfaults is
4045         * handled outside the worker.
4046         */
4047        kvm_check_async_pf_completion(vcpu);
4048
4049        vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4050        vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4051
4052        if (need_resched())
4053                schedule();
4054
4055        if (!kvm_is_ucontrol(vcpu->kvm)) {
4056                rc = kvm_s390_deliver_pending_interrupts(vcpu);
4057                if (rc)
4058                        return rc;
4059        }
4060
4061        rc = kvm_s390_handle_requests(vcpu);
4062        if (rc)
4063                return rc;
4064
4065        if (guestdbg_enabled(vcpu)) {
4066                kvm_s390_backup_guest_per_regs(vcpu);
4067                kvm_s390_patch_guest_per_regs(vcpu);
4068        }
4069
4070        clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4071
4072        vcpu->arch.sie_block->icptcode = 0;
4073        cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4074        VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4075        trace_kvm_s390_sie_enter(vcpu, cpuflags);
4076
4077        return 0;
4078}
4079
4080static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4081{
4082        struct kvm_s390_pgm_info pgm_info = {
4083                .code = PGM_ADDRESSING,
4084        };
4085        u8 opcode, ilen;
4086        int rc;
4087
4088        VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4089        trace_kvm_s390_sie_fault(vcpu);
4090
4091        /*
4092         * We want to inject an addressing exception, which is defined as a
4093         * suppressing or terminating exception. However, since we came here
4094         * by a DAT access exception, the PSW still points to the faulting
4095         * instruction since DAT exceptions are nullifying. So we've got
4096         * to look up the current opcode to get the length of the instruction
4097         * to be able to forward the PSW.
4098         */
4099        rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4100        ilen = insn_length(opcode);
4101        if (rc < 0) {
4102                return rc;
4103        } else if (rc) {
4104                /* Instruction-Fetching Exceptions - we can't detect the ilen.
4105                 * Forward by arbitrary ilc, injection will take care of
4106                 * nullification if necessary.
4107                 */
4108                pgm_info = vcpu->arch.pgm;
4109                ilen = 4;
4110        }
4111        pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4112        kvm_s390_forward_psw(vcpu, ilen);
4113        return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4114}
4115
4116static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4117{
4118        struct mcck_volatile_info *mcck_info;
4119        struct sie_page *sie_page;
4120
4121        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4122                   vcpu->arch.sie_block->icptcode);
4123        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4124
4125        if (guestdbg_enabled(vcpu))
4126                kvm_s390_restore_guest_per_regs(vcpu);
4127
4128        vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4129        vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4130
4131        if (exit_reason == -EINTR) {
4132                VCPU_EVENT(vcpu, 3, "%s", "machine check");
4133                sie_page = container_of(vcpu->arch.sie_block,
4134                                        struct sie_page, sie_block);
4135                mcck_info = &sie_page->mcck_info;
4136                kvm_s390_reinject_machine_check(vcpu, mcck_info);
4137                return 0;
4138        }
4139
4140        if (vcpu->arch.sie_block->icptcode > 0) {
4141                int rc = kvm_handle_sie_intercept(vcpu);
4142
4143                if (rc != -EOPNOTSUPP)
4144                        return rc;
4145                vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4146                vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4147                vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4148                vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4149                return -EREMOTE;
4150        } else if (exit_reason != -EFAULT) {
4151                vcpu->stat.exit_null++;
4152                return 0;
4153        } else if (kvm_is_ucontrol(vcpu->kvm)) {
4154                vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4155                vcpu->run->s390_ucontrol.trans_exc_code =
4156                                                current->thread.gmap_addr;
4157                vcpu->run->s390_ucontrol.pgm_code = 0x10;
4158                return -EREMOTE;
4159        } else if (current->thread.gmap_pfault) {
4160                trace_kvm_s390_major_guest_pfault(vcpu);
4161                current->thread.gmap_pfault = 0;
4162                if (kvm_arch_setup_async_pf(vcpu))
4163                        return 0;
4164                vcpu->stat.pfault_sync++;
4165                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4166        }
4167        return vcpu_post_run_fault_in_sie(vcpu);
4168}
4169
4170#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4171static int __vcpu_run(struct kvm_vcpu *vcpu)
4172{
4173        int rc, exit_reason;
4174        struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4175
4176        /*
4177         * We try to hold kvm->srcu during most of vcpu_run (except when run-
4178         * ning the guest), so that memslots (and other stuff) are protected
4179         */
4180        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4181
4182        do {
4183                rc = vcpu_pre_run(vcpu);
4184                if (rc)
4185                        break;
4186
4187                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4188                /*
4189                 * As PF_VCPU will be used in fault handler, between
4190                 * guest_enter and guest_exit should be no uaccess.
4191                 */
4192                local_irq_disable();
4193                guest_enter_irqoff();
4194                __disable_cpu_timer_accounting(vcpu);
4195                local_irq_enable();
4196                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4197                        memcpy(sie_page->pv_grregs,
4198                               vcpu->run->s.regs.gprs,
4199                               sizeof(sie_page->pv_grregs));
4200                }
4201                if (test_cpu_flag(CIF_FPU))
4202                        load_fpu_regs();
4203                exit_reason = sie64a(vcpu->arch.sie_block,
4204                                     vcpu->run->s.regs.gprs);
4205                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4206                        memcpy(vcpu->run->s.regs.gprs,
4207                               sie_page->pv_grregs,
4208                               sizeof(sie_page->pv_grregs));
4209                        /*
4210                         * We're not allowed to inject interrupts on intercepts
4211                         * that leave the guest state in an "in-between" state
4212                         * where the next SIE entry will do a continuation.
4213                         * Fence interrupts in our "internal" PSW.
4214                         */
4215                        if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4216                            vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4217                                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4218                        }
4219                }
4220                local_irq_disable();
4221                __enable_cpu_timer_accounting(vcpu);
4222                guest_exit_irqoff();
4223                local_irq_enable();
4224                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4225
4226                rc = vcpu_post_run(vcpu, exit_reason);
4227        } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4228
4229        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4230        return rc;
4231}
4232
4233static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4234{
4235        struct kvm_run *kvm_run = vcpu->run;
4236        struct runtime_instr_cb *riccb;
4237        struct gs_cb *gscb;
4238
4239        riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4240        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4241        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4242        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4243        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4244                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4245                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4246                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4247        }
4248        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4249                vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4250                vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4251                vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4252                if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4253                        kvm_clear_async_pf_completion_queue(vcpu);
4254        }
4255        if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4256                vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4257                vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4258        }
4259        /*
4260         * If userspace sets the riccb (e.g. after migration) to a valid state,
4261         * we should enable RI here instead of doing the lazy enablement.
4262         */
4263        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4264            test_kvm_facility(vcpu->kvm, 64) &&
4265            riccb->v &&
4266            !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4267                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4268                vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4269        }
4270        /*
4271         * If userspace sets the gscb (e.g. after migration) to non-zero,
4272         * we should enable GS here instead of doing the lazy enablement.
4273         */
4274        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4275            test_kvm_facility(vcpu->kvm, 133) &&
4276            gscb->gssm &&
4277            !vcpu->arch.gs_enabled) {
4278                VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4279                vcpu->arch.sie_block->ecb |= ECB_GS;
4280                vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4281                vcpu->arch.gs_enabled = 1;
4282        }
4283        if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4284            test_kvm_facility(vcpu->kvm, 82)) {
4285                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4286                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4287        }
4288        if (MACHINE_HAS_GS) {
4289                preempt_disable();
4290                __ctl_set_bit(2, 4);
4291                if (current->thread.gs_cb) {
4292                        vcpu->arch.host_gscb = current->thread.gs_cb;
4293                        save_gs_cb(vcpu->arch.host_gscb);
4294                }
4295                if (vcpu->arch.gs_enabled) {
4296                        current->thread.gs_cb = (struct gs_cb *)
4297                                                &vcpu->run->s.regs.gscb;
4298                        restore_gs_cb(current->thread.gs_cb);
4299                }
4300                preempt_enable();
4301        }
4302        /* SIE will load etoken directly from SDNX and therefore kvm_run */
4303}
4304
4305static void sync_regs(struct kvm_vcpu *vcpu)
4306{
4307        struct kvm_run *kvm_run = vcpu->run;
4308
4309        if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4310                kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4311        if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4312                memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4313                /* some control register changes require a tlb flush */
4314                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4315        }
4316        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4317                kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4318                vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4319        }
4320        save_access_regs(vcpu->arch.host_acrs);
4321        restore_access_regs(vcpu->run->s.regs.acrs);
4322        /* save host (userspace) fprs/vrs */
4323        save_fpu_regs();
4324        vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4325        vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4326        if (MACHINE_HAS_VX)
4327                current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4328        else
4329                current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4330        current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4331        if (test_fp_ctl(current->thread.fpu.fpc))
4332                /* User space provided an invalid FPC, let's clear it */
4333                current->thread.fpu.fpc = 0;
4334
4335        /* Sync fmt2 only data */
4336        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4337                sync_regs_fmt2(vcpu);
4338        } else {
4339                /*
4340                 * In several places we have to modify our internal view to
4341                 * not do things that are disallowed by the ultravisor. For
4342                 * example we must not inject interrupts after specific exits
4343                 * (e.g. 112 prefix page not secure). We do this by turning
4344                 * off the machine check, external and I/O interrupt bits
4345                 * of our PSW copy. To avoid getting validity intercepts, we
4346                 * do only accept the condition code from userspace.
4347                 */
4348                vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4349                vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4350                                                   PSW_MASK_CC;
4351        }
4352
4353        kvm_run->kvm_dirty_regs = 0;
4354}
4355
4356static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4357{
4358        struct kvm_run *kvm_run = vcpu->run;
4359
4360        kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4361        kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4362        kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4363        kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4364        kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4365        if (MACHINE_HAS_GS) {
4366                preempt_disable();
4367                __ctl_set_bit(2, 4);
4368                if (vcpu->arch.gs_enabled)
4369                        save_gs_cb(current->thread.gs_cb);
4370                current->thread.gs_cb = vcpu->arch.host_gscb;
4371                restore_gs_cb(vcpu->arch.host_gscb);
4372                if (!vcpu->arch.host_gscb)
4373                        __ctl_clear_bit(2, 4);
4374                vcpu->arch.host_gscb = NULL;
4375                preempt_enable();
4376        }
4377        /* SIE will save etoken directly into SDNX and therefore kvm_run */
4378}
4379
4380static void store_regs(struct kvm_vcpu *vcpu)
4381{
4382        struct kvm_run *kvm_run = vcpu->run;
4383
4384        kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4385        kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4386        kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4387        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4388        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4389        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4390        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4391        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4392        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4393        save_access_regs(vcpu->run->s.regs.acrs);
4394        restore_access_regs(vcpu->arch.host_acrs);
4395        /* Save guest register state */
4396        save_fpu_regs();
4397        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4398        /* Restore will be done lazily at return */
4399        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4400        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4401        if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4402                store_regs_fmt2(vcpu);
4403}
4404
4405int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4406{
4407        struct kvm_run *kvm_run = vcpu->run;
4408        int rc;
4409
4410        if (kvm_run->immediate_exit)
4411                return -EINTR;
4412
4413        if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4414            kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4415                return -EINVAL;
4416
4417        vcpu_load(vcpu);
4418
4419        if (guestdbg_exit_pending(vcpu)) {
4420                kvm_s390_prepare_debug_exit(vcpu);
4421                rc = 0;
4422                goto out;
4423        }
4424
4425        kvm_sigset_activate(vcpu);
4426
4427        /*
4428         * no need to check the return value of vcpu_start as it can only have
4429         * an error for protvirt, but protvirt means user cpu state
4430         */
4431        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4432                kvm_s390_vcpu_start(vcpu);
4433        } else if (is_vcpu_stopped(vcpu)) {
4434                pr_err_ratelimited("can't run stopped vcpu %d\n",
4435                                   vcpu->vcpu_id);
4436                rc = -EINVAL;
4437                goto out;
4438        }
4439
4440        sync_regs(vcpu);
4441        enable_cpu_timer_accounting(vcpu);
4442
4443        might_fault();
4444        rc = __vcpu_run(vcpu);
4445
4446        if (signal_pending(current) && !rc) {
4447                kvm_run->exit_reason = KVM_EXIT_INTR;
4448                rc = -EINTR;
4449        }
4450
4451        if (guestdbg_exit_pending(vcpu) && !rc)  {
4452                kvm_s390_prepare_debug_exit(vcpu);
4453                rc = 0;
4454        }
4455
4456        if (rc == -EREMOTE) {
4457                /* userspace support is needed, kvm_run has been prepared */
4458                rc = 0;
4459        }
4460
4461        disable_cpu_timer_accounting(vcpu);
4462        store_regs(vcpu);
4463
4464        kvm_sigset_deactivate(vcpu);
4465
4466        vcpu->stat.exit_userspace++;
4467out:
4468        vcpu_put(vcpu);
4469        return rc;
4470}
4471
4472/*
4473 * store status at address
4474 * we use have two special cases:
4475 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4476 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4477 */
4478int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4479{
4480        unsigned char archmode = 1;
4481        freg_t fprs[NUM_FPRS];
4482        unsigned int px;
4483        u64 clkcomp, cputm;
4484        int rc;
4485
4486        px = kvm_s390_get_prefix(vcpu);
4487        if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4488                if (write_guest_abs(vcpu, 163, &archmode, 1))
4489                        return -EFAULT;
4490                gpa = 0;
4491        } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4492                if (write_guest_real(vcpu, 163, &archmode, 1))
4493                        return -EFAULT;
4494                gpa = px;
4495        } else
4496                gpa -= __LC_FPREGS_SAVE_AREA;
4497
4498        /* manually convert vector registers if necessary */
4499        if (MACHINE_HAS_VX) {
4500                convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4501                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4502                                     fprs, 128);
4503        } else {
4504                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4505                                     vcpu->run->s.regs.fprs, 128);
4506        }
4507        rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4508                              vcpu->run->s.regs.gprs, 128);
4509        rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4510                              &vcpu->arch.sie_block->gpsw, 16);
4511        rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4512                              &px, 4);
4513        rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4514                              &vcpu->run->s.regs.fpc, 4);
4515        rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4516                              &vcpu->arch.sie_block->todpr, 4);
4517        cputm = kvm_s390_get_cpu_timer(vcpu);
4518        rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4519                              &cputm, 8);
4520        clkcomp = vcpu->arch.sie_block->ckc >> 8;
4521        rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4522                              &clkcomp, 8);
4523        rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4524                              &vcpu->run->s.regs.acrs, 64);
4525        rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4526                              &vcpu->arch.sie_block->gcr, 128);
4527        return rc ? -EFAULT : 0;
4528}
4529
4530int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4531{
4532        /*
4533         * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4534         * switch in the run ioctl. Let's update our copies before we save
4535         * it into the save area
4536         */
4537        save_fpu_regs();
4538        vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4539        save_access_regs(vcpu->run->s.regs.acrs);
4540
4541        return kvm_s390_store_status_unloaded(vcpu, addr);
4542}
4543
4544static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4545{
4546        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4547        kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4548}
4549
4550static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4551{
4552        unsigned int i;
4553        struct kvm_vcpu *vcpu;
4554
4555        kvm_for_each_vcpu(i, vcpu, kvm) {
4556                __disable_ibs_on_vcpu(vcpu);
4557        }
4558}
4559
4560static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4561{
4562        if (!sclp.has_ibs)
4563                return;
4564        kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4565        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4566}
4567
4568int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4569{
4570        int i, online_vcpus, r = 0, started_vcpus = 0;
4571
4572        if (!is_vcpu_stopped(vcpu))
4573                return 0;
4574
4575        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4576        /* Only one cpu at a time may enter/leave the STOPPED state. */
4577        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4578        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4579
4580        /* Let's tell the UV that we want to change into the operating state */
4581        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4582                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4583                if (r) {
4584                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4585                        return r;
4586                }
4587        }
4588
4589        for (i = 0; i < online_vcpus; i++) {
4590                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4591                        started_vcpus++;
4592        }
4593
4594        if (started_vcpus == 0) {
4595                /* we're the only active VCPU -> speed it up */
4596                __enable_ibs_on_vcpu(vcpu);
4597        } else if (started_vcpus == 1) {
4598                /*
4599                 * As we are starting a second VCPU, we have to disable
4600                 * the IBS facility on all VCPUs to remove potentially
4601                 * outstanding ENABLE requests.
4602                 */
4603                __disable_ibs_on_all_vcpus(vcpu->kvm);
4604        }
4605
4606        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4607        /*
4608         * The real PSW might have changed due to a RESTART interpreted by the
4609         * ultravisor. We block all interrupts and let the next sie exit
4610         * refresh our view.
4611         */
4612        if (kvm_s390_pv_cpu_is_protected(vcpu))
4613                vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4614        /*
4615         * Another VCPU might have used IBS while we were offline.
4616         * Let's play safe and flush the VCPU at startup.
4617         */
4618        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4619        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4620        return 0;
4621}
4622
4623int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4624{
4625        int i, online_vcpus, r = 0, started_vcpus = 0;
4626        struct kvm_vcpu *started_vcpu = NULL;
4627
4628        if (is_vcpu_stopped(vcpu))
4629                return 0;
4630
4631        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4632        /* Only one cpu at a time may enter/leave the STOPPED state. */
4633        spin_lock(&vcpu->kvm->arch.start_stop_lock);
4634        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4635
4636        /* Let's tell the UV that we want to change into the stopped state */
4637        if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4638                r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4639                if (r) {
4640                        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4641                        return r;
4642                }
4643        }
4644
4645        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4646        kvm_s390_clear_stop_irq(vcpu);
4647
4648        kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4649        __disable_ibs_on_vcpu(vcpu);
4650
4651        for (i = 0; i < online_vcpus; i++) {
4652                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4653                        started_vcpus++;
4654                        started_vcpu = vcpu->kvm->vcpus[i];
4655                }
4656        }
4657
4658        if (started_vcpus == 1) {
4659                /*
4660                 * As we only have one VCPU left, we want to enable the
4661                 * IBS facility for that VCPU to speed it up.
4662                 */
4663                __enable_ibs_on_vcpu(started_vcpu);
4664        }
4665
4666        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4667        return 0;
4668}
4669
4670static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4671                                     struct kvm_enable_cap *cap)
4672{
4673        int r;
4674
4675        if (cap->flags)
4676                return -EINVAL;
4677
4678        switch (cap->cap) {
4679        case KVM_CAP_S390_CSS_SUPPORT:
4680                if (!vcpu->kvm->arch.css_support) {
4681                        vcpu->kvm->arch.css_support = 1;
4682                        VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4683                        trace_kvm_s390_enable_css(vcpu->kvm);
4684                }
4685                r = 0;
4686                break;
4687        default:
4688                r = -EINVAL;
4689                break;
4690        }
4691        return r;
4692}
4693
4694static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4695                                   struct kvm_s390_mem_op *mop)
4696{
4697        void __user *uaddr = (void __user *)mop->buf;
4698        int r = 0;
4699
4700        if (mop->flags || !mop->size)
4701                return -EINVAL;
4702        if (mop->size + mop->sida_offset < mop->size)
4703                return -EINVAL;
4704        if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4705                return -E2BIG;
4706
4707        switch (mop->op) {
4708        case KVM_S390_MEMOP_SIDA_READ:
4709                if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4710                                 mop->sida_offset), mop->size))
4711                        r = -EFAULT;
4712
4713                break;
4714        case KVM_S390_MEMOP_SIDA_WRITE:
4715                if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4716                                   mop->sida_offset), uaddr, mop->size))
4717                        r = -EFAULT;
4718                break;
4719        }
4720        return r;
4721}
4722static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4723                                  struct kvm_s390_mem_op *mop)
4724{
4725        void __user *uaddr = (void __user *)mop->buf;
4726        void *tmpbuf = NULL;
4727        int r = 0;
4728        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4729                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
4730
4731        if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4732                return -EINVAL;
4733
4734        if (mop->size > MEM_OP_MAX_SIZE)
4735                return -E2BIG;
4736
4737        if (kvm_s390_pv_cpu_is_protected(vcpu))
4738                return -EINVAL;
4739
4740        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4741                tmpbuf = vmalloc(mop->size);
4742                if (!tmpbuf)
4743                        return -ENOMEM;
4744        }
4745
4746        switch (mop->op) {
4747        case KVM_S390_MEMOP_LOGICAL_READ:
4748                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4749                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4750                                            mop->size, GACC_FETCH);
4751                        break;
4752                }
4753                r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4754                if (r == 0) {
4755                        if (copy_to_user(uaddr, tmpbuf, mop->size))
4756                                r = -EFAULT;
4757                }
4758                break;
4759        case KVM_S390_MEMOP_LOGICAL_WRITE:
4760                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4761                        r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4762                                            mop->size, GACC_STORE);
4763                        break;
4764                }
4765                if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4766                        r = -EFAULT;
4767                        break;
4768                }
4769                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4770                break;
4771        }
4772
4773        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4774                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4775
4776        vfree(tmpbuf);
4777        return r;
4778}
4779
4780static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4781                                      struct kvm_s390_mem_op *mop)
4782{
4783        int r, srcu_idx;
4784
4785        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4786
4787        switch (mop->op) {
4788        case KVM_S390_MEMOP_LOGICAL_READ:
4789        case KVM_S390_MEMOP_LOGICAL_WRITE:
4790                r = kvm_s390_guest_mem_op(vcpu, mop);
4791                break;
4792        case KVM_S390_MEMOP_SIDA_READ:
4793        case KVM_S390_MEMOP_SIDA_WRITE:
4794                /* we are locked against sida going away by the vcpu->mutex */
4795                r = kvm_s390_guest_sida_op(vcpu, mop);
4796                break;
4797        default:
4798                r = -EINVAL;
4799        }
4800
4801        srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4802        return r;
4803}
4804
4805long kvm_arch_vcpu_async_ioctl(struct file *filp,
4806                               unsigned int ioctl, unsigned long arg)
4807{
4808        struct kvm_vcpu *vcpu = filp->private_data;
4809        void __user *argp = (void __user *)arg;
4810
4811        switch (ioctl) {
4812        case KVM_S390_IRQ: {
4813                struct kvm_s390_irq s390irq;
4814
4815                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4816                        return -EFAULT;
4817                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4818        }
4819        case KVM_S390_INTERRUPT: {
4820                struct kvm_s390_interrupt s390int;
4821                struct kvm_s390_irq s390irq = {};
4822
4823                if (copy_from_user(&s390int, argp, sizeof(s390int)))
4824                        return -EFAULT;
4825                if (s390int_to_s390irq(&s390int, &s390irq))
4826                        return -EINVAL;
4827                return kvm_s390_inject_vcpu(vcpu, &s390irq);
4828        }
4829        }
4830        return -ENOIOCTLCMD;
4831}
4832
4833long kvm_arch_vcpu_ioctl(struct file *filp,
4834                         unsigned int ioctl, unsigned long arg)
4835{
4836        struct kvm_vcpu *vcpu = filp->private_data;
4837        void __user *argp = (void __user *)arg;
4838        int idx;
4839        long r;
4840        u16 rc, rrc;
4841
4842        vcpu_load(vcpu);
4843
4844        switch (ioctl) {
4845        case KVM_S390_STORE_STATUS:
4846                idx = srcu_read_lock(&vcpu->kvm->srcu);
4847                r = kvm_s390_store_status_unloaded(vcpu, arg);
4848                srcu_read_unlock(&vcpu->kvm->srcu, idx);
4849                break;
4850        case KVM_S390_SET_INITIAL_PSW: {
4851                psw_t psw;
4852
4853                r = -EFAULT;
4854                if (copy_from_user(&psw, argp, sizeof(psw)))
4855                        break;
4856                r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4857                break;
4858        }
4859        case KVM_S390_CLEAR_RESET:
4860                r = 0;
4861                kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4862                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4863                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4864                                          UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4865                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4866                                   rc, rrc);
4867                }
4868                break;
4869        case KVM_S390_INITIAL_RESET:
4870                r = 0;
4871                kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4872                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4873                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4874                                          UVC_CMD_CPU_RESET_INITIAL,
4875                                          &rc, &rrc);
4876                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4877                                   rc, rrc);
4878                }
4879                break;
4880        case KVM_S390_NORMAL_RESET:
4881                r = 0;
4882                kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4883                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4884                        r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4885                                          UVC_CMD_CPU_RESET, &rc, &rrc);
4886                        VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4887                                   rc, rrc);
4888                }
4889                break;
4890        case KVM_SET_ONE_REG:
4891        case KVM_GET_ONE_REG: {
4892                struct kvm_one_reg reg;
4893                r = -EINVAL;
4894                if (kvm_s390_pv_cpu_is_protected(vcpu))
4895                        break;
4896                r = -EFAULT;
4897                if (copy_from_user(&reg, argp, sizeof(reg)))
4898                        break;
4899                if (ioctl == KVM_SET_ONE_REG)
4900                        r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4901                else
4902                        r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4903                break;
4904        }
4905#ifdef CONFIG_KVM_S390_UCONTROL
4906        case KVM_S390_UCAS_MAP: {
4907                struct kvm_s390_ucas_mapping ucasmap;
4908
4909                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4910                        r = -EFAULT;
4911                        break;
4912                }
4913
4914                if (!kvm_is_ucontrol(vcpu->kvm)) {
4915                        r = -EINVAL;
4916                        break;
4917                }
4918
4919                r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4920                                     ucasmap.vcpu_addr, ucasmap.length);
4921                break;
4922        }
4923        case KVM_S390_UCAS_UNMAP: {
4924                struct kvm_s390_ucas_mapping ucasmap;
4925
4926                if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4927                        r = -EFAULT;
4928                        break;
4929                }
4930
4931                if (!kvm_is_ucontrol(vcpu->kvm)) {
4932                        r = -EINVAL;
4933                        break;
4934                }
4935
4936                r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4937                        ucasmap.length);
4938                break;
4939        }
4940#endif
4941        case KVM_S390_VCPU_FAULT: {
4942                r = gmap_fault(vcpu->arch.gmap, arg, 0);
4943                break;
4944        }
4945        case KVM_ENABLE_CAP:
4946        {
4947                struct kvm_enable_cap cap;
4948                r = -EFAULT;
4949                if (copy_from_user(&cap, argp, sizeof(cap)))
4950                        break;
4951                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4952                break;
4953        }
4954        case KVM_S390_MEM_OP: {
4955                struct kvm_s390_mem_op mem_op;
4956
4957                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4958                        r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4959                else
4960                        r = -EFAULT;
4961                break;
4962        }
4963        case KVM_S390_SET_IRQ_STATE: {
4964                struct kvm_s390_irq_state irq_state;
4965
4966                r = -EFAULT;
4967                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4968                        break;
4969                if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4970                    irq_state.len == 0 ||
4971                    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4972                        r = -EINVAL;
4973                        break;
4974                }
4975                /* do not use irq_state.flags, it will break old QEMUs */
4976                r = kvm_s390_set_irq_state(vcpu,
4977                                           (void __user *) irq_state.buf,
4978                                           irq_state.len);
4979                break;
4980        }
4981        case KVM_S390_GET_IRQ_STATE: {
4982                struct kvm_s390_irq_state irq_state;
4983
4984                r = -EFAULT;
4985                if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4986                        break;
4987                if (irq_state.len == 0) {
4988                        r = -EINVAL;
4989                        break;
4990                }
4991                /* do not use irq_state.flags, it will break old QEMUs */
4992                r = kvm_s390_get_irq_state(vcpu,
4993                                           (__u8 __user *)  irq_state.buf,
4994                                           irq_state.len);
4995                break;
4996        }
4997        default:
4998                r = -ENOTTY;
4999        }
5000
5001        vcpu_put(vcpu);
5002        return r;
5003}
5004
5005vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5006{
5007#ifdef CONFIG_KVM_S390_UCONTROL
5008        if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5009                 && (kvm_is_ucontrol(vcpu->kvm))) {
5010                vmf->page = virt_to_page(vcpu->arch.sie_block);
5011                get_page(vmf->page);
5012                return 0;
5013        }
5014#endif
5015        return VM_FAULT_SIGBUS;
5016}
5017
5018/* Section: memory related */
5019int kvm_arch_prepare_memory_region(struct kvm *kvm,
5020                                   struct kvm_memory_slot *memslot,
5021                                   const struct kvm_userspace_memory_region *mem,
5022                                   enum kvm_mr_change change)
5023{
5024        /* A few sanity checks. We can have memory slots which have to be
5025           located/ended at a segment boundary (1MB). The memory in userland is
5026           ok to be fragmented into various different vmas. It is okay to mmap()
5027           and munmap() stuff in this slot after doing this call at any time */
5028
5029        if (mem->userspace_addr & 0xffffful)
5030                return -EINVAL;
5031
5032        if (mem->memory_size & 0xffffful)
5033                return -EINVAL;
5034
5035        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5036                return -EINVAL;
5037
5038        /* When we are protected, we should not change the memory slots */
5039        if (kvm_s390_pv_get_handle(kvm))
5040                return -EINVAL;
5041        return 0;
5042}
5043
5044void kvm_arch_commit_memory_region(struct kvm *kvm,
5045                                const struct kvm_userspace_memory_region *mem,
5046                                struct kvm_memory_slot *old,
5047                                const struct kvm_memory_slot *new,
5048                                enum kvm_mr_change change)
5049{
5050        int rc = 0;
5051
5052        switch (change) {
5053        case KVM_MR_DELETE:
5054                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5055                                        old->npages * PAGE_SIZE);
5056                break;
5057        case KVM_MR_MOVE:
5058                rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5059                                        old->npages * PAGE_SIZE);
5060                if (rc)
5061                        break;
5062                fallthrough;
5063        case KVM_MR_CREATE:
5064                rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5065                                      mem->guest_phys_addr, mem->memory_size);
5066                break;
5067        case KVM_MR_FLAGS_ONLY:
5068                break;
5069        default:
5070                WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5071        }
5072        if (rc)
5073                pr_warn("failed to commit memory region\n");
5074        return;
5075}
5076
5077static inline unsigned long nonhyp_mask(int i)
5078{
5079        unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5080
5081        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5082}
5083
5084void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5085{
5086        vcpu->valid_wakeup = false;
5087}
5088
5089static int __init kvm_s390_init(void)
5090{
5091        int i;
5092
5093        if (!sclp.has_sief2) {
5094                pr_info("SIE is not available\n");
5095                return -ENODEV;
5096        }
5097
5098        if (nested && hpage) {
5099                pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5100                return -EINVAL;
5101        }
5102
5103        for (i = 0; i < 16; i++)
5104                kvm_s390_fac_base[i] |=
5105                        stfle_fac_list[i] & nonhyp_mask(i);
5106
5107        return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5108}
5109
5110static void __exit kvm_s390_exit(void)
5111{
5112        kvm_exit();
5113}
5114
5115module_init(kvm_s390_init);
5116module_exit(kvm_s390_exit);
5117
5118/*
5119 * Enable autoloading of the kvm module.
5120 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5121 * since x86 takes a different approach.
5122 */
5123#include <linux/miscdevice.h>
5124MODULE_ALIAS_MISCDEV(KVM_MINOR);
5125MODULE_ALIAS("devname:kvm");
5126