linux/arch/x86/kvm/vmx/sgx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*  Copyright(c) 2021 Intel Corporation. */
   3
   4#include <asm/sgx.h>
   5
   6#include "cpuid.h"
   7#include "kvm_cache_regs.h"
   8#include "nested.h"
   9#include "sgx.h"
  10#include "vmx.h"
  11#include "x86.h"
  12
  13bool __read_mostly enable_sgx = 1;
  14module_param_named(sgx, enable_sgx, bool, 0444);
  15
  16/* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
  17static u64 sgx_pubkey_hash[4] __ro_after_init;
  18
  19/*
  20 * ENCLS's memory operands use a fixed segment (DS) and a fixed
  21 * address size based on the mode.  Related prefixes are ignored.
  22 */
  23static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
  24                             int size, int alignment, gva_t *gva)
  25{
  26        struct kvm_segment s;
  27        bool fault;
  28
  29        /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
  30        *gva = offset;
  31        if (!is_long_mode(vcpu)) {
  32                vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
  33                *gva += s.base;
  34        }
  35
  36        if (!IS_ALIGNED(*gva, alignment)) {
  37                fault = true;
  38        } else if (likely(is_long_mode(vcpu))) {
  39                fault = is_noncanonical_address(*gva, vcpu);
  40        } else {
  41                *gva &= 0xffffffff;
  42                fault = (s.unusable) ||
  43                        (s.type != 2 && s.type != 3) ||
  44                        (*gva > s.limit) ||
  45                        ((s.base != 0 || s.limit != 0xffffffff) &&
  46                        (((u64)*gva + size - 1) > s.limit + 1));
  47        }
  48        if (fault)
  49                kvm_inject_gp(vcpu, 0);
  50        return fault ? -EINVAL : 0;
  51}
  52
  53static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
  54                                         unsigned int size)
  55{
  56        vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
  57        vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
  58        vcpu->run->internal.ndata = 2;
  59        vcpu->run->internal.data[0] = addr;
  60        vcpu->run->internal.data[1] = size;
  61}
  62
  63static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
  64                        unsigned int size)
  65{
  66        if (__copy_from_user(data, (void __user *)hva, size)) {
  67                sgx_handle_emulation_failure(vcpu, hva, size);
  68                return -EFAULT;
  69        }
  70
  71        return 0;
  72}
  73
  74static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
  75                          gpa_t *gpa)
  76{
  77        struct x86_exception ex;
  78
  79        if (write)
  80                *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
  81        else
  82                *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
  83
  84        if (*gpa == UNMAPPED_GVA) {
  85                kvm_inject_emulated_page_fault(vcpu, &ex);
  86                return -EFAULT;
  87        }
  88
  89        return 0;
  90}
  91
  92static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
  93{
  94        *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
  95        if (kvm_is_error_hva(*hva)) {
  96                sgx_handle_emulation_failure(vcpu, gpa, 1);
  97                return -EFAULT;
  98        }
  99
 100        *hva |= gpa & ~PAGE_MASK;
 101
 102        return 0;
 103}
 104
 105static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
 106{
 107        struct x86_exception ex;
 108
 109        /*
 110         * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
 111         * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
 112         * but the error code isn't (yet) plumbed through the ENCLS helpers.
 113         */
 114        if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
 115                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 116                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 117                vcpu->run->internal.ndata = 0;
 118                return 0;
 119        }
 120
 121        /*
 122         * If the guest thinks it's running on SGX2 hardware, inject an SGX
 123         * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
 124         * #PF on SGX2).  The assumption is that EPCM faults are much more
 125         * likely than a bad userspace address.
 126         */
 127        if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
 128            guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
 129                memset(&ex, 0, sizeof(ex));
 130                ex.vector = PF_VECTOR;
 131                ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
 132                                PFERR_SGX_MASK;
 133                ex.address = gva;
 134                ex.error_code_valid = true;
 135                ex.nested_page_fault = false;
 136                kvm_inject_page_fault(vcpu, &ex);
 137        } else {
 138                kvm_inject_gp(vcpu, 0);
 139        }
 140        return 1;
 141}
 142
 143static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
 144                                  struct sgx_pageinfo *pageinfo,
 145                                  unsigned long secs_hva,
 146                                  gva_t secs_gva)
 147{
 148        struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
 149        struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
 150        u64 attributes, xfrm, size;
 151        u32 miscselect;
 152        u8 max_size_log2;
 153        int trapnr, ret;
 154
 155        sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
 156        sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
 157        if (!sgx_12_0 || !sgx_12_1) {
 158                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 159                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 160                vcpu->run->internal.ndata = 0;
 161                return 0;
 162        }
 163
 164        miscselect = contents->miscselect;
 165        attributes = contents->attributes;
 166        xfrm = contents->xfrm;
 167        size = contents->size;
 168
 169        /* Enforce restriction of access to the PROVISIONKEY. */
 170        if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
 171            (attributes & SGX_ATTR_PROVISIONKEY)) {
 172                if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
 173                        pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
 174                kvm_inject_gp(vcpu, 0);
 175                return 1;
 176        }
 177
 178        /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
 179        if ((u32)miscselect & ~sgx_12_0->ebx ||
 180            (u32)attributes & ~sgx_12_1->eax ||
 181            (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
 182            (u32)xfrm & ~sgx_12_1->ecx ||
 183            (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
 184                kvm_inject_gp(vcpu, 0);
 185                return 1;
 186        }
 187
 188        /* Enforce CPUID restriction on max enclave size. */
 189        max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
 190                                                            sgx_12_0->edx;
 191        if (size >= BIT_ULL(max_size_log2))
 192                kvm_inject_gp(vcpu, 0);
 193
 194        /*
 195         * sgx_virt_ecreate() returns:
 196         *  1) 0:       ECREATE was successful
 197         *  2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
 198         *              exception number.
 199         *  3) -EINVAL: access_ok() on @secs_hva failed. This should never
 200         *              happen as KVM checks host addresses at memslot creation.
 201         *              sgx_virt_ecreate() has already warned in this case.
 202         */
 203        ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
 204        if (!ret)
 205                return kvm_skip_emulated_instruction(vcpu);
 206        if (ret == -EFAULT)
 207                return sgx_inject_fault(vcpu, secs_gva, trapnr);
 208
 209        return ret;
 210}
 211
 212static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
 213{
 214        gva_t pageinfo_gva, secs_gva;
 215        gva_t metadata_gva, contents_gva;
 216        gpa_t metadata_gpa, contents_gpa, secs_gpa;
 217        unsigned long metadata_hva, contents_hva, secs_hva;
 218        struct sgx_pageinfo pageinfo;
 219        struct sgx_secs *contents;
 220        struct x86_exception ex;
 221        int r;
 222
 223        if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
 224            sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
 225                return 1;
 226
 227        /*
 228         * Copy the PAGEINFO to local memory, its pointers need to be
 229         * translated, i.e. we need to do a deep copy/translate.
 230         */
 231        r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
 232                                sizeof(pageinfo), &ex);
 233        if (r == X86EMUL_PROPAGATE_FAULT) {
 234                kvm_inject_emulated_page_fault(vcpu, &ex);
 235                return 1;
 236        } else if (r != X86EMUL_CONTINUE) {
 237                sgx_handle_emulation_failure(vcpu, pageinfo_gva,
 238                                             sizeof(pageinfo));
 239                return 0;
 240        }
 241
 242        if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
 243            sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
 244                              &contents_gva))
 245                return 1;
 246
 247        /*
 248         * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
 249         * Resume the guest on failure to inject a #PF.
 250         */
 251        if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
 252            sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
 253            sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
 254                return 1;
 255
 256        /*
 257         * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 258         * KVM doesn't have to fully process one address at a time.  Exit to
 259         * userspace if a GPA is invalid.
 260         */
 261        if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
 262            sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
 263            sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
 264                return 0;
 265
 266        /*
 267         * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
 268         * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
 269         * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
 270         * enforce restriction of access to the PROVISIONKEY.
 271         */
 272        contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
 273        if (!contents)
 274                return -ENOMEM;
 275
 276        /* Exit to userspace if copying from a host userspace address fails. */
 277        if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
 278                free_page((unsigned long)contents);
 279                return 0;
 280        }
 281
 282        pageinfo.metadata = metadata_hva;
 283        pageinfo.contents = (u64)contents;
 284
 285        r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
 286
 287        free_page((unsigned long)contents);
 288
 289        return r;
 290}
 291
 292static int handle_encls_einit(struct kvm_vcpu *vcpu)
 293{
 294        unsigned long sig_hva, secs_hva, token_hva, rflags;
 295        struct vcpu_vmx *vmx = to_vmx(vcpu);
 296        gva_t sig_gva, secs_gva, token_gva;
 297        gpa_t sig_gpa, secs_gpa, token_gpa;
 298        int ret, trapnr;
 299
 300        if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
 301            sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
 302            sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
 303                return 1;
 304
 305        /*
 306         * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
 307         * Resume the guest on failure to inject a #PF.
 308         */
 309        if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
 310            sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
 311            sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
 312                return 1;
 313
 314        /*
 315         * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 316         * KVM doesn't have to fully process one address at a time.  Exit to
 317         * userspace if a GPA is invalid.  Note, all structures are aligned and
 318         * cannot split pages.
 319         */
 320        if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
 321            sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
 322            sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
 323                return 0;
 324
 325        ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
 326                             (void __user *)secs_hva,
 327                             vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
 328
 329        if (ret == -EFAULT)
 330                return sgx_inject_fault(vcpu, secs_gva, trapnr);
 331
 332        /*
 333         * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
 334         * @token_hva or @secs_hva. This should never happen as KVM checks host
 335         * addresses at memslot creation. sgx_virt_einit() has already warned
 336         * in this case, so just return.
 337         */
 338        if (ret < 0)
 339                return ret;
 340
 341        rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
 342                                          X86_EFLAGS_AF | X86_EFLAGS_SF |
 343                                          X86_EFLAGS_OF);
 344        if (ret)
 345                rflags |= X86_EFLAGS_ZF;
 346        else
 347                rflags &= ~X86_EFLAGS_ZF;
 348        vmx_set_rflags(vcpu, rflags);
 349
 350        kvm_rax_write(vcpu, ret);
 351        return kvm_skip_emulated_instruction(vcpu);
 352}
 353
 354static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
 355{
 356        if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
 357                return false;
 358
 359        if (leaf >= ECREATE && leaf <= ETRACK)
 360                return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
 361
 362        if (leaf >= EAUG && leaf <= EMODT)
 363                return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
 364
 365        return false;
 366}
 367
 368static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
 369{
 370        const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
 371
 372        return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
 373}
 374
 375int handle_encls(struct kvm_vcpu *vcpu)
 376{
 377        u32 leaf = (u32)kvm_rax_read(vcpu);
 378
 379        if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
 380                kvm_queue_exception(vcpu, UD_VECTOR);
 381        } else if (!sgx_enabled_in_guest_bios(vcpu)) {
 382                kvm_inject_gp(vcpu, 0);
 383        } else {
 384                if (leaf == ECREATE)
 385                        return handle_encls_ecreate(vcpu);
 386                if (leaf == EINIT)
 387                        return handle_encls_einit(vcpu);
 388                WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
 389                vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 390                vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
 391                return 0;
 392        }
 393        return 1;
 394}
 395
 396void setup_default_sgx_lepubkeyhash(void)
 397{
 398        /*
 399         * Use Intel's default value for Skylake hardware if Launch Control is
 400         * not supported, i.e. Intel's hash is hardcoded into silicon, or if
 401         * Launch Control is supported and enabled, i.e. mimic the reset value
 402         * and let the guest write the MSRs at will.  If Launch Control is
 403         * supported but disabled, then use the current MSR values as the hash
 404         * MSRs exist but are read-only (locked and not writable).
 405         */
 406        if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
 407            rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
 408                sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
 409                sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
 410                sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
 411                sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
 412        } else {
 413                /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
 414                rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
 415                rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
 416                rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
 417        }
 418}
 419
 420void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
 421{
 422        struct vcpu_vmx *vmx = to_vmx(vcpu);
 423
 424        memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
 425               sizeof(sgx_pubkey_hash));
 426}
 427
 428/*
 429 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
 430 * restrictions if the guest's allowed-1 settings diverge from hardware.
 431 */
 432static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
 433{
 434        struct kvm_cpuid_entry2 *guest_cpuid;
 435        u32 eax, ebx, ecx, edx;
 436
 437        if (!vcpu->kvm->arch.sgx_provisioning_allowed)
 438                return true;
 439
 440        guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
 441        if (!guest_cpuid)
 442                return true;
 443
 444        cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
 445        if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
 446                return true;
 447
 448        guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
 449        if (!guest_cpuid)
 450                return true;
 451
 452        cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
 453        if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
 454            guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
 455                return true;
 456
 457        return false;
 458}
 459
 460void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 461{
 462        /*
 463         * There is no software enable bit for SGX that is virtualized by
 464         * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
 465         * guest (either by the host or by the guest's BIOS) but enabled in the
 466         * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
 467         * the expected system behavior for ENCLS.
 468         */
 469        u64 bitmap = -1ull;
 470
 471        /* Nothing to do if hardware doesn't support SGX */
 472        if (!cpu_has_vmx_encls_vmexit())
 473                return;
 474
 475        if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
 476            sgx_enabled_in_guest_bios(vcpu)) {
 477                if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
 478                        bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
 479                        if (sgx_intercept_encls_ecreate(vcpu))
 480                                bitmap |= (1 << ECREATE);
 481                }
 482
 483                if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
 484                        bitmap &= ~GENMASK_ULL(EMODT, EAUG);
 485
 486                /*
 487                 * Trap and execute EINIT if launch control is enabled in the
 488                 * host using the guest's values for launch control MSRs, even
 489                 * if the guest's values are fixed to hardware default values.
 490                 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
 491                 * the MSRs is extraordinarily expensive.
 492                 */
 493                if (boot_cpu_has(X86_FEATURE_SGX_LC))
 494                        bitmap |= (1 << EINIT);
 495
 496                if (!vmcs12 && is_guest_mode(vcpu))
 497                        vmcs12 = get_vmcs12(vcpu);
 498                if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
 499                        bitmap |= vmcs12->encls_exiting_bitmap;
 500        }
 501        vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
 502}
 503