linux/include/xen/interface/xen-mca.h
<<
>>
Prefs
   1/******************************************************************************
   2 * arch-x86/mca.h
   3 * Guest OS machine check interface to x86 Xen.
   4 *
   5 * Contributed by Advanced Micro Devices, Inc.
   6 * Author: Christoph Egger <Christoph.Egger@amd.com>
   7 *
   8 * Updated by Intel Corporation
   9 * Author: Liu, Jinsong <jinsong.liu@intel.com>
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a copy
  12 * of this software and associated documentation files (the "Software"), to
  13 * deal in the Software without restriction, including without limitation the
  14 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  15 * sell copies of the Software, and to permit persons to whom the Software is
  16 * furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice shall be included in
  19 * all copies or substantial portions of the Software.
  20 *
  21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  27 * DEALINGS IN THE SOFTWARE.
  28 */
  29
  30#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
  31#define __XEN_PUBLIC_ARCH_X86_MCA_H__
  32
  33/* Hypercall */
  34#define __HYPERVISOR_mca __HYPERVISOR_arch_0
  35
  36#define XEN_MCA_INTERFACE_VERSION       0x01ecc003
  37
  38/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
  39#define XEN_MC_NONURGENT        0x1
  40/* IN: Dom0 calls hypercall to retrieve urgent error log entry */
  41#define XEN_MC_URGENT           0x2
  42/* IN: Dom0 acknowledges previosly-fetched error log entry */
  43#define XEN_MC_ACK              0x4
  44
  45/* OUT: All is ok */
  46#define XEN_MC_OK               0x0
  47/* OUT: Domain could not fetch data. */
  48#define XEN_MC_FETCHFAILED      0x1
  49/* OUT: There was no machine check data to fetch. */
  50#define XEN_MC_NODATA           0x2
  51
  52#ifndef __ASSEMBLY__
  53/* vIRQ injected to Dom0 */
  54#define VIRQ_MCA VIRQ_ARCH_0
  55
  56/*
  57 * mc_info entry types
  58 * mca machine check info are recorded in mc_info entries.
  59 * when fetch mca info, it can use MC_TYPE_... to distinguish
  60 * different mca info.
  61 */
  62#define MC_TYPE_GLOBAL          0
  63#define MC_TYPE_BANK            1
  64#define MC_TYPE_EXTENDED        2
  65#define MC_TYPE_RECOVERY        3
  66
  67struct mcinfo_common {
  68        uint16_t type; /* structure type */
  69        uint16_t size; /* size of this struct in bytes */
  70};
  71
  72#define MC_FLAG_CORRECTABLE     (1 << 0)
  73#define MC_FLAG_UNCORRECTABLE   (1 << 1)
  74#define MC_FLAG_RECOVERABLE     (1 << 2)
  75#define MC_FLAG_POLLED          (1 << 3)
  76#define MC_FLAG_RESET           (1 << 4)
  77#define MC_FLAG_CMCI            (1 << 5)
  78#define MC_FLAG_MCE             (1 << 6)
  79
  80/* contains x86 global mc information */
  81struct mcinfo_global {
  82        struct mcinfo_common common;
  83
  84        uint16_t mc_domid; /* running domain at the time in error */
  85        uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
  86        uint32_t mc_socketid; /* physical socket of the physical core */
  87        uint16_t mc_coreid; /* physical impacted core */
  88        uint16_t mc_core_threadid; /* core thread of physical core */
  89        uint32_t mc_apicid;
  90        uint32_t mc_flags;
  91        uint64_t mc_gstatus; /* global status */
  92};
  93
  94/* contains x86 bank mc information */
  95struct mcinfo_bank {
  96        struct mcinfo_common common;
  97
  98        uint16_t mc_bank; /* bank nr */
  99        uint16_t mc_domid; /* domain referenced by mc_addr if valid */
 100        uint64_t mc_status; /* bank status */
 101        uint64_t mc_addr; /* bank address */
 102        uint64_t mc_misc;
 103        uint64_t mc_ctrl2;
 104        uint64_t mc_tsc;
 105};
 106
 107struct mcinfo_msr {
 108        uint64_t reg; /* MSR */
 109        uint64_t value; /* MSR value */
 110};
 111
 112/* contains mc information from other or additional mc MSRs */
 113struct mcinfo_extended {
 114        struct mcinfo_common common;
 115        uint32_t mc_msrs; /* Number of msr with valid values. */
 116        /*
 117         * Currently Intel extended MSR (32/64) include all gp registers
 118         * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
 119         * useful at present. So expand this array to 16/32 to leave room.
 120         */
 121        struct mcinfo_msr mc_msr[sizeof(void *) * 4];
 122};
 123
 124/* Recovery Action flags. Giving recovery result information to DOM0 */
 125
 126/* Xen takes successful recovery action, the error is recovered */
 127#define REC_ACTION_RECOVERED (0x1 << 0)
 128/* No action is performed by XEN */
 129#define REC_ACTION_NONE (0x1 << 1)
 130/* It's possible DOM0 might take action ownership in some case */
 131#define REC_ACTION_NEED_RESET (0x1 << 2)
 132
 133/*
 134 * Different Recovery Action types, if the action is performed successfully,
 135 * REC_ACTION_RECOVERED flag will be returned.
 136 */
 137
 138/* Page Offline Action */
 139#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
 140/* CPU offline Action */
 141#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
 142/* L3 cache disable Action */
 143#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
 144
 145/*
 146 * Below interface used between XEN/DOM0 for passing XEN's recovery action
 147 * information to DOM0.
 148 */
 149struct page_offline_action {
 150        /* Params for passing the offlined page number to DOM0 */
 151        uint64_t mfn;
 152        uint64_t status;
 153};
 154
 155struct cpu_offline_action {
 156        /* Params for passing the identity of the offlined CPU to DOM0 */
 157        uint32_t mc_socketid;
 158        uint16_t mc_coreid;
 159        uint16_t mc_core_threadid;
 160};
 161
 162#define MAX_UNION_SIZE 16
 163struct mcinfo_recovery {
 164        struct mcinfo_common common;
 165        uint16_t mc_bank; /* bank nr */
 166        uint8_t action_flags;
 167        uint8_t action_types;
 168        union {
 169                struct page_offline_action page_retire;
 170                struct cpu_offline_action cpu_offline;
 171                uint8_t pad[MAX_UNION_SIZE];
 172        } action_info;
 173};
 174
 175
 176#define MCINFO_MAXSIZE 768
 177struct mc_info {
 178        /* Number of mcinfo_* entries in mi_data */
 179        uint32_t mi_nentries;
 180        uint32_t flags;
 181        uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
 182};
 183DEFINE_GUEST_HANDLE_STRUCT(mc_info);
 184
 185#define __MC_MSR_ARRAYSIZE 8
 186#define __MC_MSR_MCGCAP 0
 187#define __MC_NMSRS 1
 188#define MC_NCAPS 7
 189struct mcinfo_logical_cpu {
 190        uint32_t mc_cpunr;
 191        uint32_t mc_chipid;
 192        uint16_t mc_coreid;
 193        uint16_t mc_threadid;
 194        uint32_t mc_apicid;
 195        uint32_t mc_clusterid;
 196        uint32_t mc_ncores;
 197        uint32_t mc_ncores_active;
 198        uint32_t mc_nthreads;
 199        uint32_t mc_cpuid_level;
 200        uint32_t mc_family;
 201        uint32_t mc_vendor;
 202        uint32_t mc_model;
 203        uint32_t mc_step;
 204        char mc_vendorid[16];
 205        char mc_brandid[64];
 206        uint32_t mc_cpu_caps[MC_NCAPS];
 207        uint32_t mc_cache_size;
 208        uint32_t mc_cache_alignment;
 209        uint32_t mc_nmsrvals;
 210        struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
 211};
 212DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
 213
 214/*
 215 * Prototype:
 216 *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
 217 */
 218#define x86_mcinfo_nentries(_mi)    \
 219        ((_mi)->mi_nentries)
 220/*
 221 * Prototype:
 222 *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
 223 */
 224#define x86_mcinfo_first(_mi)       \
 225        ((struct mcinfo_common *)(_mi)->mi_data)
 226/*
 227 * Prototype:
 228 *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
 229 */
 230#define x86_mcinfo_next(_mic)       \
 231        ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
 232
 233/*
 234 * Prototype:
 235 *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
 236 */
 237static inline void x86_mcinfo_lookup(struct mcinfo_common **ret,
 238                                     struct mc_info *mi, uint16_t type)
 239{
 240        uint32_t i;
 241        struct mcinfo_common *mic;
 242        bool found = 0;
 243
 244        if (!ret || !mi)
 245                return;
 246
 247        mic = x86_mcinfo_first(mi);
 248        for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
 249                if (mic->type == type) {
 250                        found = 1;
 251                        break;
 252                }
 253                mic = x86_mcinfo_next(mic);
 254        }
 255
 256        *ret = found ? mic : NULL;
 257}
 258
 259/*
 260 * Fetch machine check data from hypervisor.
 261 */
 262#define XEN_MC_fetch            1
 263struct xen_mc_fetch {
 264        /*
 265         * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
 266         * XEN_MC_ACK if ack'king an earlier fetch
 267         * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA
 268         */
 269        uint32_t flags;
 270        uint32_t _pad0;
 271        /* OUT: id for ack, IN: id we are ack'ing */
 272        uint64_t fetch_id;
 273
 274        /* OUT variables. */
 275        GUEST_HANDLE(mc_info) data;
 276};
 277DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
 278
 279
 280/*
 281 * This tells the hypervisor to notify a DomU about the machine check error
 282 */
 283#define XEN_MC_notifydomain     2
 284struct xen_mc_notifydomain {
 285        /* IN variables */
 286        uint16_t mc_domid; /* The unprivileged domain to notify */
 287        uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */
 288
 289        /* IN/OUT variables */
 290        uint32_t flags;
 291};
 292DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
 293
 294#define XEN_MC_physcpuinfo      3
 295struct xen_mc_physcpuinfo {
 296        /* IN/OUT */
 297        uint32_t ncpus;
 298        uint32_t _pad0;
 299        /* OUT */
 300        GUEST_HANDLE(mcinfo_logical_cpu) info;
 301};
 302
 303#define XEN_MC_msrinject        4
 304#define MC_MSRINJ_MAXMSRS       8
 305struct xen_mc_msrinject {
 306        /* IN */
 307        uint32_t mcinj_cpunr; /* target processor id */
 308        uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
 309        uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
 310        uint32_t _pad0;
 311        struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
 312};
 313
 314/* Flags for mcinj_flags above; bits 16-31 are reserved */
 315#define MC_MSRINJ_F_INTERPOSE   0x1
 316
 317#define XEN_MC_mceinject        5
 318struct xen_mc_mceinject {
 319        unsigned int mceinj_cpunr; /* target processor id */
 320};
 321
 322struct xen_mc {
 323        uint32_t cmd;
 324        uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
 325        union {
 326                struct xen_mc_fetch        mc_fetch;
 327                struct xen_mc_notifydomain mc_notifydomain;
 328                struct xen_mc_physcpuinfo  mc_physcpuinfo;
 329                struct xen_mc_msrinject    mc_msrinject;
 330                struct xen_mc_mceinject    mc_mceinject;
 331        } u;
 332};
 333DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
 334
 335/* Fields are zero when not available */
 336struct xen_mce {
 337        __u64 status;
 338        __u64 misc;
 339        __u64 addr;
 340        __u64 mcgstatus;
 341        __u64 ip;
 342        __u64 tsc;      /* cpu time stamp counter */
 343        __u64 time;     /* wall time_t when error was detected */
 344        __u8  cpuvendor;        /* cpu vendor as encoded in system.h */
 345        __u8  inject_flags;     /* software inject flags */
 346        __u16  pad;
 347        __u32 cpuid;    /* CPUID 1 EAX */
 348        __u8  cs;               /* code segment */
 349        __u8  bank;     /* machine check bank */
 350        __u8  cpu;      /* cpu number; obsolete; use extcpu now */
 351        __u8  finished;   /* entry is valid */
 352        __u32 extcpu;   /* linux cpu number that detected the error */
 353        __u32 socketid; /* CPU socket ID */
 354        __u32 apicid;   /* CPU initial apic ID */
 355        __u64 mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
 356};
 357
 358/*
 359 * This structure contains all data related to the MCE log.  Also
 360 * carries a signature to make it easier to find from external
 361 * debugging tools.  Each entry is only valid when its finished flag
 362 * is set.
 363 */
 364
 365#define XEN_MCE_LOG_LEN 32
 366
 367struct xen_mce_log {
 368        char signature[12]; /* "MACHINECHECK" */
 369        unsigned len;       /* = XEN_MCE_LOG_LEN */
 370        unsigned next;
 371        unsigned flags;
 372        unsigned recordlen;     /* length of struct xen_mce */
 373        struct xen_mce entry[XEN_MCE_LOG_LEN];
 374};
 375
 376#define XEN_MCE_OVERFLOW 0              /* bit 0 in flags means overflow */
 377
 378#define XEN_MCE_LOG_SIGNATURE   "MACHINECHECK"
 379
 380#define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
 381#define MCE_GET_LOG_LEN      _IOR('M', 2, int)
 382#define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int)
 383
 384#endif /* __ASSEMBLY__ */
 385#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
 386