linux/drivers/gpu/drm/i915/i915_gpu_error.h
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright � 2008-2018 Intel Corporation
   5 */
   6
   7#ifndef _I915_GPU_ERROR_H_
   8#define _I915_GPU_ERROR_H_
   9
  10#include <linux/atomic.h>
  11#include <linux/kref.h>
  12#include <linux/ktime.h>
  13#include <linux/sched.h>
  14
  15#include <drm/drm_mm.h>
  16
  17#include "gt/intel_engine.h"
  18#include "gt/intel_gt_types.h"
  19#include "gt/uc/intel_uc_fw.h"
  20
  21#include "intel_device_info.h"
  22
  23#include "i915_gem.h"
  24#include "i915_gem_gtt.h"
  25#include "i915_params.h"
  26#include "i915_scheduler.h"
  27
  28struct drm_i915_private;
  29struct i915_vma_compress;
  30struct intel_engine_capture_vma;
  31struct intel_overlay_error_state;
  32struct intel_display_error_state;
  33
  34struct i915_vma_coredump {
  35        struct i915_vma_coredump *next;
  36
  37        char name[20];
  38
  39        u64 gtt_offset;
  40        u64 gtt_size;
  41        u32 gtt_page_sizes;
  42
  43        int num_pages;
  44        int page_count;
  45        int unused;
  46        u32 *pages[];
  47};
  48
  49struct i915_request_coredump {
  50        unsigned long flags;
  51        pid_t pid;
  52        u32 context;
  53        u32 seqno;
  54        u32 head;
  55        u32 tail;
  56        struct i915_sched_attr sched_attr;
  57};
  58
  59struct intel_engine_coredump {
  60        const struct intel_engine_cs *engine;
  61
  62        bool hung;
  63        bool simulated;
  64        u32 reset_count;
  65
  66        /* position of active request inside the ring */
  67        u32 rq_head, rq_post, rq_tail;
  68
  69        /* Register state */
  70        u32 ccid;
  71        u32 start;
  72        u32 tail;
  73        u32 head;
  74        u32 ctl;
  75        u32 mode;
  76        u32 hws;
  77        u32 ipeir;
  78        u32 ipehr;
  79        u32 esr;
  80        u32 bbstate;
  81        u32 instpm;
  82        u32 instps;
  83        u64 bbaddr;
  84        u64 acthd;
  85        u32 fault_reg;
  86        u64 faddr;
  87        u32 rc_psmi; /* sleep state */
  88        struct intel_instdone instdone;
  89
  90        struct i915_gem_context_coredump {
  91                char comm[TASK_COMM_LEN];
  92
  93                u64 total_runtime;
  94                u32 avg_runtime;
  95
  96                pid_t pid;
  97                int active;
  98                int guilty;
  99                struct i915_sched_attr sched_attr;
 100        } context;
 101
 102        struct i915_vma_coredump *vma;
 103
 104        struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
 105        unsigned int num_ports;
 106
 107        struct {
 108                u32 gfx_mode;
 109                union {
 110                        u64 pdp[4];
 111                        u32 pp_dir_base;
 112                };
 113        } vm_info;
 114
 115        struct intel_engine_coredump *next;
 116};
 117
 118struct intel_gt_coredump {
 119        const struct intel_gt *_gt;
 120        bool awake;
 121        bool simulated;
 122
 123        struct intel_gt_info info;
 124
 125        /* Generic register state */
 126        u32 eir;
 127        u32 pgtbl_er;
 128        u32 ier;
 129        u32 gtier[6], ngtier;
 130        u32 derrmr;
 131        u32 forcewake;
 132        u32 error; /* gen6+ */
 133        u32 err_int; /* gen7 */
 134        u32 fault_data0; /* gen8, gen9 */
 135        u32 fault_data1; /* gen8, gen9 */
 136        u32 done_reg;
 137        u32 gac_eco;
 138        u32 gam_ecochk;
 139        u32 gab_ctl;
 140        u32 gfx_mode;
 141        u32 gtt_cache;
 142        u32 aux_err; /* gen12 */
 143        u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
 144        u32 gam_done; /* gen12 */
 145
 146        u32 nfence;
 147        u64 fence[I915_MAX_NUM_FENCES];
 148
 149        struct intel_engine_coredump *engine;
 150
 151        struct intel_uc_coredump {
 152                struct intel_uc_fw guc_fw;
 153                struct intel_uc_fw huc_fw;
 154                struct i915_vma_coredump *guc_log;
 155        } *uc;
 156
 157        struct intel_gt_coredump *next;
 158};
 159
 160struct i915_gpu_coredump {
 161        struct kref ref;
 162        ktime_t time;
 163        ktime_t boottime;
 164        ktime_t uptime;
 165        unsigned long capture;
 166
 167        struct drm_i915_private *i915;
 168
 169        struct intel_gt_coredump *gt;
 170
 171        char error_msg[128];
 172        bool simulated;
 173        bool wakelock;
 174        bool suspended;
 175        int iommu;
 176        u32 reset_count;
 177        u32 suspend_count;
 178
 179        struct intel_device_info device_info;
 180        struct intel_runtime_info runtime_info;
 181        struct intel_driver_caps driver_caps;
 182        struct i915_params params;
 183
 184        struct intel_overlay_error_state *overlay;
 185        struct intel_display_error_state *display;
 186
 187        struct scatterlist *sgl, *fit;
 188};
 189
 190struct i915_gpu_error {
 191        /* For reset and error_state handling. */
 192        spinlock_t lock;
 193        /* Protected by the above dev->gpu_error.lock. */
 194        struct i915_gpu_coredump *first_error;
 195
 196        atomic_t pending_fb_pin;
 197
 198        /** Number of times the device has been reset (global) */
 199        atomic_t reset_count;
 200
 201        /** Number of times an engine has been reset */
 202        atomic_t reset_engine_count[I915_NUM_ENGINES];
 203};
 204
 205struct drm_i915_error_state_buf {
 206        struct drm_i915_private *i915;
 207        struct scatterlist *sgl, *cur, *end;
 208
 209        char *buf;
 210        size_t bytes;
 211        size_t size;
 212        loff_t iter;
 213
 214        int err;
 215};
 216
 217#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 218
 219__printf(2, 3)
 220void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 221
 222struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
 223                                            intel_engine_mask_t engine_mask);
 224void i915_capture_error_state(struct intel_gt *gt,
 225                              intel_engine_mask_t engine_mask);
 226
 227struct i915_gpu_coredump *
 228i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
 229
 230struct intel_gt_coredump *
 231intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
 232
 233struct intel_engine_coredump *
 234intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 235
 236struct intel_engine_capture_vma *
 237intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 238                                  struct i915_request *rq,
 239                                  gfp_t gfp);
 240
 241void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 242                                   struct intel_engine_capture_vma *capture,
 243                                   struct i915_vma_compress *compress);
 244
 245struct i915_vma_compress *
 246i915_vma_capture_prepare(struct intel_gt_coredump *gt);
 247
 248void i915_vma_capture_finish(struct intel_gt_coredump *gt,
 249                             struct i915_vma_compress *compress);
 250
 251void i915_error_state_store(struct i915_gpu_coredump *error);
 252
 253static inline struct i915_gpu_coredump *
 254i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
 255{
 256        kref_get(&gpu->ref);
 257        return gpu;
 258}
 259
 260ssize_t
 261i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
 262                                 char *buf, loff_t offset, size_t count);
 263
 264void __i915_gpu_coredump_free(struct kref *kref);
 265static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 266{
 267        if (gpu)
 268                kref_put(&gpu->ref, __i915_gpu_coredump_free);
 269}
 270
 271struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 272void i915_reset_error_state(struct drm_i915_private *i915);
 273void i915_disable_error_state(struct drm_i915_private *i915, int err);
 274
 275#else
 276
 277static inline void
 278i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 279{
 280}
 281
 282static inline struct i915_gpu_coredump *
 283i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 284{
 285        return NULL;
 286}
 287
 288static inline struct intel_gt_coredump *
 289intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
 290{
 291        return NULL;
 292}
 293
 294static inline struct intel_engine_coredump *
 295intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 296{
 297        return NULL;
 298}
 299
 300static inline struct intel_engine_capture_vma *
 301intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 302                                  struct i915_request *rq,
 303                                  gfp_t gfp)
 304{
 305        return NULL;
 306}
 307
 308static inline void
 309intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 310                              struct intel_engine_capture_vma *capture,
 311                              struct i915_vma_compress *compress)
 312{
 313}
 314
 315static inline struct i915_vma_compress *
 316i915_vma_capture_prepare(struct intel_gt_coredump *gt)
 317{
 318        return NULL;
 319}
 320
 321static inline void
 322i915_vma_capture_finish(struct intel_gt_coredump *gt,
 323                        struct i915_vma_compress *compress)
 324{
 325}
 326
 327static inline void
 328i915_error_state_store(struct i915_gpu_coredump *error)
 329{
 330}
 331
 332static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 333{
 334}
 335
 336static inline struct i915_gpu_coredump *
 337i915_first_error_state(struct drm_i915_private *i915)
 338{
 339        return ERR_PTR(-ENODEV);
 340}
 341
 342static inline void i915_reset_error_state(struct drm_i915_private *i915)
 343{
 344}
 345
 346static inline void i915_disable_error_state(struct drm_i915_private *i915,
 347                                            int err)
 348{
 349}
 350
 351#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 352
 353#endif /* _I915_GPU_ERROR_H_ */
 354