linux/drivers/gpu/drm/i915/i915_gpu_error.h
<<
>>
Prefs
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2008-2018 Intel Corporation
   5 */
   6
   7#ifndef _I915_GPU_ERROR_H_
   8#define _I915_GPU_ERROR_H_
   9
  10#include <linux/atomic.h>
  11#include <linux/kref.h>
  12#include <linux/ktime.h>
  13#include <linux/sched.h>
  14
  15#include <drm/drm_mm.h>
  16
  17#include "gt/intel_engine.h"
  18#include "gt/intel_gt_types.h"
  19#include "gt/uc/intel_uc_fw.h"
  20
  21#include "intel_device_info.h"
  22
  23#include "i915_gem.h"
  24#include "i915_gem_gtt.h"
  25#include "i915_params.h"
  26#include "i915_scheduler.h"
  27
  28struct drm_i915_private;
  29struct i915_vma_compress;
  30struct intel_engine_capture_vma;
  31struct intel_overlay_error_state;
  32
  33struct i915_vma_coredump {
  34        struct i915_vma_coredump *next;
  35
  36        char name[20];
  37
  38        u64 gtt_offset;
  39        u64 gtt_size;
  40        u32 gtt_page_sizes;
  41
  42        int num_pages;
  43        int page_count;
  44        int unused;
  45        u32 *pages[];
  46};
  47
  48struct i915_request_coredump {
  49        unsigned long flags;
  50        pid_t pid;
  51        u32 context;
  52        u32 seqno;
  53        u32 head;
  54        u32 tail;
  55        struct i915_sched_attr sched_attr;
  56};
  57
  58struct intel_engine_coredump {
  59        const struct intel_engine_cs *engine;
  60
  61        bool hung;
  62        bool simulated;
  63        u32 reset_count;
  64
  65        /* position of active request inside the ring */
  66        u32 rq_head, rq_post, rq_tail;
  67
  68        /* Register state */
  69        u32 ccid;
  70        u32 start;
  71        u32 tail;
  72        u32 head;
  73        u32 ctl;
  74        u32 mode;
  75        u32 hws;
  76        u32 ipeir;
  77        u32 ipehr;
  78        u32 esr;
  79        u32 bbstate;
  80        u32 instpm;
  81        u32 instps;
  82        u64 bbaddr;
  83        u64 acthd;
  84        u32 fault_reg;
  85        u64 faddr;
  86        u32 rc_psmi; /* sleep state */
  87        struct intel_instdone instdone;
  88
  89        struct i915_gem_context_coredump {
  90                char comm[TASK_COMM_LEN];
  91
  92                u64 total_runtime;
  93                u32 avg_runtime;
  94
  95                pid_t pid;
  96                int active;
  97                int guilty;
  98                struct i915_sched_attr sched_attr;
  99        } context;
 100
 101        struct i915_vma_coredump *vma;
 102
 103        struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
 104        unsigned int num_ports;
 105
 106        struct {
 107                u32 gfx_mode;
 108                union {
 109                        u64 pdp[4];
 110                        u32 pp_dir_base;
 111                };
 112        } vm_info;
 113
 114        struct intel_engine_coredump *next;
 115};
 116
 117struct intel_gt_coredump {
 118        const struct intel_gt *_gt;
 119        bool awake;
 120        bool simulated;
 121
 122        struct intel_gt_info info;
 123
 124        /* Generic register state */
 125        u32 eir;
 126        u32 pgtbl_er;
 127        u32 ier;
 128        u32 gtier[6], ngtier;
 129        u32 derrmr;
 130        u32 forcewake;
 131        u32 error; /* gen6+ */
 132        u32 err_int; /* gen7 */
 133        u32 fault_data0; /* gen8, gen9 */
 134        u32 fault_data1; /* gen8, gen9 */
 135        u32 done_reg;
 136        u32 gac_eco;
 137        u32 gam_ecochk;
 138        u32 gab_ctl;
 139        u32 gfx_mode;
 140        u32 gtt_cache;
 141        u32 aux_err; /* gen12 */
 142        u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
 143        u32 gam_done; /* gen12 */
 144
 145        u32 nfence;
 146        u64 fence[I915_MAX_NUM_FENCES];
 147
 148        struct intel_engine_coredump *engine;
 149
 150        struct intel_uc_coredump {
 151                struct intel_uc_fw guc_fw;
 152                struct intel_uc_fw huc_fw;
 153                struct i915_vma_coredump *guc_log;
 154        } *uc;
 155
 156        struct intel_gt_coredump *next;
 157};
 158
 159struct i915_gpu_coredump {
 160        struct kref ref;
 161        ktime_t time;
 162        ktime_t boottime;
 163        ktime_t uptime;
 164        unsigned long capture;
 165
 166        struct drm_i915_private *i915;
 167
 168        struct intel_gt_coredump *gt;
 169
 170        char error_msg[128];
 171        bool simulated;
 172        bool wakelock;
 173        bool suspended;
 174        int iommu;
 175        u32 reset_count;
 176        u32 suspend_count;
 177
 178        struct intel_device_info device_info;
 179        struct intel_runtime_info runtime_info;
 180        struct intel_driver_caps driver_caps;
 181        struct i915_params params;
 182
 183        struct intel_overlay_error_state *overlay;
 184
 185        struct scatterlist *sgl, *fit;
 186};
 187
 188struct i915_gpu_error {
 189        /* For reset and error_state handling. */
 190        spinlock_t lock;
 191        /* Protected by the above dev->gpu_error.lock. */
 192        struct i915_gpu_coredump *first_error;
 193
 194        atomic_t pending_fb_pin;
 195
 196        /** Number of times the device has been reset (global) */
 197        atomic_t reset_count;
 198
 199        /** Number of times an engine has been reset */
 200        atomic_t reset_engine_count[I915_NUM_ENGINES];
 201};
 202
 203struct drm_i915_error_state_buf {
 204        struct drm_i915_private *i915;
 205        struct scatterlist *sgl, *cur, *end;
 206
 207        char *buf;
 208        size_t bytes;
 209        size_t size;
 210        loff_t iter;
 211
 212        int err;
 213};
 214
 215#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 216
 217__printf(2, 3)
 218void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 219
 220struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
 221                                            intel_engine_mask_t engine_mask);
 222void i915_capture_error_state(struct intel_gt *gt,
 223                              intel_engine_mask_t engine_mask);
 224
 225struct i915_gpu_coredump *
 226i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
 227
 228struct intel_gt_coredump *
 229intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
 230
 231struct intel_engine_coredump *
 232intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 233
 234struct intel_engine_capture_vma *
 235intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 236                                  struct i915_request *rq,
 237                                  gfp_t gfp);
 238
 239void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 240                                   struct intel_engine_capture_vma *capture,
 241                                   struct i915_vma_compress *compress);
 242
 243struct i915_vma_compress *
 244i915_vma_capture_prepare(struct intel_gt_coredump *gt);
 245
 246void i915_vma_capture_finish(struct intel_gt_coredump *gt,
 247                             struct i915_vma_compress *compress);
 248
 249void i915_error_state_store(struct i915_gpu_coredump *error);
 250
 251static inline struct i915_gpu_coredump *
 252i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
 253{
 254        kref_get(&gpu->ref);
 255        return gpu;
 256}
 257
 258ssize_t
 259i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
 260                                 char *buf, loff_t offset, size_t count);
 261
 262void __i915_gpu_coredump_free(struct kref *kref);
 263static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 264{
 265        if (gpu)
 266                kref_put(&gpu->ref, __i915_gpu_coredump_free);
 267}
 268
 269struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
 270void i915_reset_error_state(struct drm_i915_private *i915);
 271void i915_disable_error_state(struct drm_i915_private *i915, int err);
 272
 273#else
 274
 275static inline void
 276i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 277{
 278}
 279
 280static inline struct i915_gpu_coredump *
 281i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
 282{
 283        return NULL;
 284}
 285
 286static inline struct intel_gt_coredump *
 287intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
 288{
 289        return NULL;
 290}
 291
 292static inline struct intel_engine_coredump *
 293intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 294{
 295        return NULL;
 296}
 297
 298static inline struct intel_engine_capture_vma *
 299intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 300                                  struct i915_request *rq,
 301                                  gfp_t gfp)
 302{
 303        return NULL;
 304}
 305
 306static inline void
 307intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 308                              struct intel_engine_capture_vma *capture,
 309                              struct i915_vma_compress *compress)
 310{
 311}
 312
 313static inline struct i915_vma_compress *
 314i915_vma_capture_prepare(struct intel_gt_coredump *gt)
 315{
 316        return NULL;
 317}
 318
 319static inline void
 320i915_vma_capture_finish(struct intel_gt_coredump *gt,
 321                        struct i915_vma_compress *compress)
 322{
 323}
 324
 325static inline void
 326i915_error_state_store(struct i915_gpu_coredump *error)
 327{
 328}
 329
 330static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
 331{
 332}
 333
 334static inline struct i915_gpu_coredump *
 335i915_first_error_state(struct drm_i915_private *i915)
 336{
 337        return ERR_PTR(-ENODEV);
 338}
 339
 340static inline void i915_reset_error_state(struct drm_i915_private *i915)
 341{
 342}
 343
 344static inline void i915_disable_error_state(struct drm_i915_private *i915,
 345                                            int err)
 346{
 347}
 348
 349#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 350
 351#endif /* _I915_GPU_ERROR_H_ */
 352