1
2
3
4
5
6
7#ifndef _I915_GPU_ERROR_H_
8#define _I915_GPU_ERROR_H_
9
10#include <linux/atomic.h>
11#include <linux/kref.h>
12#include <linux/ktime.h>
13#include <linux/sched.h>
14
15#include <drm/drm_mm.h>
16
17#include "gt/intel_engine.h"
18#include "gt/intel_gt_types.h"
19#include "gt/uc/intel_uc_fw.h"
20
21#include "intel_device_info.h"
22
23#include "i915_gem.h"
24#include "i915_gem_gtt.h"
25#include "i915_params.h"
26#include "i915_scheduler.h"
27
28struct drm_i915_private;
29struct i915_vma_compress;
30struct intel_engine_capture_vma;
31struct intel_overlay_error_state;
32struct intel_display_error_state;
33
34struct i915_vma_coredump {
35 struct i915_vma_coredump *next;
36
37 char name[20];
38
39 u64 gtt_offset;
40 u64 gtt_size;
41 u32 gtt_page_sizes;
42
43 int num_pages;
44 int page_count;
45 int unused;
46 u32 *pages[];
47};
48
49struct i915_request_coredump {
50 unsigned long flags;
51 pid_t pid;
52 u32 context;
53 u32 seqno;
54 u32 head;
55 u32 tail;
56 struct i915_sched_attr sched_attr;
57};
58
59struct intel_engine_coredump {
60 const struct intel_engine_cs *engine;
61
62 bool hung;
63 bool simulated;
64 u32 reset_count;
65
66
67 u32 rq_head, rq_post, rq_tail;
68
69
70 u32 ccid;
71 u32 start;
72 u32 tail;
73 u32 head;
74 u32 ctl;
75 u32 mode;
76 u32 hws;
77 u32 ipeir;
78 u32 ipehr;
79 u32 esr;
80 u32 bbstate;
81 u32 instpm;
82 u32 instps;
83 u64 bbaddr;
84 u64 acthd;
85 u32 fault_reg;
86 u64 faddr;
87 u32 rc_psmi;
88 struct intel_instdone instdone;
89
90 struct i915_gem_context_coredump {
91 char comm[TASK_COMM_LEN];
92
93 u64 total_runtime;
94 u32 avg_runtime;
95
96 pid_t pid;
97 int active;
98 int guilty;
99 struct i915_sched_attr sched_attr;
100 } context;
101
102 struct i915_vma_coredump *vma;
103
104 struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
105 unsigned int num_ports;
106
107 struct {
108 u32 gfx_mode;
109 union {
110 u64 pdp[4];
111 u32 pp_dir_base;
112 };
113 } vm_info;
114
115 struct intel_engine_coredump *next;
116};
117
118struct intel_gt_coredump {
119 const struct intel_gt *_gt;
120 bool awake;
121 bool simulated;
122
123 struct intel_gt_info info;
124
125
126 u32 eir;
127 u32 pgtbl_er;
128 u32 ier;
129 u32 gtier[6], ngtier;
130 u32 derrmr;
131 u32 forcewake;
132 u32 error;
133 u32 err_int;
134 u32 fault_data0;
135 u32 fault_data1;
136 u32 done_reg;
137 u32 gac_eco;
138 u32 gam_ecochk;
139 u32 gab_ctl;
140 u32 gfx_mode;
141 u32 gtt_cache;
142 u32 aux_err;
143 u32 sfc_done[GEN12_SFC_DONE_MAX];
144 u32 gam_done;
145
146 u32 nfence;
147 u64 fence[I915_MAX_NUM_FENCES];
148
149 struct intel_engine_coredump *engine;
150
151 struct intel_uc_coredump {
152 struct intel_uc_fw guc_fw;
153 struct intel_uc_fw huc_fw;
154 struct i915_vma_coredump *guc_log;
155 } *uc;
156
157 struct intel_gt_coredump *next;
158};
159
160struct i915_gpu_coredump {
161 struct kref ref;
162 ktime_t time;
163 ktime_t boottime;
164 ktime_t uptime;
165 unsigned long capture;
166
167 struct drm_i915_private *i915;
168
169 struct intel_gt_coredump *gt;
170
171 char error_msg[128];
172 bool simulated;
173 bool wakelock;
174 bool suspended;
175 int iommu;
176 u32 reset_count;
177 u32 suspend_count;
178
179 struct intel_device_info device_info;
180 struct intel_runtime_info runtime_info;
181 struct intel_driver_caps driver_caps;
182 struct i915_params params;
183
184 struct intel_overlay_error_state *overlay;
185 struct intel_display_error_state *display;
186
187 struct scatterlist *sgl, *fit;
188};
189
190struct i915_gpu_error {
191
192 spinlock_t lock;
193
194 struct i915_gpu_coredump *first_error;
195
196 atomic_t pending_fb_pin;
197
198
199 atomic_t reset_count;
200
201
202 atomic_t reset_engine_count[I915_NUM_ENGINES];
203};
204
205struct drm_i915_error_state_buf {
206 struct drm_i915_private *i915;
207 struct scatterlist *sgl, *cur, *end;
208
209 char *buf;
210 size_t bytes;
211 size_t size;
212 loff_t iter;
213
214 int err;
215};
216
217#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
218
219__printf(2, 3)
220void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
221
222struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
223 intel_engine_mask_t engine_mask);
224void i915_capture_error_state(struct intel_gt *gt,
225 intel_engine_mask_t engine_mask);
226
227struct i915_gpu_coredump *
228i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
229
230struct intel_gt_coredump *
231intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
232
233struct intel_engine_coredump *
234intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
235
236struct intel_engine_capture_vma *
237intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
238 struct i915_request *rq,
239 gfp_t gfp);
240
241void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
242 struct intel_engine_capture_vma *capture,
243 struct i915_vma_compress *compress);
244
245struct i915_vma_compress *
246i915_vma_capture_prepare(struct intel_gt_coredump *gt);
247
248void i915_vma_capture_finish(struct intel_gt_coredump *gt,
249 struct i915_vma_compress *compress);
250
251void i915_error_state_store(struct i915_gpu_coredump *error);
252
253static inline struct i915_gpu_coredump *
254i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
255{
256 kref_get(&gpu->ref);
257 return gpu;
258}
259
260ssize_t
261i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
262 char *buf, loff_t offset, size_t count);
263
264void __i915_gpu_coredump_free(struct kref *kref);
265static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
266{
267 if (gpu)
268 kref_put(&gpu->ref, __i915_gpu_coredump_free);
269}
270
271struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
272void i915_reset_error_state(struct drm_i915_private *i915);
273void i915_disable_error_state(struct drm_i915_private *i915, int err);
274
275#else
276
277static inline void
278i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
279{
280}
281
282static inline struct i915_gpu_coredump *
283i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
284{
285 return NULL;
286}
287
288static inline struct intel_gt_coredump *
289intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
290{
291 return NULL;
292}
293
294static inline struct intel_engine_coredump *
295intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
296{
297 return NULL;
298}
299
300static inline struct intel_engine_capture_vma *
301intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
302 struct i915_request *rq,
303 gfp_t gfp)
304{
305 return NULL;
306}
307
308static inline void
309intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
310 struct intel_engine_capture_vma *capture,
311 struct i915_vma_compress *compress)
312{
313}
314
315static inline struct i915_vma_compress *
316i915_vma_capture_prepare(struct intel_gt_coredump *gt)
317{
318 return NULL;
319}
320
321static inline void
322i915_vma_capture_finish(struct intel_gt_coredump *gt,
323 struct i915_vma_compress *compress)
324{
325}
326
327static inline void
328i915_error_state_store(struct i915_gpu_coredump *error)
329{
330}
331
332static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
333{
334}
335
336static inline struct i915_gpu_coredump *
337i915_first_error_state(struct drm_i915_private *i915)
338{
339 return ERR_PTR(-ENODEV);
340}
341
342static inline void i915_reset_error_state(struct drm_i915_private *i915)
343{
344}
345
346static inline void i915_disable_error_state(struct drm_i915_private *i915,
347 int err)
348{
349}
350
351#endif
352
353#endif
354