1
2
3
4
5#include <linux/mm.h>
6#include <linux/gfp.h>
7#include <linux/ras.h>
8#include <linux/kernel.h>
9#include <linux/workqueue.h>
10
11#include <asm/mce.h>
12
13#include "debugfs.h"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63#undef pr_fmt
64#define pr_fmt(fmt) "RAS: " fmt
65
66
67
68
69
70#define DECAY_BITS 2
71#define DECAY_MASK ((1ULL << DECAY_BITS) - 1)
72#define MAX_ELEMS (PAGE_SIZE / sizeof(u64))
73
74
75
76
77
78#define CLEAN_ELEMS (MAX_ELEMS >> DECAY_BITS)
79
80
81#define COUNT_BITS (PAGE_SHIFT - DECAY_BITS)
82#define COUNT_MASK ((1ULL << COUNT_BITS) - 1)
83#define FULL_COUNT_MASK (PAGE_SIZE - 1)
84
85
86
87
88
89#define PFN(e) ((e) >> PAGE_SHIFT)
90#define DECAY(e) (((e) >> COUNT_BITS) & DECAY_MASK)
91#define COUNT(e) ((unsigned int)(e) & COUNT_MASK)
92#define FULL_COUNT(e) ((e) & (PAGE_SIZE - 1))
93
94static struct ce_array {
95 u64 *array;
96 unsigned int n;
97
98 unsigned int decay_count;
99
100
101
102
103 u64 pfns_poisoned;
104
105
106
107 u64 ces_entered;
108
109
110
111
112 u64 decays_done;
113
114
115
116 union {
117 struct {
118 __u32 disabled : 1,
119 __resv : 31;
120 };
121 __u32 flags;
122 };
123} ce_arr;
124
125static DEFINE_MUTEX(ce_mutex);
126static u64 dfs_pfn;
127
128
129static u64 action_threshold = COUNT_MASK;
130
131
132#define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60
133#define CEC_DECAY_MIN_INTERVAL 1 * 60 * 60
134#define CEC_DECAY_MAX_INTERVAL 30 * 24 * 60 * 60
135static struct delayed_work cec_work;
136static u64 decay_interval = CEC_DECAY_DEFAULT_INTERVAL;
137
138
139
140
141
142static void do_spring_cleaning(struct ce_array *ca)
143{
144 int i;
145
146 for (i = 0; i < ca->n; i++) {
147 u8 decay = DECAY(ca->array[i]);
148
149 if (!decay)
150 continue;
151
152 decay--;
153
154 ca->array[i] &= ~(DECAY_MASK << COUNT_BITS);
155 ca->array[i] |= (decay << COUNT_BITS);
156 }
157 ca->decay_count = 0;
158 ca->decays_done++;
159}
160
161
162
163
164static void cec_mod_work(unsigned long interval)
165{
166 unsigned long iv;
167
168 iv = interval * HZ;
169 mod_delayed_work(system_wq, &cec_work, round_jiffies(iv));
170}
171
172static void cec_work_fn(struct work_struct *work)
173{
174 mutex_lock(&ce_mutex);
175 do_spring_cleaning(&ce_arr);
176 mutex_unlock(&ce_mutex);
177
178 cec_mod_work(decay_interval);
179}
180
181
182
183
184
185
186static int __find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
187{
188 int min = 0, max = ca->n - 1;
189 u64 this_pfn;
190
191 while (min <= max) {
192 int i = (min + max) >> 1;
193
194 this_pfn = PFN(ca->array[i]);
195
196 if (this_pfn < pfn)
197 min = i + 1;
198 else if (this_pfn > pfn)
199 max = i - 1;
200 else if (this_pfn == pfn) {
201 if (to)
202 *to = i;
203
204 return i;
205 }
206 }
207
208
209
210
211
212
213
214
215
216
217 if (to)
218 *to = min;
219
220 return -ENOKEY;
221}
222
223static int find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
224{
225 WARN_ON(!to);
226
227 if (!ca->n) {
228 *to = 0;
229 return -ENOKEY;
230 }
231 return __find_elem(ca, pfn, to);
232}
233
234static void del_elem(struct ce_array *ca, int idx)
235{
236
237 if (ca->n - (idx + 1))
238 memmove((void *)&ca->array[idx],
239 (void *)&ca->array[idx + 1],
240 (ca->n - (idx + 1)) * sizeof(u64));
241
242 ca->n--;
243}
244
245static u64 del_lru_elem_unlocked(struct ce_array *ca)
246{
247 unsigned int min = FULL_COUNT_MASK;
248 int i, min_idx = 0;
249
250 for (i = 0; i < ca->n; i++) {
251 unsigned int this = FULL_COUNT(ca->array[i]);
252
253 if (min > this) {
254 min = this;
255 min_idx = i;
256 }
257 }
258
259 del_elem(ca, min_idx);
260
261 return PFN(ca->array[min_idx]);
262}
263
264
265
266
267
268static u64 __maybe_unused del_lru_elem(void)
269{
270 struct ce_array *ca = &ce_arr;
271 u64 pfn;
272
273 if (!ca->n)
274 return 0;
275
276 mutex_lock(&ce_mutex);
277 pfn = del_lru_elem_unlocked(ca);
278 mutex_unlock(&ce_mutex);
279
280 return pfn;
281}
282
283static bool sanity_check(struct ce_array *ca)
284{
285 bool ret = false;
286 u64 prev = 0;
287 int i;
288
289 for (i = 0; i < ca->n; i++) {
290 u64 this = PFN(ca->array[i]);
291
292 if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this))
293 ret = true;
294
295 prev = this;
296 }
297
298 if (!ret)
299 return ret;
300
301 pr_info("Sanity check dump:\n{ n: %d\n", ca->n);
302 for (i = 0; i < ca->n; i++) {
303 u64 this = PFN(ca->array[i]);
304
305 pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
306 }
307 pr_info("}\n");
308
309 return ret;
310}
311
312int cec_add_elem(u64 pfn)
313{
314 struct ce_array *ca = &ce_arr;
315 unsigned int to = 0;
316 int count, ret = 0;
317
318
319
320
321
322 if (!ce_arr.array || ce_arr.disabled)
323 return -ENODEV;
324
325 mutex_lock(&ce_mutex);
326
327 ca->ces_entered++;
328
329
330 if (ca->n == MAX_ELEMS)
331 WARN_ON(!del_lru_elem_unlocked(ca));
332
333 ret = find_elem(ca, pfn, &to);
334 if (ret < 0) {
335
336
337
338 memmove((void *)&ca->array[to + 1],
339 (void *)&ca->array[to],
340 (ca->n - to) * sizeof(u64));
341
342 ca->array[to] = pfn << PAGE_SHIFT;
343 ca->n++;
344 }
345
346
347 ca->array[to] |= DECAY_MASK << COUNT_BITS;
348 ca->array[to]++;
349
350
351 count = COUNT(ca->array[to]);
352 if (count >= action_threshold) {
353 u64 pfn = ca->array[to] >> PAGE_SHIFT;
354
355 if (!pfn_valid(pfn)) {
356 pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
357 } else {
358
359 pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
360 memory_failure_queue(pfn, MF_SOFT_OFFLINE);
361 ca->pfns_poisoned++;
362 }
363
364 del_elem(ca, to);
365
366
367
368
369
370 ret = 1;
371
372 goto unlock;
373 }
374
375 ca->decay_count++;
376
377 if (ca->decay_count >= CLEAN_ELEMS)
378 do_spring_cleaning(ca);
379
380 WARN_ON_ONCE(sanity_check(ca));
381
382unlock:
383 mutex_unlock(&ce_mutex);
384
385 return ret;
386}
387
388static int u64_get(void *data, u64 *val)
389{
390 *val = *(u64 *)data;
391
392 return 0;
393}
394
395static int pfn_set(void *data, u64 val)
396{
397 *(u64 *)data = val;
398
399 cec_add_elem(val);
400
401 return 0;
402}
403
404DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
405
406static int decay_interval_set(void *data, u64 val)
407{
408 if (val < CEC_DECAY_MIN_INTERVAL)
409 return -EINVAL;
410
411 if (val > CEC_DECAY_MAX_INTERVAL)
412 return -EINVAL;
413
414 *(u64 *)data = val;
415 decay_interval = val;
416
417 cec_mod_work(decay_interval);
418
419 return 0;
420}
421DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
422
423static int action_threshold_set(void *data, u64 val)
424{
425 *(u64 *)data = val;
426
427 if (val > COUNT_MASK)
428 val = COUNT_MASK;
429
430 action_threshold = val;
431
432 return 0;
433}
434DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n");
435
436static const char * const bins[] = { "00", "01", "10", "11" };
437
438static int array_dump(struct seq_file *m, void *v)
439{
440 struct ce_array *ca = &ce_arr;
441 int i;
442
443 mutex_lock(&ce_mutex);
444
445 seq_printf(m, "{ n: %d\n", ca->n);
446 for (i = 0; i < ca->n; i++) {
447 u64 this = PFN(ca->array[i]);
448
449 seq_printf(m, " %3d: [%016llx|%s|%03llx]\n",
450 i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i]));
451 }
452
453 seq_printf(m, "}\n");
454
455 seq_printf(m, "Stats:\nCEs: %llu\nofflined pages: %llu\n",
456 ca->ces_entered, ca->pfns_poisoned);
457
458 seq_printf(m, "Flags: 0x%x\n", ca->flags);
459
460 seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
461 seq_printf(m, "Decays: %lld\n", ca->decays_done);
462
463 seq_printf(m, "Action threshold: %lld\n", action_threshold);
464
465 mutex_unlock(&ce_mutex);
466
467 return 0;
468}
469
470static int array_open(struct inode *inode, struct file *filp)
471{
472 return single_open(filp, array_dump, NULL);
473}
474
475static const struct file_operations array_ops = {
476 .owner = THIS_MODULE,
477 .open = array_open,
478 .read = seq_read,
479 .llseek = seq_lseek,
480 .release = single_release,
481};
482
483static int __init create_debugfs_nodes(void)
484{
485 struct dentry *d, *pfn, *decay, *count, *array;
486
487 d = debugfs_create_dir("cec", ras_debugfs_dir);
488 if (!d) {
489 pr_warn("Error creating cec debugfs node!\n");
490 return -1;
491 }
492
493 decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
494 &decay_interval, &decay_interval_ops);
495 if (!decay) {
496 pr_warn("Error creating decay_interval debugfs node!\n");
497 goto err;
498 }
499
500 count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d,
501 &action_threshold, &action_threshold_ops);
502 if (!count) {
503 pr_warn("Error creating action_threshold debugfs node!\n");
504 goto err;
505 }
506
507 if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG))
508 return 0;
509
510 pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
511 if (!pfn) {
512 pr_warn("Error creating pfn debugfs node!\n");
513 goto err;
514 }
515
516 array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
517 if (!array) {
518 pr_warn("Error creating array debugfs node!\n");
519 goto err;
520 }
521
522 return 0;
523
524err:
525 debugfs_remove_recursive(d);
526
527 return 1;
528}
529
530void __init cec_init(void)
531{
532 if (ce_arr.disabled)
533 return;
534
535 ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
536 if (!ce_arr.array) {
537 pr_err("Error allocating CE array page!\n");
538 return;
539 }
540
541 if (create_debugfs_nodes()) {
542 free_page((unsigned long)ce_arr.array);
543 return;
544 }
545
546 INIT_DELAYED_WORK(&cec_work, cec_work_fn);
547 schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
548
549 pr_info("Correctable Errors collector initialized.\n");
550}
551
552int __init parse_cec_param(char *str)
553{
554 if (!str)
555 return 0;
556
557 if (*str == '=')
558 str++;
559
560 if (!strcmp(str, "cec_disable"))
561 ce_arr.disabled = 1;
562 else
563 return 0;
564
565 return 1;
566}
567