linux/arch/powerpc/kernel/eeh_event.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *
   4 * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
   5 */
   6
   7#include <linux/delay.h>
   8#include <linux/list.h>
   9#include <linux/sched.h>
  10#include <linux/semaphore.h>
  11#include <linux/pci.h>
  12#include <linux/slab.h>
  13#include <linux/kthread.h>
  14#include <asm/eeh_event.h>
  15#include <asm/ppc-pci.h>
  16
  17/** Overview:
  18 *  EEH error states may be detected within exception handlers;
  19 *  however, the recovery processing needs to occur asynchronously
  20 *  in a normal kernel context and not an interrupt context.
  21 *  This pair of routines creates an event and queues it onto a
  22 *  work-queue, where a worker thread can drive recovery.
  23 */
  24
  25static DEFINE_SPINLOCK(eeh_eventlist_lock);
  26static DECLARE_COMPLETION(eeh_eventlist_event);
  27static LIST_HEAD(eeh_eventlist);
  28
  29/**
  30 * eeh_event_handler - Dispatch EEH events.
  31 * @dummy - unused
  32 *
  33 * The detection of a frozen slot can occur inside an interrupt,
  34 * where it can be hard to do anything about it.  The goal of this
  35 * routine is to pull these detection events out of the context
  36 * of the interrupt handler, and re-dispatch them for processing
  37 * at a later time in a normal context.
  38 */
  39static int eeh_event_handler(void * dummy)
  40{
  41        unsigned long flags;
  42        struct eeh_event *event;
  43
  44        while (!kthread_should_stop()) {
  45                if (wait_for_completion_interruptible(&eeh_eventlist_event))
  46                        break;
  47
  48                /* Fetch EEH event from the queue */
  49                spin_lock_irqsave(&eeh_eventlist_lock, flags);
  50                event = NULL;
  51                if (!list_empty(&eeh_eventlist)) {
  52                        event = list_entry(eeh_eventlist.next,
  53                                           struct eeh_event, list);
  54                        list_del(&event->list);
  55                }
  56                spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
  57                if (!event)
  58                        continue;
  59
  60                /* We might have event without binding PE */
  61                if (event->pe)
  62                        eeh_handle_normal_event(event->pe);
  63                else
  64                        eeh_handle_special_event();
  65
  66                kfree(event);
  67        }
  68
  69        return 0;
  70}
  71
  72/**
  73 * eeh_event_init - Start kernel thread to handle EEH events
  74 *
  75 * This routine is called to start the kernel thread for processing
  76 * EEH event.
  77 */
  78int eeh_event_init(void)
  79{
  80        struct task_struct *t;
  81        int ret = 0;
  82
  83        t = kthread_run(eeh_event_handler, NULL, "eehd");
  84        if (IS_ERR(t)) {
  85                ret = PTR_ERR(t);
  86                pr_err("%s: Failed to start EEH daemon (%d)\n",
  87                        __func__, ret);
  88                return ret;
  89        }
  90
  91        return 0;
  92}
  93
  94/**
  95 * eeh_send_failure_event - Generate a PCI error event
  96 * @pe: EEH PE
  97 *
  98 * This routine can be called within an interrupt context;
  99 * the actual event will be delivered in a normal context
 100 * (from a workqueue).
 101 */
 102int __eeh_send_failure_event(struct eeh_pe *pe)
 103{
 104        unsigned long flags;
 105        struct eeh_event *event;
 106
 107        event = kzalloc(sizeof(*event), GFP_ATOMIC);
 108        if (!event) {
 109                pr_err("EEH: out of memory, event not handled\n");
 110                return -ENOMEM;
 111        }
 112        event->pe = pe;
 113
 114        /*
 115         * Mark the PE as recovering before inserting it in the queue.
 116         * This prevents the PE from being free()ed by a hotplug driver
 117         * while the PE is sitting in the event queue.
 118         */
 119        if (pe) {
 120#ifdef CONFIG_STACKTRACE
 121                /*
 122                 * Save the current stack trace so we can dump it from the
 123                 * event handler thread.
 124                 */
 125                pe->trace_entries = stack_trace_save(pe->stack_trace,
 126                                         ARRAY_SIZE(pe->stack_trace), 0);
 127#endif /* CONFIG_STACKTRACE */
 128
 129                eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 130        }
 131
 132        /* We may or may not be called in an interrupt context */
 133        spin_lock_irqsave(&eeh_eventlist_lock, flags);
 134        list_add(&event->list, &eeh_eventlist);
 135        spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 136
 137        /* For EEH deamon to knick in */
 138        complete(&eeh_eventlist_event);
 139
 140        return 0;
 141}
 142
 143int eeh_send_failure_event(struct eeh_pe *pe)
 144{
 145        /*
 146         * If we've manually supressed recovery events via debugfs
 147         * then just drop it on the floor.
 148         */
 149        if (eeh_debugfs_no_recover) {
 150                pr_err("EEH: Event dropped due to no_recover setting\n");
 151                return 0;
 152        }
 153
 154        return __eeh_send_failure_event(pe);
 155}
 156
 157/**
 158 * eeh_remove_event - Remove EEH event from the queue
 159 * @pe: Event binding to the PE
 160 * @force: Event will be removed unconditionally
 161 *
 162 * On PowerNV platform, we might have subsequent coming events
 163 * is part of the former one. For that case, those subsequent
 164 * coming events are totally duplicated and unnecessary, thus
 165 * they should be removed.
 166 */
 167void eeh_remove_event(struct eeh_pe *pe, bool force)
 168{
 169        unsigned long flags;
 170        struct eeh_event *event, *tmp;
 171
 172        /*
 173         * If we have NULL PE passed in, we have dead IOC
 174         * or we're sure we can report all existing errors
 175         * by the caller.
 176         *
 177         * With "force", the event with associated PE that
 178         * have been isolated, the event won't be removed
 179         * to avoid event lost.
 180         */
 181        spin_lock_irqsave(&eeh_eventlist_lock, flags);
 182        list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
 183                if (!force && event->pe &&
 184                    (event->pe->state & EEH_PE_ISOLATED))
 185                        continue;
 186
 187                if (!pe) {
 188                        list_del(&event->list);
 189                        kfree(event);
 190                } else if (pe->type & EEH_PE_PHB) {
 191                        if (event->pe && event->pe->phb == pe->phb) {
 192                                list_del(&event->list);
 193                                kfree(event);
 194                        }
 195                } else if (event->pe == pe) {
 196                        list_del(&event->list);
 197                        kfree(event);
 198                }
 199        }
 200        spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 201}
 202