linux/drivers/net/ethernet/mellanox/mlx4/catas.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
   3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/workqueue.h>
  35#include <linux/module.h>
  36
  37#include "mlx4.h"
  38
  39enum {
  40        MLX4_CATAS_POLL_INTERVAL        = 5 * HZ,
  41};
  42
  43static DEFINE_SPINLOCK(catas_lock);
  44
  45static LIST_HEAD(catas_list);
  46static struct work_struct catas_work;
  47
  48static int internal_err_reset = 1;
  49module_param(internal_err_reset, int, 0644);
  50MODULE_PARM_DESC(internal_err_reset,
  51                 "Reset device on internal errors if non-zero"
  52                 " (default 1, in SRIOV mode default is 0)");
  53
  54static void dump_err_buf(struct mlx4_dev *dev)
  55{
  56        struct mlx4_priv *priv = mlx4_priv(dev);
  57
  58        int i;
  59
  60        mlx4_err(dev, "Internal error detected:\n");
  61        for (i = 0; i < priv->fw.catas_size; ++i)
  62                mlx4_err(dev, "  buf[%02x]: %08x\n",
  63                         i, swab32(readl(priv->catas_err.map + i)));
  64}
  65
  66static void poll_catas(unsigned long dev_ptr)
  67{
  68        struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
  69        struct mlx4_priv *priv = mlx4_priv(dev);
  70
  71        if (readl(priv->catas_err.map)) {
  72                /* If the device is off-line, we cannot try to recover it */
  73                if (pci_channel_offline(dev->pdev))
  74                        mod_timer(&priv->catas_err.timer,
  75                                  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
  76                else {
  77                        dump_err_buf(dev);
  78                        mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
  79
  80                        if (internal_err_reset) {
  81                                spin_lock(&catas_lock);
  82                                list_add(&priv->catas_err.list, &catas_list);
  83                                spin_unlock(&catas_lock);
  84
  85                                queue_work(mlx4_wq, &catas_work);
  86                        }
  87                }
  88        } else
  89                mod_timer(&priv->catas_err.timer,
  90                          round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
  91}
  92
  93static void catas_reset(struct work_struct *work)
  94{
  95        struct mlx4_priv *priv, *tmppriv;
  96        struct mlx4_dev *dev;
  97
  98        LIST_HEAD(tlist);
  99        int ret;
 100
 101        spin_lock_irq(&catas_lock);
 102        list_splice_init(&catas_list, &tlist);
 103        spin_unlock_irq(&catas_lock);
 104
 105        list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
 106                struct pci_dev *pdev = priv->dev.pdev;
 107
 108                /* If the device is off-line, we cannot reset it */
 109                if (pci_channel_offline(pdev))
 110                        continue;
 111
 112                ret = mlx4_restart_one(priv->dev.pdev);
 113                /* 'priv' now is not valid */
 114                if (ret)
 115                        pr_err("mlx4 %s: Reset failed (%d)\n",
 116                               pci_name(pdev), ret);
 117                else {
 118                        dev  = pci_get_drvdata(pdev);
 119                        mlx4_dbg(dev, "Reset succeeded\n");
 120                }
 121        }
 122}
 123
 124void mlx4_start_catas_poll(struct mlx4_dev *dev)
 125{
 126        struct mlx4_priv *priv = mlx4_priv(dev);
 127        phys_addr_t addr;
 128
 129        /*If we are in SRIOV the default of the module param must be 0*/
 130        if (mlx4_is_mfunc(dev))
 131                internal_err_reset = 0;
 132
 133        INIT_LIST_HEAD(&priv->catas_err.list);
 134        init_timer(&priv->catas_err.timer);
 135        priv->catas_err.map = NULL;
 136
 137        addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
 138                priv->fw.catas_offset;
 139
 140        priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
 141        if (!priv->catas_err.map) {
 142                mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
 143                          (unsigned long long) addr);
 144                return;
 145        }
 146
 147        priv->catas_err.timer.data     = (unsigned long) dev;
 148        priv->catas_err.timer.function = poll_catas;
 149        priv->catas_err.timer.expires  =
 150                round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
 151        add_timer(&priv->catas_err.timer);
 152}
 153
 154void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 155{
 156        struct mlx4_priv *priv = mlx4_priv(dev);
 157
 158        del_timer_sync(&priv->catas_err.timer);
 159
 160        if (priv->catas_err.map)
 161                iounmap(priv->catas_err.map);
 162
 163        spin_lock_irq(&catas_lock);
 164        list_del(&priv->catas_err.list);
 165        spin_unlock_irq(&catas_lock);
 166}
 167
 168void  __init mlx4_catas_init(void)
 169{
 170        INIT_WORK(&catas_work, catas_reset);
 171}
 172