linux/drivers/infiniband/hw/hfi1/fault.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2018 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47#include <linux/debugfs.h>
  48#include <linux/seq_file.h>
  49#include <linux/kernel.h>
  50#include <linux/module.h>
  51#include <linux/types.h>
  52#include <linux/bitmap.h>
  53
  54#include "debugfs.h"
  55#include "fault.h"
  56#include "trace.h"
  57
  58#define HFI1_FAULT_DIR_TX   BIT(0)
  59#define HFI1_FAULT_DIR_RX   BIT(1)
  60#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
  61
  62static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
  63{
  64        struct hfi1_opcode_stats_perctx *opstats;
  65
  66        if (*pos >= ARRAY_SIZE(opstats->stats))
  67                return NULL;
  68        return pos;
  69}
  70
  71static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
  72{
  73        struct hfi1_opcode_stats_perctx *opstats;
  74
  75        ++*pos;
  76        if (*pos >= ARRAY_SIZE(opstats->stats))
  77                return NULL;
  78        return pos;
  79}
  80
  81static void _fault_stats_seq_stop(struct seq_file *s, void *v)
  82{
  83}
  84
  85static int _fault_stats_seq_show(struct seq_file *s, void *v)
  86{
  87        loff_t *spos = v;
  88        loff_t i = *spos, j;
  89        u64 n_packets = 0, n_bytes = 0;
  90        struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
  91        struct hfi1_devdata *dd = dd_from_dev(ibd);
  92        struct hfi1_ctxtdata *rcd;
  93
  94        for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
  95                rcd = hfi1_rcd_get_by_index(dd, j);
  96                if (rcd) {
  97                        n_packets += rcd->opstats->stats[i].n_packets;
  98                        n_bytes += rcd->opstats->stats[i].n_bytes;
  99                }
 100                hfi1_rcd_put(rcd);
 101        }
 102        for_each_possible_cpu(j) {
 103                struct hfi1_opcode_stats_perctx *sp =
 104                        per_cpu_ptr(dd->tx_opstats, j);
 105
 106                n_packets += sp->stats[i].n_packets;
 107                n_bytes += sp->stats[i].n_bytes;
 108        }
 109        if (!n_packets && !n_bytes)
 110                return SEQ_SKIP;
 111        if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
 112                return SEQ_SKIP;
 113        seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
 114                   (unsigned long long)n_packets,
 115                   (unsigned long long)n_bytes,
 116                   (unsigned long long)ibd->fault->n_rxfaults[i],
 117                   (unsigned long long)ibd->fault->n_txfaults[i]);
 118        return 0;
 119}
 120
 121DEBUGFS_SEQ_FILE_OPS(fault_stats);
 122DEBUGFS_SEQ_FILE_OPEN(fault_stats);
 123DEBUGFS_FILE_OPS(fault_stats);
 124
 125static int fault_opcodes_open(struct inode *inode, struct file *file)
 126{
 127        file->private_data = inode->i_private;
 128        return nonseekable_open(inode, file);
 129}
 130
 131static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
 132                                   size_t len, loff_t *pos)
 133{
 134        ssize_t ret = 0;
 135        /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
 136        size_t copy, datalen = 1280;
 137        char *data, *token, *ptr, *end;
 138        struct fault *fault = file->private_data;
 139
 140        data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
 141        if (!data)
 142                return -ENOMEM;
 143        copy = min(len, datalen - 1);
 144        if (copy_from_user(data, buf, copy)) {
 145                ret = -EFAULT;
 146                goto free_data;
 147        }
 148
 149        ret = debugfs_file_get(file->f_path.dentry);
 150        if (unlikely(ret))
 151                goto free_data;
 152        ptr = data;
 153        token = ptr;
 154        for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
 155                char *dash;
 156                unsigned long range_start, range_end, i;
 157                bool remove = false;
 158                unsigned long bound = 1U << BITS_PER_BYTE;
 159
 160                end = strchr(ptr, ',');
 161                if (end)
 162                        *end = '\0';
 163                if (token[0] == '-') {
 164                        remove = true;
 165                        token++;
 166                }
 167                dash = strchr(token, '-');
 168                if (dash)
 169                        *dash = '\0';
 170                if (kstrtoul(token, 0, &range_start))
 171                        break;
 172                if (dash) {
 173                        token = dash + 1;
 174                        if (kstrtoul(token, 0, &range_end))
 175                                break;
 176                } else {
 177                        range_end = range_start;
 178                }
 179                if (range_start == range_end && range_start == -1UL) {
 180                        bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
 181                                    BITS_PER_BYTE);
 182                        break;
 183                }
 184                /* Check the inputs */
 185                if (range_start >= bound || range_end >= bound)
 186                        break;
 187
 188                for (i = range_start; i <= range_end; i++) {
 189                        if (remove)
 190                                clear_bit(i, fault->opcodes);
 191                        else
 192                                set_bit(i, fault->opcodes);
 193                }
 194                if (!end)
 195                        break;
 196        }
 197        ret = len;
 198
 199        debugfs_file_put(file->f_path.dentry);
 200free_data:
 201        kfree(data);
 202        return ret;
 203}
 204
 205static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
 206                                  size_t len, loff_t *pos)
 207{
 208        ssize_t ret = 0;
 209        char *data;
 210        size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
 211        unsigned long bit = 0, zero = 0;
 212        struct fault *fault = file->private_data;
 213        size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
 214
 215        data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
 216        if (!data)
 217                return -ENOMEM;
 218        ret = debugfs_file_get(file->f_path.dentry);
 219        if (unlikely(ret))
 220                goto free_data;
 221        bit = find_first_bit(fault->opcodes, bitsize);
 222        while (bit < bitsize) {
 223                zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
 224                if (zero - 1 != bit)
 225                        size += scnprintf(data + size,
 226                                         datalen - size - 1,
 227                                         "0x%lx-0x%lx,", bit, zero - 1);
 228                else
 229                        size += scnprintf(data + size,
 230                                         datalen - size - 1, "0x%lx,",
 231                                         bit);
 232                bit = find_next_bit(fault->opcodes, bitsize, zero);
 233        }
 234        debugfs_file_put(file->f_path.dentry);
 235        data[size - 1] = '\n';
 236        data[size] = '\0';
 237        ret = simple_read_from_buffer(buf, len, pos, data, size);
 238free_data:
 239        kfree(data);
 240        return ret;
 241}
 242
 243static const struct file_operations __fault_opcodes_fops = {
 244        .owner = THIS_MODULE,
 245        .open = fault_opcodes_open,
 246        .read = fault_opcodes_read,
 247        .write = fault_opcodes_write,
 248        .llseek = no_llseek
 249};
 250
 251void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
 252{
 253        if (ibd->fault)
 254                debugfs_remove_recursive(ibd->fault->dir);
 255        kfree(ibd->fault);
 256        ibd->fault = NULL;
 257}
 258
 259int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
 260{
 261        struct dentry *parent = ibd->hfi1_ibdev_dbg;
 262        struct dentry *fault_dir;
 263
 264        ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
 265        if (!ibd->fault)
 266                return -ENOMEM;
 267
 268        ibd->fault->attr.interval = 1;
 269        ibd->fault->attr.require_end = ULONG_MAX;
 270        ibd->fault->attr.stacktrace_depth = 32;
 271        ibd->fault->attr.dname = NULL;
 272        ibd->fault->attr.verbose = 0;
 273        ibd->fault->enable = false;
 274        ibd->fault->opcode = false;
 275        ibd->fault->fault_skip = 0;
 276        ibd->fault->skip = 0;
 277        ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
 278        ibd->fault->suppress_err = false;
 279        bitmap_zero(ibd->fault->opcodes,
 280                    sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
 281
 282        fault_dir =
 283                fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
 284        if (IS_ERR(fault_dir)) {
 285                kfree(ibd->fault);
 286                ibd->fault = NULL;
 287                return -ENOENT;
 288        }
 289        ibd->fault->dir = fault_dir;
 290
 291        debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
 292                            &_fault_stats_file_ops);
 293        debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
 294        debugfs_create_bool("suppress_err", 0600, fault_dir,
 295                            &ibd->fault->suppress_err);
 296        debugfs_create_bool("opcode_mode", 0600, fault_dir,
 297                            &ibd->fault->opcode);
 298        debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
 299                            &__fault_opcodes_fops);
 300        debugfs_create_u64("skip_pkts", 0600, fault_dir,
 301                           &ibd->fault->fault_skip);
 302        debugfs_create_u64("skip_usec", 0600, fault_dir,
 303                           &ibd->fault->fault_skip_usec);
 304        debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
 305
 306        return 0;
 307}
 308
 309bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
 310{
 311        if (ibd->fault)
 312                return ibd->fault->suppress_err;
 313        return false;
 314}
 315
 316static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
 317                                u8 direction)
 318{
 319        bool ret = false;
 320
 321        if (!ibd->fault || !ibd->fault->enable)
 322                return false;
 323        if (!(ibd->fault->direction & direction))
 324                return false;
 325        if (ibd->fault->opcode) {
 326                if (bitmap_empty(ibd->fault->opcodes,
 327                                 (sizeof(ibd->fault->opcodes) *
 328                                  BITS_PER_BYTE)))
 329                        return false;
 330                if (!(test_bit(opcode, ibd->fault->opcodes)))
 331                        return false;
 332        }
 333        if (ibd->fault->fault_skip_usec &&
 334            time_before(jiffies, ibd->fault->skip_usec))
 335                return false;
 336        if (ibd->fault->fault_skip && ibd->fault->skip) {
 337                ibd->fault->skip--;
 338                return false;
 339        }
 340        ret = should_fail(&ibd->fault->attr, 1);
 341        if (ret) {
 342                ibd->fault->skip = ibd->fault->fault_skip;
 343                ibd->fault->skip_usec = jiffies +
 344                        usecs_to_jiffies(ibd->fault->fault_skip_usec);
 345        }
 346        return ret;
 347}
 348
 349bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
 350{
 351        struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
 352
 353        if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
 354                trace_hfi1_fault_opcode(qp, opcode);
 355                ibd->fault->n_txfaults[opcode]++;
 356                return true;
 357        }
 358        return false;
 359}
 360
 361bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
 362{
 363        struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
 364
 365        if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
 366                trace_hfi1_fault_packet(packet);
 367                ibd->fault->n_rxfaults[packet->opcode]++;
 368                return true;
 369        }
 370        return false;
 371}
 372