linux/drivers/staging/lustre/lnet/libcfs/tracefile.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2012, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Sun Microsystems, Inc.
  31 *
  32 * libcfs/libcfs/tracefile.c
  33 *
  34 * Author: Zach Brown <zab@clusterfs.com>
  35 * Author: Phil Schwan <phil@clusterfs.com>
  36 */
  37
  38#define DEBUG_SUBSYSTEM S_LNET
  39#define LUSTRE_TRACEFILE_PRIVATE
  40#define pr_fmt(fmt) "Lustre: " fmt
  41#include "tracefile.h"
  42
  43#include <linux/libcfs/libcfs.h>
  44
  45/* XXX move things up to the top, comment */
  46union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
  47
  48char cfs_tracefile[TRACEFILE_NAME_SIZE];
  49long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
  50static struct tracefiled_ctl trace_tctl;
  51static DEFINE_MUTEX(cfs_trace_thread_mutex);
  52static int thread_running;
  53
  54static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
  55
  56struct page_collection {
  57        struct list_head        pc_pages;
  58        /*
  59         * if this flag is set, collect_pages() will spill both
  60         * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
  61         * only ->tcd_pages are spilled.
  62         */
  63        int                     pc_want_daemon_pages;
  64};
  65
  66struct tracefiled_ctl {
  67        struct completion       tctl_start;
  68        struct completion       tctl_stop;
  69        wait_queue_head_t       tctl_waitq;
  70        pid_t                   tctl_pid;
  71        atomic_t                tctl_shutdown;
  72};
  73
  74/*
  75 * small data-structure for each page owned by tracefiled.
  76 */
  77struct cfs_trace_page {
  78        /*
  79         * page itself
  80         */
  81        struct page             *page;
  82        /*
  83         * linkage into one of the lists in trace_data_union or
  84         * page_collection
  85         */
  86        struct list_head        linkage;
  87        /*
  88         * number of bytes used within this page
  89         */
  90        unsigned int            used;
  91        /*
  92         * cpu that owns this page
  93         */
  94        unsigned short          cpu;
  95        /*
  96         * type(context) of this page
  97         */
  98        unsigned short          type;
  99};
 100
 101static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
 102                                         struct cfs_trace_cpu_data *tcd);
 103
 104static inline struct cfs_trace_page *
 105cfs_tage_from_list(struct list_head *list)
 106{
 107        return list_entry(list, struct cfs_trace_page, linkage);
 108}
 109
 110static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
 111{
 112        struct page *page;
 113        struct cfs_trace_page *tage;
 114
 115        /* My caller is trying to free memory */
 116        if (!in_interrupt() && memory_pressure_get())
 117                return NULL;
 118
 119        /*
 120         * Don't spam console with allocation failures: they will be reported
 121         * by upper layer anyway.
 122         */
 123        gfp |= __GFP_NOWARN;
 124        page = alloc_page(gfp);
 125        if (!page)
 126                return NULL;
 127
 128        tage = kmalloc(sizeof(*tage), gfp);
 129        if (!tage) {
 130                __free_page(page);
 131                return NULL;
 132        }
 133
 134        tage->page = page;
 135        atomic_inc(&cfs_tage_allocated);
 136        return tage;
 137}
 138
 139static void cfs_tage_free(struct cfs_trace_page *tage)
 140{
 141        __free_page(tage->page);
 142        kfree(tage);
 143        atomic_dec(&cfs_tage_allocated);
 144}
 145
 146static void cfs_tage_to_tail(struct cfs_trace_page *tage,
 147                             struct list_head *queue)
 148{
 149        list_move_tail(&tage->linkage, queue);
 150}
 151
 152int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
 153                           struct list_head *stock)
 154{
 155        int i;
 156
 157        /*
 158         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 159         * from here: this will lead to infinite recursion.
 160         */
 161
 162        for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
 163                struct cfs_trace_page *tage;
 164
 165                tage = cfs_tage_alloc(gfp);
 166                if (!tage)
 167                        break;
 168                list_add_tail(&tage->linkage, stock);
 169        }
 170        return i;
 171}
 172
 173/* return a page that has 'len' bytes left at the end */
 174static struct cfs_trace_page *
 175cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
 176{
 177        struct cfs_trace_page *tage;
 178
 179        if (tcd->tcd_cur_pages > 0) {
 180                __LASSERT(!list_empty(&tcd->tcd_pages));
 181                tage = cfs_tage_from_list(tcd->tcd_pages.prev);
 182                if (tage->used + len <= PAGE_SIZE)
 183                        return tage;
 184        }
 185
 186        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
 187                if (tcd->tcd_cur_stock_pages > 0) {
 188                        tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
 189                        --tcd->tcd_cur_stock_pages;
 190                        list_del_init(&tage->linkage);
 191                } else {
 192                        tage = cfs_tage_alloc(GFP_ATOMIC);
 193                        if (unlikely(!tage)) {
 194                                if (!memory_pressure_get() || in_interrupt())
 195                                        pr_warn_ratelimited("cannot allocate a tage (%ld)\n",
 196                                                            tcd->tcd_cur_pages);
 197                                return NULL;
 198                        }
 199                }
 200
 201                tage->used = 0;
 202                tage->cpu = smp_processor_id();
 203                tage->type = tcd->tcd_type;
 204                list_add_tail(&tage->linkage, &tcd->tcd_pages);
 205                tcd->tcd_cur_pages++;
 206
 207                if (tcd->tcd_cur_pages > 8 && thread_running) {
 208                        struct tracefiled_ctl *tctl = &trace_tctl;
 209                        /*
 210                         * wake up tracefiled to process some pages.
 211                         */
 212                        wake_up(&tctl->tctl_waitq);
 213                }
 214                return tage;
 215        }
 216        return NULL;
 217}
 218
 219static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
 220{
 221        int pgcount = tcd->tcd_cur_pages / 10;
 222        struct page_collection pc;
 223        struct cfs_trace_page *tage;
 224        struct cfs_trace_page *tmp;
 225
 226        /*
 227         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 228         * from here: this will lead to infinite recursion.
 229         */
 230
 231        pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
 232                            pgcount + 1, tcd->tcd_cur_pages);
 233
 234        INIT_LIST_HEAD(&pc.pc_pages);
 235
 236        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
 237                if (!pgcount--)
 238                        break;
 239
 240                list_move_tail(&tage->linkage, &pc.pc_pages);
 241                tcd->tcd_cur_pages--;
 242        }
 243        put_pages_on_tcd_daemon_list(&pc, tcd);
 244}
 245
 246/* return a page that has 'len' bytes left at the end */
 247static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
 248                                                 unsigned long len)
 249{
 250        struct cfs_trace_page *tage;
 251
 252        /*
 253         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 254         * from here: this will lead to infinite recursion.
 255         */
 256
 257        if (len > PAGE_SIZE) {
 258                pr_err("cowardly refusing to write %lu bytes in a page\n", len);
 259                return NULL;
 260        }
 261
 262        tage = cfs_trace_get_tage_try(tcd, len);
 263        if (tage)
 264                return tage;
 265        if (thread_running)
 266                cfs_tcd_shrink(tcd);
 267        if (tcd->tcd_cur_pages > 0) {
 268                tage = cfs_tage_from_list(tcd->tcd_pages.next);
 269                tage->used = 0;
 270                cfs_tage_to_tail(tage, &tcd->tcd_pages);
 271        }
 272        return tage;
 273}
 274
 275int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
 276                     const char *format, ...)
 277{
 278        va_list args;
 279        int rc;
 280
 281        va_start(args, format);
 282        rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
 283        va_end(args);
 284
 285        return rc;
 286}
 287EXPORT_SYMBOL(libcfs_debug_msg);
 288
 289int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
 290                       const char *format1, va_list args,
 291                       const char *format2, ...)
 292{
 293        struct cfs_trace_cpu_data *tcd = NULL;
 294        struct ptldebug_header header = { 0 };
 295        struct cfs_trace_page *tage;
 296        /* string_buf is used only if tcd != NULL, and is always set then */
 297        char *string_buf = NULL;
 298        char *debug_buf;
 299        int known_size;
 300        int needed = 85; /* average message length */
 301        int max_nob;
 302        va_list ap;
 303        int depth;
 304        int i;
 305        int remain;
 306        int mask = msgdata->msg_mask;
 307        const char *file = kbasename(msgdata->msg_file);
 308        struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
 309
 310        tcd = cfs_trace_get_tcd();
 311
 312        /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
 313         * pins us to a particular CPU.  This avoids an smp_processor_id()
 314         * warning on Linux when debugging is enabled.
 315         */
 316        cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
 317
 318        if (!tcd)               /* arch may not log in IRQ context */
 319                goto console;
 320
 321        if (!tcd->tcd_cur_pages)
 322                header.ph_flags |= PH_FLAG_FIRST_RECORD;
 323
 324        if (tcd->tcd_shutting_down) {
 325                cfs_trace_put_tcd(tcd);
 326                tcd = NULL;
 327                goto console;
 328        }
 329
 330        depth = __current_nesting_level();
 331        known_size = strlen(file) + 1 + depth;
 332        if (msgdata->msg_fn)
 333                known_size += strlen(msgdata->msg_fn) + 1;
 334
 335        if (libcfs_debug_binary)
 336                known_size += sizeof(header);
 337
 338        /*
 339         * '2' used because vsnprintf return real size required for output
 340         * _without_ terminating NULL.
 341         * if needed is to small for this format.
 342         */
 343        for (i = 0; i < 2; i++) {
 344                tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
 345                if (!tage) {
 346                        if (needed + known_size > PAGE_SIZE)
 347                                mask |= D_ERROR;
 348
 349                        cfs_trace_put_tcd(tcd);
 350                        tcd = NULL;
 351                        goto console;
 352                }
 353
 354                string_buf = (char *)page_address(tage->page) +
 355                                        tage->used + known_size;
 356
 357                max_nob = PAGE_SIZE - tage->used - known_size;
 358                if (max_nob <= 0) {
 359                        pr_emerg("negative max_nob: %d\n", max_nob);
 360                        mask |= D_ERROR;
 361                        cfs_trace_put_tcd(tcd);
 362                        tcd = NULL;
 363                        goto console;
 364                }
 365
 366                needed = 0;
 367                if (format1) {
 368                        va_copy(ap, args);
 369                        needed = vsnprintf(string_buf, max_nob, format1, ap);
 370                        va_end(ap);
 371                }
 372
 373                if (format2) {
 374                        remain = max_nob - needed;
 375                        if (remain < 0)
 376                                remain = 0;
 377
 378                        va_start(ap, format2);
 379                        needed += vsnprintf(string_buf + needed, remain,
 380                                            format2, ap);
 381                        va_end(ap);
 382                }
 383
 384                if (needed < max_nob) /* well. printing ok.. */
 385                        break;
 386        }
 387
 388        if (*(string_buf + needed - 1) != '\n')
 389                pr_info("format at %s:%d:%s doesn't end in newline\n", file,
 390                        msgdata->msg_line, msgdata->msg_fn);
 391
 392        header.ph_len = known_size + needed;
 393        debug_buf = (char *)page_address(tage->page) + tage->used;
 394
 395        if (libcfs_debug_binary) {
 396                memcpy(debug_buf, &header, sizeof(header));
 397                tage->used += sizeof(header);
 398                debug_buf += sizeof(header);
 399        }
 400
 401        /* indent message according to the nesting level */
 402        while (depth-- > 0) {
 403                *(debug_buf++) = '.';
 404                ++tage->used;
 405        }
 406
 407        strcpy(debug_buf, file);
 408        tage->used += strlen(file) + 1;
 409        debug_buf += strlen(file) + 1;
 410
 411        if (msgdata->msg_fn) {
 412                strcpy(debug_buf, msgdata->msg_fn);
 413                tage->used += strlen(msgdata->msg_fn) + 1;
 414                debug_buf += strlen(msgdata->msg_fn) + 1;
 415        }
 416
 417        __LASSERT(debug_buf == string_buf);
 418
 419        tage->used += needed;
 420        __LASSERT(tage->used <= PAGE_SIZE);
 421
 422console:
 423        if (!(mask & libcfs_printk)) {
 424                /* no console output requested */
 425                if (tcd)
 426                        cfs_trace_put_tcd(tcd);
 427                return 1;
 428        }
 429
 430        if (cdls) {
 431                if (libcfs_console_ratelimit &&
 432                    cdls->cdls_next &&          /* not first time ever */
 433                    !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
 434                        /* skipping a console message */
 435                        cdls->cdls_count++;
 436                        if (tcd)
 437                                cfs_trace_put_tcd(tcd);
 438                        return 1;
 439                }
 440
 441                if (cfs_time_after(cfs_time_current(),
 442                                   cdls->cdls_next + libcfs_console_max_delay +
 443                                   cfs_time_seconds(10))) {
 444                        /* last timeout was a long time ago */
 445                        cdls->cdls_delay /= libcfs_console_backoff * 4;
 446                } else {
 447                        cdls->cdls_delay *= libcfs_console_backoff;
 448                }
 449
 450                if (cdls->cdls_delay < libcfs_console_min_delay)
 451                        cdls->cdls_delay = libcfs_console_min_delay;
 452                else if (cdls->cdls_delay > libcfs_console_max_delay)
 453                        cdls->cdls_delay = libcfs_console_max_delay;
 454
 455                /* ensure cdls_next is never zero after it's been seen */
 456                cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
 457        }
 458
 459        if (tcd) {
 460                cfs_print_to_console(&header, mask, string_buf, needed, file,
 461                                     msgdata->msg_fn);
 462                cfs_trace_put_tcd(tcd);
 463        } else {
 464                string_buf = cfs_trace_get_console_buffer();
 465
 466                needed = 0;
 467                if (format1) {
 468                        va_copy(ap, args);
 469                        needed = vsnprintf(string_buf,
 470                                           CFS_TRACE_CONSOLE_BUFFER_SIZE,
 471                                           format1, ap);
 472                        va_end(ap);
 473                }
 474                if (format2) {
 475                        remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
 476                        if (remain > 0) {
 477                                va_start(ap, format2);
 478                                needed += vsnprintf(string_buf + needed, remain,
 479                                                    format2, ap);
 480                                va_end(ap);
 481                        }
 482                }
 483                cfs_print_to_console(&header, mask,
 484                                     string_buf, needed, file, msgdata->msg_fn);
 485
 486                put_cpu();
 487        }
 488
 489        if (cdls && cdls->cdls_count) {
 490                string_buf = cfs_trace_get_console_buffer();
 491
 492                needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
 493                                  "Skipped %d previous similar message%s\n",
 494                                  cdls->cdls_count,
 495                                  (cdls->cdls_count > 1) ? "s" : "");
 496
 497                cfs_print_to_console(&header, mask,
 498                                     string_buf, needed, file, msgdata->msg_fn);
 499
 500                put_cpu();
 501                cdls->cdls_count = 0;
 502        }
 503
 504        return 0;
 505}
 506EXPORT_SYMBOL(libcfs_debug_vmsg2);
 507
 508void
 509cfs_trace_assertion_failed(const char *str,
 510                           struct libcfs_debug_msg_data *msgdata)
 511{
 512        struct ptldebug_header hdr;
 513
 514        libcfs_panic_in_progress = 1;
 515        libcfs_catastrophe = 1;
 516        mb();
 517
 518        cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
 519
 520        cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
 521                             msgdata->msg_file, msgdata->msg_fn);
 522
 523        panic("Lustre debug assertion failure\n");
 524
 525        /* not reached */
 526}
 527
 528static void
 529panic_collect_pages(struct page_collection *pc)
 530{
 531        /* Do the collect_pages job on a single CPU: assumes that all other
 532         * CPUs have been stopped during a panic.  If this isn't true for some
 533         * arch, this will have to be implemented separately in each arch.
 534         */
 535        struct cfs_trace_cpu_data *tcd;
 536        int i;
 537        int j;
 538
 539        INIT_LIST_HEAD(&pc->pc_pages);
 540
 541        cfs_tcd_for_each(tcd, i, j) {
 542                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
 543                tcd->tcd_cur_pages = 0;
 544
 545                if (pc->pc_want_daemon_pages) {
 546                        list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
 547                        tcd->tcd_cur_daemon_pages = 0;
 548                }
 549        }
 550}
 551
 552static void collect_pages_on_all_cpus(struct page_collection *pc)
 553{
 554        struct cfs_trace_cpu_data *tcd;
 555        int i, cpu;
 556
 557        for_each_possible_cpu(cpu) {
 558                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
 559                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
 560                        tcd->tcd_cur_pages = 0;
 561                        if (pc->pc_want_daemon_pages) {
 562                                list_splice_init(&tcd->tcd_daemon_pages,
 563                                                 &pc->pc_pages);
 564                                tcd->tcd_cur_daemon_pages = 0;
 565                        }
 566                }
 567        }
 568}
 569
 570static void collect_pages(struct page_collection *pc)
 571{
 572        INIT_LIST_HEAD(&pc->pc_pages);
 573
 574        if (libcfs_panic_in_progress)
 575                panic_collect_pages(pc);
 576        else
 577                collect_pages_on_all_cpus(pc);
 578}
 579
 580static void put_pages_back_on_all_cpus(struct page_collection *pc)
 581{
 582        struct cfs_trace_cpu_data *tcd;
 583        struct list_head *cur_head;
 584        struct cfs_trace_page *tage;
 585        struct cfs_trace_page *tmp;
 586        int i, cpu;
 587
 588        for_each_possible_cpu(cpu) {
 589                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
 590                        cur_head = tcd->tcd_pages.next;
 591
 592                        list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
 593                                                 linkage) {
 594                                __LASSERT_TAGE_INVARIANT(tage);
 595
 596                                if (tage->cpu != cpu || tage->type != i)
 597                                        continue;
 598
 599                                cfs_tage_to_tail(tage, cur_head);
 600                                tcd->tcd_cur_pages++;
 601                        }
 602                }
 603        }
 604}
 605
 606static void put_pages_back(struct page_collection *pc)
 607{
 608        if (!libcfs_panic_in_progress)
 609                put_pages_back_on_all_cpus(pc);
 610}
 611
 612/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
 613 * we have a good amount of data at all times for dumping during an LBUG, even
 614 * if we have been steadily writing (and otherwise discarding) pages via the
 615 * debug daemon.
 616 */
 617static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
 618                                         struct cfs_trace_cpu_data *tcd)
 619{
 620        struct cfs_trace_page *tage;
 621        struct cfs_trace_page *tmp;
 622
 623        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 624                __LASSERT_TAGE_INVARIANT(tage);
 625
 626                if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
 627                        continue;
 628
 629                cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
 630                tcd->tcd_cur_daemon_pages++;
 631
 632                if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
 633                        struct cfs_trace_page *victim;
 634
 635                        __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
 636                        victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
 637
 638                        __LASSERT_TAGE_INVARIANT(victim);
 639
 640                        list_del(&victim->linkage);
 641                        cfs_tage_free(victim);
 642                        tcd->tcd_cur_daemon_pages--;
 643                }
 644        }
 645}
 646
 647static void put_pages_on_daemon_list(struct page_collection *pc)
 648{
 649        struct cfs_trace_cpu_data *tcd;
 650        int i, cpu;
 651
 652        for_each_possible_cpu(cpu) {
 653                cfs_tcd_for_each_type_lock(tcd, i, cpu)
 654                        put_pages_on_tcd_daemon_list(pc, tcd);
 655        }
 656}
 657
 658void cfs_trace_debug_print(void)
 659{
 660        struct page_collection pc;
 661        struct cfs_trace_page *tage;
 662        struct cfs_trace_page *tmp;
 663
 664        pc.pc_want_daemon_pages = 1;
 665        collect_pages(&pc);
 666        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 667                char *p, *file, *fn;
 668                struct page *page;
 669
 670                __LASSERT_TAGE_INVARIANT(tage);
 671
 672                page = tage->page;
 673                p = page_address(page);
 674                while (p < ((char *)page_address(page) + tage->used)) {
 675                        struct ptldebug_header *hdr;
 676                        int len;
 677
 678                        hdr = (void *)p;
 679                        p += sizeof(*hdr);
 680                        file = p;
 681                        p += strlen(file) + 1;
 682                        fn = p;
 683                        p += strlen(fn) + 1;
 684                        len = hdr->ph_len - (int)(p - (char *)hdr);
 685
 686                        cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
 687
 688                        p += len;
 689                }
 690
 691                list_del(&tage->linkage);
 692                cfs_tage_free(tage);
 693        }
 694}
 695
 696int cfs_tracefile_dump_all_pages(char *filename)
 697{
 698        struct page_collection pc;
 699        struct file *filp;
 700        struct cfs_trace_page *tage;
 701        struct cfs_trace_page *tmp;
 702        char *buf;
 703        mm_segment_t __oldfs;
 704        int rc;
 705
 706        cfs_tracefile_write_lock();
 707
 708        filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
 709                         0600);
 710        if (IS_ERR(filp)) {
 711                rc = PTR_ERR(filp);
 712                filp = NULL;
 713                pr_err("LustreError: can't open %s for dump: rc %d\n",
 714                       filename, rc);
 715                goto out;
 716        }
 717
 718        pc.pc_want_daemon_pages = 1;
 719        collect_pages(&pc);
 720        if (list_empty(&pc.pc_pages)) {
 721                rc = 0;
 722                goto close;
 723        }
 724        __oldfs = get_fs();
 725        set_fs(get_ds());
 726
 727        /* ok, for now, just write the pages.  in the future we'll be building
 728         * iobufs with the pages and calling generic_direct_IO
 729         */
 730        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 731                __LASSERT_TAGE_INVARIANT(tage);
 732
 733                buf = kmap(tage->page);
 734                rc = kernel_write(filp, buf, tage->used, &filp->f_pos);
 735                kunmap(tage->page);
 736
 737                if (rc != (int)tage->used) {
 738                        pr_warn("wanted to write %u but wrote %d\n", tage->used,
 739                                rc);
 740                        put_pages_back(&pc);
 741                        __LASSERT(list_empty(&pc.pc_pages));
 742                        break;
 743                }
 744                list_del(&tage->linkage);
 745                cfs_tage_free(tage);
 746        }
 747        set_fs(__oldfs);
 748        rc = vfs_fsync(filp, 1);
 749        if (rc)
 750                pr_err("sync returns %d\n", rc);
 751close:
 752        filp_close(filp, NULL);
 753out:
 754        cfs_tracefile_write_unlock();
 755        return rc;
 756}
 757
 758void cfs_trace_flush_pages(void)
 759{
 760        struct page_collection pc;
 761        struct cfs_trace_page *tage;
 762        struct cfs_trace_page *tmp;
 763
 764        pc.pc_want_daemon_pages = 1;
 765        collect_pages(&pc);
 766        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 767                __LASSERT_TAGE_INVARIANT(tage);
 768
 769                list_del(&tage->linkage);
 770                cfs_tage_free(tage);
 771        }
 772}
 773
 774int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
 775                            const char __user *usr_buffer, int usr_buffer_nob)
 776{
 777        int nob;
 778
 779        if (usr_buffer_nob > knl_buffer_nob)
 780                return -EOVERFLOW;
 781
 782        if (copy_from_user((void *)knl_buffer,
 783                           usr_buffer, usr_buffer_nob))
 784                return -EFAULT;
 785
 786        nob = strnlen(knl_buffer, usr_buffer_nob);
 787        while (nob-- >= 0)                    /* strip trailing whitespace */
 788                if (!isspace(knl_buffer[nob]))
 789                        break;
 790
 791        if (nob < 0)                        /* empty string */
 792                return -EINVAL;
 793
 794        if (nob == knl_buffer_nob)            /* no space to terminate */
 795                return -EOVERFLOW;
 796
 797        knl_buffer[nob + 1] = 0;                /* terminate */
 798        return 0;
 799}
 800EXPORT_SYMBOL(cfs_trace_copyin_string);
 801
 802int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
 803                             const char *knl_buffer, char *append)
 804{
 805        /*
 806         * NB if 'append' != NULL, it's a single character to append to the
 807         * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
 808         */
 809        int nob = strlen(knl_buffer);
 810
 811        if (nob > usr_buffer_nob)
 812                nob = usr_buffer_nob;
 813
 814        if (copy_to_user(usr_buffer, knl_buffer, nob))
 815                return -EFAULT;
 816
 817        if (append && nob < usr_buffer_nob) {
 818                if (copy_to_user(usr_buffer + nob, append, 1))
 819                        return -EFAULT;
 820
 821                nob++;
 822        }
 823
 824        return nob;
 825}
 826EXPORT_SYMBOL(cfs_trace_copyout_string);
 827
 828int cfs_trace_allocate_string_buffer(char **str, int nob)
 829{
 830        if (nob > 2 * PAGE_SIZE)            /* string must be "sensible" */
 831                return -EINVAL;
 832
 833        *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
 834        if (!*str)
 835                return -ENOMEM;
 836
 837        return 0;
 838}
 839
 840int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
 841{
 842        char *str;
 843        int rc;
 844
 845        rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
 846        if (rc)
 847                return rc;
 848
 849        rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
 850                                     usr_str, usr_str_nob);
 851        if (rc)
 852                goto out;
 853
 854        if (str[0] != '/') {
 855                rc = -EINVAL;
 856                goto out;
 857        }
 858        rc = cfs_tracefile_dump_all_pages(str);
 859out:
 860        kfree(str);
 861        return rc;
 862}
 863
 864int cfs_trace_daemon_command(char *str)
 865{
 866        int rc = 0;
 867
 868        cfs_tracefile_write_lock();
 869
 870        if (!strcmp(str, "stop")) {
 871                cfs_tracefile_write_unlock();
 872                cfs_trace_stop_thread();
 873                cfs_tracefile_write_lock();
 874                memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
 875
 876        } else if (!strncmp(str, "size=", 5)) {
 877                unsigned long tmp;
 878
 879                rc = kstrtoul(str + 5, 10, &tmp);
 880                if (!rc) {
 881                        if (tmp < 10 || tmp > 20480)
 882                                cfs_tracefile_size = CFS_TRACEFILE_SIZE;
 883                        else
 884                                cfs_tracefile_size = tmp << 20;
 885                }
 886        } else if (strlen(str) >= sizeof(cfs_tracefile)) {
 887                rc = -ENAMETOOLONG;
 888        } else if (str[0] != '/') {
 889                rc = -EINVAL;
 890        } else {
 891                strcpy(cfs_tracefile, str);
 892
 893                pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n",
 894                        cfs_tracefile,
 895                        (long)(cfs_tracefile_size >> 10));
 896
 897                cfs_trace_start_thread();
 898        }
 899
 900        cfs_tracefile_write_unlock();
 901        return rc;
 902}
 903
 904int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
 905{
 906        char *str;
 907        int rc;
 908
 909        rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
 910        if (rc)
 911                return rc;
 912
 913        rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
 914                                     usr_str, usr_str_nob);
 915        if (!rc)
 916                rc = cfs_trace_daemon_command(str);
 917
 918        kfree(str);
 919        return rc;
 920}
 921
 922int cfs_trace_set_debug_mb(int mb)
 923{
 924        int i;
 925        int j;
 926        int pages;
 927        int limit = cfs_trace_max_debug_mb();
 928        struct cfs_trace_cpu_data *tcd;
 929
 930        if (mb < num_possible_cpus()) {
 931                pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n",
 932                        mb, num_possible_cpus());
 933                mb = num_possible_cpus();
 934        }
 935
 936        if (mb > limit) {
 937                pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n",
 938                        mb, limit);
 939                mb = limit;
 940        }
 941
 942        mb /= num_possible_cpus();
 943        pages = mb << (20 - PAGE_SHIFT);
 944
 945        cfs_tracefile_write_lock();
 946
 947        cfs_tcd_for_each(tcd, i, j)
 948                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
 949
 950        cfs_tracefile_write_unlock();
 951
 952        return 0;
 953}
 954
 955int cfs_trace_get_debug_mb(void)
 956{
 957        int i;
 958        int j;
 959        struct cfs_trace_cpu_data *tcd;
 960        int total_pages = 0;
 961
 962        cfs_tracefile_read_lock();
 963
 964        cfs_tcd_for_each(tcd, i, j)
 965                total_pages += tcd->tcd_max_pages;
 966
 967        cfs_tracefile_read_unlock();
 968
 969        return (total_pages >> (20 - PAGE_SHIFT)) + 1;
 970}
 971
 972static int tracefiled(void *arg)
 973{
 974        struct page_collection pc;
 975        struct tracefiled_ctl *tctl = arg;
 976        struct cfs_trace_page *tage;
 977        struct cfs_trace_page *tmp;
 978        struct file *filp;
 979        char *buf;
 980        int last_loop = 0;
 981        int rc;
 982
 983        /* we're started late enough that we pick up init's fs context */
 984        /* this is so broken in uml?  what on earth is going on? */
 985
 986        complete(&tctl->tctl_start);
 987
 988        while (1) {
 989                wait_queue_entry_t __wait;
 990
 991                pc.pc_want_daemon_pages = 0;
 992                collect_pages(&pc);
 993                if (list_empty(&pc.pc_pages))
 994                        goto end_loop;
 995
 996                filp = NULL;
 997                cfs_tracefile_read_lock();
 998                if (cfs_tracefile[0]) {
 999                        filp = filp_open(cfs_tracefile,
1000                                         O_CREAT | O_RDWR | O_LARGEFILE,
1001                                         0600);
1002                        if (IS_ERR(filp)) {
1003                                rc = PTR_ERR(filp);
1004                                filp = NULL;
1005                                pr_warn("couldn't open %s: %d\n", cfs_tracefile,
1006                                        rc);
1007                        }
1008                }
1009                cfs_tracefile_read_unlock();
1010                if (!filp) {
1011                        put_pages_on_daemon_list(&pc);
1012                        __LASSERT(list_empty(&pc.pc_pages));
1013                        goto end_loop;
1014                }
1015
1016                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
1017                        static loff_t f_pos;
1018
1019                        __LASSERT_TAGE_INVARIANT(tage);
1020
1021                        if (f_pos >= (off_t)cfs_tracefile_size)
1022                                f_pos = 0;
1023                        else if (f_pos > i_size_read(file_inode(filp)))
1024                                f_pos = i_size_read(file_inode(filp));
1025
1026                        buf = kmap(tage->page);
1027                        rc = kernel_write(filp, buf, tage->used, &f_pos);
1028                        kunmap(tage->page);
1029
1030                        if (rc != (int)tage->used) {
1031                                pr_warn("wanted to write %u but wrote %d\n",
1032                                        tage->used, rc);
1033                                put_pages_back(&pc);
1034                                __LASSERT(list_empty(&pc.pc_pages));
1035                                break;
1036                        }
1037                }
1038
1039                filp_close(filp, NULL);
1040                put_pages_on_daemon_list(&pc);
1041                if (!list_empty(&pc.pc_pages)) {
1042                        int i;
1043
1044                        pr_alert("trace pages aren't empty\n");
1045                        pr_err("total cpus(%d): ", num_possible_cpus());
1046                        for (i = 0; i < num_possible_cpus(); i++)
1047                                if (cpu_online(i))
1048                                        pr_cont("%d(on) ", i);
1049                                else
1050                                        pr_cont("%d(off) ", i);
1051                        pr_cont("\n");
1052
1053                        i = 0;
1054                        list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1055                                                 linkage)
1056                                pr_err("page %d belongs to cpu %d\n",
1057                                       ++i, tage->cpu);
1058                        pr_err("There are %d pages unwritten\n", i);
1059                }
1060                __LASSERT(list_empty(&pc.pc_pages));
1061end_loop:
1062                if (atomic_read(&tctl->tctl_shutdown)) {
1063                        if (!last_loop) {
1064                                last_loop = 1;
1065                                continue;
1066                        } else {
1067                                break;
1068                        }
1069                }
1070                init_waitqueue_entry(&__wait, current);
1071                add_wait_queue(&tctl->tctl_waitq, &__wait);
1072                set_current_state(TASK_INTERRUPTIBLE);
1073                schedule_timeout(cfs_time_seconds(1));
1074                remove_wait_queue(&tctl->tctl_waitq, &__wait);
1075        }
1076        complete(&tctl->tctl_stop);
1077        return 0;
1078}
1079
1080int cfs_trace_start_thread(void)
1081{
1082        struct tracefiled_ctl *tctl = &trace_tctl;
1083        struct task_struct *task;
1084        int rc = 0;
1085
1086        mutex_lock(&cfs_trace_thread_mutex);
1087        if (thread_running)
1088                goto out;
1089
1090        init_completion(&tctl->tctl_start);
1091        init_completion(&tctl->tctl_stop);
1092        init_waitqueue_head(&tctl->tctl_waitq);
1093        atomic_set(&tctl->tctl_shutdown, 0);
1094
1095        task = kthread_run(tracefiled, tctl, "ktracefiled");
1096        if (IS_ERR(task)) {
1097                rc = PTR_ERR(task);
1098                goto out;
1099        }
1100
1101        wait_for_completion(&tctl->tctl_start);
1102        thread_running = 1;
1103out:
1104        mutex_unlock(&cfs_trace_thread_mutex);
1105        return rc;
1106}
1107
1108void cfs_trace_stop_thread(void)
1109{
1110        struct tracefiled_ctl *tctl = &trace_tctl;
1111
1112        mutex_lock(&cfs_trace_thread_mutex);
1113        if (thread_running) {
1114                pr_info("shutting down debug daemon thread...\n");
1115                atomic_set(&tctl->tctl_shutdown, 1);
1116                wait_for_completion(&tctl->tctl_stop);
1117                thread_running = 0;
1118        }
1119        mutex_unlock(&cfs_trace_thread_mutex);
1120}
1121
1122int cfs_tracefile_init(int max_pages)
1123{
1124        struct cfs_trace_cpu_data *tcd;
1125        int i;
1126        int j;
1127        int rc;
1128        int factor;
1129
1130        rc = cfs_tracefile_init_arch();
1131        if (rc)
1132                return rc;
1133
1134        cfs_tcd_for_each(tcd, i, j) {
1135                /* tcd_pages_factor is initialized int tracefile_init_arch. */
1136                factor = tcd->tcd_pages_factor;
1137                INIT_LIST_HEAD(&tcd->tcd_pages);
1138                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1139                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1140                tcd->tcd_cur_pages = 0;
1141                tcd->tcd_cur_stock_pages = 0;
1142                tcd->tcd_cur_daemon_pages = 0;
1143                tcd->tcd_max_pages = (max_pages * factor) / 100;
1144                LASSERT(tcd->tcd_max_pages > 0);
1145                tcd->tcd_shutting_down = 0;
1146        }
1147
1148        return 0;
1149}
1150
1151static void trace_cleanup_on_all_cpus(void)
1152{
1153        struct cfs_trace_cpu_data *tcd;
1154        struct cfs_trace_page *tage;
1155        struct cfs_trace_page *tmp;
1156        int i, cpu;
1157
1158        for_each_possible_cpu(cpu) {
1159                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1160                        tcd->tcd_shutting_down = 1;
1161
1162                        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
1163                                                 linkage) {
1164                                __LASSERT_TAGE_INVARIANT(tage);
1165
1166                                list_del(&tage->linkage);
1167                                cfs_tage_free(tage);
1168                        }
1169
1170                        tcd->tcd_cur_pages = 0;
1171                }
1172        }
1173}
1174
1175static void cfs_trace_cleanup(void)
1176{
1177        struct page_collection pc;
1178
1179        INIT_LIST_HEAD(&pc.pc_pages);
1180
1181        trace_cleanup_on_all_cpus();
1182
1183        cfs_tracefile_fini_arch();
1184}
1185
1186void cfs_tracefile_exit(void)
1187{
1188        cfs_trace_stop_thread();
1189        cfs_trace_cleanup();
1190}
1191