linux/drivers/staging/lustre/lustre/libcfs/tracefile.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * libcfs/libcfs/tracefile.c
  37 *
  38 * Author: Zach Brown <zab@clusterfs.com>
  39 * Author: Phil Schwan <phil@clusterfs.com>
  40 */
  41
  42
  43#define DEBUG_SUBSYSTEM S_LNET
  44#define LUSTRE_TRACEFILE_PRIVATE
  45#include "tracefile.h"
  46
  47#include <linux/libcfs/libcfs.h>
  48
  49/* XXX move things up to the top, comment */
  50union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
  51
  52char cfs_tracefile[TRACEFILE_NAME_SIZE];
  53long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
  54static struct tracefiled_ctl trace_tctl;
  55struct mutex cfs_trace_thread_mutex;
  56static int thread_running = 0;
  57
  58atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
  59
  60static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
  61                                         struct cfs_trace_cpu_data *tcd);
  62
  63static inline struct cfs_trace_page *
  64cfs_tage_from_list(struct list_head *list)
  65{
  66        return list_entry(list, struct cfs_trace_page, linkage);
  67}
  68
  69static struct cfs_trace_page *cfs_tage_alloc(int gfp)
  70{
  71        struct page         *page;
  72        struct cfs_trace_page *tage;
  73
  74        /* My caller is trying to free memory */
  75        if (!in_interrupt() && memory_pressure_get())
  76                return NULL;
  77
  78        /*
  79         * Don't spam console with allocation failures: they will be reported
  80         * by upper layer anyway.
  81         */
  82        gfp |= __GFP_NOWARN;
  83        page = alloc_page(gfp);
  84        if (page == NULL)
  85                return NULL;
  86
  87        tage = kmalloc(sizeof(*tage), gfp);
  88        if (tage == NULL) {
  89                __free_page(page);
  90                return NULL;
  91        }
  92
  93        tage->page = page;
  94        atomic_inc(&cfs_tage_allocated);
  95        return tage;
  96}
  97
  98static void cfs_tage_free(struct cfs_trace_page *tage)
  99{
 100        __LASSERT(tage != NULL);
 101        __LASSERT(tage->page != NULL);
 102
 103        __free_page(tage->page);
 104        kfree(tage);
 105        atomic_dec(&cfs_tage_allocated);
 106}
 107
 108static void cfs_tage_to_tail(struct cfs_trace_page *tage,
 109                             struct list_head *queue)
 110{
 111        __LASSERT(tage != NULL);
 112        __LASSERT(queue != NULL);
 113
 114        list_move_tail(&tage->linkage, queue);
 115}
 116
 117int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, int gfp,
 118                           struct list_head *stock)
 119{
 120        int i;
 121
 122        /*
 123         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 124         * from here: this will lead to infinite recursion.
 125         */
 126
 127        for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
 128                struct cfs_trace_page *tage;
 129
 130                tage = cfs_tage_alloc(gfp);
 131                if (tage == NULL)
 132                        break;
 133                list_add_tail(&tage->linkage, stock);
 134        }
 135        return i;
 136}
 137
 138/* return a page that has 'len' bytes left at the end */
 139static struct cfs_trace_page *
 140cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
 141{
 142        struct cfs_trace_page *tage;
 143
 144        if (tcd->tcd_cur_pages > 0) {
 145                __LASSERT(!list_empty(&tcd->tcd_pages));
 146                tage = cfs_tage_from_list(tcd->tcd_pages.prev);
 147                if (tage->used + len <= PAGE_CACHE_SIZE)
 148                        return tage;
 149        }
 150
 151        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
 152                if (tcd->tcd_cur_stock_pages > 0) {
 153                        tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
 154                        --tcd->tcd_cur_stock_pages;
 155                        list_del_init(&tage->linkage);
 156                } else {
 157                        tage = cfs_tage_alloc(GFP_ATOMIC);
 158                        if (unlikely(tage == NULL)) {
 159                                if ((!memory_pressure_get() ||
 160                                     in_interrupt()) && printk_ratelimit())
 161                                        printk(KERN_WARNING
 162                                               "cannot allocate a tage (%ld)\n",
 163                                               tcd->tcd_cur_pages);
 164                                return NULL;
 165                        }
 166                }
 167
 168                tage->used = 0;
 169                tage->cpu = smp_processor_id();
 170                tage->type = tcd->tcd_type;
 171                list_add_tail(&tage->linkage, &tcd->tcd_pages);
 172                tcd->tcd_cur_pages++;
 173
 174                if (tcd->tcd_cur_pages > 8 && thread_running) {
 175                        struct tracefiled_ctl *tctl = &trace_tctl;
 176                        /*
 177                         * wake up tracefiled to process some pages.
 178                         */
 179                        wake_up(&tctl->tctl_waitq);
 180                }
 181                return tage;
 182        }
 183        return NULL;
 184}
 185
 186static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
 187{
 188        int pgcount = tcd->tcd_cur_pages / 10;
 189        struct page_collection pc;
 190        struct cfs_trace_page *tage;
 191        struct cfs_trace_page *tmp;
 192
 193        /*
 194         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 195         * from here: this will lead to infinite recursion.
 196         */
 197
 198        if (printk_ratelimit())
 199                printk(KERN_WARNING "debug daemon buffer overflowed; "
 200                       "discarding 10%% of pages (%d of %ld)\n",
 201                       pgcount + 1, tcd->tcd_cur_pages);
 202
 203        INIT_LIST_HEAD(&pc.pc_pages);
 204        spin_lock_init(&pc.pc_lock);
 205
 206        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
 207                if (pgcount-- == 0)
 208                        break;
 209
 210                list_move_tail(&tage->linkage, &pc.pc_pages);
 211                tcd->tcd_cur_pages--;
 212        }
 213        put_pages_on_tcd_daemon_list(&pc, tcd);
 214}
 215
 216/* return a page that has 'len' bytes left at the end */
 217static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
 218                                                 unsigned long len)
 219{
 220        struct cfs_trace_page *tage;
 221
 222        /*
 223         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
 224         * from here: this will lead to infinite recursion.
 225         */
 226
 227        if (len > PAGE_CACHE_SIZE) {
 228                printk(KERN_ERR
 229                       "cowardly refusing to write %lu bytes in a page\n", len);
 230                return NULL;
 231        }
 232
 233        tage = cfs_trace_get_tage_try(tcd, len);
 234        if (tage != NULL)
 235                return tage;
 236        if (thread_running)
 237                cfs_tcd_shrink(tcd);
 238        if (tcd->tcd_cur_pages > 0) {
 239                tage = cfs_tage_from_list(tcd->tcd_pages.next);
 240                tage->used = 0;
 241                cfs_tage_to_tail(tage, &tcd->tcd_pages);
 242        }
 243        return tage;
 244}
 245
 246int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
 247                     const char *format, ...)
 248{
 249        va_list args;
 250        int     rc;
 251
 252        va_start(args, format);
 253        rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
 254        va_end(args);
 255
 256        return rc;
 257}
 258EXPORT_SYMBOL(libcfs_debug_msg);
 259
 260int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
 261                       const char *format1, va_list args,
 262                       const char *format2, ...)
 263{
 264        struct cfs_trace_cpu_data *tcd = NULL;
 265        struct ptldebug_header     header = {0};
 266        struct cfs_trace_page     *tage;
 267        /* string_buf is used only if tcd != NULL, and is always set then */
 268        char                  *string_buf = NULL;
 269        char                  *debug_buf;
 270        int                     known_size;
 271        int                     needed = 85; /* average message length */
 272        int                     max_nob;
 273        va_list             ap;
 274        int                     depth;
 275        int                     i;
 276        int                     remain;
 277        int                     mask = msgdata->msg_mask;
 278        char                  *file = (char *)msgdata->msg_file;
 279        cfs_debug_limit_state_t   *cdls = msgdata->msg_cdls;
 280
 281        if (strchr(file, '/'))
 282                file = strrchr(file, '/') + 1;
 283
 284        tcd = cfs_trace_get_tcd();
 285
 286        /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
 287         * pins us to a particular CPU.  This avoids an smp_processor_id()
 288         * warning on Linux when debugging is enabled. */
 289        cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
 290
 291        if (tcd == NULL)                /* arch may not log in IRQ context */
 292                goto console;
 293
 294        if (tcd->tcd_cur_pages == 0)
 295                header.ph_flags |= PH_FLAG_FIRST_RECORD;
 296
 297        if (tcd->tcd_shutting_down) {
 298                cfs_trace_put_tcd(tcd);
 299                tcd = NULL;
 300                goto console;
 301        }
 302
 303        depth = __current_nesting_level();
 304        known_size = strlen(file) + 1 + depth;
 305        if (msgdata->msg_fn)
 306                known_size += strlen(msgdata->msg_fn) + 1;
 307
 308        if (libcfs_debug_binary)
 309                known_size += sizeof(header);
 310
 311        /*/
 312         * '2' used because vsnprintf return real size required for output
 313         * _without_ terminating NULL.
 314         * if needed is to small for this format.
 315         */
 316        for (i = 0; i < 2; i++) {
 317                tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
 318                if (tage == NULL) {
 319                        if (needed + known_size > PAGE_CACHE_SIZE)
 320                                mask |= D_ERROR;
 321
 322                        cfs_trace_put_tcd(tcd);
 323                        tcd = NULL;
 324                        goto console;
 325                }
 326
 327                string_buf = (char *)page_address(tage->page) +
 328                                        tage->used + known_size;
 329
 330                max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
 331                if (max_nob <= 0) {
 332                        printk(KERN_EMERG "negative max_nob: %d\n",
 333                               max_nob);
 334                        mask |= D_ERROR;
 335                        cfs_trace_put_tcd(tcd);
 336                        tcd = NULL;
 337                        goto console;
 338                }
 339
 340                needed = 0;
 341                if (format1) {
 342                        va_copy(ap, args);
 343                        needed = vsnprintf(string_buf, max_nob, format1, ap);
 344                        va_end(ap);
 345                }
 346
 347                if (format2) {
 348                        remain = max_nob - needed;
 349                        if (remain < 0)
 350                                remain = 0;
 351
 352                        va_start(ap, format2);
 353                        needed += vsnprintf(string_buf + needed, remain,
 354                                            format2, ap);
 355                        va_end(ap);
 356                }
 357
 358                if (needed < max_nob) /* well. printing ok.. */
 359                        break;
 360        }
 361
 362        if (*(string_buf+needed-1) != '\n')
 363                printk(KERN_INFO "format at %s:%d:%s doesn't end in "
 364                       "newline\n", file, msgdata->msg_line, msgdata->msg_fn);
 365
 366        header.ph_len = known_size + needed;
 367        debug_buf = (char *)page_address(tage->page) + tage->used;
 368
 369        if (libcfs_debug_binary) {
 370                memcpy(debug_buf, &header, sizeof(header));
 371                tage->used += sizeof(header);
 372                debug_buf += sizeof(header);
 373        }
 374
 375        /* indent message according to the nesting level */
 376        while (depth-- > 0) {
 377                *(debug_buf++) = '.';
 378                ++ tage->used;
 379        }
 380
 381        strcpy(debug_buf, file);
 382        tage->used += strlen(file) + 1;
 383        debug_buf += strlen(file) + 1;
 384
 385        if (msgdata->msg_fn) {
 386                strcpy(debug_buf, msgdata->msg_fn);
 387                tage->used += strlen(msgdata->msg_fn) + 1;
 388                debug_buf += strlen(msgdata->msg_fn) + 1;
 389        }
 390
 391        __LASSERT(debug_buf == string_buf);
 392
 393        tage->used += needed;
 394        __LASSERT (tage->used <= PAGE_CACHE_SIZE);
 395
 396console:
 397        if ((mask & libcfs_printk) == 0) {
 398                /* no console output requested */
 399                if (tcd != NULL)
 400                        cfs_trace_put_tcd(tcd);
 401                return 1;
 402        }
 403
 404        if (cdls != NULL) {
 405                if (libcfs_console_ratelimit &&
 406                    cdls->cdls_next != 0 &&     /* not first time ever */
 407                    !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
 408                        /* skipping a console message */
 409                        cdls->cdls_count++;
 410                        if (tcd != NULL)
 411                                cfs_trace_put_tcd(tcd);
 412                        return 1;
 413                }
 414
 415                if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
 416                                                       libcfs_console_max_delay
 417                                                       + cfs_time_seconds(10))) {
 418                        /* last timeout was a long time ago */
 419                        cdls->cdls_delay /= libcfs_console_backoff * 4;
 420                } else {
 421                        cdls->cdls_delay *= libcfs_console_backoff;
 422
 423                        if (cdls->cdls_delay < libcfs_console_min_delay)
 424                                cdls->cdls_delay = libcfs_console_min_delay;
 425                        else if (cdls->cdls_delay > libcfs_console_max_delay)
 426                                cdls->cdls_delay = libcfs_console_max_delay;
 427                }
 428
 429                /* ensure cdls_next is never zero after it's been seen */
 430                cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
 431        }
 432
 433        if (tcd != NULL) {
 434                cfs_print_to_console(&header, mask, string_buf, needed, file,
 435                                     msgdata->msg_fn);
 436                cfs_trace_put_tcd(tcd);
 437        } else {
 438                string_buf = cfs_trace_get_console_buffer();
 439
 440                needed = 0;
 441                if (format1 != NULL) {
 442                        va_copy(ap, args);
 443                        needed = vsnprintf(string_buf,
 444                                           CFS_TRACE_CONSOLE_BUFFER_SIZE,
 445                                           format1, ap);
 446                        va_end(ap);
 447                }
 448                if (format2 != NULL) {
 449                        remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
 450                        if (remain > 0) {
 451                                va_start(ap, format2);
 452                                needed += vsnprintf(string_buf+needed, remain,
 453                                                    format2, ap);
 454                                va_end(ap);
 455                        }
 456                }
 457                cfs_print_to_console(&header, mask,
 458                                     string_buf, needed, file, msgdata->msg_fn);
 459
 460                cfs_trace_put_console_buffer(string_buf);
 461        }
 462
 463        if (cdls != NULL && cdls->cdls_count != 0) {
 464                string_buf = cfs_trace_get_console_buffer();
 465
 466                needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
 467                                  "Skipped %d previous similar message%s\n",
 468                                  cdls->cdls_count,
 469                                  (cdls->cdls_count > 1) ? "s" : "");
 470
 471                cfs_print_to_console(&header, mask,
 472                                     string_buf, needed, file, msgdata->msg_fn);
 473
 474                cfs_trace_put_console_buffer(string_buf);
 475                cdls->cdls_count = 0;
 476        }
 477
 478        return 0;
 479}
 480EXPORT_SYMBOL(libcfs_debug_vmsg2);
 481
 482void
 483cfs_trace_assertion_failed(const char *str,
 484                           struct libcfs_debug_msg_data *msgdata)
 485{
 486        struct ptldebug_header hdr;
 487
 488        libcfs_panic_in_progress = 1;
 489        libcfs_catastrophe = 1;
 490        mb();
 491
 492        cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
 493
 494        cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
 495                             msgdata->msg_file, msgdata->msg_fn);
 496
 497        panic("Lustre debug assertion failure\n");
 498
 499        /* not reached */
 500}
 501
 502static void
 503panic_collect_pages(struct page_collection *pc)
 504{
 505        /* Do the collect_pages job on a single CPU: assumes that all other
 506         * CPUs have been stopped during a panic.  If this isn't true for some
 507         * arch, this will have to be implemented separately in each arch.  */
 508        int                     i;
 509        int                     j;
 510        struct cfs_trace_cpu_data *tcd;
 511
 512        INIT_LIST_HEAD(&pc->pc_pages);
 513
 514        cfs_tcd_for_each(tcd, i, j) {
 515                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
 516                tcd->tcd_cur_pages = 0;
 517
 518                if (pc->pc_want_daemon_pages) {
 519                        list_splice_init(&tcd->tcd_daemon_pages,
 520                                             &pc->pc_pages);
 521                        tcd->tcd_cur_daemon_pages = 0;
 522                }
 523        }
 524}
 525
 526static void collect_pages_on_all_cpus(struct page_collection *pc)
 527{
 528        struct cfs_trace_cpu_data *tcd;
 529        int i, cpu;
 530
 531        spin_lock(&pc->pc_lock);
 532        cfs_for_each_possible_cpu(cpu) {
 533                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
 534                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
 535                        tcd->tcd_cur_pages = 0;
 536                        if (pc->pc_want_daemon_pages) {
 537                                list_splice_init(&tcd->tcd_daemon_pages,
 538                                                     &pc->pc_pages);
 539                                tcd->tcd_cur_daemon_pages = 0;
 540                        }
 541                }
 542        }
 543        spin_unlock(&pc->pc_lock);
 544}
 545
 546static void collect_pages(struct page_collection *pc)
 547{
 548        INIT_LIST_HEAD(&pc->pc_pages);
 549
 550        if (libcfs_panic_in_progress)
 551                panic_collect_pages(pc);
 552        else
 553                collect_pages_on_all_cpus(pc);
 554}
 555
 556static void put_pages_back_on_all_cpus(struct page_collection *pc)
 557{
 558        struct cfs_trace_cpu_data *tcd;
 559        struct list_head *cur_head;
 560        struct cfs_trace_page *tage;
 561        struct cfs_trace_page *tmp;
 562        int i, cpu;
 563
 564        spin_lock(&pc->pc_lock);
 565        cfs_for_each_possible_cpu(cpu) {
 566                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
 567                        cur_head = tcd->tcd_pages.next;
 568
 569                        list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
 570                                                 linkage) {
 571
 572                                __LASSERT_TAGE_INVARIANT(tage);
 573
 574                                if (tage->cpu != cpu || tage->type != i)
 575                                        continue;
 576
 577                                cfs_tage_to_tail(tage, cur_head);
 578                                tcd->tcd_cur_pages++;
 579                        }
 580                }
 581        }
 582        spin_unlock(&pc->pc_lock);
 583}
 584
 585static void put_pages_back(struct page_collection *pc)
 586{
 587        if (!libcfs_panic_in_progress)
 588                put_pages_back_on_all_cpus(pc);
 589}
 590
 591/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
 592 * we have a good amount of data at all times for dumping during an LBUG, even
 593 * if we have been steadily writing (and otherwise discarding) pages via the
 594 * debug daemon. */
 595static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
 596                                         struct cfs_trace_cpu_data *tcd)
 597{
 598        struct cfs_trace_page *tage;
 599        struct cfs_trace_page *tmp;
 600
 601        spin_lock(&pc->pc_lock);
 602        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 603
 604                __LASSERT_TAGE_INVARIANT(tage);
 605
 606                if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
 607                        continue;
 608
 609                cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
 610                tcd->tcd_cur_daemon_pages++;
 611
 612                if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
 613                        struct cfs_trace_page *victim;
 614
 615                        __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
 616                        victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
 617
 618                        __LASSERT_TAGE_INVARIANT(victim);
 619
 620                        list_del(&victim->linkage);
 621                        cfs_tage_free(victim);
 622                        tcd->tcd_cur_daemon_pages--;
 623                }
 624        }
 625        spin_unlock(&pc->pc_lock);
 626}
 627
 628static void put_pages_on_daemon_list(struct page_collection *pc)
 629{
 630        struct cfs_trace_cpu_data *tcd;
 631        int i, cpu;
 632
 633        cfs_for_each_possible_cpu(cpu) {
 634                cfs_tcd_for_each_type_lock(tcd, i, cpu)
 635                        put_pages_on_tcd_daemon_list(pc, tcd);
 636        }
 637}
 638
 639void cfs_trace_debug_print(void)
 640{
 641        struct page_collection pc;
 642        struct cfs_trace_page *tage;
 643        struct cfs_trace_page *tmp;
 644
 645        spin_lock_init(&pc.pc_lock);
 646
 647        pc.pc_want_daemon_pages = 1;
 648        collect_pages(&pc);
 649        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 650                char *p, *file, *fn;
 651                struct page *page;
 652
 653                __LASSERT_TAGE_INVARIANT(tage);
 654
 655                page = tage->page;
 656                p = page_address(page);
 657                while (p < ((char *)page_address(page) + tage->used)) {
 658                        struct ptldebug_header *hdr;
 659                        int len;
 660                        hdr = (void *)p;
 661                        p += sizeof(*hdr);
 662                        file = p;
 663                        p += strlen(file) + 1;
 664                        fn = p;
 665                        p += strlen(fn) + 1;
 666                        len = hdr->ph_len - (int)(p - (char *)hdr);
 667
 668                        cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
 669
 670                        p += len;
 671                }
 672
 673                list_del(&tage->linkage);
 674                cfs_tage_free(tage);
 675        }
 676}
 677
 678int cfs_tracefile_dump_all_pages(char *filename)
 679{
 680        struct page_collection  pc;
 681        struct file             *filp;
 682        struct cfs_trace_page   *tage;
 683        struct cfs_trace_page   *tmp;
 684        int rc;
 685
 686        DECL_MMSPACE;
 687
 688        cfs_tracefile_write_lock();
 689
 690        filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
 691        if (IS_ERR(filp)) {
 692                rc = PTR_ERR(filp);
 693                filp = NULL;
 694                printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
 695                      filename, rc);
 696                goto out;
 697        }
 698
 699        spin_lock_init(&pc.pc_lock);
 700        pc.pc_want_daemon_pages = 1;
 701        collect_pages(&pc);
 702        if (list_empty(&pc.pc_pages)) {
 703                rc = 0;
 704                goto close;
 705        }
 706
 707        /* ok, for now, just write the pages.  in the future we'll be building
 708         * iobufs with the pages and calling generic_direct_IO */
 709        MMSPACE_OPEN;
 710        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 711
 712                __LASSERT_TAGE_INVARIANT(tage);
 713
 714                rc = filp_write(filp, page_address(tage->page),
 715                                tage->used, filp_poff(filp));
 716                if (rc != (int)tage->used) {
 717                        printk(KERN_WARNING "wanted to write %u but wrote "
 718                               "%d\n", tage->used, rc);
 719                        put_pages_back(&pc);
 720                        __LASSERT(list_empty(&pc.pc_pages));
 721                        break;
 722                }
 723                list_del(&tage->linkage);
 724                cfs_tage_free(tage);
 725        }
 726        MMSPACE_CLOSE;
 727        rc = filp_fsync(filp);
 728        if (rc)
 729                printk(KERN_ERR "sync returns %d\n", rc);
 730close:
 731        filp_close(filp, NULL);
 732out:
 733        cfs_tracefile_write_unlock();
 734        return rc;
 735}
 736
 737void cfs_trace_flush_pages(void)
 738{
 739        struct page_collection pc;
 740        struct cfs_trace_page *tage;
 741        struct cfs_trace_page *tmp;
 742
 743        spin_lock_init(&pc.pc_lock);
 744
 745        pc.pc_want_daemon_pages = 1;
 746        collect_pages(&pc);
 747        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 748
 749                __LASSERT_TAGE_INVARIANT(tage);
 750
 751                list_del(&tage->linkage);
 752                cfs_tage_free(tage);
 753        }
 754}
 755
 756int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
 757                            const char *usr_buffer, int usr_buffer_nob)
 758{
 759        int    nob;
 760
 761        if (usr_buffer_nob > knl_buffer_nob)
 762                return -EOVERFLOW;
 763
 764        if (copy_from_user((void *)knl_buffer,
 765                           (void *)usr_buffer, usr_buffer_nob))
 766                return -EFAULT;
 767
 768        nob = strnlen(knl_buffer, usr_buffer_nob);
 769        while (nob-- >= 0)                    /* strip trailing whitespace */
 770                if (!isspace(knl_buffer[nob]))
 771                        break;
 772
 773        if (nob < 0)                        /* empty string */
 774                return -EINVAL;
 775
 776        if (nob == knl_buffer_nob)            /* no space to terminate */
 777                return -EOVERFLOW;
 778
 779        knl_buffer[nob + 1] = 0;                /* terminate */
 780        return 0;
 781}
 782EXPORT_SYMBOL(cfs_trace_copyin_string);
 783
 784int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
 785                             const char *knl_buffer, char *append)
 786{
 787        /* NB if 'append' != NULL, it's a single character to append to the
 788         * copied out string - usually "\n", for /proc entries and "" (i.e. a
 789         * terminating zero byte) for sysctl entries */
 790        int   nob = strlen(knl_buffer);
 791
 792        if (nob > usr_buffer_nob)
 793                nob = usr_buffer_nob;
 794
 795        if (copy_to_user(usr_buffer, knl_buffer, nob))
 796                return -EFAULT;
 797
 798        if (append != NULL && nob < usr_buffer_nob) {
 799                if (copy_to_user(usr_buffer + nob, append, 1))
 800                        return -EFAULT;
 801
 802                nob++;
 803        }
 804
 805        return nob;
 806}
 807EXPORT_SYMBOL(cfs_trace_copyout_string);
 808
 809int cfs_trace_allocate_string_buffer(char **str, int nob)
 810{
 811        if (nob > 2 * PAGE_CACHE_SIZE)      /* string must be "sensible" */
 812                return -EINVAL;
 813
 814        *str = kmalloc(nob, GFP_IOFS | __GFP_ZERO);
 815        if (*str == NULL)
 816                return -ENOMEM;
 817
 818        return 0;
 819}
 820
 821void cfs_trace_free_string_buffer(char *str, int nob)
 822{
 823        kfree(str);
 824}
 825
 826int cfs_trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
 827{
 828        char     *str;
 829        int        rc;
 830
 831        rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
 832        if (rc != 0)
 833                return rc;
 834
 835        rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
 836                                     usr_str, usr_str_nob);
 837        if (rc != 0)
 838                goto out;
 839
 840        if (str[0] != '/') {
 841                rc = -EINVAL;
 842                goto out;
 843        }
 844        rc = cfs_tracefile_dump_all_pages(str);
 845out:
 846        cfs_trace_free_string_buffer(str, usr_str_nob + 1);
 847        return rc;
 848}
 849
 850int cfs_trace_daemon_command(char *str)
 851{
 852        int       rc = 0;
 853
 854        cfs_tracefile_write_lock();
 855
 856        if (strcmp(str, "stop") == 0) {
 857                cfs_tracefile_write_unlock();
 858                cfs_trace_stop_thread();
 859                cfs_tracefile_write_lock();
 860                memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
 861
 862        } else if (strncmp(str, "size=", 5) == 0) {
 863                cfs_tracefile_size = simple_strtoul(str + 5, NULL, 0);
 864                if (cfs_tracefile_size < 10 || cfs_tracefile_size > 20480)
 865                        cfs_tracefile_size = CFS_TRACEFILE_SIZE;
 866                else
 867                        cfs_tracefile_size <<= 20;
 868
 869        } else if (strlen(str) >= sizeof(cfs_tracefile)) {
 870                rc = -ENAMETOOLONG;
 871        } else if (str[0] != '/') {
 872                rc = -EINVAL;
 873        } else {
 874                strcpy(cfs_tracefile, str);
 875
 876                printk(KERN_INFO
 877                       "Lustre: debug daemon will attempt to start writing "
 878                       "to %s (%lukB max)\n", cfs_tracefile,
 879                       (long)(cfs_tracefile_size >> 10));
 880
 881                cfs_trace_start_thread();
 882        }
 883
 884        cfs_tracefile_write_unlock();
 885        return rc;
 886}
 887
 888int cfs_trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
 889{
 890        char *str;
 891        int   rc;
 892
 893        rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
 894        if (rc != 0)
 895                return rc;
 896
 897        rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
 898                                 usr_str, usr_str_nob);
 899        if (rc == 0)
 900                rc = cfs_trace_daemon_command(str);
 901
 902        cfs_trace_free_string_buffer(str, usr_str_nob + 1);
 903        return rc;
 904}
 905
 906int cfs_trace_set_debug_mb(int mb)
 907{
 908        int i;
 909        int j;
 910        int pages;
 911        int limit = cfs_trace_max_debug_mb();
 912        struct cfs_trace_cpu_data *tcd;
 913
 914        if (mb < num_possible_cpus()) {
 915                printk(KERN_WARNING
 916                       "Lustre: %d MB is too small for debug buffer size, "
 917                       "setting it to %d MB.\n", mb, num_possible_cpus());
 918                mb = num_possible_cpus();
 919        }
 920
 921        if (mb > limit) {
 922                printk(KERN_WARNING
 923                       "Lustre: %d MB is too large for debug buffer size, "
 924                       "setting it to %d MB.\n", mb, limit);
 925                mb = limit;
 926        }
 927
 928        mb /= num_possible_cpus();
 929        pages = mb << (20 - PAGE_CACHE_SHIFT);
 930
 931        cfs_tracefile_write_lock();
 932
 933        cfs_tcd_for_each(tcd, i, j)
 934                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
 935
 936        cfs_tracefile_write_unlock();
 937
 938        return 0;
 939}
 940
 941int cfs_trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
 942{
 943        char     str[32];
 944        int      rc;
 945
 946        rc = cfs_trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
 947        if (rc < 0)
 948                return rc;
 949
 950        return cfs_trace_set_debug_mb(simple_strtoul(str, NULL, 0));
 951}
 952
 953int cfs_trace_get_debug_mb(void)
 954{
 955        int i;
 956        int j;
 957        struct cfs_trace_cpu_data *tcd;
 958        int total_pages = 0;
 959
 960        cfs_tracefile_read_lock();
 961
 962        cfs_tcd_for_each(tcd, i, j)
 963                total_pages += tcd->tcd_max_pages;
 964
 965        cfs_tracefile_read_unlock();
 966
 967        return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
 968}
 969
 970static int tracefiled(void *arg)
 971{
 972        struct page_collection pc;
 973        struct tracefiled_ctl *tctl = arg;
 974        struct cfs_trace_page *tage;
 975        struct cfs_trace_page *tmp;
 976        struct file *filp;
 977        int last_loop = 0;
 978        int rc;
 979
 980        DECL_MMSPACE;
 981
 982        /* we're started late enough that we pick up init's fs context */
 983        /* this is so broken in uml?  what on earth is going on? */
 984
 985        spin_lock_init(&pc.pc_lock);
 986        complete(&tctl->tctl_start);
 987
 988        while (1) {
 989                wait_queue_t __wait;
 990
 991                pc.pc_want_daemon_pages = 0;
 992                collect_pages(&pc);
 993                if (list_empty(&pc.pc_pages))
 994                        goto end_loop;
 995
 996                filp = NULL;
 997                cfs_tracefile_read_lock();
 998                if (cfs_tracefile[0] != 0) {
 999                        filp = filp_open(cfs_tracefile,
1000                                         O_CREAT | O_RDWR | O_LARGEFILE,
1001                                         0600);
1002                        if (IS_ERR(filp)) {
1003                                rc = PTR_ERR(filp);
1004                                filp = NULL;
1005                                printk(KERN_WARNING "couldn't open %s: "
1006                                       "%d\n", cfs_tracefile, rc);
1007                        }
1008                }
1009                cfs_tracefile_read_unlock();
1010                if (filp == NULL) {
1011                        put_pages_on_daemon_list(&pc);
1012                        __LASSERT(list_empty(&pc.pc_pages));
1013                        goto end_loop;
1014                }
1015
1016                MMSPACE_OPEN;
1017
1018                list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1019                                                   linkage) {
1020                        static loff_t f_pos;
1021
1022                        __LASSERT_TAGE_INVARIANT(tage);
1023
1024                        if (f_pos >= (off_t)cfs_tracefile_size)
1025                                f_pos = 0;
1026                        else if (f_pos > (off_t)filp_size(filp))
1027                                f_pos = filp_size(filp);
1028
1029                        rc = filp_write(filp, page_address(tage->page),
1030                                        tage->used, &f_pos);
1031                        if (rc != (int)tage->used) {
1032                                printk(KERN_WARNING "wanted to write %u "
1033                                       "but wrote %d\n", tage->used, rc);
1034                                put_pages_back(&pc);
1035                                __LASSERT(list_empty(&pc.pc_pages));
1036                        }
1037                }
1038                MMSPACE_CLOSE;
1039
1040                filp_close(filp, NULL);
1041                put_pages_on_daemon_list(&pc);
1042                if (!list_empty(&pc.pc_pages)) {
1043                        int i;
1044
1045                        printk(KERN_ALERT "Lustre: trace pages aren't "
1046                               " empty\n");
1047                        printk(KERN_ERR "total cpus(%d): ",
1048                               num_possible_cpus());
1049                        for (i = 0; i < num_possible_cpus(); i++)
1050                                if (cpu_online(i))
1051                                        printk(KERN_ERR "%d(on) ", i);
1052                                else
1053                                        printk(KERN_ERR "%d(off) ", i);
1054                        printk(KERN_ERR "\n");
1055
1056                        i = 0;
1057                        list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1058                                                     linkage)
1059                                printk(KERN_ERR "page %d belongs to cpu "
1060                                       "%d\n", ++i, tage->cpu);
1061                        printk(KERN_ERR "There are %d pages unwritten\n",
1062                               i);
1063                }
1064                __LASSERT(list_empty(&pc.pc_pages));
1065end_loop:
1066                if (atomic_read(&tctl->tctl_shutdown)) {
1067                        if (last_loop == 0) {
1068                                last_loop = 1;
1069                                continue;
1070                        } else {
1071                                break;
1072                        }
1073                }
1074                init_waitqueue_entry_current(&__wait);
1075                add_wait_queue(&tctl->tctl_waitq, &__wait);
1076                set_current_state(TASK_INTERRUPTIBLE);
1077                waitq_timedwait(&__wait, TASK_INTERRUPTIBLE,
1078                                    cfs_time_seconds(1));
1079                remove_wait_queue(&tctl->tctl_waitq, &__wait);
1080        }
1081        complete(&tctl->tctl_stop);
1082        return 0;
1083}
1084
1085int cfs_trace_start_thread(void)
1086{
1087        struct tracefiled_ctl *tctl = &trace_tctl;
1088        int rc = 0;
1089
1090        mutex_lock(&cfs_trace_thread_mutex);
1091        if (thread_running)
1092                goto out;
1093
1094        init_completion(&tctl->tctl_start);
1095        init_completion(&tctl->tctl_stop);
1096        init_waitqueue_head(&tctl->tctl_waitq);
1097        atomic_set(&tctl->tctl_shutdown, 0);
1098
1099        if (IS_ERR(kthread_run(tracefiled, tctl, "ktracefiled"))) {
1100                rc = -ECHILD;
1101                goto out;
1102        }
1103
1104        wait_for_completion(&tctl->tctl_start);
1105        thread_running = 1;
1106out:
1107        mutex_unlock(&cfs_trace_thread_mutex);
1108        return rc;
1109}
1110
1111void cfs_trace_stop_thread(void)
1112{
1113        struct tracefiled_ctl *tctl = &trace_tctl;
1114
1115        mutex_lock(&cfs_trace_thread_mutex);
1116        if (thread_running) {
1117                printk(KERN_INFO
1118                       "Lustre: shutting down debug daemon thread...\n");
1119                atomic_set(&tctl->tctl_shutdown, 1);
1120                wait_for_completion(&tctl->tctl_stop);
1121                thread_running = 0;
1122        }
1123        mutex_unlock(&cfs_trace_thread_mutex);
1124}
1125
1126int cfs_tracefile_init(int max_pages)
1127{
1128        struct cfs_trace_cpu_data *tcd;
1129        int                 i;
1130        int                 j;
1131        int                 rc;
1132        int                 factor;
1133
1134        rc = cfs_tracefile_init_arch();
1135        if (rc != 0)
1136                return rc;
1137
1138        cfs_tcd_for_each(tcd, i, j) {
1139                /* tcd_pages_factor is initialized int tracefile_init_arch. */
1140                factor = tcd->tcd_pages_factor;
1141                INIT_LIST_HEAD(&tcd->tcd_pages);
1142                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1143                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1144                tcd->tcd_cur_pages = 0;
1145                tcd->tcd_cur_stock_pages = 0;
1146                tcd->tcd_cur_daemon_pages = 0;
1147                tcd->tcd_max_pages = (max_pages * factor) / 100;
1148                LASSERT(tcd->tcd_max_pages > 0);
1149                tcd->tcd_shutting_down = 0;
1150        }
1151
1152        return 0;
1153}
1154
1155static void trace_cleanup_on_all_cpus(void)
1156{
1157        struct cfs_trace_cpu_data *tcd;
1158        struct cfs_trace_page *tage;
1159        struct cfs_trace_page *tmp;
1160        int i, cpu;
1161
1162        cfs_for_each_possible_cpu(cpu) {
1163                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1164                        tcd->tcd_shutting_down = 1;
1165
1166                        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
1167                                                           linkage) {
1168                                __LASSERT_TAGE_INVARIANT(tage);
1169
1170                                list_del(&tage->linkage);
1171                                cfs_tage_free(tage);
1172                        }
1173
1174                        tcd->tcd_cur_pages = 0;
1175                }
1176        }
1177}
1178
1179static void cfs_trace_cleanup(void)
1180{
1181        struct page_collection pc;
1182
1183        INIT_LIST_HEAD(&pc.pc_pages);
1184        spin_lock_init(&pc.pc_lock);
1185
1186        trace_cleanup_on_all_cpus();
1187
1188        cfs_tracefile_fini_arch();
1189}
1190
1191void cfs_tracefile_exit(void)
1192{
1193        cfs_trace_stop_thread();
1194        cfs_trace_cleanup();
1195}
1196