linux/kernel/trace/trace_syscalls.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <trace/syscall.h>
   3#include <trace/events/syscalls.h>
   4#include <linux/syscalls.h>
   5#include <linux/slab.h>
   6#include <linux/kernel.h>
   7#include <linux/module.h>       /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
   8#include <linux/ftrace.h>
   9#include <linux/perf_event.h>
  10#include <asm/syscall.h>
  11
  12#include "trace_output.h"
  13#include "trace.h"
  14
  15static DEFINE_MUTEX(syscall_trace_lock);
  16
  17static int syscall_enter_register(struct trace_event_call *event,
  18                                 enum trace_reg type, void *data);
  19static int syscall_exit_register(struct trace_event_call *event,
  20                                 enum trace_reg type, void *data);
  21
  22static struct list_head *
  23syscall_get_enter_fields(struct trace_event_call *call)
  24{
  25        struct syscall_metadata *entry = call->data;
  26
  27        return &entry->enter_fields;
  28}
  29
  30extern struct syscall_metadata *__start_syscalls_metadata[];
  31extern struct syscall_metadata *__stop_syscalls_metadata[];
  32
  33static struct syscall_metadata **syscalls_metadata;
  34
  35#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
  36static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
  37{
  38        /*
  39         * Only compare after the "sys" prefix. Archs that use
  40         * syscall wrappers may have syscalls symbols aliases prefixed
  41         * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
  42         * mismatch.
  43         */
  44        return !strcmp(sym + 3, name + 3);
  45}
  46#endif
  47
  48#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
  49/*
  50 * Some architectures that allow for 32bit applications
  51 * to run on a 64bit kernel, do not map the syscalls for
  52 * the 32bit tasks the same as they do for 64bit tasks.
  53 *
  54 *     *cough*x86*cough*
  55 *
  56 * In such a case, instead of reporting the wrong syscalls,
  57 * simply ignore them.
  58 *
  59 * For an arch to ignore the compat syscalls it needs to
  60 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
  61 * define the function arch_trace_is_compat_syscall() to let
  62 * the tracing system know that it should ignore it.
  63 */
  64static int
  65trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
  66{
  67        if (unlikely(arch_trace_is_compat_syscall(regs)))
  68                return -1;
  69
  70        return syscall_get_nr(task, regs);
  71}
  72#else
  73static inline int
  74trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
  75{
  76        return syscall_get_nr(task, regs);
  77}
  78#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
  79
  80static __init struct syscall_metadata *
  81find_syscall_meta(unsigned long syscall)
  82{
  83        struct syscall_metadata **start;
  84        struct syscall_metadata **stop;
  85        char str[KSYM_SYMBOL_LEN];
  86
  87
  88        start = __start_syscalls_metadata;
  89        stop = __stop_syscalls_metadata;
  90        kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  91
  92        if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
  93                return NULL;
  94
  95        for ( ; start < stop; start++) {
  96                if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
  97                        return *start;
  98        }
  99        return NULL;
 100}
 101
 102static struct syscall_metadata *syscall_nr_to_meta(int nr)
 103{
 104        if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
 105                return NULL;
 106
 107        return syscalls_metadata[nr];
 108}
 109
 110const char *get_syscall_name(int syscall)
 111{
 112        struct syscall_metadata *entry;
 113
 114        entry = syscall_nr_to_meta(syscall);
 115        if (!entry)
 116                return NULL;
 117
 118        return entry->name;
 119}
 120
 121static enum print_line_t
 122print_syscall_enter(struct trace_iterator *iter, int flags,
 123                    struct trace_event *event)
 124{
 125        struct trace_array *tr = iter->tr;
 126        struct trace_seq *s = &iter->seq;
 127        struct trace_entry *ent = iter->ent;
 128        struct syscall_trace_enter *trace;
 129        struct syscall_metadata *entry;
 130        int i, syscall;
 131
 132        trace = (typeof(trace))ent;
 133        syscall = trace->nr;
 134        entry = syscall_nr_to_meta(syscall);
 135
 136        if (!entry)
 137                goto end;
 138
 139        if (entry->enter_event->event.type != ent->type) {
 140                WARN_ON_ONCE(1);
 141                goto end;
 142        }
 143
 144        trace_seq_printf(s, "%s(", entry->name);
 145
 146        for (i = 0; i < entry->nb_args; i++) {
 147
 148                if (trace_seq_has_overflowed(s))
 149                        goto end;
 150
 151                /* parameter types */
 152                if (tr->trace_flags & TRACE_ITER_VERBOSE)
 153                        trace_seq_printf(s, "%s ", entry->types[i]);
 154
 155                /* parameter values */
 156                trace_seq_printf(s, "%s: %lx%s", entry->args[i],
 157                                 trace->args[i],
 158                                 i == entry->nb_args - 1 ? "" : ", ");
 159        }
 160
 161        trace_seq_putc(s, ')');
 162end:
 163        trace_seq_putc(s, '\n');
 164
 165        return trace_handle_return(s);
 166}
 167
 168static enum print_line_t
 169print_syscall_exit(struct trace_iterator *iter, int flags,
 170                   struct trace_event *event)
 171{
 172        struct trace_seq *s = &iter->seq;
 173        struct trace_entry *ent = iter->ent;
 174        struct syscall_trace_exit *trace;
 175        int syscall;
 176        struct syscall_metadata *entry;
 177
 178        trace = (typeof(trace))ent;
 179        syscall = trace->nr;
 180        entry = syscall_nr_to_meta(syscall);
 181
 182        if (!entry) {
 183                trace_seq_putc(s, '\n');
 184                goto out;
 185        }
 186
 187        if (entry->exit_event->event.type != ent->type) {
 188                WARN_ON_ONCE(1);
 189                return TRACE_TYPE_UNHANDLED;
 190        }
 191
 192        trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
 193                                trace->ret);
 194
 195 out:
 196        return trace_handle_return(s);
 197}
 198
 199extern char *__bad_type_size(void);
 200
 201#define SYSCALL_FIELD(type, field, name)                                \
 202        sizeof(type) != sizeof(trace.field) ?                           \
 203                __bad_type_size() :                                     \
 204                #type, #name, offsetof(typeof(trace), field),           \
 205                sizeof(trace.field), is_signed_type(type)
 206
 207static int __init
 208__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
 209{
 210        int i;
 211        int pos = 0;
 212
 213        /* When len=0, we just calculate the needed length */
 214#define LEN_OR_ZERO (len ? len - pos : 0)
 215
 216        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 217        for (i = 0; i < entry->nb_args; i++) {
 218                pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
 219                                entry->args[i], sizeof(unsigned long),
 220                                i == entry->nb_args - 1 ? "" : ", ");
 221        }
 222        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 223
 224        for (i = 0; i < entry->nb_args; i++) {
 225                pos += snprintf(buf + pos, LEN_OR_ZERO,
 226                                ", ((unsigned long)(REC->%s))", entry->args[i]);
 227        }
 228
 229#undef LEN_OR_ZERO
 230
 231        /* return the length of print_fmt */
 232        return pos;
 233}
 234
 235static int __init set_syscall_print_fmt(struct trace_event_call *call)
 236{
 237        char *print_fmt;
 238        int len;
 239        struct syscall_metadata *entry = call->data;
 240
 241        if (entry->enter_event != call) {
 242                call->print_fmt = "\"0x%lx\", REC->ret";
 243                return 0;
 244        }
 245
 246        /* First: called with 0 length to calculate the needed length */
 247        len = __set_enter_print_fmt(entry, NULL, 0);
 248
 249        print_fmt = kmalloc(len + 1, GFP_KERNEL);
 250        if (!print_fmt)
 251                return -ENOMEM;
 252
 253        /* Second: actually write the @print_fmt */
 254        __set_enter_print_fmt(entry, print_fmt, len + 1);
 255        call->print_fmt = print_fmt;
 256
 257        return 0;
 258}
 259
 260static void __init free_syscall_print_fmt(struct trace_event_call *call)
 261{
 262        struct syscall_metadata *entry = call->data;
 263
 264        if (entry->enter_event == call)
 265                kfree(call->print_fmt);
 266}
 267
 268static int __init syscall_enter_define_fields(struct trace_event_call *call)
 269{
 270        struct syscall_trace_enter trace;
 271        struct syscall_metadata *meta = call->data;
 272        int ret;
 273        int i;
 274        int offset = offsetof(typeof(trace), args);
 275
 276        ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
 277                                 FILTER_OTHER);
 278        if (ret)
 279                return ret;
 280
 281        for (i = 0; i < meta->nb_args; i++) {
 282                ret = trace_define_field(call, meta->types[i],
 283                                         meta->args[i], offset,
 284                                         sizeof(unsigned long), 0,
 285                                         FILTER_OTHER);
 286                offset += sizeof(unsigned long);
 287        }
 288
 289        return ret;
 290}
 291
 292static int __init syscall_exit_define_fields(struct trace_event_call *call)
 293{
 294        struct syscall_trace_exit trace;
 295        int ret;
 296
 297        ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
 298                                 FILTER_OTHER);
 299        if (ret)
 300                return ret;
 301
 302        ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
 303                                 FILTER_OTHER);
 304
 305        return ret;
 306}
 307
 308static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 309{
 310        struct trace_array *tr = data;
 311        struct trace_event_file *trace_file;
 312        struct syscall_trace_enter *entry;
 313        struct syscall_metadata *sys_data;
 314        struct ring_buffer_event *event;
 315        struct ring_buffer *buffer;
 316        unsigned long irq_flags;
 317        int pc;
 318        int syscall_nr;
 319        int size;
 320
 321        syscall_nr = trace_get_syscall_nr(current, regs);
 322        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 323                return;
 324
 325        /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
 326        trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
 327        if (!trace_file)
 328                return;
 329
 330        if (trace_trigger_soft_disabled(trace_file))
 331                return;
 332
 333        sys_data = syscall_nr_to_meta(syscall_nr);
 334        if (!sys_data)
 335                return;
 336
 337        size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 338
 339        local_save_flags(irq_flags);
 340        pc = preempt_count();
 341
 342        buffer = tr->trace_buffer.buffer;
 343        event = trace_buffer_lock_reserve(buffer,
 344                        sys_data->enter_event->event.type, size, irq_flags, pc);
 345        if (!event)
 346                return;
 347
 348        entry = ring_buffer_event_data(event);
 349        entry->nr = syscall_nr;
 350        syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 351
 352        event_trigger_unlock_commit(trace_file, buffer, event, entry,
 353                                    irq_flags, pc);
 354}
 355
 356static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
 357{
 358        struct trace_array *tr = data;
 359        struct trace_event_file *trace_file;
 360        struct syscall_trace_exit *entry;
 361        struct syscall_metadata *sys_data;
 362        struct ring_buffer_event *event;
 363        struct ring_buffer *buffer;
 364        unsigned long irq_flags;
 365        int pc;
 366        int syscall_nr;
 367
 368        syscall_nr = trace_get_syscall_nr(current, regs);
 369        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 370                return;
 371
 372        /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
 373        trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
 374        if (!trace_file)
 375                return;
 376
 377        if (trace_trigger_soft_disabled(trace_file))
 378                return;
 379
 380        sys_data = syscall_nr_to_meta(syscall_nr);
 381        if (!sys_data)
 382                return;
 383
 384        local_save_flags(irq_flags);
 385        pc = preempt_count();
 386
 387        buffer = tr->trace_buffer.buffer;
 388        event = trace_buffer_lock_reserve(buffer,
 389                        sys_data->exit_event->event.type, sizeof(*entry),
 390                        irq_flags, pc);
 391        if (!event)
 392                return;
 393
 394        entry = ring_buffer_event_data(event);
 395        entry->nr = syscall_nr;
 396        entry->ret = syscall_get_return_value(current, regs);
 397
 398        event_trigger_unlock_commit(trace_file, buffer, event, entry,
 399                                    irq_flags, pc);
 400}
 401
 402static int reg_event_syscall_enter(struct trace_event_file *file,
 403                                   struct trace_event_call *call)
 404{
 405        struct trace_array *tr = file->tr;
 406        int ret = 0;
 407        int num;
 408
 409        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 410        if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
 411                return -ENOSYS;
 412        mutex_lock(&syscall_trace_lock);
 413        if (!tr->sys_refcount_enter)
 414                ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
 415        if (!ret) {
 416                rcu_assign_pointer(tr->enter_syscall_files[num], file);
 417                tr->sys_refcount_enter++;
 418        }
 419        mutex_unlock(&syscall_trace_lock);
 420        return ret;
 421}
 422
 423static void unreg_event_syscall_enter(struct trace_event_file *file,
 424                                      struct trace_event_call *call)
 425{
 426        struct trace_array *tr = file->tr;
 427        int num;
 428
 429        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 430        if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
 431                return;
 432        mutex_lock(&syscall_trace_lock);
 433        tr->sys_refcount_enter--;
 434        RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
 435        if (!tr->sys_refcount_enter)
 436                unregister_trace_sys_enter(ftrace_syscall_enter, tr);
 437        mutex_unlock(&syscall_trace_lock);
 438}
 439
 440static int reg_event_syscall_exit(struct trace_event_file *file,
 441                                  struct trace_event_call *call)
 442{
 443        struct trace_array *tr = file->tr;
 444        int ret = 0;
 445        int num;
 446
 447        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 448        if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
 449                return -ENOSYS;
 450        mutex_lock(&syscall_trace_lock);
 451        if (!tr->sys_refcount_exit)
 452                ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
 453        if (!ret) {
 454                rcu_assign_pointer(tr->exit_syscall_files[num], file);
 455                tr->sys_refcount_exit++;
 456        }
 457        mutex_unlock(&syscall_trace_lock);
 458        return ret;
 459}
 460
 461static void unreg_event_syscall_exit(struct trace_event_file *file,
 462                                     struct trace_event_call *call)
 463{
 464        struct trace_array *tr = file->tr;
 465        int num;
 466
 467        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 468        if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
 469                return;
 470        mutex_lock(&syscall_trace_lock);
 471        tr->sys_refcount_exit--;
 472        RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
 473        if (!tr->sys_refcount_exit)
 474                unregister_trace_sys_exit(ftrace_syscall_exit, tr);
 475        mutex_unlock(&syscall_trace_lock);
 476}
 477
 478static int __init init_syscall_trace(struct trace_event_call *call)
 479{
 480        int id;
 481        int num;
 482
 483        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 484        if (num < 0 || num >= NR_syscalls) {
 485                pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
 486                                ((struct syscall_metadata *)call->data)->name);
 487                return -ENOSYS;
 488        }
 489
 490        if (set_syscall_print_fmt(call) < 0)
 491                return -ENOMEM;
 492
 493        id = trace_event_raw_init(call);
 494
 495        if (id < 0) {
 496                free_syscall_print_fmt(call);
 497                return id;
 498        }
 499
 500        return id;
 501}
 502
 503struct trace_event_functions enter_syscall_print_funcs = {
 504        .trace          = print_syscall_enter,
 505};
 506
 507struct trace_event_functions exit_syscall_print_funcs = {
 508        .trace          = print_syscall_exit,
 509};
 510
 511struct trace_event_class __refdata event_class_syscall_enter = {
 512        .system         = "syscalls",
 513        .reg            = syscall_enter_register,
 514        .define_fields  = syscall_enter_define_fields,
 515        .get_fields     = syscall_get_enter_fields,
 516        .raw_init       = init_syscall_trace,
 517};
 518
 519struct trace_event_class __refdata event_class_syscall_exit = {
 520        .system         = "syscalls",
 521        .reg            = syscall_exit_register,
 522        .define_fields  = syscall_exit_define_fields,
 523        .fields         = LIST_HEAD_INIT(event_class_syscall_exit.fields),
 524        .raw_init       = init_syscall_trace,
 525};
 526
 527unsigned long __init __weak arch_syscall_addr(int nr)
 528{
 529        return (unsigned long)sys_call_table[nr];
 530}
 531
 532void __init init_ftrace_syscalls(void)
 533{
 534        struct syscall_metadata *meta;
 535        unsigned long addr;
 536        int i;
 537
 538        syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
 539                                    GFP_KERNEL);
 540        if (!syscalls_metadata) {
 541                WARN_ON(1);
 542                return;
 543        }
 544
 545        for (i = 0; i < NR_syscalls; i++) {
 546                addr = arch_syscall_addr(i);
 547                meta = find_syscall_meta(addr);
 548                if (!meta)
 549                        continue;
 550
 551                meta->syscall_nr = i;
 552                syscalls_metadata[i] = meta;
 553        }
 554}
 555
 556#ifdef CONFIG_PERF_EVENTS
 557
 558static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
 559static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
 560static int sys_perf_refcount_enter;
 561static int sys_perf_refcount_exit;
 562
 563static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
 564                              struct syscall_metadata *sys_data,
 565                              struct syscall_trace_enter *rec) {
 566        struct syscall_tp_t {
 567                unsigned long long regs;
 568                unsigned long syscall_nr;
 569                unsigned long args[SYSCALL_DEFINE_MAXARGS];
 570        } param;
 571        int i;
 572
 573        *(struct pt_regs **)&param = regs;
 574        param.syscall_nr = rec->nr;
 575        for (i = 0; i < sys_data->nb_args; i++)
 576                param.args[i] = rec->args[i];
 577        return trace_call_bpf(prog, &param);
 578}
 579
 580static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 581{
 582        struct syscall_metadata *sys_data;
 583        struct syscall_trace_enter *rec;
 584        struct hlist_head *head;
 585        struct bpf_prog *prog;
 586        int syscall_nr;
 587        int rctx;
 588        int size;
 589
 590        syscall_nr = trace_get_syscall_nr(current, regs);
 591        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 592                return;
 593        if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
 594                return;
 595
 596        sys_data = syscall_nr_to_meta(syscall_nr);
 597        if (!sys_data)
 598                return;
 599
 600        prog = READ_ONCE(sys_data->enter_event->prog);
 601        head = this_cpu_ptr(sys_data->enter_event->perf_events);
 602        if (!prog && hlist_empty(head))
 603                return;
 604
 605        /* get the size after alignment with the u32 buffer size field */
 606        size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
 607        size = ALIGN(size + sizeof(u32), sizeof(u64));
 608        size -= sizeof(u32);
 609
 610        rec = perf_trace_buf_alloc(size, NULL, &rctx);
 611        if (!rec)
 612                return;
 613
 614        rec->nr = syscall_nr;
 615        syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 616                               (unsigned long *)&rec->args);
 617
 618        if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
 619            hlist_empty(head)) {
 620                perf_swevent_put_recursion_context(rctx);
 621                return;
 622        }
 623
 624        perf_trace_buf_submit(rec, size, rctx,
 625                              sys_data->enter_event->event.type, 1, regs,
 626                              head, NULL, NULL);
 627}
 628
 629static int perf_sysenter_enable(struct trace_event_call *call)
 630{
 631        int ret = 0;
 632        int num;
 633
 634        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 635
 636        mutex_lock(&syscall_trace_lock);
 637        if (!sys_perf_refcount_enter)
 638                ret = register_trace_sys_enter(perf_syscall_enter, NULL);
 639        if (ret) {
 640                pr_info("event trace: Could not activate syscall entry trace point");
 641        } else {
 642                set_bit(num, enabled_perf_enter_syscalls);
 643                sys_perf_refcount_enter++;
 644        }
 645        mutex_unlock(&syscall_trace_lock);
 646        return ret;
 647}
 648
 649static void perf_sysenter_disable(struct trace_event_call *call)
 650{
 651        int num;
 652
 653        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 654
 655        mutex_lock(&syscall_trace_lock);
 656        sys_perf_refcount_enter--;
 657        clear_bit(num, enabled_perf_enter_syscalls);
 658        if (!sys_perf_refcount_enter)
 659                unregister_trace_sys_enter(perf_syscall_enter, NULL);
 660        mutex_unlock(&syscall_trace_lock);
 661}
 662
 663static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
 664                              struct syscall_trace_exit *rec) {
 665        struct syscall_tp_t {
 666                unsigned long long regs;
 667                unsigned long syscall_nr;
 668                unsigned long ret;
 669        } param;
 670
 671        *(struct pt_regs **)&param = regs;
 672        param.syscall_nr = rec->nr;
 673        param.ret = rec->ret;
 674        return trace_call_bpf(prog, &param);
 675}
 676
 677static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 678{
 679        struct syscall_metadata *sys_data;
 680        struct syscall_trace_exit *rec;
 681        struct hlist_head *head;
 682        struct bpf_prog *prog;
 683        int syscall_nr;
 684        int rctx;
 685        int size;
 686
 687        syscall_nr = trace_get_syscall_nr(current, regs);
 688        if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
 689                return;
 690        if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
 691                return;
 692
 693        sys_data = syscall_nr_to_meta(syscall_nr);
 694        if (!sys_data)
 695                return;
 696
 697        prog = READ_ONCE(sys_data->exit_event->prog);
 698        head = this_cpu_ptr(sys_data->exit_event->perf_events);
 699        if (!prog && hlist_empty(head))
 700                return;
 701
 702        /* We can probably do that at build time */
 703        size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
 704        size -= sizeof(u32);
 705
 706        rec = perf_trace_buf_alloc(size, NULL, &rctx);
 707        if (!rec)
 708                return;
 709
 710        rec->nr = syscall_nr;
 711        rec->ret = syscall_get_return_value(current, regs);
 712
 713        if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
 714            hlist_empty(head)) {
 715                perf_swevent_put_recursion_context(rctx);
 716                return;
 717        }
 718
 719        perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
 720                              1, regs, head, NULL, NULL);
 721}
 722
 723static int perf_sysexit_enable(struct trace_event_call *call)
 724{
 725        int ret = 0;
 726        int num;
 727
 728        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 729
 730        mutex_lock(&syscall_trace_lock);
 731        if (!sys_perf_refcount_exit)
 732                ret = register_trace_sys_exit(perf_syscall_exit, NULL);
 733        if (ret) {
 734                pr_info("event trace: Could not activate syscall exit trace point");
 735        } else {
 736                set_bit(num, enabled_perf_exit_syscalls);
 737                sys_perf_refcount_exit++;
 738        }
 739        mutex_unlock(&syscall_trace_lock);
 740        return ret;
 741}
 742
 743static void perf_sysexit_disable(struct trace_event_call *call)
 744{
 745        int num;
 746
 747        num = ((struct syscall_metadata *)call->data)->syscall_nr;
 748
 749        mutex_lock(&syscall_trace_lock);
 750        sys_perf_refcount_exit--;
 751        clear_bit(num, enabled_perf_exit_syscalls);
 752        if (!sys_perf_refcount_exit)
 753                unregister_trace_sys_exit(perf_syscall_exit, NULL);
 754        mutex_unlock(&syscall_trace_lock);
 755}
 756
 757#endif /* CONFIG_PERF_EVENTS */
 758
 759static int syscall_enter_register(struct trace_event_call *event,
 760                                 enum trace_reg type, void *data)
 761{
 762        struct trace_event_file *file = data;
 763
 764        switch (type) {
 765        case TRACE_REG_REGISTER:
 766                return reg_event_syscall_enter(file, event);
 767        case TRACE_REG_UNREGISTER:
 768                unreg_event_syscall_enter(file, event);
 769                return 0;
 770
 771#ifdef CONFIG_PERF_EVENTS
 772        case TRACE_REG_PERF_REGISTER:
 773                return perf_sysenter_enable(event);
 774        case TRACE_REG_PERF_UNREGISTER:
 775                perf_sysenter_disable(event);
 776                return 0;
 777        case TRACE_REG_PERF_OPEN:
 778        case TRACE_REG_PERF_CLOSE:
 779        case TRACE_REG_PERF_ADD:
 780        case TRACE_REG_PERF_DEL:
 781                return 0;
 782#endif
 783        }
 784        return 0;
 785}
 786
 787static int syscall_exit_register(struct trace_event_call *event,
 788                                 enum trace_reg type, void *data)
 789{
 790        struct trace_event_file *file = data;
 791
 792        switch (type) {
 793        case TRACE_REG_REGISTER:
 794                return reg_event_syscall_exit(file, event);
 795        case TRACE_REG_UNREGISTER:
 796                unreg_event_syscall_exit(file, event);
 797                return 0;
 798
 799#ifdef CONFIG_PERF_EVENTS
 800        case TRACE_REG_PERF_REGISTER:
 801                return perf_sysexit_enable(event);
 802        case TRACE_REG_PERF_UNREGISTER:
 803                perf_sysexit_disable(event);
 804                return 0;
 805        case TRACE_REG_PERF_OPEN:
 806        case TRACE_REG_PERF_CLOSE:
 807        case TRACE_REG_PERF_ADD:
 808        case TRACE_REG_PERF_DEL:
 809                return 0;
 810#endif
 811        }
 812        return 0;
 813}
 814