linux/tools/accounting/getdelays.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* getdelays.c
   3 *
   4 * Utility to get per-pid and per-tgid delay accounting statistics
   5 * Also illustrates usage of the taskstats interface
   6 *
   7 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
   8 * Copyright (C) Balbir Singh, IBM Corp. 2006
   9 * Copyright (c) Jay Lan, SGI. 2006
  10 *
  11 * Compile with
  12 *      gcc -I/usr/src/linux/include getdelays.c -o getdelays
  13 */
  14
  15#include <stdio.h>
  16#include <stdlib.h>
  17#include <errno.h>
  18#include <unistd.h>
  19#include <poll.h>
  20#include <string.h>
  21#include <fcntl.h>
  22#include <sys/types.h>
  23#include <sys/stat.h>
  24#include <sys/socket.h>
  25#include <sys/wait.h>
  26#include <signal.h>
  27
  28#include <linux/genetlink.h>
  29#include <linux/taskstats.h>
  30#include <linux/cgroupstats.h>
  31
  32/*
  33 * Generic macros for dealing with netlink sockets. Might be duplicated
  34 * elsewhere. It is recommended that commercial grade applications use
  35 * libnl or libnetlink and use the interfaces provided by the library
  36 */
  37#define GENLMSG_DATA(glh)       ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
  38#define GENLMSG_PAYLOAD(glh)    (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
  39#define NLA_DATA(na)            ((void *)((char*)(na) + NLA_HDRLEN))
  40#define NLA_PAYLOAD(len)        (len - NLA_HDRLEN)
  41
  42#define err(code, fmt, arg...)                  \
  43        do {                                    \
  44                fprintf(stderr, fmt, ##arg);    \
  45                exit(code);                     \
  46        } while (0)
  47
  48int done;
  49int rcvbufsz;
  50char name[100];
  51int dbg;
  52int print_delays;
  53int print_io_accounting;
  54int print_task_context_switch_counts;
  55
  56#define PRINTF(fmt, arg...) {                   \
  57            if (dbg) {                          \
  58                printf(fmt, ##arg);             \
  59            }                                   \
  60        }
  61
  62/* Maximum size of response requested or message sent */
  63#define MAX_MSG_SIZE    1024
  64/* Maximum number of cpus expected to be specified in a cpumask */
  65#define MAX_CPUS        32
  66
  67struct msgtemplate {
  68        struct nlmsghdr n;
  69        struct genlmsghdr g;
  70        char buf[MAX_MSG_SIZE];
  71};
  72
  73char cpumask[100+6*MAX_CPUS];
  74
  75static void usage(void)
  76{
  77        fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
  78                        "[-m cpumask] [-t tgid] [-p pid]\n");
  79        fprintf(stderr, "  -d: print delayacct stats\n");
  80        fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
  81        fprintf(stderr, "  -l: listen forever\n");
  82        fprintf(stderr, "  -v: debug on\n");
  83        fprintf(stderr, "  -C: container path\n");
  84}
  85
  86/*
  87 * Create a raw netlink socket and bind
  88 */
  89static int create_nl_socket(int protocol)
  90{
  91        int fd;
  92        struct sockaddr_nl local;
  93
  94        fd = socket(AF_NETLINK, SOCK_RAW, protocol);
  95        if (fd < 0)
  96                return -1;
  97
  98        if (rcvbufsz)
  99                if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
 100                                &rcvbufsz, sizeof(rcvbufsz)) < 0) {
 101                        fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
 102                                rcvbufsz);
 103                        goto error;
 104                }
 105
 106        memset(&local, 0, sizeof(local));
 107        local.nl_family = AF_NETLINK;
 108
 109        if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
 110                goto error;
 111
 112        return fd;
 113error:
 114        close(fd);
 115        return -1;
 116}
 117
 118
 119static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
 120             __u8 genl_cmd, __u16 nla_type,
 121             void *nla_data, int nla_len)
 122{
 123        struct nlattr *na;
 124        struct sockaddr_nl nladdr;
 125        int r, buflen;
 126        char *buf;
 127
 128        struct msgtemplate msg;
 129
 130        msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
 131        msg.n.nlmsg_type = nlmsg_type;
 132        msg.n.nlmsg_flags = NLM_F_REQUEST;
 133        msg.n.nlmsg_seq = 0;
 134        msg.n.nlmsg_pid = nlmsg_pid;
 135        msg.g.cmd = genl_cmd;
 136        msg.g.version = 0x1;
 137        na = (struct nlattr *) GENLMSG_DATA(&msg);
 138        na->nla_type = nla_type;
 139        na->nla_len = nla_len + 1 + NLA_HDRLEN;
 140        memcpy(NLA_DATA(na), nla_data, nla_len);
 141        msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
 142
 143        buf = (char *) &msg;
 144        buflen = msg.n.nlmsg_len ;
 145        memset(&nladdr, 0, sizeof(nladdr));
 146        nladdr.nl_family = AF_NETLINK;
 147        while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
 148                           sizeof(nladdr))) < buflen) {
 149                if (r > 0) {
 150                        buf += r;
 151                        buflen -= r;
 152                } else if (errno != EAGAIN)
 153                        return -1;
 154        }
 155        return 0;
 156}
 157
 158
 159/*
 160 * Probe the controller in genetlink to find the family id
 161 * for the TASKSTATS family
 162 */
 163static int get_family_id(int sd)
 164{
 165        struct {
 166                struct nlmsghdr n;
 167                struct genlmsghdr g;
 168                char buf[256];
 169        } ans;
 170
 171        int id = 0, rc;
 172        struct nlattr *na;
 173        int rep_len;
 174
 175        strcpy(name, TASKSTATS_GENL_NAME);
 176        rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
 177                        CTRL_ATTR_FAMILY_NAME, (void *)name,
 178                        strlen(TASKSTATS_GENL_NAME)+1);
 179        if (rc < 0)
 180                return 0;       /* sendto() failure? */
 181
 182        rep_len = recv(sd, &ans, sizeof(ans), 0);
 183        if (ans.n.nlmsg_type == NLMSG_ERROR ||
 184            (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
 185                return 0;
 186
 187        na = (struct nlattr *) GENLMSG_DATA(&ans);
 188        na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
 189        if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
 190                id = *(__u16 *) NLA_DATA(na);
 191        }
 192        return id;
 193}
 194
 195#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
 196
 197static void print_delayacct(struct taskstats *t)
 198{
 199        printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
 200               "      %15llu%15llu%15llu%15llu%15.3fms\n"
 201               "IO    %15s%15s%15s\n"
 202               "      %15llu%15llu%15llums\n"
 203               "SWAP  %15s%15s%15s\n"
 204               "      %15llu%15llu%15llums\n"
 205               "RECLAIM  %12s%15s%15s\n"
 206               "      %15llu%15llu%15llums\n"
 207               "THRASHING%12s%15s%15s\n"
 208               "      %15llu%15llu%15llums\n",
 209               "count", "real total", "virtual total",
 210               "delay total", "delay average",
 211               (unsigned long long)t->cpu_count,
 212               (unsigned long long)t->cpu_run_real_total,
 213               (unsigned long long)t->cpu_run_virtual_total,
 214               (unsigned long long)t->cpu_delay_total,
 215               average_ms((double)t->cpu_delay_total, t->cpu_count),
 216               "count", "delay total", "delay average",
 217               (unsigned long long)t->blkio_count,
 218               (unsigned long long)t->blkio_delay_total,
 219               average_ms(t->blkio_delay_total, t->blkio_count),
 220               "count", "delay total", "delay average",
 221               (unsigned long long)t->swapin_count,
 222               (unsigned long long)t->swapin_delay_total,
 223               average_ms(t->swapin_delay_total, t->swapin_count),
 224               "count", "delay total", "delay average",
 225               (unsigned long long)t->freepages_count,
 226               (unsigned long long)t->freepages_delay_total,
 227               average_ms(t->freepages_delay_total, t->freepages_count),
 228               "count", "delay total", "delay average",
 229               (unsigned long long)t->thrashing_count,
 230               (unsigned long long)t->thrashing_delay_total,
 231               average_ms(t->thrashing_delay_total, t->thrashing_count));
 232}
 233
 234static void task_context_switch_counts(struct taskstats *t)
 235{
 236        printf("\n\nTask   %15s%15s\n"
 237               "       %15llu%15llu\n",
 238               "voluntary", "nonvoluntary",
 239               (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
 240}
 241
 242static void print_cgroupstats(struct cgroupstats *c)
 243{
 244        printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
 245                "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
 246                (unsigned long long)c->nr_io_wait,
 247                (unsigned long long)c->nr_running,
 248                (unsigned long long)c->nr_stopped,
 249                (unsigned long long)c->nr_uninterruptible);
 250}
 251
 252
 253static void print_ioacct(struct taskstats *t)
 254{
 255        printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
 256                t->ac_comm,
 257                (unsigned long long)t->read_bytes,
 258                (unsigned long long)t->write_bytes,
 259                (unsigned long long)t->cancelled_write_bytes);
 260}
 261
 262int main(int argc, char *argv[])
 263{
 264        int c, rc, rep_len, aggr_len, len2;
 265        int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
 266        __u16 id;
 267        __u32 mypid;
 268
 269        struct nlattr *na;
 270        int nl_sd = -1;
 271        int len = 0;
 272        pid_t tid = 0;
 273        pid_t rtid = 0;
 274
 275        int fd = 0;
 276        int count = 0;
 277        int write_file = 0;
 278        int maskset = 0;
 279        char *logfile = NULL;
 280        int loop = 0;
 281        int containerset = 0;
 282        char *containerpath = NULL;
 283        int cfd = 0;
 284        int forking = 0;
 285        sigset_t sigset;
 286
 287        struct msgtemplate msg;
 288
 289        while (!forking) {
 290                c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
 291                if (c < 0)
 292                        break;
 293
 294                switch (c) {
 295                case 'd':
 296                        printf("print delayacct stats ON\n");
 297                        print_delays = 1;
 298                        break;
 299                case 'i':
 300                        printf("printing IO accounting\n");
 301                        print_io_accounting = 1;
 302                        break;
 303                case 'q':
 304                        printf("printing task/process context switch rates\n");
 305                        print_task_context_switch_counts = 1;
 306                        break;
 307                case 'C':
 308                        containerset = 1;
 309                        containerpath = optarg;
 310                        break;
 311                case 'w':
 312                        logfile = strdup(optarg);
 313                        printf("write to file %s\n", logfile);
 314                        write_file = 1;
 315                        break;
 316                case 'r':
 317                        rcvbufsz = atoi(optarg);
 318                        printf("receive buf size %d\n", rcvbufsz);
 319                        if (rcvbufsz < 0)
 320                                err(1, "Invalid rcv buf size\n");
 321                        break;
 322                case 'm':
 323                        strncpy(cpumask, optarg, sizeof(cpumask));
 324                        cpumask[sizeof(cpumask) - 1] = '\0';
 325                        maskset = 1;
 326                        printf("cpumask %s maskset %d\n", cpumask, maskset);
 327                        break;
 328                case 't':
 329                        tid = atoi(optarg);
 330                        if (!tid)
 331                                err(1, "Invalid tgid\n");
 332                        cmd_type = TASKSTATS_CMD_ATTR_TGID;
 333                        break;
 334                case 'p':
 335                        tid = atoi(optarg);
 336                        if (!tid)
 337                                err(1, "Invalid pid\n");
 338                        cmd_type = TASKSTATS_CMD_ATTR_PID;
 339                        break;
 340                case 'c':
 341
 342                        /* Block SIGCHLD for sigwait() later */
 343                        if (sigemptyset(&sigset) == -1)
 344                                err(1, "Failed to empty sigset");
 345                        if (sigaddset(&sigset, SIGCHLD))
 346                                err(1, "Failed to set sigchld in sigset");
 347                        sigprocmask(SIG_BLOCK, &sigset, NULL);
 348
 349                        /* fork/exec a child */
 350                        tid = fork();
 351                        if (tid < 0)
 352                                err(1, "Fork failed\n");
 353                        if (tid == 0)
 354                                if (execvp(argv[optind - 1],
 355                                    &argv[optind - 1]) < 0)
 356                                        exit(-1);
 357
 358                        /* Set the command type and avoid further processing */
 359                        cmd_type = TASKSTATS_CMD_ATTR_PID;
 360                        forking = 1;
 361                        break;
 362                case 'v':
 363                        printf("debug on\n");
 364                        dbg = 1;
 365                        break;
 366                case 'l':
 367                        printf("listen forever\n");
 368                        loop = 1;
 369                        break;
 370                default:
 371                        usage();
 372                        exit(-1);
 373                }
 374        }
 375
 376        if (write_file) {
 377                fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
 378                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
 379                if (fd == -1) {
 380                        perror("Cannot open output file\n");
 381                        exit(1);
 382                }
 383        }
 384
 385        nl_sd = create_nl_socket(NETLINK_GENERIC);
 386        if (nl_sd < 0)
 387                err(1, "error creating Netlink socket\n");
 388
 389
 390        mypid = getpid();
 391        id = get_family_id(nl_sd);
 392        if (!id) {
 393                fprintf(stderr, "Error getting family id, errno %d\n", errno);
 394                goto err;
 395        }
 396        PRINTF("family id %d\n", id);
 397
 398        if (maskset) {
 399                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 400                              TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
 401                              &cpumask, strlen(cpumask) + 1);
 402                PRINTF("Sent register cpumask, retval %d\n", rc);
 403                if (rc < 0) {
 404                        fprintf(stderr, "error sending register cpumask\n");
 405                        goto err;
 406                }
 407        }
 408
 409        if (tid && containerset) {
 410                fprintf(stderr, "Select either -t or -C, not both\n");
 411                goto err;
 412        }
 413
 414        /*
 415         * If we forked a child, wait for it to exit. Cannot use waitpid()
 416         * as all the delicious data would be reaped as part of the wait
 417         */
 418        if (tid && forking) {
 419                int sig_received;
 420                sigwait(&sigset, &sig_received);
 421        }
 422
 423        if (tid) {
 424                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 425                              cmd_type, &tid, sizeof(__u32));
 426                PRINTF("Sent pid/tgid, retval %d\n", rc);
 427                if (rc < 0) {
 428                        fprintf(stderr, "error sending tid/tgid cmd\n");
 429                        goto done;
 430                }
 431        }
 432
 433        if (containerset) {
 434                cfd = open(containerpath, O_RDONLY);
 435                if (cfd < 0) {
 436                        perror("error opening container file");
 437                        goto err;
 438                }
 439                rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
 440                              CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
 441                if (rc < 0) {
 442                        perror("error sending cgroupstats command");
 443                        goto err;
 444                }
 445        }
 446        if (!maskset && !tid && !containerset) {
 447                usage();
 448                goto err;
 449        }
 450
 451        do {
 452                rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
 453                PRINTF("received %d bytes\n", rep_len);
 454
 455                if (rep_len < 0) {
 456                        fprintf(stderr, "nonfatal reply error: errno %d\n",
 457                                errno);
 458                        continue;
 459                }
 460                if (msg.n.nlmsg_type == NLMSG_ERROR ||
 461                    !NLMSG_OK((&msg.n), rep_len)) {
 462                        struct nlmsgerr *err = NLMSG_DATA(&msg);
 463                        fprintf(stderr, "fatal reply error,  errno %d\n",
 464                                err->error);
 465                        goto done;
 466                }
 467
 468                PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
 469                       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
 470
 471
 472                rep_len = GENLMSG_PAYLOAD(&msg.n);
 473
 474                na = (struct nlattr *) GENLMSG_DATA(&msg);
 475                len = 0;
 476                while (len < rep_len) {
 477                        len += NLA_ALIGN(na->nla_len);
 478                        switch (na->nla_type) {
 479                        case TASKSTATS_TYPE_AGGR_TGID:
 480                                /* Fall through */
 481                        case TASKSTATS_TYPE_AGGR_PID:
 482                                aggr_len = NLA_PAYLOAD(na->nla_len);
 483                                len2 = 0;
 484                                /* For nested attributes, na follows */
 485                                na = (struct nlattr *) NLA_DATA(na);
 486                                done = 0;
 487                                while (len2 < aggr_len) {
 488                                        switch (na->nla_type) {
 489                                        case TASKSTATS_TYPE_PID:
 490                                                rtid = *(int *) NLA_DATA(na);
 491                                                if (print_delays)
 492                                                        printf("PID\t%d\n", rtid);
 493                                                break;
 494                                        case TASKSTATS_TYPE_TGID:
 495                                                rtid = *(int *) NLA_DATA(na);
 496                                                if (print_delays)
 497                                                        printf("TGID\t%d\n", rtid);
 498                                                break;
 499                                        case TASKSTATS_TYPE_STATS:
 500                                                count++;
 501                                                if (print_delays)
 502                                                        print_delayacct((struct taskstats *) NLA_DATA(na));
 503                                                if (print_io_accounting)
 504                                                        print_ioacct((struct taskstats *) NLA_DATA(na));
 505                                                if (print_task_context_switch_counts)
 506                                                        task_context_switch_counts((struct taskstats *) NLA_DATA(na));
 507                                                if (fd) {
 508                                                        if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
 509                                                                err(1,"write error\n");
 510                                                        }
 511                                                }
 512                                                if (!loop)
 513                                                        goto done;
 514                                                break;
 515                                        case TASKSTATS_TYPE_NULL:
 516                                                break;
 517                                        default:
 518                                                fprintf(stderr, "Unknown nested"
 519                                                        " nla_type %d\n",
 520                                                        na->nla_type);
 521                                                break;
 522                                        }
 523                                        len2 += NLA_ALIGN(na->nla_len);
 524                                        na = (struct nlattr *)((char *)na +
 525                                                               NLA_ALIGN(na->nla_len));
 526                                }
 527                                break;
 528
 529                        case CGROUPSTATS_TYPE_CGROUP_STATS:
 530                                print_cgroupstats(NLA_DATA(na));
 531                                break;
 532                        default:
 533                                fprintf(stderr, "Unknown nla_type %d\n",
 534                                        na->nla_type);
 535                        case TASKSTATS_TYPE_NULL:
 536                                break;
 537                        }
 538                        na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
 539                }
 540        } while (loop);
 541done:
 542        if (maskset) {
 543                rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
 544                              TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
 545                              &cpumask, strlen(cpumask) + 1);
 546                printf("Sent deregister mask, retval %d\n", rc);
 547                if (rc < 0)
 548                        err(rc, "error sending deregister cpumask\n");
 549        }
 550err:
 551        close(nl_sd);
 552        if (fd)
 553                close(fd);
 554        if (cfd)
 555                close(cfd);
 556        return 0;
 557}
 558