iproute2/misc/ss.c
<<
>>
Prefs
   1/*
   2 * ss.c         "sockstat", socket statistics
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12#include <stdio.h>
  13#include <stdlib.h>
  14#include <unistd.h>
  15#include <fcntl.h>
  16#include <sys/ioctl.h>
  17#include <sys/socket.h>
  18#include <sys/uio.h>
  19#include <sys/sysmacros.h>
  20#include <netinet/in.h>
  21#include <string.h>
  22#include <errno.h>
  23#include <netdb.h>
  24#include <arpa/inet.h>
  25#include <dirent.h>
  26#include <fnmatch.h>
  27#include <getopt.h>
  28#include <stdbool.h>
  29#include <limits.h>
  30#include <stdarg.h>
  31
  32#include "ss_util.h"
  33#include "utils.h"
  34#include "rt_names.h"
  35#include "ll_map.h"
  36#include "libnetlink.h"
  37#include "namespace.h"
  38#include "version.h"
  39#include "rt_names.h"
  40#include "cg_map.h"
  41
  42#include <linux/tcp.h>
  43#include <linux/unix_diag.h>
  44#include <linux/netdevice.h>    /* for MAX_ADDR_LEN */
  45#include <linux/filter.h>
  46#include <linux/xdp_diag.h>
  47#include <linux/packet_diag.h>
  48#include <linux/netlink_diag.h>
  49#include <linux/sctp.h>
  50#include <linux/vm_sockets_diag.h>
  51#include <linux/net.h>
  52#include <linux/tipc.h>
  53#include <linux/tipc_netlink.h>
  54#include <linux/tipc_sockets_diag.h>
  55#include <linux/tls.h>
  56#include <linux/mptcp.h>
  57
  58#if HAVE_RPC
  59#include <rpc/rpc.h>
  60#include <rpc/xdr.h>
  61#endif
  62
  63/* AF_VSOCK/PF_VSOCK is only provided since glibc 2.18 */
  64#ifndef PF_VSOCK
  65#define PF_VSOCK 40
  66#endif
  67#ifndef AF_VSOCK
  68#define AF_VSOCK PF_VSOCK
  69#endif
  70
  71#ifndef IPPROTO_MPTCP
  72#define IPPROTO_MPTCP 262
  73#endif
  74
  75#define BUF_CHUNK (1024 * 1024) /* Buffer chunk allocation size */
  76#define BUF_CHUNKS_MAX 5        /* Maximum number of allocated buffer chunks */
  77#define LEN_ALIGN(x) (((x) + 1) & ~1)
  78
  79#if HAVE_SELINUX
  80#include <selinux/selinux.h>
  81#else
  82/* Stubs for SELinux functions */
  83static int is_selinux_enabled(void)
  84{
  85        return -1;
  86}
  87
  88static int getpidcon(pid_t pid, char **context)
  89{
  90        *context = NULL;
  91        return -1;
  92}
  93
  94static int getfilecon(char *path, char **context)
  95{
  96        *context = NULL;
  97        return -1;
  98}
  99
 100static int security_get_initial_context(char *name,  char **context)
 101{
 102        *context = NULL;
 103        return -1;
 104}
 105
 106static void freecon(char *context)
 107{
 108        free(context);
 109}
 110#endif
 111
 112int preferred_family = AF_UNSPEC;
 113static int show_options;
 114int show_details;
 115static int show_users;
 116static int show_mem;
 117static int show_tcpinfo;
 118static int show_bpf;
 119static int show_proc_ctx;
 120static int show_sock_ctx;
 121static int show_header = 1;
 122static int follow_events;
 123static int sctp_ino;
 124static int show_tipcinfo;
 125static int show_tos;
 126static int show_cgroup;
 127static int show_inet_sockopt;
 128int oneline;
 129
 130enum col_id {
 131        COL_NETID,
 132        COL_STATE,
 133        COL_RECVQ,
 134        COL_SENDQ,
 135        COL_ADDR,
 136        COL_SERV,
 137        COL_RADDR,
 138        COL_RSERV,
 139        COL_EXT,
 140        COL_PROC,
 141        COL_MAX
 142};
 143
 144enum col_align {
 145        ALIGN_LEFT,
 146        ALIGN_CENTER,
 147        ALIGN_RIGHT
 148};
 149
 150struct column {
 151        const enum col_align align;
 152        const char *header;
 153        const char *ldelim;
 154        int disabled;
 155        int width;      /* Calculated, including additional layout spacing */
 156        int max_len;    /* Measured maximum field length in this column */
 157};
 158
 159static struct column columns[] = {
 160        { ALIGN_LEFT,   "Netid",                "",     0, 0, 0 },
 161        { ALIGN_LEFT,   "State",                " ",    0, 0, 0 },
 162        { ALIGN_LEFT,   "Recv-Q",               " ",    0, 0, 0 },
 163        { ALIGN_LEFT,   "Send-Q",               " ",    0, 0, 0 },
 164        { ALIGN_RIGHT,  "Local Address:",       " ",    0, 0, 0 },
 165        { ALIGN_LEFT,   "Port",                 "",     0, 0, 0 },
 166        { ALIGN_RIGHT,  "Peer Address:",        " ",    0, 0, 0 },
 167        { ALIGN_LEFT,   "Port",                 "",     0, 0, 0 },
 168        { ALIGN_LEFT,   "Process",              "",     0, 0, 0 },
 169        { ALIGN_LEFT,   "",                     "",     0, 0, 0 },
 170};
 171
 172static struct column *current_field = columns;
 173
 174/* Output buffer: chained chunks of BUF_CHUNK bytes. Each field is written to
 175 * the buffer as a variable size token. A token consists of a 16 bits length
 176 * field, followed by a string which is not NULL-terminated.
 177 *
 178 * A new chunk is allocated and linked when the current chunk doesn't have
 179 * enough room to store the current token as a whole.
 180 */
 181struct buf_chunk {
 182        struct buf_chunk *next; /* Next chained chunk */
 183        char *end;              /* Current end of content */
 184        char data[0];
 185};
 186
 187struct buf_token {
 188        uint16_t len;           /* Data length, excluding length descriptor */
 189        char data[0];
 190};
 191
 192static struct {
 193        struct buf_token *cur;  /* Position of current token in chunk */
 194        struct buf_chunk *head; /* First chunk */
 195        struct buf_chunk *tail; /* Current chunk */
 196        int chunks;             /* Number of allocated chunks */
 197} buffer;
 198
 199static const char *TCP_PROTO = "tcp";
 200static const char *UDP_PROTO = "udp";
 201#ifdef HAVE_RPC
 202static const char *TCP6_PROTO = "tcp6";
 203static const char *UDP6_PROTO = "udp6";
 204static const char *SCTP_PROTO = "sctp";
 205#endif
 206static const char *RAW_PROTO = "raw";
 207static const char *dg_proto;
 208
 209enum {
 210        TCP_DB,
 211        MPTCP_DB,
 212        DCCP_DB,
 213        UDP_DB,
 214        RAW_DB,
 215        UNIX_DG_DB,
 216        UNIX_ST_DB,
 217        UNIX_SQ_DB,
 218        PACKET_DG_DB,
 219        PACKET_R_DB,
 220        NETLINK_DB,
 221        SCTP_DB,
 222        VSOCK_ST_DB,
 223        VSOCK_DG_DB,
 224        TIPC_DB,
 225        XDP_DB,
 226        MAX_DB
 227};
 228
 229#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
 230#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB))
 231#define ALL_DB ((1<<MAX_DB)-1)
 232#define INET_L4_DBM ((1<<TCP_DB)|(1<<MPTCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB))
 233#define INET_DBM (INET_L4_DBM | (1<<RAW_DB))
 234#define VSOCK_DBM ((1<<VSOCK_ST_DB)|(1<<VSOCK_DG_DB))
 235
 236enum {
 237        SS_UNKNOWN,
 238        SS_ESTABLISHED,
 239        SS_SYN_SENT,
 240        SS_SYN_RECV,
 241        SS_FIN_WAIT1,
 242        SS_FIN_WAIT2,
 243        SS_TIME_WAIT,
 244        SS_CLOSE,
 245        SS_CLOSE_WAIT,
 246        SS_LAST_ACK,
 247        SS_LISTEN,
 248        SS_CLOSING,
 249        SS_MAX
 250};
 251
 252enum {
 253        SCTP_STATE_CLOSED               = 0,
 254        SCTP_STATE_COOKIE_WAIT          = 1,
 255        SCTP_STATE_COOKIE_ECHOED        = 2,
 256        SCTP_STATE_ESTABLISHED          = 3,
 257        SCTP_STATE_SHUTDOWN_PENDING     = 4,
 258        SCTP_STATE_SHUTDOWN_SENT        = 5,
 259        SCTP_STATE_SHUTDOWN_RECEIVED    = 6,
 260        SCTP_STATE_SHUTDOWN_ACK_SENT    = 7,
 261};
 262
 263#define SS_ALL ((1 << SS_MAX) - 1)
 264#define SS_CONN (SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)))
 265#define TIPC_SS_CONN ((1<<SS_ESTABLISHED)|(1<<SS_LISTEN)|(1<<SS_CLOSE))
 266
 267#include "ssfilter.h"
 268
 269struct filter {
 270        int dbs;
 271        int states;
 272        uint64_t families;
 273        struct ssfilter *f;
 274        bool kill;
 275        struct rtnl_handle *rth_for_killing;
 276};
 277
 278#define FAMILY_MASK(family) ((uint64_t)1 << (family))
 279
 280static const struct filter default_dbs[MAX_DB] = {
 281        [TCP_DB] = {
 282                .states   = SS_CONN,
 283                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 284        },
 285        [MPTCP_DB] = {
 286                .states   = SS_CONN,
 287                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 288        },
 289        [DCCP_DB] = {
 290                .states   = SS_CONN,
 291                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 292        },
 293        [UDP_DB] = {
 294                .states   = (1 << SS_ESTABLISHED),
 295                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 296        },
 297        [RAW_DB] = {
 298                .states   = (1 << SS_ESTABLISHED),
 299                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 300        },
 301        [UNIX_DG_DB] = {
 302                .states   = (1 << SS_CLOSE),
 303                .families = FAMILY_MASK(AF_UNIX),
 304        },
 305        [UNIX_ST_DB] = {
 306                .states   = SS_CONN,
 307                .families = FAMILY_MASK(AF_UNIX),
 308        },
 309        [UNIX_SQ_DB] = {
 310                .states   = SS_CONN,
 311                .families = FAMILY_MASK(AF_UNIX),
 312        },
 313        [PACKET_DG_DB] = {
 314                .states   = (1 << SS_CLOSE),
 315                .families = FAMILY_MASK(AF_PACKET),
 316        },
 317        [PACKET_R_DB] = {
 318                .states   = (1 << SS_CLOSE),
 319                .families = FAMILY_MASK(AF_PACKET),
 320        },
 321        [NETLINK_DB] = {
 322                .states   = (1 << SS_CLOSE),
 323                .families = FAMILY_MASK(AF_NETLINK),
 324        },
 325        [SCTP_DB] = {
 326                .states   = SS_CONN,
 327                .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
 328        },
 329        [VSOCK_ST_DB] = {
 330                .states   = SS_CONN,
 331                .families = FAMILY_MASK(AF_VSOCK),
 332        },
 333        [VSOCK_DG_DB] = {
 334                .states   = SS_CONN,
 335                .families = FAMILY_MASK(AF_VSOCK),
 336        },
 337        [TIPC_DB] = {
 338                .states   = TIPC_SS_CONN,
 339                .families = FAMILY_MASK(AF_TIPC),
 340        },
 341        [XDP_DB] = {
 342                .states   = (1 << SS_CLOSE),
 343                .families = FAMILY_MASK(AF_XDP),
 344        },
 345};
 346
 347static const struct filter default_afs[AF_MAX] = {
 348        [AF_INET] = {
 349                .dbs    = INET_DBM,
 350                .states = SS_CONN,
 351        },
 352        [AF_INET6] = {
 353                .dbs    = INET_DBM,
 354                .states = SS_CONN,
 355        },
 356        [AF_UNIX] = {
 357                .dbs    = UNIX_DBM,
 358                .states = SS_CONN,
 359        },
 360        [AF_PACKET] = {
 361                .dbs    = PACKET_DBM,
 362                .states = (1 << SS_CLOSE),
 363        },
 364        [AF_NETLINK] = {
 365                .dbs    = (1 << NETLINK_DB),
 366                .states = (1 << SS_CLOSE),
 367        },
 368        [AF_VSOCK] = {
 369                .dbs    = VSOCK_DBM,
 370                .states = SS_CONN,
 371        },
 372        [AF_TIPC] = {
 373                .dbs    = (1 << TIPC_DB),
 374                .states = TIPC_SS_CONN,
 375        },
 376        [AF_XDP] = {
 377                .dbs    = (1 << XDP_DB),
 378                .states = (1 << SS_CLOSE),
 379        },
 380};
 381
 382static int do_default = 1;
 383static struct filter current_filter;
 384
 385static void filter_db_set(struct filter *f, int db, bool enable)
 386{
 387        if (enable) {
 388                f->states   |= default_dbs[db].states;
 389                f->dbs      |= 1 << db;
 390        } else {
 391                f->dbs &= ~(1 << db);
 392        }
 393        do_default   = 0;
 394}
 395
 396static int filter_db_parse(struct filter *f, const char *s)
 397{
 398        const struct {
 399                const char *name;
 400                int dbs[MAX_DB + 1];
 401        } db_name_tbl[] = {
 402#define ENTRY(name, ...) { #name, { __VA_ARGS__, MAX_DB } }
 403                ENTRY(all, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, RAW_DB,
 404                           UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB,
 405                           PACKET_R_DB, PACKET_DG_DB, NETLINK_DB,
 406                           SCTP_DB, VSOCK_ST_DB, VSOCK_DG_DB, XDP_DB),
 407                ENTRY(inet, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, SCTP_DB, RAW_DB),
 408                ENTRY(udp, UDP_DB),
 409                ENTRY(dccp, DCCP_DB),
 410                ENTRY(tcp, TCP_DB),
 411                ENTRY(mptcp, MPTCP_DB),
 412                ENTRY(sctp, SCTP_DB),
 413                ENTRY(raw, RAW_DB),
 414                ENTRY(unix, UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB),
 415                ENTRY(unix_stream, UNIX_ST_DB),
 416                ENTRY(u_str, UNIX_ST_DB),       /* alias for unix_stream */
 417                ENTRY(unix_dgram, UNIX_DG_DB),
 418                ENTRY(u_dgr, UNIX_DG_DB),       /* alias for unix_dgram */
 419                ENTRY(unix_seqpacket, UNIX_SQ_DB),
 420                ENTRY(u_seq, UNIX_SQ_DB),       /* alias for unix_seqpacket */
 421                ENTRY(packet, PACKET_R_DB, PACKET_DG_DB),
 422                ENTRY(packet_raw, PACKET_R_DB),
 423                ENTRY(p_raw, PACKET_R_DB),      /* alias for packet_raw */
 424                ENTRY(packet_dgram, PACKET_DG_DB),
 425                ENTRY(p_dgr, PACKET_DG_DB),     /* alias for packet_dgram */
 426                ENTRY(netlink, NETLINK_DB),
 427                ENTRY(vsock, VSOCK_ST_DB, VSOCK_DG_DB),
 428                ENTRY(vsock_stream, VSOCK_ST_DB),
 429                ENTRY(v_str, VSOCK_ST_DB),      /* alias for vsock_stream */
 430                ENTRY(vsock_dgram, VSOCK_DG_DB),
 431                ENTRY(v_dgr, VSOCK_DG_DB),      /* alias for vsock_dgram */
 432                ENTRY(xdp, XDP_DB),
 433#undef ENTRY
 434        };
 435        bool enable = true;
 436        unsigned int i;
 437        const int *dbp;
 438
 439        if (s[0] == '!') {
 440                enable = false;
 441                s++;
 442        }
 443        for (i = 0; i < ARRAY_SIZE(db_name_tbl); i++) {
 444                if (strcmp(s, db_name_tbl[i].name))
 445                        continue;
 446                for (dbp = db_name_tbl[i].dbs; *dbp != MAX_DB; dbp++)
 447                        filter_db_set(f, *dbp, enable);
 448                return 0;
 449        }
 450        return -1;
 451}
 452
 453static void filter_af_set(struct filter *f, int af)
 454{
 455        f->states          |= default_afs[af].states;
 456        f->families        |= FAMILY_MASK(af);
 457        do_default          = 0;
 458        preferred_family    = af;
 459}
 460
 461static int filter_af_get(struct filter *f, int af)
 462{
 463        return !!(f->families & FAMILY_MASK(af));
 464}
 465
 466static void filter_states_set(struct filter *f, int states)
 467{
 468        if (states)
 469                f->states = states;
 470}
 471
 472static void filter_merge_defaults(struct filter *f)
 473{
 474        int db;
 475        int af;
 476
 477        for (db = 0; db < MAX_DB; db++) {
 478                if (!(f->dbs & (1 << db)))
 479                        continue;
 480
 481                if (!(default_dbs[db].families & f->families))
 482                        f->families |= default_dbs[db].families;
 483        }
 484        for (af = 0; af < AF_MAX; af++) {
 485                if (!(f->families & FAMILY_MASK(af)))
 486                        continue;
 487
 488                if (!(default_afs[af].dbs & f->dbs))
 489                        f->dbs |= default_afs[af].dbs;
 490        }
 491}
 492
 493static FILE *generic_proc_open(const char *env, const char *name)
 494{
 495        const char *p = getenv(env);
 496        char store[128];
 497
 498        if (!p) {
 499                p = getenv("PROC_ROOT") ? : "/proc";
 500                snprintf(store, sizeof(store)-1, "%s/%s", p, name);
 501                p = store;
 502        }
 503
 504        return fopen(p, "r");
 505}
 506#define net_tcp_open()          generic_proc_open("PROC_NET_TCP", "net/tcp")
 507#define net_tcp6_open()         generic_proc_open("PROC_NET_TCP6", "net/tcp6")
 508#define net_udp_open()          generic_proc_open("PROC_NET_UDP", "net/udp")
 509#define net_udp6_open()         generic_proc_open("PROC_NET_UDP6", "net/udp6")
 510#define net_raw_open()          generic_proc_open("PROC_NET_RAW", "net/raw")
 511#define net_raw6_open()         generic_proc_open("PROC_NET_RAW6", "net/raw6")
 512#define net_unix_open()         generic_proc_open("PROC_NET_UNIX", "net/unix")
 513#define net_packet_open()       generic_proc_open("PROC_NET_PACKET", \
 514                                                        "net/packet")
 515#define net_netlink_open()      generic_proc_open("PROC_NET_NETLINK", \
 516                                                        "net/netlink")
 517#define net_sockstat_open()     generic_proc_open("PROC_NET_SOCKSTAT", \
 518                                                        "net/sockstat")
 519#define net_sockstat6_open()    generic_proc_open("PROC_NET_SOCKSTAT6", \
 520                                                        "net/sockstat6")
 521#define net_snmp_open()         generic_proc_open("PROC_NET_SNMP", "net/snmp")
 522#define ephemeral_ports_open()  generic_proc_open("PROC_IP_LOCAL_PORT_RANGE", \
 523                                        "sys/net/ipv4/ip_local_port_range")
 524
 525struct user_ent {
 526        struct user_ent *next;
 527        unsigned int    ino;
 528        int             pid;
 529        int             fd;
 530        char            *process;
 531        char            *process_ctx;
 532        char            *socket_ctx;
 533};
 534
 535#define USER_ENT_HASH_SIZE      256
 536static struct user_ent *user_ent_hash[USER_ENT_HASH_SIZE];
 537
 538static int user_ent_hashfn(unsigned int ino)
 539{
 540        int val = (ino >> 24) ^ (ino >> 16) ^ (ino >> 8) ^ ino;
 541
 542        return val & (USER_ENT_HASH_SIZE - 1);
 543}
 544
 545static void user_ent_add(unsigned int ino, char *process,
 546                                        int pid, int fd,
 547                                        char *proc_ctx,
 548                                        char *sock_ctx)
 549{
 550        struct user_ent *p, **pp;
 551
 552        p = malloc(sizeof(struct user_ent));
 553        if (!p) {
 554                fprintf(stderr, "ss: failed to malloc buffer\n");
 555                abort();
 556        }
 557        p->next = NULL;
 558        p->ino = ino;
 559        p->pid = pid;
 560        p->fd = fd;
 561        p->process = strdup(process);
 562        p->process_ctx = strdup(proc_ctx);
 563        p->socket_ctx = strdup(sock_ctx);
 564
 565        pp = &user_ent_hash[user_ent_hashfn(ino)];
 566        p->next = *pp;
 567        *pp = p;
 568}
 569
 570static void user_ent_destroy(void)
 571{
 572        struct user_ent *p, *p_next;
 573        int cnt = 0;
 574
 575        while (cnt != USER_ENT_HASH_SIZE) {
 576                p = user_ent_hash[cnt];
 577                while (p) {
 578                        free(p->process);
 579                        free(p->process_ctx);
 580                        free(p->socket_ctx);
 581                        p_next = p->next;
 582                        free(p);
 583                        p = p_next;
 584                }
 585                cnt++;
 586        }
 587}
 588
 589static void user_ent_hash_build(void)
 590{
 591        const char *root = getenv("PROC_ROOT") ? : "/proc/";
 592        struct dirent *d;
 593        char name[1024];
 594        int nameoff;
 595        DIR *dir;
 596        char *pid_context;
 597        char *sock_context;
 598        const char *no_ctx = "unavailable";
 599        static int user_ent_hash_build_init;
 600
 601        /* If show_users & show_proc_ctx set only do this once */
 602        if (user_ent_hash_build_init != 0)
 603                return;
 604
 605        user_ent_hash_build_init = 1;
 606
 607        strlcpy(name, root, sizeof(name));
 608
 609        if (strlen(name) == 0 || name[strlen(name)-1] != '/')
 610                strcat(name, "/");
 611
 612        nameoff = strlen(name);
 613
 614        dir = opendir(name);
 615        if (!dir)
 616                return;
 617
 618        while ((d = readdir(dir)) != NULL) {
 619                struct dirent *d1;
 620                char process[16];
 621                char *p;
 622                int pid, pos;
 623                DIR *dir1;
 624                char crap;
 625
 626                if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
 627                        continue;
 628
 629                if (getpidcon(pid, &pid_context) != 0)
 630                        pid_context = strdup(no_ctx);
 631
 632                snprintf(name + nameoff, sizeof(name) - nameoff, "%d/fd/", pid);
 633                pos = strlen(name);
 634                if ((dir1 = opendir(name)) == NULL) {
 635                        freecon(pid_context);
 636                        continue;
 637                }
 638
 639                process[0] = '\0';
 640                p = process;
 641
 642                while ((d1 = readdir(dir1)) != NULL) {
 643                        const char *pattern = "socket:[";
 644                        unsigned int ino;
 645                        char lnk[64];
 646                        int fd;
 647                        ssize_t link_len;
 648                        char tmp[1024];
 649
 650                        if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
 651                                continue;
 652
 653                        snprintf(name+pos, sizeof(name) - pos, "%d", fd);
 654
 655                        link_len = readlink(name, lnk, sizeof(lnk)-1);
 656                        if (link_len == -1)
 657                                continue;
 658                        lnk[link_len] = '\0';
 659
 660                        if (strncmp(lnk, pattern, strlen(pattern)))
 661                                continue;
 662
 663                        sscanf(lnk, "socket:[%u]", &ino);
 664
 665                        snprintf(tmp, sizeof(tmp), "%s/%d/fd/%s",
 666                                        root, pid, d1->d_name);
 667
 668                        if (getfilecon(tmp, &sock_context) <= 0)
 669                                sock_context = strdup(no_ctx);
 670
 671                        if (*p == '\0') {
 672                                FILE *fp;
 673
 674                                snprintf(tmp, sizeof(tmp), "%s/%d/stat",
 675                                        root, pid);
 676                                if ((fp = fopen(tmp, "r")) != NULL) {
 677                                        if (fscanf(fp, "%*d (%[^)])", p) < 1)
 678                                                ; /* ignore */
 679                                        fclose(fp);
 680                                }
 681                        }
 682                        user_ent_add(ino, p, pid, fd,
 683                                        pid_context, sock_context);
 684                        freecon(sock_context);
 685                }
 686                freecon(pid_context);
 687                closedir(dir1);
 688        }
 689        closedir(dir);
 690}
 691
 692enum entry_types {
 693        USERS,
 694        PROC_CTX,
 695        PROC_SOCK_CTX
 696};
 697
 698#define ENTRY_BUF_SIZE 512
 699static int find_entry(unsigned int ino, char **buf, int type)
 700{
 701        struct user_ent *p;
 702        int cnt = 0;
 703        char *ptr;
 704        char *new_buf;
 705        int len, new_buf_len;
 706        int buf_used = 0;
 707        int buf_len = 0;
 708
 709        if (!ino)
 710                return 0;
 711
 712        p = user_ent_hash[user_ent_hashfn(ino)];
 713        ptr = *buf = NULL;
 714        while (p) {
 715                if (p->ino != ino)
 716                        goto next;
 717
 718                while (1) {
 719                        ptr = *buf + buf_used;
 720                        switch (type) {
 721                        case USERS:
 722                                len = snprintf(ptr, buf_len - buf_used,
 723                                        "(\"%s\",pid=%d,fd=%d),",
 724                                        p->process, p->pid, p->fd);
 725                                break;
 726                        case PROC_CTX:
 727                                len = snprintf(ptr, buf_len - buf_used,
 728                                        "(\"%s\",pid=%d,proc_ctx=%s,fd=%d),",
 729                                        p->process, p->pid,
 730                                        p->process_ctx, p->fd);
 731                                break;
 732                        case PROC_SOCK_CTX:
 733                                len = snprintf(ptr, buf_len - buf_used,
 734                                        "(\"%s\",pid=%d,proc_ctx=%s,fd=%d,sock_ctx=%s),",
 735                                        p->process, p->pid,
 736                                        p->process_ctx, p->fd,
 737                                        p->socket_ctx);
 738                                break;
 739                        default:
 740                                fprintf(stderr, "ss: invalid type: %d\n", type);
 741                                abort();
 742                        }
 743
 744                        if (len < 0 || len >= buf_len - buf_used) {
 745                                new_buf_len = buf_len + ENTRY_BUF_SIZE;
 746                                new_buf = realloc(*buf, new_buf_len);
 747                                if (!new_buf) {
 748                                        fprintf(stderr, "ss: failed to malloc buffer\n");
 749                                        abort();
 750                                }
 751                                *buf = new_buf;
 752                                buf_len = new_buf_len;
 753                                continue;
 754                        } else {
 755                                buf_used += len;
 756                                break;
 757                        }
 758                }
 759                cnt++;
 760next:
 761                p = p->next;
 762        }
 763        if (buf_used) {
 764                ptr = *buf + buf_used;
 765                ptr[-1] = '\0';
 766        }
 767        return cnt;
 768}
 769
 770static unsigned long long cookie_sk_get(const uint32_t *cookie)
 771{
 772        return (((unsigned long long)cookie[1] << 31) << 1) | cookie[0];
 773}
 774
 775static const char *sctp_sstate_name[] = {
 776        [SCTP_STATE_CLOSED] = "CLOSED",
 777        [SCTP_STATE_COOKIE_WAIT] = "COOKIE_WAIT",
 778        [SCTP_STATE_COOKIE_ECHOED] = "COOKIE_ECHOED",
 779        [SCTP_STATE_ESTABLISHED] = "ESTAB",
 780        [SCTP_STATE_SHUTDOWN_PENDING] = "SHUTDOWN_PENDING",
 781        [SCTP_STATE_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
 782        [SCTP_STATE_SHUTDOWN_RECEIVED] = "SHUTDOWN_RECEIVED",
 783        [SCTP_STATE_SHUTDOWN_ACK_SENT] = "ACK_SENT",
 784};
 785
 786static const char * const stype_nameg[] = {
 787        "UNKNOWN",
 788        [SOCK_STREAM] = "STREAM",
 789        [SOCK_DGRAM] = "DGRAM",
 790        [SOCK_RDM] = "RDM",
 791        [SOCK_SEQPACKET] = "SEQPACKET",
 792};
 793
 794struct sockstat {
 795        struct sockstat    *next;
 796        unsigned int        type;
 797        uint16_t            prot;
 798        uint16_t            raw_prot;
 799        inet_prefix         local;
 800        inet_prefix         remote;
 801        int                 lport;
 802        int                 rport;
 803        int                 state;
 804        int                 rq, wq;
 805        unsigned int ino;
 806        unsigned int uid;
 807        int                 refcnt;
 808        unsigned int        iface;
 809        unsigned long long  sk;
 810        char *name;
 811        char *peer_name;
 812        __u32               mark;
 813        __u64               cgroup_id;
 814};
 815
 816struct dctcpstat {
 817        unsigned int    ce_state;
 818        unsigned int    alpha;
 819        unsigned int    ab_ecn;
 820        unsigned int    ab_tot;
 821        bool            enabled;
 822};
 823
 824struct tcpstat {
 825        struct sockstat     ss;
 826        unsigned int        timer;
 827        unsigned int        timeout;
 828        int                 probes;
 829        char                cong_alg[16];
 830        double              rto, ato, rtt, rttvar;
 831        int                 qack, ssthresh, backoff;
 832        double              send_bps;
 833        int                 snd_wscale;
 834        int                 rcv_wscale;
 835        int                 mss;
 836        int                 rcv_mss;
 837        int                 advmss;
 838        unsigned int        pmtu;
 839        unsigned int        cwnd;
 840        unsigned int        lastsnd;
 841        unsigned int        lastrcv;
 842        unsigned int        lastack;
 843        double              pacing_rate;
 844        double              pacing_rate_max;
 845        double              delivery_rate;
 846        unsigned long long  bytes_acked;
 847        unsigned long long  bytes_received;
 848        unsigned int        segs_out;
 849        unsigned int        segs_in;
 850        unsigned int        data_segs_out;
 851        unsigned int        data_segs_in;
 852        unsigned int        unacked;
 853        unsigned int        retrans;
 854        unsigned int        retrans_total;
 855        unsigned int        lost;
 856        unsigned int        sacked;
 857        unsigned int        fackets;
 858        unsigned int        reordering;
 859        unsigned int        not_sent;
 860        unsigned int        delivered;
 861        unsigned int        delivered_ce;
 862        unsigned int        dsack_dups;
 863        unsigned int        reord_seen;
 864        double              rcv_rtt;
 865        double              min_rtt;
 866        unsigned int        rcv_ooopack;
 867        unsigned int        snd_wnd;
 868        int                 rcv_space;
 869        unsigned int        rcv_ssthresh;
 870        unsigned long long  busy_time;
 871        unsigned long long  rwnd_limited;
 872        unsigned long long  sndbuf_limited;
 873        unsigned long long  bytes_sent;
 874        unsigned long long  bytes_retrans;
 875        bool                has_ts_opt;
 876        bool                has_sack_opt;
 877        bool                has_ecn_opt;
 878        bool                has_ecnseen_opt;
 879        bool                has_fastopen_opt;
 880        bool                has_wscale_opt;
 881        bool                app_limited;
 882        struct dctcpstat    *dctcp;
 883        struct tcp_bbr_info *bbr_info;
 884};
 885
 886/* SCTP assocs share the same inode number with their parent endpoint. So if we
 887 * have seen the inode number before, it must be an assoc instead of the next
 888 * endpoint. */
 889static bool is_sctp_assoc(struct sockstat *s, const char *sock_name)
 890{
 891        if (strcmp(sock_name, "sctp"))
 892                return false;
 893        if (!sctp_ino || sctp_ino != s->ino)
 894                return false;
 895        return true;
 896}
 897
 898static const char *unix_netid_name(int type)
 899{
 900        switch (type) {
 901        case SOCK_STREAM:
 902                return "u_str";
 903        case SOCK_SEQPACKET:
 904                return "u_seq";
 905        case SOCK_DGRAM:
 906        default:
 907                return "u_dgr";
 908        }
 909}
 910
 911static const char *proto_name(int protocol)
 912{
 913        switch (protocol) {
 914        case 0:
 915                return "raw";
 916        case IPPROTO_UDP:
 917                return "udp";
 918        case IPPROTO_TCP:
 919                return "tcp";
 920        case IPPROTO_MPTCP:
 921                return "mptcp";
 922        case IPPROTO_SCTP:
 923                return "sctp";
 924        case IPPROTO_DCCP:
 925                return "dccp";
 926        case IPPROTO_ICMPV6:
 927                return "icmp6";
 928        }
 929
 930        return "???";
 931}
 932
 933static const char *vsock_netid_name(int type)
 934{
 935        switch (type) {
 936        case SOCK_STREAM:
 937                return "v_str";
 938        case SOCK_DGRAM:
 939                return "v_dgr";
 940        default:
 941                return "???";
 942        }
 943}
 944
 945static const char *tipc_netid_name(int type)
 946{
 947        switch (type) {
 948        case SOCK_STREAM:
 949                return "ti_st";
 950        case SOCK_DGRAM:
 951                return "ti_dg";
 952        case SOCK_RDM:
 953                return "ti_rd";
 954        case SOCK_SEQPACKET:
 955                return "ti_sq";
 956        default:
 957                return "???";
 958        }
 959}
 960
 961/* Allocate and initialize a new buffer chunk */
 962static struct buf_chunk *buf_chunk_new(void)
 963{
 964        struct buf_chunk *new = malloc(BUF_CHUNK);
 965
 966        if (!new)
 967                abort();
 968
 969        new->next = NULL;
 970
 971        /* This is also the last block */
 972        buffer.tail = new;
 973
 974        /* Next token will be stored at the beginning of chunk data area, and
 975         * its initial length is zero.
 976         */
 977        buffer.cur = (struct buf_token *)new->data;
 978        buffer.cur->len = 0;
 979
 980        new->end = buffer.cur->data;
 981
 982        buffer.chunks++;
 983
 984        return new;
 985}
 986
 987/* Return available tail room in given chunk */
 988static int buf_chunk_avail(struct buf_chunk *chunk)
 989{
 990        return BUF_CHUNK - offsetof(struct buf_chunk, data) -
 991               (chunk->end - chunk->data);
 992}
 993
 994/* Update end pointer and token length, link new chunk if we hit the end of the
 995 * current one. Return -EAGAIN if we got a new chunk, caller has to print again.
 996 */
 997static int buf_update(int len)
 998{
 999        struct buf_chunk *chunk = buffer.tail;
1000        struct buf_token *t = buffer.cur;
1001
1002        /* Claim success if new content fits in the current chunk, and anyway
1003         * if this is the first token in the chunk: in the latter case,
1004         * allocating a new chunk won't help, so we'll just cut the output.
1005         */
1006        if ((len < buf_chunk_avail(chunk) && len != -1 /* glibc < 2.0.6 */) ||
1007            t == (struct buf_token *)chunk->data) {
1008                len = min(len, buf_chunk_avail(chunk));
1009
1010                /* Total field length can't exceed 2^16 bytes, cut as needed */
1011                len = min(len, USHRT_MAX - t->len);
1012
1013                chunk->end += len;
1014                t->len += len;
1015                return 0;
1016        }
1017
1018        /* Content truncated, time to allocate more */
1019        chunk->next = buf_chunk_new();
1020
1021        /* Copy current token over to new chunk, including length descriptor */
1022        memcpy(chunk->next->data, t, sizeof(t->len) + t->len);
1023        chunk->next->end += t->len;
1024
1025        /* Discard partially written field in old chunk */
1026        chunk->end -= t->len + sizeof(t->len);
1027
1028        return -EAGAIN;
1029}
1030
1031/* Append content to buffer as part of the current field */
1032__attribute__((format(printf, 1, 2)))
1033static void out(const char *fmt, ...)
1034{
1035        struct column *f = current_field;
1036        va_list args;
1037        char *pos;
1038        int len;
1039
1040        if (f->disabled)
1041                return;
1042
1043        if (!buffer.head)
1044                buffer.head = buf_chunk_new();
1045
1046again:  /* Append to buffer: if we have a new chunk, print again */
1047
1048        pos = buffer.cur->data + buffer.cur->len;
1049        va_start(args, fmt);
1050
1051        /* Limit to tail room. If we hit the limit, buf_update() will tell us */
1052        len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, args);
1053        va_end(args);
1054
1055        if (buf_update(len))
1056                goto again;
1057}
1058
1059static int print_left_spacing(struct column *f, int stored, int printed)
1060{
1061        int s;
1062
1063        if (!f->width || f->align == ALIGN_LEFT)
1064                return 0;
1065
1066        s = f->width - stored - printed;
1067        if (f->align == ALIGN_CENTER)
1068                /* If count of total spacing is odd, shift right by one */
1069                s = (s + 1) / 2;
1070
1071        if (s > 0)
1072                return printf("%*c", s, ' ');
1073
1074        return 0;
1075}
1076
1077static void print_right_spacing(struct column *f, int printed)
1078{
1079        int s;
1080
1081        if (!f->width || f->align == ALIGN_RIGHT)
1082                return;
1083
1084        s = f->width - printed;
1085        if (f->align == ALIGN_CENTER)
1086                s /= 2;
1087
1088        if (s > 0)
1089                printf("%*c", s, ' ');
1090}
1091
1092/* Done with field: update buffer pointer, start new token after current one */
1093static void field_flush(struct column *f)
1094{
1095        struct buf_chunk *chunk;
1096        unsigned int pad;
1097
1098        if (f->disabled)
1099                return;
1100
1101        chunk = buffer.tail;
1102        pad = buffer.cur->len % 2;
1103
1104        if (buffer.cur->len > f->max_len)
1105                f->max_len = buffer.cur->len;
1106
1107        /* We need a new chunk if we can't store the next length descriptor.
1108         * Mind the gap between end of previous token and next aligned position
1109         * for length descriptor.
1110         */
1111        if (buf_chunk_avail(chunk) - pad < sizeof(buffer.cur->len)) {
1112                chunk->end += pad;
1113                chunk->next = buf_chunk_new();
1114                return;
1115        }
1116
1117        buffer.cur = (struct buf_token *)(buffer.cur->data +
1118                                          LEN_ALIGN(buffer.cur->len));
1119        buffer.cur->len = 0;
1120        buffer.tail->end = buffer.cur->data;
1121}
1122
1123static int field_is_last(struct column *f)
1124{
1125        return f - columns == COL_MAX - 1;
1126}
1127
1128/* Get the next available token in the buffer starting from the current token */
1129static struct buf_token *buf_token_next(struct buf_token *cur)
1130{
1131        struct buf_chunk *chunk = buffer.tail;
1132
1133        /* If we reached the end of chunk contents, get token from next chunk */
1134        if (cur->data + LEN_ALIGN(cur->len) == chunk->end) {
1135                buffer.tail = chunk = chunk->next;
1136                return chunk ? (struct buf_token *)chunk->data : NULL;
1137        }
1138
1139        return (struct buf_token *)(cur->data + LEN_ALIGN(cur->len));
1140}
1141
1142/* Free up all allocated buffer chunks */
1143static void buf_free_all(void)
1144{
1145        struct buf_chunk *tmp;
1146
1147        for (buffer.tail = buffer.head; buffer.tail; ) {
1148                tmp = buffer.tail;
1149                buffer.tail = buffer.tail->next;
1150                free(tmp);
1151        }
1152        buffer.head = NULL;
1153        buffer.chunks = 0;
1154}
1155
1156/* Get current screen width, returns -1 if TIOCGWINSZ fails */
1157static int render_screen_width(void)
1158{
1159        int width = -1;
1160
1161        if (isatty(STDOUT_FILENO)) {
1162                struct winsize w;
1163
1164                if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
1165                        if (w.ws_col > 0)
1166                                width = w.ws_col;
1167                }
1168        }
1169
1170        return width;
1171}
1172
1173/* Calculate column width from contents length. If columns don't fit on one
1174 * line, break them into the least possible amount of lines and keep them
1175 * aligned across lines. Available screen space is equally spread between fields
1176 * as additional spacing.
1177 */
1178static void render_calc_width(void)
1179{
1180        int screen_width, first, len = 0, linecols = 0;
1181        struct column *c, *eol = columns - 1;
1182        bool compact_output = false;
1183
1184        screen_width = render_screen_width();
1185        if (screen_width == -1) {
1186                screen_width = INT_MAX;
1187                compact_output = true;
1188        }
1189
1190        /* First pass: set width for each column to measured content length */
1191        for (first = 1, c = columns; c - columns < COL_MAX; c++) {
1192                if (c->disabled)
1193                        continue;
1194
1195                if (!first && c->max_len)
1196                        c->width = c->max_len + strlen(c->ldelim);
1197                else
1198                        c->width = c->max_len;
1199
1200                /* But don't exceed screen size. If we exceed the screen size
1201                 * for even a single field, it will just start on a line of its
1202                 * own and then naturally wrap.
1203                 */
1204                c->width = min(c->width, screen_width);
1205
1206                if (c->width)
1207                        first = 0;
1208        }
1209
1210        if (compact_output) {
1211                /* Compact output, skip extending columns. */
1212                return;
1213        }
1214
1215        /* Second pass: find out newlines and distribute available spacing */
1216        for (c = columns; c - columns < COL_MAX; c++) {
1217                int pad, spacing, rem, last;
1218                struct column *tmp;
1219
1220                if (!c->width)
1221                        continue;
1222
1223                linecols++;
1224                len += c->width;
1225
1226                for (last = 1, tmp = c + 1; tmp - columns < COL_MAX; tmp++) {
1227                        if (tmp->width) {
1228                                last = 0;
1229                                break;
1230                        }
1231                }
1232
1233                if (!last && len < screen_width) {
1234                        /* Columns fit on screen so far, nothing to do yet */
1235                        continue;
1236                }
1237
1238                if (len == screen_width) {
1239                        /* Exact fit, just start with new line */
1240                        goto newline;
1241                }
1242
1243                if (len > screen_width) {
1244                        /* Screen width exceeded: go back one column */
1245                        len -= c->width;
1246                        c--;
1247                        linecols--;
1248                }
1249
1250                /* Distribute remaining space to columns on this line */
1251                pad = screen_width - len;
1252                spacing = pad / linecols;
1253                rem = pad % linecols;
1254                for (tmp = c; tmp > eol; tmp--) {
1255                        if (!tmp->width)
1256                                continue;
1257
1258                        tmp->width += spacing;
1259                        if (rem) {
1260                                tmp->width++;
1261                                rem--;
1262                        }
1263                }
1264
1265newline:
1266                /* Line break: reset line counters, mark end-of-line */
1267                eol = c;
1268                len = 0;
1269                linecols = 0;
1270        }
1271}
1272
1273/* Render buffered output with spacing and delimiters, then free up buffers */
1274static void render(void)
1275{
1276        struct buf_token *token;
1277        int printed, line_started = 0;
1278        struct column *f;
1279
1280        if (!buffer.head)
1281                return;
1282
1283        token = (struct buf_token *)buffer.head->data;
1284
1285        /* Ensure end alignment of last token, it wasn't necessarily flushed */
1286        buffer.tail->end += buffer.cur->len % 2;
1287
1288        render_calc_width();
1289
1290        /* Rewind and replay */
1291        buffer.tail = buffer.head;
1292
1293        f = columns;
1294        while (!f->width)
1295                f++;
1296
1297        while (token) {
1298                /* Print left delimiter only if we already started a line */
1299                if (line_started++)
1300                        printed = printf("%s", f->ldelim);
1301                else
1302                        printed = 0;
1303
1304                /* Print field content from token data with spacing */
1305                printed += print_left_spacing(f, token->len, printed);
1306                printed += fwrite(token->data, 1, token->len, stdout);
1307                print_right_spacing(f, printed);
1308
1309                /* Go to next non-empty field, deal with end-of-line */
1310                do {
1311                        if (field_is_last(f)) {
1312                                printf("\n");
1313                                f = columns;
1314                                line_started = 0;
1315                        } else {
1316                                f++;
1317                        }
1318                } while (f->disabled);
1319
1320                token = buf_token_next(token);
1321        }
1322        /* Deal with final end-of-line when the last non-empty field printed
1323         * is not the last field.
1324         */
1325        if (line_started)
1326                printf("\n");
1327
1328        buf_free_all();
1329        current_field = columns;
1330}
1331
1332/* Move to next field, and render buffer if we reached the maximum number of
1333 * chunks, at the last field in a line.
1334 */
1335static void field_next(void)
1336{
1337        if (field_is_last(current_field) && buffer.chunks >= BUF_CHUNKS_MAX) {
1338                render();
1339                return;
1340        }
1341
1342        field_flush(current_field);
1343        if (field_is_last(current_field))
1344                current_field = columns;
1345        else
1346                current_field++;
1347}
1348
1349/* Walk through fields and flush them until we reach the desired one */
1350static void field_set(enum col_id id)
1351{
1352        while (id != current_field - columns)
1353                field_next();
1354}
1355
1356/* Print header for all non-empty columns */
1357static void print_header(void)
1358{
1359        while (!field_is_last(current_field)) {
1360                if (!current_field->disabled)
1361                        out("%s", current_field->header);
1362                field_next();
1363        }
1364}
1365
1366static void sock_state_print(struct sockstat *s)
1367{
1368        const char *sock_name;
1369        static const char * const sstate_name[] = {
1370                "UNKNOWN",
1371                [SS_ESTABLISHED] = "ESTAB",
1372                [SS_SYN_SENT] = "SYN-SENT",
1373                [SS_SYN_RECV] = "SYN-RECV",
1374                [SS_FIN_WAIT1] = "FIN-WAIT-1",
1375                [SS_FIN_WAIT2] = "FIN-WAIT-2",
1376                [SS_TIME_WAIT] = "TIME-WAIT",
1377                [SS_CLOSE] = "UNCONN",
1378                [SS_CLOSE_WAIT] = "CLOSE-WAIT",
1379                [SS_LAST_ACK] = "LAST-ACK",
1380                [SS_LISTEN] =   "LISTEN",
1381                [SS_CLOSING] = "CLOSING",
1382        };
1383
1384        switch (s->local.family) {
1385        case AF_UNIX:
1386                sock_name = unix_netid_name(s->type);
1387                break;
1388        case AF_INET:
1389        case AF_INET6:
1390                sock_name = proto_name(s->type);
1391                break;
1392        case AF_PACKET:
1393                sock_name = s->type == SOCK_RAW ? "p_raw" : "p_dgr";
1394                break;
1395        case AF_NETLINK:
1396                sock_name = "nl";
1397                break;
1398        case AF_TIPC:
1399                sock_name = tipc_netid_name(s->type);
1400                break;
1401        case AF_VSOCK:
1402                sock_name = vsock_netid_name(s->type);
1403                break;
1404        case AF_XDP:
1405                sock_name = "xdp";
1406                break;
1407        default:
1408                sock_name = "unknown";
1409        }
1410
1411        if (is_sctp_assoc(s, sock_name)) {
1412                field_set(COL_STATE);           /* Empty Netid field */
1413                out("`- %s", sctp_sstate_name[s->state]);
1414        } else {
1415                field_set(COL_NETID);
1416                out("%s", sock_name);
1417                field_set(COL_STATE);
1418                out("%s", sstate_name[s->state]);
1419        }
1420
1421        field_set(COL_RECVQ);
1422        out("%-6d", s->rq);
1423        field_set(COL_SENDQ);
1424        out("%-6d", s->wq);
1425        field_set(COL_ADDR);
1426}
1427
1428static void sock_details_print(struct sockstat *s)
1429{
1430        if (s->uid)
1431                out(" uid:%u", s->uid);
1432
1433        out(" ino:%u", s->ino);
1434        out(" sk:%llx", s->sk);
1435
1436        if (s->mark)
1437                out(" fwmark:0x%x", s->mark);
1438
1439        if (s->cgroup_id)
1440                out(" cgroup:%s", cg_id_to_path(s->cgroup_id));
1441}
1442
1443static void sock_addr_print(const char *addr, char *delim, const char *port,
1444                const char *ifname)
1445{
1446        if (ifname)
1447                out("%s" "%%" "%s%s", addr, ifname, delim);
1448        else
1449                out("%s%s", addr, delim);
1450
1451        field_next();
1452        out("%s", port);
1453        field_next();
1454}
1455
1456static const char *print_ms_timer(unsigned int timeout)
1457{
1458        static char buf[64];
1459        int secs, msecs, minutes;
1460
1461        secs = timeout/1000;
1462        minutes = secs/60;
1463        secs = secs%60;
1464        msecs = timeout%1000;
1465        buf[0] = 0;
1466        if (minutes) {
1467                msecs = 0;
1468                snprintf(buf, sizeof(buf)-16, "%dmin", minutes);
1469                if (minutes > 9)
1470                        secs = 0;
1471        }
1472        if (secs) {
1473                if (secs > 9)
1474                        msecs = 0;
1475                sprintf(buf+strlen(buf), "%d%s", secs, msecs ? "." : "sec");
1476        }
1477        if (msecs)
1478                sprintf(buf+strlen(buf), "%03dms", msecs);
1479        return buf;
1480}
1481
1482struct scache {
1483        struct scache *next;
1484        int port;
1485        char *name;
1486        const char *proto;
1487};
1488
1489static struct scache *rlist;
1490
1491#ifdef HAVE_RPC
1492static CLIENT *rpc_client_create(rpcprog_t prog, rpcvers_t vers)
1493{
1494        struct netbuf nbuf;
1495        struct sockaddr_un saddr;
1496        int sock;
1497
1498        memset(&saddr, 0, sizeof(saddr));
1499        sock = socket(AF_LOCAL, SOCK_STREAM, 0);
1500        if (sock < 0)
1501                return NULL;
1502
1503        saddr.sun_family = AF_LOCAL;
1504        strcpy(saddr.sun_path, _PATH_RPCBINDSOCK);
1505        nbuf.len = SUN_LEN(&saddr);
1506        nbuf.maxlen = sizeof(struct sockaddr_un);
1507        nbuf.buf = &saddr;
1508
1509        return clnt_vc_create(sock, &nbuf, prog, vers, 0, 0);
1510}
1511
1512static void init_service_resolver(void)
1513{
1514        struct rpcblist *rhead = NULL;
1515        struct timeval timeout;
1516        struct rpcent *rpc;
1517        enum clnt_stat res;
1518        CLIENT *client;
1519
1520        timeout.tv_sec = 5;
1521        timeout.tv_usec = 0;
1522
1523        client = rpc_client_create(PMAPPROG, RPCBVERS4);
1524        if (!client)
1525                return;
1526
1527        res = clnt_call(client, RPCBPROC_DUMP, (xdrproc_t)xdr_void, NULL,
1528                        (xdrproc_t)xdr_rpcblist_ptr, (char *)&rhead,
1529                        timeout);
1530        if (res != RPC_SUCCESS)
1531                return;
1532
1533        for (; rhead; rhead = rhead->rpcb_next) {
1534                char prog[128] = "rpc.";
1535                struct scache *c;
1536                int hport, lport, ok;
1537
1538                c = malloc(sizeof(*c));
1539                if (!c)
1540                        continue;
1541
1542                ok = sscanf(rhead->rpcb_map.r_addr, "::.%d.%d", &hport, &lport);
1543                if (!ok)
1544                        ok = sscanf(rhead->rpcb_map.r_addr, "0.0.0.0.%d.%d",
1545                                    &hport, &lport);
1546                if (!ok)
1547                        continue;
1548                c->port = hport << 8 | lport;
1549
1550                if (strcmp(rhead->rpcb_map.r_netid, TCP_PROTO) == 0 ||
1551                    strcmp(rhead->rpcb_map.r_netid, TCP6_PROTO) == 0)
1552                        c->proto = TCP_PROTO;
1553                else if (strcmp(rhead->rpcb_map.r_netid, UDP_PROTO) == 0 ||
1554                         strcmp(rhead->rpcb_map.r_netid, UDP6_PROTO) == 0)
1555                        c->proto = UDP_PROTO;
1556                else if (strcmp(rhead->rpcb_map.r_netid, SCTP_PROTO) == 0)
1557                        c->proto = SCTP_PROTO;
1558                else
1559                        continue;
1560
1561                rpc = getrpcbynumber(rhead->rpcb_map.r_prog);
1562                if (rpc) {
1563                        strncat(prog, rpc->r_name, 128 - strlen(prog));
1564                        c->name = strdup(prog);
1565                }
1566
1567                c->next = rlist;
1568                rlist = c;
1569        }
1570}
1571#endif
1572
1573/* Even do not try default linux ephemeral port ranges:
1574 * default /etc/services contains so much of useless crap
1575 * wouldbe "allocated" to this area that resolution
1576 * is really harmful. I shrug each time when seeing
1577 * "socks" or "cfinger" in dumps.
1578 */
1579static int is_ephemeral(int port)
1580{
1581        static int min = 0, max;
1582
1583        if (!min) {
1584                FILE *f = ephemeral_ports_open();
1585
1586                if (!f || fscanf(f, "%d %d", &min, &max) < 2) {
1587                        min = 1024;
1588                        max = 4999;
1589                }
1590                if (f)
1591                        fclose(f);
1592        }
1593        return port >= min && port <= max;
1594}
1595
1596
1597static const char *__resolve_service(int port)
1598{
1599        struct scache *c;
1600
1601        for (c = rlist; c; c = c->next) {
1602                if (c->port == port && c->proto == dg_proto)
1603                        return c->name;
1604        }
1605
1606        if (!is_ephemeral(port)) {
1607                static int notfirst;
1608                struct servent *se;
1609
1610                if (!notfirst) {
1611                        setservent(1);
1612                        notfirst = 1;
1613                }
1614                se = getservbyport(htons(port), dg_proto);
1615                if (se)
1616                        return se->s_name;
1617        }
1618
1619        return NULL;
1620}
1621
1622#define SCACHE_BUCKETS 1024
1623static struct scache *cache_htab[SCACHE_BUCKETS];
1624
1625static const char *resolve_service(int port)
1626{
1627        static char buf[128];
1628        struct scache *c;
1629        const char *res;
1630        int hash;
1631
1632        if (port == 0) {
1633                buf[0] = '*';
1634                buf[1] = 0;
1635                return buf;
1636        }
1637
1638        if (numeric)
1639                goto do_numeric;
1640
1641        if (dg_proto == RAW_PROTO)
1642                return inet_proto_n2a(port, buf, sizeof(buf));
1643
1644
1645        hash = (port^(((unsigned long)dg_proto)>>2)) % SCACHE_BUCKETS;
1646
1647        for (c = cache_htab[hash]; c; c = c->next) {
1648                if (c->port == port && c->proto == dg_proto)
1649                        goto do_cache;
1650        }
1651
1652        c = malloc(sizeof(*c));
1653        if (!c)
1654                goto do_numeric;
1655        res = __resolve_service(port);
1656        c->port = port;
1657        c->name = res ? strdup(res) : NULL;
1658        c->proto = dg_proto;
1659        c->next = cache_htab[hash];
1660        cache_htab[hash] = c;
1661
1662do_cache:
1663        if (c->name)
1664                return c->name;
1665
1666do_numeric:
1667        sprintf(buf, "%u", port);
1668        return buf;
1669}
1670
1671static void inet_addr_print(const inet_prefix *a, int port,
1672                            unsigned int ifindex, bool v6only)
1673{
1674        char buf[1024];
1675        const char *ap = buf;
1676        const char *ifname = NULL;
1677
1678        if (a->family == AF_INET) {
1679                ap = format_host(AF_INET, 4, a->data);
1680        } else {
1681                if (!v6only &&
1682                    !memcmp(a->data, &in6addr_any, sizeof(in6addr_any))) {
1683                        buf[0] = '*';
1684                        buf[1] = 0;
1685                } else {
1686                        ap = format_host(a->family, 16, a->data);
1687
1688                        /* Numeric IPv6 addresses should be bracketed */
1689                        if (strchr(ap, ':')) {
1690                                snprintf(buf, sizeof(buf),
1691                                         "[%s]", ap);
1692                                ap = buf;
1693                        }
1694                }
1695        }
1696
1697        if (ifindex)
1698                ifname = ll_index_to_name(ifindex);
1699
1700        sock_addr_print(ap, ":", resolve_service(port), ifname);
1701}
1702
1703struct aafilter {
1704        inet_prefix     addr;
1705        int             port;
1706        unsigned int    iface;
1707        __u32           mark;
1708        __u32           mask;
1709        __u64           cgroup_id;
1710        struct aafilter *next;
1711};
1712
1713static int inet2_addr_match(const inet_prefix *a, const inet_prefix *p,
1714                            int plen)
1715{
1716        if (!inet_addr_match(a, p, plen))
1717                return 0;
1718
1719        /* Cursed "v4 mapped" addresses: v4 mapped socket matches
1720         * pure IPv4 rule, but v4-mapped rule selects only v4-mapped
1721         * sockets. Fair? */
1722        if (p->family == AF_INET && a->family == AF_INET6) {
1723                if (a->data[0] == 0 && a->data[1] == 0 &&
1724                    a->data[2] == htonl(0xffff)) {
1725                        inet_prefix tmp = *a;
1726
1727                        tmp.data[0] = a->data[3];
1728                        return inet_addr_match(&tmp, p, plen);
1729                }
1730        }
1731        return 1;
1732}
1733
1734static int unix_match(const inet_prefix *a, const inet_prefix *p)
1735{
1736        char *addr, *pattern;
1737
1738        memcpy(&addr, a->data, sizeof(addr));
1739        memcpy(&pattern, p->data, sizeof(pattern));
1740        if (pattern == NULL)
1741                return 1;
1742        if (addr == NULL)
1743                addr = "";
1744        return !fnmatch(pattern, addr, FNM_CASEFOLD);
1745}
1746
1747static int run_ssfilter(struct ssfilter *f, struct sockstat *s)
1748{
1749        switch (f->type) {
1750                case SSF_S_AUTO:
1751        {
1752                if (s->local.family == AF_UNIX) {
1753                        char *p;
1754
1755                        memcpy(&p, s->local.data, sizeof(p));
1756                        return p == NULL || (p[0] == '@' && strlen(p) == 6 &&
1757                                             strspn(p+1, "0123456789abcdef") == 5);
1758                }
1759                if (s->local.family == AF_PACKET)
1760                        return s->lport == 0 && s->local.data[0] == 0;
1761                if (s->local.family == AF_NETLINK)
1762                        return s->lport < 0;
1763                if (s->local.family == AF_VSOCK)
1764                        return s->lport > 1023;
1765
1766                return is_ephemeral(s->lport);
1767        }
1768                case SSF_DCOND:
1769        {
1770                struct aafilter *a = (void *)f->pred;
1771
1772                if (a->addr.family == AF_UNIX)
1773                        return unix_match(&s->remote, &a->addr);
1774                if (a->port != -1 && a->port != s->rport)
1775                        return 0;
1776                if (a->addr.bitlen) {
1777                        do {
1778                                if (!inet2_addr_match(&s->remote, &a->addr, a->addr.bitlen))
1779                                        return 1;
1780                        } while ((a = a->next) != NULL);
1781                        return 0;
1782                }
1783                return 1;
1784        }
1785                case SSF_SCOND:
1786        {
1787                struct aafilter *a = (void *)f->pred;
1788
1789                if (a->addr.family == AF_UNIX)
1790                        return unix_match(&s->local, &a->addr);
1791                if (a->port != -1 && a->port != s->lport)
1792                        return 0;
1793                if (a->addr.bitlen) {
1794                        do {
1795                                if (!inet2_addr_match(&s->local, &a->addr, a->addr.bitlen))
1796                                        return 1;
1797                        } while ((a = a->next) != NULL);
1798                        return 0;
1799                }
1800                return 1;
1801        }
1802                case SSF_D_GE:
1803        {
1804                struct aafilter *a = (void *)f->pred;
1805
1806                return s->rport >= a->port;
1807        }
1808                case SSF_D_LE:
1809        {
1810                struct aafilter *a = (void *)f->pred;
1811
1812                return s->rport <= a->port;
1813        }
1814                case SSF_S_GE:
1815        {
1816                struct aafilter *a = (void *)f->pred;
1817
1818                return s->lport >= a->port;
1819        }
1820                case SSF_S_LE:
1821        {
1822                struct aafilter *a = (void *)f->pred;
1823
1824                return s->lport <= a->port;
1825        }
1826                case SSF_DEVCOND:
1827        {
1828                struct aafilter *a = (void *)f->pred;
1829
1830                return s->iface == a->iface;
1831        }
1832                case SSF_MARKMASK:
1833        {
1834                struct aafilter *a = (void *)f->pred;
1835
1836                return (s->mark & a->mask) == a->mark;
1837        }
1838                case SSF_CGROUPCOND:
1839        {
1840                struct aafilter *a = (void *)f->pred;
1841
1842                return s->cgroup_id == a->cgroup_id;
1843        }
1844                /* Yup. It is recursion. Sorry. */
1845                case SSF_AND:
1846                return run_ssfilter(f->pred, s) && run_ssfilter(f->post, s);
1847                case SSF_OR:
1848                return run_ssfilter(f->pred, s) || run_ssfilter(f->post, s);
1849                case SSF_NOT:
1850                return !run_ssfilter(f->pred, s);
1851                default:
1852                abort();
1853        }
1854}
1855
1856/* Relocate external jumps by reloc. */
1857static void ssfilter_patch(char *a, int len, int reloc)
1858{
1859        while (len > 0) {
1860                struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)a;
1861
1862                if (op->no == len+4)
1863                        op->no += reloc;
1864                len -= op->yes;
1865                a += op->yes;
1866        }
1867        if (len < 0)
1868                abort();
1869}
1870
1871static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
1872{
1873        switch (f->type) {
1874                case SSF_S_AUTO:
1875        {
1876                if (!(*bytecode = malloc(4))) abort();
1877                ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_AUTO, 4, 8 };
1878                return 4;
1879        }
1880                case SSF_DCOND:
1881                case SSF_SCOND:
1882        {
1883                struct aafilter *a = (void *)f->pred;
1884                struct aafilter *b;
1885                char *ptr;
1886                int  code = (f->type == SSF_DCOND ? INET_DIAG_BC_D_COND : INET_DIAG_BC_S_COND);
1887                int len = 0;
1888
1889                for (b = a; b; b = b->next) {
1890                        len += 4 + sizeof(struct inet_diag_hostcond);
1891                        if (a->addr.family == AF_INET6)
1892                                len += 16;
1893                        else
1894                                len += 4;
1895                        if (b->next)
1896                                len += 4;
1897                }
1898                if (!(ptr = malloc(len))) abort();
1899                *bytecode = ptr;
1900                for (b = a; b; b = b->next) {
1901                        struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)ptr;
1902                        int alen = (a->addr.family == AF_INET6 ? 16 : 4);
1903                        int oplen = alen + 4 + sizeof(struct inet_diag_hostcond);
1904                        struct inet_diag_hostcond *cond = (struct inet_diag_hostcond *)(ptr+4);
1905
1906                        *op = (struct inet_diag_bc_op){ code, oplen, oplen+4 };
1907                        cond->family = a->addr.family;
1908                        cond->port = a->port;
1909                        cond->prefix_len = a->addr.bitlen;
1910                        memcpy(cond->addr, a->addr.data, alen);
1911                        ptr += oplen;
1912                        if (b->next) {
1913                                op = (struct inet_diag_bc_op *)ptr;
1914                                *op = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, len - (ptr-*bytecode)};
1915                                ptr += 4;
1916                        }
1917                }
1918                return ptr - *bytecode;
1919        }
1920                case SSF_D_GE:
1921        {
1922                struct aafilter *x = (void *)f->pred;
1923
1924                if (!(*bytecode = malloc(8))) abort();
1925                ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_GE, 8, 12 };
1926                ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
1927                return 8;
1928        }
1929                case SSF_D_LE:
1930        {
1931                struct aafilter *x = (void *)f->pred;
1932
1933                if (!(*bytecode = malloc(8))) abort();
1934                ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_LE, 8, 12 };
1935                ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
1936                return 8;
1937        }
1938                case SSF_S_GE:
1939        {
1940                struct aafilter *x = (void *)f->pred;
1941
1942                if (!(*bytecode = malloc(8))) abort();
1943                ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_GE, 8, 12 };
1944                ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
1945                return 8;
1946        }
1947                case SSF_S_LE:
1948        {
1949                struct aafilter *x = (void *)f->pred;
1950
1951                if (!(*bytecode = malloc(8))) abort();
1952                ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_LE, 8, 12 };
1953                ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
1954                return 8;
1955        }
1956
1957                case SSF_AND:
1958        {
1959                char *a1 = NULL, *a2 = NULL, *a;
1960                int l1, l2;
1961
1962                l1 = ssfilter_bytecompile(f->pred, &a1);
1963                l2 = ssfilter_bytecompile(f->post, &a2);
1964                if (!l1 || !l2) {
1965                        free(a1);
1966                        free(a2);
1967                        return 0;
1968                }
1969                if (!(a = malloc(l1+l2))) abort();
1970                memcpy(a, a1, l1);
1971                memcpy(a+l1, a2, l2);
1972                free(a1); free(a2);
1973                ssfilter_patch(a, l1, l2);
1974                *bytecode = a;
1975                return l1+l2;
1976        }
1977                case SSF_OR:
1978        {
1979                char *a1 = NULL, *a2 = NULL, *a;
1980                int l1, l2;
1981
1982                l1 = ssfilter_bytecompile(f->pred, &a1);
1983                l2 = ssfilter_bytecompile(f->post, &a2);
1984                if (!l1 || !l2) {
1985                        free(a1);
1986                        free(a2);
1987                        return 0;
1988                }
1989                if (!(a = malloc(l1+l2+4))) abort();
1990                memcpy(a, a1, l1);
1991                memcpy(a+l1+4, a2, l2);
1992                free(a1); free(a2);
1993                *(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, l2+4 };
1994                *bytecode = a;
1995                return l1+l2+4;
1996        }
1997                case SSF_NOT:
1998        {
1999                char *a1 = NULL, *a;
2000                int l1;
2001
2002                l1 = ssfilter_bytecompile(f->pred, &a1);
2003                if (!l1) {
2004                        free(a1);
2005                        return 0;
2006                }
2007                if (!(a = malloc(l1+4))) abort();
2008                memcpy(a, a1, l1);
2009                free(a1);
2010                *(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, 8 };
2011                *bytecode = a;
2012                return l1+4;
2013        }
2014                case SSF_DEVCOND:
2015        {
2016                /* bytecompile for SSF_DEVCOND not supported yet */
2017                return 0;
2018        }
2019                case SSF_MARKMASK:
2020        {
2021                struct aafilter *a = (void *)f->pred;
2022                struct instr {
2023                        struct inet_diag_bc_op op;
2024                        struct inet_diag_markcond cond;
2025                };
2026                int inslen = sizeof(struct instr);
2027
2028                if (!(*bytecode = malloc(inslen))) abort();
2029                ((struct instr *)*bytecode)[0] = (struct instr) {
2030                        { INET_DIAG_BC_MARK_COND, inslen, inslen + 4 },
2031                        { a->mark, a->mask},
2032                };
2033
2034                return inslen;
2035        }
2036                case SSF_CGROUPCOND:
2037        {
2038                struct aafilter *a = (void *)f->pred;
2039                struct instr {
2040                        struct inet_diag_bc_op op;
2041                        __u64 cgroup_id;
2042                } __attribute__((packed));
2043                int inslen = sizeof(struct instr);
2044
2045                if (!(*bytecode = malloc(inslen))) abort();
2046                ((struct instr *)*bytecode)[0] = (struct instr) {
2047                        { INET_DIAG_BC_CGROUP_COND, inslen, inslen + 4 },
2048                        a->cgroup_id,
2049                };
2050
2051                return inslen;
2052        }
2053                default:
2054                abort();
2055        }
2056}
2057
2058static int remember_he(struct aafilter *a, struct hostent *he)
2059{
2060        char **ptr = he->h_addr_list;
2061        int cnt = 0;
2062        int len;
2063
2064        if (he->h_addrtype == AF_INET)
2065                len = 4;
2066        else if (he->h_addrtype == AF_INET6)
2067                len = 16;
2068        else
2069                return 0;
2070
2071        while (*ptr) {
2072                struct aafilter *b = a;
2073
2074                if (a->addr.bitlen) {
2075                        if ((b = malloc(sizeof(*b))) == NULL)
2076                                return cnt;
2077                        *b = *a;
2078                        a->next = b;
2079                }
2080                memcpy(b->addr.data, *ptr, len);
2081                b->addr.bytelen = len;
2082                b->addr.bitlen = len*8;
2083                b->addr.family = he->h_addrtype;
2084                ptr++;
2085                cnt++;
2086        }
2087        return cnt;
2088}
2089
2090static int get_dns_host(struct aafilter *a, const char *addr, int fam)
2091{
2092        static int notfirst;
2093        int cnt = 0;
2094        struct hostent *he;
2095
2096        a->addr.bitlen = 0;
2097        if (!notfirst) {
2098                sethostent(1);
2099                notfirst = 1;
2100        }
2101        he = gethostbyname2(addr, fam == AF_UNSPEC ? AF_INET : fam);
2102        if (he)
2103                cnt = remember_he(a, he);
2104        if (fam == AF_UNSPEC) {
2105                he = gethostbyname2(addr, AF_INET6);
2106                if (he)
2107                        cnt += remember_he(a, he);
2108        }
2109        return !cnt;
2110}
2111
2112static int xll_initted;
2113
2114static void xll_init(void)
2115{
2116        struct rtnl_handle rth;
2117
2118        if (rtnl_open(&rth, 0) < 0)
2119                exit(1);
2120
2121        ll_init_map(&rth);
2122        rtnl_close(&rth);
2123        xll_initted = 1;
2124}
2125
2126static const char *xll_index_to_name(int index)
2127{
2128        if (!xll_initted)
2129                xll_init();
2130        return ll_index_to_name(index);
2131}
2132
2133static int xll_name_to_index(const char *dev)
2134{
2135        if (!xll_initted)
2136                xll_init();
2137        return ll_name_to_index(dev);
2138}
2139
2140void *parse_devcond(char *name)
2141{
2142        struct aafilter a = { .iface = 0 };
2143        struct aafilter *res;
2144
2145        a.iface = xll_name_to_index(name);
2146        if (a.iface == 0) {
2147                char *end;
2148                unsigned long n;
2149
2150                n = strtoul(name, &end, 0);
2151                if (!end || end == name || *end || n > UINT_MAX)
2152                        return NULL;
2153
2154                a.iface = n;
2155        }
2156
2157        res = malloc(sizeof(*res));
2158        *res = a;
2159
2160        return res;
2161}
2162
2163static void vsock_set_inet_prefix(inet_prefix *a, __u32 cid)
2164{
2165        *a = (inet_prefix){
2166                .bytelen = sizeof(cid),
2167                .family = AF_VSOCK,
2168        };
2169        memcpy(a->data, &cid, sizeof(cid));
2170}
2171
2172static char* find_port(char *addr, bool is_port)
2173{
2174        char *port = NULL;
2175        if (is_port)
2176                port = addr;
2177        else
2178                port = strchr(addr, ':');
2179        if (port && *port == ':')
2180                *port++ = '\0';
2181        return port;
2182}
2183
2184void *parse_hostcond(char *addr, bool is_port)
2185{
2186        char *port = NULL;
2187        struct aafilter a = { .port = -1 };
2188        struct aafilter *res;
2189        int fam = preferred_family;
2190        struct filter *f = &current_filter;
2191
2192        if (strncmp(addr, "unix:", 5) == 0) {
2193                fam = AF_UNIX;
2194                addr += 5;
2195        } else if (strncmp(addr, "link:", 5) == 0) {
2196                fam = AF_PACKET;
2197                addr += 5;
2198        } else if (strncmp(addr, "netlink:", 8) == 0) {
2199                fam = AF_NETLINK;
2200                addr += 8;
2201        } else if (strncmp(addr, "vsock:", 6) == 0) {
2202                fam = AF_VSOCK;
2203                addr += 6;
2204        } else if (strncmp(addr, "inet:", 5) == 0) {
2205                fam = AF_INET;
2206                addr += 5;
2207        } else if (strncmp(addr, "inet6:", 6) == 0) {
2208                fam = AF_INET6;
2209                addr += 6;
2210        }
2211
2212        if (fam == AF_UNIX) {
2213                char *p;
2214
2215                a.addr.family = AF_UNIX;
2216                p = strdup(addr);
2217                a.addr.bitlen = 8*strlen(p);
2218                memcpy(a.addr.data, &p, sizeof(p));
2219                goto out;
2220        }
2221
2222        if (fam == AF_PACKET) {
2223                a.addr.family = AF_PACKET;
2224                a.addr.bitlen = 0;
2225                port = find_port(addr, is_port);
2226                if (port) {
2227                        if (*port && strcmp(port, "*")) {
2228                                if (get_integer(&a.port, port, 0)) {
2229                                        if ((a.port = xll_name_to_index(port)) <= 0)
2230                                                return NULL;
2231                                }
2232                        }
2233                }
2234                if (!is_port && addr[0] && strcmp(addr, "*")) {
2235                        unsigned short tmp;
2236
2237                        a.addr.bitlen = 32;
2238                        if (ll_proto_a2n(&tmp, addr))
2239                                return NULL;
2240                        a.addr.data[0] = ntohs(tmp);
2241                }
2242                goto out;
2243        }
2244
2245        if (fam == AF_NETLINK) {
2246                a.addr.family = AF_NETLINK;
2247                a.addr.bitlen = 0;
2248                port = find_port(addr, is_port);
2249                if (port) {
2250                        if (*port && strcmp(port, "*")) {
2251                                if (get_integer(&a.port, port, 0)) {
2252                                        if (strcmp(port, "kernel") == 0)
2253                                                a.port = 0;
2254                                        else
2255                                                return NULL;
2256                                }
2257                        }
2258                }
2259                if (!is_port && addr[0] && strcmp(addr, "*")) {
2260                        a.addr.bitlen = 32;
2261                        if (nl_proto_a2n(&a.addr.data[0], addr) == -1)
2262                                return NULL;
2263                }
2264                goto out;
2265        }
2266
2267        if (fam == AF_VSOCK) {
2268                __u32 cid = ~(__u32)0;
2269
2270                a.addr.family = AF_VSOCK;
2271
2272                port = find_port(addr, is_port);
2273
2274                if (port && strcmp(port, "*") &&
2275                    get_u32((__u32 *)&a.port, port, 0))
2276                        return NULL;
2277
2278                if (!is_port && addr[0] && strcmp(addr, "*")) {
2279                        a.addr.bitlen = 32;
2280                        if (get_u32(&cid, addr, 0))
2281                                return NULL;
2282                }
2283                vsock_set_inet_prefix(&a.addr, cid);
2284                goto out;
2285        }
2286
2287        /* URL-like literal [] */
2288        if (addr[0] == '[') {
2289                addr++;
2290                if ((port = strchr(addr, ']')) == NULL)
2291                        return NULL;
2292                *port++ = 0;
2293        } else if (addr[0] == '*') {
2294                port = addr+1;
2295        } else {
2296                port = strrchr(strchr(addr, '/') ? : addr, ':');
2297        }
2298
2299        if (is_port)
2300                port = addr;
2301
2302        if (port && *port) {
2303                if (*port == ':')
2304                        *port++ = 0;
2305
2306                if (*port && *port != '*') {
2307                        if (get_integer(&a.port, port, 0)) {
2308                                struct servent *se1 = NULL;
2309                                struct servent *se2 = NULL;
2310
2311                                if (current_filter.dbs&(1<<UDP_DB))
2312                                        se1 = getservbyname(port, UDP_PROTO);
2313                                if (current_filter.dbs&(1<<TCP_DB))
2314                                        se2 = getservbyname(port, TCP_PROTO);
2315                                if (se1 && se2 && se1->s_port != se2->s_port) {
2316                                        fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
2317                                        return NULL;
2318                                }
2319                                if (!se1)
2320                                        se1 = se2;
2321                                if (se1) {
2322                                        a.port = ntohs(se1->s_port);
2323                                } else {
2324                                        struct scache *s;
2325
2326                                        for (s = rlist; s; s = s->next) {
2327                                                if ((s->proto == UDP_PROTO &&
2328                                                     (current_filter.dbs&(1<<UDP_DB))) ||
2329                                                    (s->proto == TCP_PROTO &&
2330                                                     (current_filter.dbs&(1<<TCP_DB)))) {
2331                                                        if (s->name && strcmp(s->name, port) == 0) {
2332                                                                if (a.port > 0 && a.port != s->port) {
2333                                                                        fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
2334                                                                        return NULL;
2335                                                                }
2336                                                                a.port = s->port;
2337                                                        }
2338                                                }
2339                                        }
2340                                        if (a.port <= 0) {
2341                                                fprintf(stderr, "Error: \"%s\" does not look like a port.\n", port);
2342                                                return NULL;
2343                                        }
2344                                }
2345                        }
2346                }
2347        }
2348        if (!is_port && *addr && *addr != '*') {
2349                if (get_prefix_1(&a.addr, addr, fam)) {
2350                        if (get_dns_host(&a, addr, fam)) {
2351                                fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr);
2352                                return NULL;
2353                        }
2354                }
2355        }
2356
2357out:
2358        if (fam != AF_UNSPEC) {
2359                int states = f->states;
2360                f->families = 0;
2361                filter_af_set(f, fam);
2362                filter_states_set(f, states);
2363        }
2364
2365        res = malloc(sizeof(*res));
2366        if (res)
2367                memcpy(res, &a, sizeof(a));
2368        return res;
2369}
2370
2371void *parse_markmask(const char *markmask)
2372{
2373        struct aafilter a, *res;
2374
2375        if (strchr(markmask, '/')) {
2376                if (sscanf(markmask, "%i/%i", &a.mark, &a.mask) != 2)
2377                        return NULL;
2378        } else {
2379                a.mask = 0xffffffff;
2380                if (sscanf(markmask, "%i", &a.mark) != 1)
2381                        return NULL;
2382        }
2383
2384        res = malloc(sizeof(*res));
2385        if (res)
2386                memcpy(res, &a, sizeof(a));
2387        return res;
2388}
2389
2390void *parse_cgroupcond(const char *path)
2391{
2392        struct aafilter *res;
2393        __u64 id;
2394
2395        id = get_cgroup2_id(path);
2396        if (!id)
2397                return NULL;
2398
2399        res = malloc(sizeof(*res));
2400        if (res)
2401                res->cgroup_id = id;
2402
2403        return res;
2404}
2405
2406static void proc_ctx_print(struct sockstat *s)
2407{
2408        char *buf;
2409
2410        if (show_proc_ctx || show_sock_ctx) {
2411                if (find_entry(s->ino, &buf,
2412                                (show_proc_ctx & show_sock_ctx) ?
2413                                PROC_SOCK_CTX : PROC_CTX) > 0) {
2414                        out(" users:(%s)", buf);
2415                        free(buf);
2416                }
2417        } else if (show_users) {
2418                if (find_entry(s->ino, &buf, USERS) > 0) {
2419                        out(" users:(%s)", buf);
2420                        free(buf);
2421                }
2422        }
2423}
2424
2425static void inet_stats_print(struct sockstat *s, bool v6only)
2426{
2427        sock_state_print(s);
2428
2429        inet_addr_print(&s->local, s->lport, s->iface, v6only);
2430        inet_addr_print(&s->remote, s->rport, 0, v6only);
2431
2432        proc_ctx_print(s);
2433}
2434
2435static int proc_parse_inet_addr(char *loc, char *rem, int family, struct
2436                sockstat * s)
2437{
2438        s->local.family = s->remote.family = family;
2439        if (family == AF_INET) {
2440                sscanf(loc, "%x:%x", s->local.data, (unsigned *)&s->lport);
2441                sscanf(rem, "%x:%x", s->remote.data, (unsigned *)&s->rport);
2442                s->local.bytelen = s->remote.bytelen = 4;
2443                return 0;
2444        } else {
2445                sscanf(loc, "%08x%08x%08x%08x:%x",
2446                       s->local.data,
2447                       s->local.data + 1,
2448                       s->local.data + 2,
2449                       s->local.data + 3,
2450                       &s->lport);
2451                sscanf(rem, "%08x%08x%08x%08x:%x",
2452                       s->remote.data,
2453                       s->remote.data + 1,
2454                       s->remote.data + 2,
2455                       s->remote.data + 3,
2456                       &s->rport);
2457                s->local.bytelen = s->remote.bytelen = 16;
2458                return 0;
2459        }
2460        return -1;
2461}
2462
2463static int proc_inet_split_line(char *line, char **loc, char **rem, char **data)
2464{
2465        char *p;
2466
2467        if ((p = strchr(line, ':')) == NULL)
2468                return -1;
2469
2470        *loc = p+2;
2471        if ((p = strchr(*loc, ':')) == NULL)
2472                return -1;
2473
2474        p[5] = 0;
2475        *rem = p+6;
2476        if ((p = strchr(*rem, ':')) == NULL)
2477                return -1;
2478
2479        p[5] = 0;
2480        *data = p+6;
2481        return 0;
2482}
2483
2484/*
2485 * Display bandwidth in standard units
2486 * See: https://en.wikipedia.org/wiki/Data-rate_units
2487 * bw is in bits per second
2488 */
2489static char *sprint_bw(char *buf, double bw)
2490{
2491        if (numeric)
2492                sprintf(buf, "%.0f", bw);
2493        else if (bw >= 1e12)
2494                sprintf(buf, "%.3gT", bw / 1e12);
2495        else if (bw >= 1e9)
2496                sprintf(buf, "%.3gG", bw / 1e9);
2497        else if (bw >= 1e6)
2498                sprintf(buf, "%.3gM", bw / 1e6);
2499        else if (bw >= 1e3)
2500                sprintf(buf, "%.3gk", bw / 1e3);
2501        else
2502                sprintf(buf, "%g", bw);
2503
2504        return buf;
2505}
2506
2507static void sctp_stats_print(struct sctp_info *s)
2508{
2509        if (s->sctpi_tag)
2510                out(" tag:%x", s->sctpi_tag);
2511        if (s->sctpi_state)
2512                out(" state:%s", sctp_sstate_name[s->sctpi_state]);
2513        if (s->sctpi_rwnd)
2514                out(" rwnd:%d", s->sctpi_rwnd);
2515        if (s->sctpi_unackdata)
2516                out(" unackdata:%d", s->sctpi_unackdata);
2517        if (s->sctpi_penddata)
2518                out(" penddata:%d", s->sctpi_penddata);
2519        if (s->sctpi_instrms)
2520                out(" instrms:%d", s->sctpi_instrms);
2521        if (s->sctpi_outstrms)
2522                out(" outstrms:%d", s->sctpi_outstrms);
2523        if (s->sctpi_inqueue)
2524                out(" inqueue:%d", s->sctpi_inqueue);
2525        if (s->sctpi_outqueue)
2526                out(" outqueue:%d", s->sctpi_outqueue);
2527        if (s->sctpi_overall_error)
2528                out(" overerr:%d", s->sctpi_overall_error);
2529        if (s->sctpi_max_burst)
2530                out(" maxburst:%d", s->sctpi_max_burst);
2531        if (s->sctpi_maxseg)
2532                out(" maxseg:%d", s->sctpi_maxseg);
2533        if (s->sctpi_peer_rwnd)
2534                out(" prwnd:%d", s->sctpi_peer_rwnd);
2535        if (s->sctpi_peer_tag)
2536                out(" ptag:%x", s->sctpi_peer_tag);
2537        if (s->sctpi_peer_capable)
2538                out(" pcapable:%d", s->sctpi_peer_capable);
2539        if (s->sctpi_peer_sack)
2540                out(" psack:%d", s->sctpi_peer_sack);
2541        if (s->sctpi_s_autoclose)
2542                out(" autoclose:%d", s->sctpi_s_autoclose);
2543        if (s->sctpi_s_adaptation_ind)
2544                out(" adapind:%d", s->sctpi_s_adaptation_ind);
2545        if (s->sctpi_s_pd_point)
2546                out(" pdpoint:%d", s->sctpi_s_pd_point);
2547        if (s->sctpi_s_nodelay)
2548                out(" nodelay:%d", s->sctpi_s_nodelay);
2549        if (s->sctpi_s_disable_fragments)
2550                out(" nofrag:%d", s->sctpi_s_disable_fragments);
2551        if (s->sctpi_s_v4mapped)
2552                out(" v4mapped:%d", s->sctpi_s_v4mapped);
2553        if (s->sctpi_s_frag_interleave)
2554                out(" fraginl:%d", s->sctpi_s_frag_interleave);
2555}
2556
2557static void tcp_stats_print(struct tcpstat *s)
2558{
2559        char b1[64];
2560
2561        if (s->has_ts_opt)
2562                out(" ts");
2563        if (s->has_sack_opt)
2564                out(" sack");
2565        if (s->has_ecn_opt)
2566                out(" ecn");
2567        if (s->has_ecnseen_opt)
2568                out(" ecnseen");
2569        if (s->has_fastopen_opt)
2570                out(" fastopen");
2571        if (s->cong_alg[0])
2572                out(" %s", s->cong_alg);
2573        if (s->has_wscale_opt)
2574                out(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale);
2575        if (s->rto)
2576                out(" rto:%g", s->rto);
2577        if (s->backoff)
2578                out(" backoff:%u", s->backoff);
2579        if (s->rtt)
2580                out(" rtt:%g/%g", s->rtt, s->rttvar);
2581        if (s->ato)
2582                out(" ato:%g", s->ato);
2583
2584        if (s->qack)
2585                out(" qack:%d", s->qack);
2586        if (s->qack & 1)
2587                out(" bidir");
2588
2589        if (s->mss)
2590                out(" mss:%d", s->mss);
2591        if (s->pmtu)
2592                out(" pmtu:%u", s->pmtu);
2593        if (s->rcv_mss)
2594                out(" rcvmss:%d", s->rcv_mss);
2595        if (s->advmss)
2596                out(" advmss:%d", s->advmss);
2597        if (s->cwnd)
2598                out(" cwnd:%u", s->cwnd);
2599        if (s->ssthresh)
2600                out(" ssthresh:%d", s->ssthresh);
2601
2602        if (s->bytes_sent)
2603                out(" bytes_sent:%llu", s->bytes_sent);
2604        if (s->bytes_retrans)
2605                out(" bytes_retrans:%llu", s->bytes_retrans);
2606        if (s->bytes_acked)
2607                out(" bytes_acked:%llu", s->bytes_acked);
2608        if (s->bytes_received)
2609                out(" bytes_received:%llu", s->bytes_received);
2610        if (s->segs_out)
2611                out(" segs_out:%u", s->segs_out);
2612        if (s->segs_in)
2613                out(" segs_in:%u", s->segs_in);
2614        if (s->data_segs_out)
2615                out(" data_segs_out:%u", s->data_segs_out);
2616        if (s->data_segs_in)
2617                out(" data_segs_in:%u", s->data_segs_in);
2618
2619        if (s->dctcp && s->dctcp->enabled) {
2620                struct dctcpstat *dctcp = s->dctcp;
2621
2622                out(" dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)",
2623                             dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn,
2624                             dctcp->ab_tot);
2625        } else if (s->dctcp) {
2626                out(" dctcp:fallback_mode");
2627        }
2628
2629        if (s->bbr_info) {
2630                __u64 bw;
2631
2632                bw = s->bbr_info->bbr_bw_hi;
2633                bw <<= 32;
2634                bw |= s->bbr_info->bbr_bw_lo;
2635
2636                out(" bbr:(bw:%sbps,mrtt:%g",
2637                    sprint_bw(b1, bw * 8.0),
2638                    (double)s->bbr_info->bbr_min_rtt / 1000.0);
2639                if (s->bbr_info->bbr_pacing_gain)
2640                        out(",pacing_gain:%g",
2641                            (double)s->bbr_info->bbr_pacing_gain / 256.0);
2642                if (s->bbr_info->bbr_cwnd_gain)
2643                        out(",cwnd_gain:%g",
2644                            (double)s->bbr_info->bbr_cwnd_gain / 256.0);
2645                out(")");
2646        }
2647
2648        if (s->send_bps)
2649                out(" send %sbps", sprint_bw(b1, s->send_bps));
2650        if (s->lastsnd)
2651                out(" lastsnd:%u", s->lastsnd);
2652        if (s->lastrcv)
2653                out(" lastrcv:%u", s->lastrcv);
2654        if (s->lastack)
2655                out(" lastack:%u", s->lastack);
2656
2657        if (s->pacing_rate) {
2658                out(" pacing_rate %sbps", sprint_bw(b1, s->pacing_rate));
2659                if (s->pacing_rate_max)
2660                        out("/%sbps", sprint_bw(b1, s->pacing_rate_max));
2661        }
2662
2663        if (s->delivery_rate)
2664                out(" delivery_rate %sbps", sprint_bw(b1, s->delivery_rate));
2665        if (s->delivered)
2666                out(" delivered:%u", s->delivered);
2667        if (s->delivered_ce)
2668                out(" delivered_ce:%u", s->delivered_ce);
2669        if (s->app_limited)
2670                out(" app_limited");
2671
2672        if (s->busy_time) {
2673                out(" busy:%llums", s->busy_time / 1000);
2674                if (s->rwnd_limited)
2675                        out(" rwnd_limited:%llums(%.1f%%)",
2676                            s->rwnd_limited / 1000,
2677                            100.0 * s->rwnd_limited / s->busy_time);
2678                if (s->sndbuf_limited)
2679                        out(" sndbuf_limited:%llums(%.1f%%)",
2680                            s->sndbuf_limited / 1000,
2681                            100.0 * s->sndbuf_limited / s->busy_time);
2682        }
2683
2684        if (s->unacked)
2685                out(" unacked:%u", s->unacked);
2686        if (s->retrans || s->retrans_total)
2687                out(" retrans:%u/%u", s->retrans, s->retrans_total);
2688        if (s->lost)
2689                out(" lost:%u", s->lost);
2690        if (s->sacked && s->ss.state != SS_LISTEN)
2691                out(" sacked:%u", s->sacked);
2692        if (s->dsack_dups)
2693                out(" dsack_dups:%u", s->dsack_dups);
2694        if (s->fackets)
2695                out(" fackets:%u", s->fackets);
2696        if (s->reordering != 3)
2697                out(" reordering:%d", s->reordering);
2698        if (s->reord_seen)
2699                out(" reord_seen:%d", s->reord_seen);
2700        if (s->rcv_rtt)
2701                out(" rcv_rtt:%g", s->rcv_rtt);
2702        if (s->rcv_space)
2703                out(" rcv_space:%d", s->rcv_space);
2704        if (s->rcv_ssthresh)
2705                out(" rcv_ssthresh:%u", s->rcv_ssthresh);
2706        if (s->not_sent)
2707                out(" notsent:%u", s->not_sent);
2708        if (s->min_rtt)
2709                out(" minrtt:%g", s->min_rtt);
2710        if (s->rcv_ooopack)
2711                out(" rcv_ooopack:%u", s->rcv_ooopack);
2712        if (s->snd_wnd)
2713                out(" snd_wnd:%u", s->snd_wnd);
2714}
2715
2716static void tcp_timer_print(struct tcpstat *s)
2717{
2718        static const char * const tmr_name[] = {
2719                "off",
2720                "on",
2721                "keepalive",
2722                "timewait",
2723                "persist",
2724                "unknown"
2725        };
2726
2727        if (s->timer) {
2728                if (s->timer > 4)
2729                        s->timer = 5;
2730                out(" timer:(%s,%s,%d)",
2731                             tmr_name[s->timer],
2732                             print_ms_timer(s->timeout),
2733                             s->retrans);
2734        }
2735}
2736
2737static void sctp_timer_print(struct tcpstat *s)
2738{
2739        if (s->timer)
2740                out(" timer:(T3_RTX,%s,%d)",
2741                    print_ms_timer(s->timeout), s->retrans);
2742}
2743
2744static int tcp_show_line(char *line, const struct filter *f, int family)
2745{
2746        int rto = 0, ato = 0;
2747        struct tcpstat s = {};
2748        char *loc, *rem, *data;
2749        char opt[256];
2750        int n;
2751        int hz = get_user_hz();
2752
2753        if (proc_inet_split_line(line, &loc, &rem, &data))
2754                return -1;
2755
2756        int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0');
2757
2758        if (!(f->states & (1 << state)))
2759                return 0;
2760
2761        proc_parse_inet_addr(loc, rem, family, &s.ss);
2762
2763        if (f->f && run_ssfilter(f->f, &s.ss) == 0)
2764                return 0;
2765
2766        opt[0] = 0;
2767        n = sscanf(data, "%x %x:%x %x:%x %x %d %d %u %d %llx %d %d %d %u %d %[^\n]\n",
2768                   &s.ss.state, &s.ss.wq, &s.ss.rq,
2769                   &s.timer, &s.timeout, &s.retrans, &s.ss.uid, &s.probes,
2770                   &s.ss.ino, &s.ss.refcnt, &s.ss.sk, &rto, &ato, &s.qack, &s.cwnd,
2771                   &s.ssthresh, opt);
2772
2773        if (n < 17)
2774                opt[0] = 0;
2775
2776        if (n < 12) {
2777                rto = 0;
2778                s.cwnd = 2;
2779                s.ssthresh = -1;
2780                ato = s.qack = 0;
2781        }
2782
2783        s.retrans   = s.timer != 1 ? s.probes : s.retrans;
2784        s.timeout   = (s.timeout * 1000 + hz - 1) / hz;
2785        s.ato       = (double)ato / hz;
2786        s.qack     /= 2;
2787        s.rto       = (double)rto;
2788        s.ssthresh  = s.ssthresh == -1 ? 0 : s.ssthresh;
2789        s.rto       = s.rto != 3 * hz  ? s.rto / hz : 0;
2790        s.ss.type   = IPPROTO_TCP;
2791
2792        inet_stats_print(&s.ss, false);
2793
2794        if (show_options)
2795                tcp_timer_print(&s);
2796
2797        if (show_details) {
2798                sock_details_print(&s.ss);
2799                if (opt[0])
2800                        out(" opt:\"%s\"", opt);
2801        }
2802
2803        if (show_tcpinfo)
2804                tcp_stats_print(&s);
2805
2806        return 0;
2807}
2808
2809static int generic_record_read(FILE *fp,
2810                               int (*worker)(char*, const struct filter *, int),
2811                               const struct filter *f, int fam)
2812{
2813        char line[256];
2814
2815        /* skip header */
2816        if (fgets(line, sizeof(line), fp) == NULL)
2817                goto outerr;
2818
2819        while (fgets(line, sizeof(line), fp) != NULL) {
2820                int n = strlen(line);
2821
2822                if (n == 0 || line[n-1] != '\n') {
2823                        errno = -EINVAL;
2824                        return -1;
2825                }
2826                line[n-1] = 0;
2827
2828                if (worker(line, f, fam) < 0)
2829                        return 0;
2830        }
2831outerr:
2832
2833        return ferror(fp) ? -1 : 0;
2834}
2835
2836static void print_skmeminfo(struct rtattr *tb[], int attrtype)
2837{
2838        const __u32 *skmeminfo;
2839
2840        if (!tb[attrtype]) {
2841                if (attrtype == INET_DIAG_SKMEMINFO) {
2842                        if (!tb[INET_DIAG_MEMINFO])
2843                                return;
2844
2845                        const struct inet_diag_meminfo *minfo =
2846                                RTA_DATA(tb[INET_DIAG_MEMINFO]);
2847
2848                        out(" mem:(r%u,w%u,f%u,t%u)",
2849                                   minfo->idiag_rmem,
2850                                   minfo->idiag_wmem,
2851                                   minfo->idiag_fmem,
2852                                   minfo->idiag_tmem);
2853                }
2854                return;
2855        }
2856
2857        skmeminfo = RTA_DATA(tb[attrtype]);
2858
2859        out(" skmem:(r%u,rb%u,t%u,tb%u,f%u,w%u,o%u",
2860                     skmeminfo[SK_MEMINFO_RMEM_ALLOC],
2861                     skmeminfo[SK_MEMINFO_RCVBUF],
2862                     skmeminfo[SK_MEMINFO_WMEM_ALLOC],
2863                     skmeminfo[SK_MEMINFO_SNDBUF],
2864                     skmeminfo[SK_MEMINFO_FWD_ALLOC],
2865                     skmeminfo[SK_MEMINFO_WMEM_QUEUED],
2866                     skmeminfo[SK_MEMINFO_OPTMEM]);
2867
2868        if (RTA_PAYLOAD(tb[attrtype]) >=
2869                (SK_MEMINFO_BACKLOG + 1) * sizeof(__u32))
2870                out(",bl%u", skmeminfo[SK_MEMINFO_BACKLOG]);
2871
2872        if (RTA_PAYLOAD(tb[attrtype]) >=
2873                (SK_MEMINFO_DROPS + 1) * sizeof(__u32))
2874                out(",d%u", skmeminfo[SK_MEMINFO_DROPS]);
2875
2876        out(")");
2877}
2878
2879static void print_md5sig(struct tcp_diag_md5sig *sig)
2880{
2881        out("%s/%d=",
2882            format_host(sig->tcpm_family,
2883                        sig->tcpm_family == AF_INET6 ? 16 : 4,
2884                        &sig->tcpm_addr),
2885            sig->tcpm_prefixlen);
2886        print_escape_buf(sig->tcpm_key, sig->tcpm_keylen, " ,");
2887}
2888
2889static void tcp_tls_version(struct rtattr *attr)
2890{
2891        u_int16_t val;
2892
2893        if (!attr)
2894                return;
2895        val = rta_getattr_u16(attr);
2896
2897        switch (val) {
2898        case TLS_1_2_VERSION:
2899                out(" version: 1.2");
2900                break;
2901        case TLS_1_3_VERSION:
2902                out(" version: 1.3");
2903                break;
2904        default:
2905                out(" version: unknown(%hu)", val);
2906                break;
2907        }
2908}
2909
2910static void tcp_tls_cipher(struct rtattr *attr)
2911{
2912        u_int16_t val;
2913
2914        if (!attr)
2915                return;
2916        val = rta_getattr_u16(attr);
2917
2918        switch (val) {
2919        case TLS_CIPHER_AES_GCM_128:
2920                out(" cipher: aes-gcm-128");
2921                break;
2922        case TLS_CIPHER_AES_GCM_256:
2923                out(" cipher: aes-gcm-256");
2924                break;
2925        }
2926}
2927
2928static void tcp_tls_conf(const char *name, struct rtattr *attr)
2929{
2930        u_int16_t val;
2931
2932        if (!attr)
2933                return;
2934        val = rta_getattr_u16(attr);
2935
2936        switch (val) {
2937        case TLS_CONF_BASE:
2938                out(" %s: none", name);
2939                break;
2940        case TLS_CONF_SW:
2941                out(" %s: sw", name);
2942                break;
2943        case TLS_CONF_HW:
2944                out(" %s: hw", name);
2945                break;
2946        case TLS_CONF_HW_RECORD:
2947                out(" %s: hw-record", name);
2948                break;
2949        default:
2950                out(" %s: unknown(%hu)", name, val);
2951                break;
2952        }
2953}
2954
2955static void mptcp_subflow_info(struct rtattr *tb[])
2956{
2957        u_int32_t flags = 0;
2958
2959        if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
2960                char caps[32 + 1] = { 0 }, *cap = &caps[0];
2961
2962                flags = rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
2963
2964                if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
2965                        *cap++ = 'M';
2966                if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
2967                        *cap++ = 'm';
2968                if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
2969                        *cap++ = 'J';
2970                if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
2971                        *cap++ = 'j';
2972                if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
2973                        *cap++ = 'B';
2974                if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
2975                        *cap++ = 'b';
2976                if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
2977                        *cap++ = 'e';
2978                if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
2979                        *cap++ = 'c';
2980                if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
2981                        *cap++ = 'v';
2982                if (flags)
2983                        out(" flags:%s", caps);
2984        }
2985        if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
2986            tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
2987            tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
2988            tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
2989                out(" token:%04x(id:%hhu)/%04x(id:%hhu)",
2990                    rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
2991                    rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
2992                    rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
2993                    rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
2994        if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
2995                out(" seq:%llx",
2996                    rta_getattr_u64(tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
2997        if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
2998                out(" sfseq:%x",
2999                    rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
3000        if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
3001                out(" ssnoff:%x",
3002                    rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
3003        if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
3004                out(" maplen:%x",
3005                    rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
3006}
3007
3008#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt))
3009
3010static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
3011                struct rtattr *tb[])
3012{
3013        double rtt = 0;
3014        struct tcpstat s = {};
3015
3016        s.ss.state = r->idiag_state;
3017
3018        print_skmeminfo(tb, INET_DIAG_SKMEMINFO);
3019
3020        if (tb[INET_DIAG_INFO]) {
3021                struct tcp_info *info;
3022                int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
3023
3024                /* workaround for older kernels with less fields */
3025                if (len < sizeof(*info)) {
3026                        info = alloca(sizeof(*info));
3027                        memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
3028                        memset((char *)info + len, 0, sizeof(*info) - len);
3029                } else
3030                        info = RTA_DATA(tb[INET_DIAG_INFO]);
3031
3032                if (show_options) {
3033                        s.has_ts_opt       = TCPI_HAS_OPT(info, TCPI_OPT_TIMESTAMPS);
3034                        s.has_sack_opt     = TCPI_HAS_OPT(info, TCPI_OPT_SACK);
3035                        s.has_ecn_opt      = TCPI_HAS_OPT(info, TCPI_OPT_ECN);
3036                        s.has_ecnseen_opt  = TCPI_HAS_OPT(info, TCPI_OPT_ECN_SEEN);
3037                        s.has_fastopen_opt = TCPI_HAS_OPT(info, TCPI_OPT_SYN_DATA);
3038                }
3039
3040                if (tb[INET_DIAG_CONG])
3041                        strncpy(s.cong_alg,
3042                                rta_getattr_str(tb[INET_DIAG_CONG]),
3043                                sizeof(s.cong_alg) - 1);
3044
3045                if (TCPI_HAS_OPT(info, TCPI_OPT_WSCALE)) {
3046                        s.has_wscale_opt  = true;
3047                        s.snd_wscale      = info->tcpi_snd_wscale;
3048                        s.rcv_wscale      = info->tcpi_rcv_wscale;
3049                }
3050
3051                if (info->tcpi_rto && info->tcpi_rto != 3000000)
3052                        s.rto = (double)info->tcpi_rto / 1000;
3053
3054                s.backoff        = info->tcpi_backoff;
3055                s.rtt            = (double)info->tcpi_rtt / 1000;
3056                s.rttvar         = (double)info->tcpi_rttvar / 1000;
3057                s.ato            = (double)info->tcpi_ato / 1000;
3058                s.mss            = info->tcpi_snd_mss;
3059                s.rcv_mss        = info->tcpi_rcv_mss;
3060                s.advmss         = info->tcpi_advmss;
3061                s.rcv_space      = info->tcpi_rcv_space;
3062                s.rcv_rtt        = (double)info->tcpi_rcv_rtt / 1000;
3063                s.lastsnd        = info->tcpi_last_data_sent;
3064                s.lastrcv        = info->tcpi_last_data_recv;
3065                s.lastack        = info->tcpi_last_ack_recv;
3066                s.unacked        = info->tcpi_unacked;
3067                s.retrans        = info->tcpi_retrans;
3068                s.retrans_total  = info->tcpi_total_retrans;
3069                s.lost           = info->tcpi_lost;
3070                s.sacked         = info->tcpi_sacked;
3071                s.fackets        = info->tcpi_fackets;
3072                s.reordering     = info->tcpi_reordering;
3073                s.rcv_ssthresh   = info->tcpi_rcv_ssthresh;
3074                s.cwnd           = info->tcpi_snd_cwnd;
3075                s.pmtu           = info->tcpi_pmtu;
3076
3077                if (info->tcpi_snd_ssthresh < 0xFFFF)
3078                        s.ssthresh = info->tcpi_snd_ssthresh;
3079
3080                rtt = (double) info->tcpi_rtt;
3081                if (tb[INET_DIAG_VEGASINFO]) {
3082                        const struct tcpvegas_info *vinfo
3083                                = RTA_DATA(tb[INET_DIAG_VEGASINFO]);
3084
3085                        if (vinfo->tcpv_enabled &&
3086                                        vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff)
3087                                rtt =  vinfo->tcpv_rtt;
3088                }
3089
3090                if (tb[INET_DIAG_DCTCPINFO]) {
3091                        struct dctcpstat *dctcp = malloc(sizeof(struct
3092                                                dctcpstat));
3093
3094                        const struct tcp_dctcp_info *dinfo
3095                                = RTA_DATA(tb[INET_DIAG_DCTCPINFO]);
3096
3097                        dctcp->enabled  = !!dinfo->dctcp_enabled;
3098                        dctcp->ce_state = dinfo->dctcp_ce_state;
3099                        dctcp->alpha    = dinfo->dctcp_alpha;
3100                        dctcp->ab_ecn   = dinfo->dctcp_ab_ecn;
3101                        dctcp->ab_tot   = dinfo->dctcp_ab_tot;
3102                        s.dctcp         = dctcp;
3103                }
3104
3105                if (tb[INET_DIAG_BBRINFO]) {
3106                        const void *bbr_info = RTA_DATA(tb[INET_DIAG_BBRINFO]);
3107                        int len = min(RTA_PAYLOAD(tb[INET_DIAG_BBRINFO]),
3108                                      sizeof(*s.bbr_info));
3109
3110                        s.bbr_info = calloc(1, sizeof(*s.bbr_info));
3111                        if (s.bbr_info && bbr_info)
3112                                memcpy(s.bbr_info, bbr_info, len);
3113                }
3114
3115                if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) {
3116                        s.send_bps = (double) info->tcpi_snd_cwnd *
3117                                (double)info->tcpi_snd_mss * 8000000. / rtt;
3118                }
3119
3120                if (info->tcpi_pacing_rate &&
3121                                info->tcpi_pacing_rate != ~0ULL) {
3122                        s.pacing_rate = info->tcpi_pacing_rate * 8.;
3123
3124                        if (info->tcpi_max_pacing_rate &&
3125                                        info->tcpi_max_pacing_rate != ~0ULL)
3126                                s.pacing_rate_max = info->tcpi_max_pacing_rate * 8.;
3127                }
3128                s.bytes_acked = info->tcpi_bytes_acked;
3129                s.bytes_received = info->tcpi_bytes_received;
3130                s.segs_out = info->tcpi_segs_out;
3131                s.segs_in = info->tcpi_segs_in;
3132                s.data_segs_out = info->tcpi_data_segs_out;
3133                s.data_segs_in = info->tcpi_data_segs_in;
3134                s.not_sent = info->tcpi_notsent_bytes;
3135                if (info->tcpi_min_rtt && info->tcpi_min_rtt != ~0U)
3136                        s.min_rtt = (double) info->tcpi_min_rtt / 1000;
3137                s.delivery_rate = info->tcpi_delivery_rate * 8.;
3138                s.app_limited = info->tcpi_delivery_rate_app_limited;
3139                s.busy_time = info->tcpi_busy_time;
3140                s.rwnd_limited = info->tcpi_rwnd_limited;
3141                s.sndbuf_limited = info->tcpi_sndbuf_limited;
3142                s.delivered = info->tcpi_delivered;
3143                s.delivered_ce = info->tcpi_delivered_ce;
3144                s.dsack_dups = info->tcpi_dsack_dups;
3145                s.reord_seen = info->tcpi_reord_seen;
3146                s.bytes_sent = info->tcpi_bytes_sent;
3147                s.bytes_retrans = info->tcpi_bytes_retrans;
3148                s.rcv_ooopack = info->tcpi_rcv_ooopack;
3149                s.snd_wnd = info->tcpi_snd_wnd;
3150                tcp_stats_print(&s);
3151                free(s.dctcp);
3152                free(s.bbr_info);
3153        }
3154        if (tb[INET_DIAG_MD5SIG]) {
3155                struct tcp_diag_md5sig *sig = RTA_DATA(tb[INET_DIAG_MD5SIG]);
3156                int len = RTA_PAYLOAD(tb[INET_DIAG_MD5SIG]);
3157
3158                out(" md5keys:");
3159                print_md5sig(sig++);
3160                for (len -= sizeof(*sig); len > 0; len -= sizeof(*sig)) {
3161                        out(",");
3162                        print_md5sig(sig++);
3163                }
3164        }
3165        if (tb[INET_DIAG_ULP_INFO]) {
3166                struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
3167
3168                parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
3169                                    tb[INET_DIAG_ULP_INFO]);
3170
3171                if (ulpinfo[INET_ULP_INFO_NAME])
3172                        out(" tcp-ulp-%s",
3173                            rta_getattr_str(ulpinfo[INET_ULP_INFO_NAME]));
3174
3175                if (ulpinfo[INET_ULP_INFO_TLS]) {
3176                        struct rtattr *tlsinfo[TLS_INFO_MAX + 1] = { 0 };
3177
3178                        parse_rtattr_nested(tlsinfo, TLS_INFO_MAX,
3179                                            ulpinfo[INET_ULP_INFO_TLS]);
3180
3181                        tcp_tls_version(tlsinfo[TLS_INFO_VERSION]);
3182                        tcp_tls_cipher(tlsinfo[TLS_INFO_CIPHER]);
3183                        tcp_tls_conf("rxconf", tlsinfo[TLS_INFO_RXCONF]);
3184                        tcp_tls_conf("txconf", tlsinfo[TLS_INFO_TXCONF]);
3185                }
3186                if (ulpinfo[INET_ULP_INFO_MPTCP]) {
3187                        struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] =
3188                                { 0 };
3189
3190                        parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
3191                                            ulpinfo[INET_ULP_INFO_MPTCP]);
3192                        mptcp_subflow_info(sfinfo);
3193                }
3194        }
3195}
3196
3197static void mptcp_stats_print(struct mptcp_info *s)
3198{
3199        if (s->mptcpi_subflows)
3200                out(" subflows:%d", s->mptcpi_subflows);
3201        if (s->mptcpi_add_addr_signal)
3202                out(" add_addr_signal:%d", s->mptcpi_add_addr_signal);
3203        if (s->mptcpi_add_addr_accepted)
3204                out(" add_addr_accepted:%d", s->mptcpi_add_addr_accepted);
3205        if (s->mptcpi_subflows_max)
3206                out(" subflows_max:%d", s->mptcpi_subflows_max);
3207        if (s->mptcpi_add_addr_signal_max)
3208                out(" add_addr_signal_max:%d", s->mptcpi_add_addr_signal_max);
3209        if (s->mptcpi_add_addr_accepted_max)
3210                out(" add_addr_accepted_max:%d", s->mptcpi_add_addr_accepted_max);
3211        if (s->mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK)
3212                out(" fallback");
3213        if (s->mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED)
3214                out(" remote_key");
3215        if (s->mptcpi_token)
3216                out(" token:%x", s->mptcpi_token);
3217        if (s->mptcpi_write_seq)
3218                out(" write_seq:%llx", s->mptcpi_write_seq);
3219        if (s->mptcpi_snd_una)
3220                out(" snd_una:%llx", s->mptcpi_snd_una);
3221        if (s->mptcpi_rcv_nxt)
3222                out(" rcv_nxt:%llx", s->mptcpi_rcv_nxt);
3223}
3224
3225static void mptcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
3226                            struct rtattr *tb[])
3227{
3228        print_skmeminfo(tb, INET_DIAG_SKMEMINFO);
3229
3230        if (tb[INET_DIAG_INFO]) {
3231                struct mptcp_info *info;
3232                int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
3233
3234                /* workaround for older kernels with less fields */
3235                if (len < sizeof(*info)) {
3236                        info = alloca(sizeof(*info));
3237                        memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
3238                        memset((char *)info + len, 0, sizeof(*info) - len);
3239                } else
3240                        info = RTA_DATA(tb[INET_DIAG_INFO]);
3241
3242                mptcp_stats_print(info);
3243        }
3244}
3245
3246static const char *format_host_sa(struct sockaddr_storage *sa)
3247{
3248        union {
3249                struct sockaddr_in sin;
3250                struct sockaddr_in6 sin6;
3251        } *saddr = (void *)sa;
3252
3253        switch (sa->ss_family) {
3254        case AF_INET:
3255                return format_host(AF_INET, 4, &saddr->sin.sin_addr);
3256        case AF_INET6:
3257                return format_host(AF_INET6, 16, &saddr->sin6.sin6_addr);
3258        default:
3259                return "";
3260        }
3261}
3262
3263static void sctp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
3264                struct rtattr *tb[])
3265{
3266        struct sockaddr_storage *sa;
3267        int len;
3268
3269        print_skmeminfo(tb, INET_DIAG_SKMEMINFO);
3270
3271        if (tb[INET_DIAG_LOCALS]) {
3272                len = RTA_PAYLOAD(tb[INET_DIAG_LOCALS]);
3273                sa = RTA_DATA(tb[INET_DIAG_LOCALS]);
3274
3275                out(" locals:%s", format_host_sa(sa));
3276                for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa))
3277                        out(",%s", format_host_sa(sa));
3278
3279        }
3280        if (tb[INET_DIAG_PEERS]) {
3281                len = RTA_PAYLOAD(tb[INET_DIAG_PEERS]);
3282                sa = RTA_DATA(tb[INET_DIAG_PEERS]);
3283
3284                out(" peers:%s", format_host_sa(sa));
3285                for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa))
3286                        out(",%s", format_host_sa(sa));
3287        }
3288        if (tb[INET_DIAG_INFO]) {
3289                struct sctp_info *info;
3290                len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
3291
3292                /* workaround for older kernels with less fields */
3293                if (len < sizeof(*info)) {
3294                        info = alloca(sizeof(*info));
3295                        memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
3296                        memset((char *)info + len, 0, sizeof(*info) - len);
3297                } else
3298                        info = RTA_DATA(tb[INET_DIAG_INFO]);
3299
3300                sctp_stats_print(info);
3301        }
3302}
3303
3304static void parse_diag_msg(struct nlmsghdr *nlh, struct sockstat *s)
3305{
3306        struct rtattr *tb[INET_DIAG_MAX+1];
3307        struct inet_diag_msg *r = NLMSG_DATA(nlh);
3308
3309        parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
3310                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
3311
3312        s->state        = r->idiag_state;
3313        s->local.family = s->remote.family = r->idiag_family;
3314        s->lport        = ntohs(r->id.idiag_sport);
3315        s->rport        = ntohs(r->id.idiag_dport);
3316        s->wq           = r->idiag_wqueue;
3317        s->rq           = r->idiag_rqueue;
3318        s->ino          = r->idiag_inode;
3319        s->uid          = r->idiag_uid;
3320        s->iface        = r->id.idiag_if;
3321        s->sk           = cookie_sk_get(&r->id.idiag_cookie[0]);
3322
3323        s->mark = 0;
3324        if (tb[INET_DIAG_MARK])
3325                s->mark = rta_getattr_u32(tb[INET_DIAG_MARK]);
3326        s->cgroup_id = 0;
3327        if (tb[INET_DIAG_CGROUP_ID])
3328                s->cgroup_id = rta_getattr_u64(tb[INET_DIAG_CGROUP_ID]);
3329        if (tb[INET_DIAG_PROTOCOL])
3330                s->raw_prot = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]);
3331        else
3332                s->raw_prot = 0;
3333
3334        if (s->local.family == AF_INET)
3335                s->local.bytelen = s->remote.bytelen = 4;
3336        else
3337                s->local.bytelen = s->remote.bytelen = 16;
3338
3339        memcpy(s->local.data, r->id.idiag_src, s->local.bytelen);
3340        memcpy(s->remote.data, r->id.idiag_dst, s->local.bytelen);
3341}
3342
3343static int inet_show_sock(struct nlmsghdr *nlh,
3344                          struct sockstat *s)
3345{
3346        struct rtattr *tb[INET_DIAG_MAX+1];
3347        struct inet_diag_msg *r = NLMSG_DATA(nlh);
3348        unsigned char v6only = 0;
3349
3350        parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1),
3351                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
3352
3353        if (tb[INET_DIAG_PROTOCOL])
3354                s->type = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]);
3355
3356        if (s->local.family == AF_INET6 && tb[INET_DIAG_SKV6ONLY])
3357                v6only = rta_getattr_u8(tb[INET_DIAG_SKV6ONLY]);
3358
3359        inet_stats_print(s, v6only);
3360
3361        if (show_options) {
3362                struct tcpstat t = {};
3363
3364                t.timer = r->idiag_timer;
3365                t.timeout = r->idiag_expires;
3366                t.retrans = r->idiag_retrans;
3367                if (s->type == IPPROTO_SCTP)
3368                        sctp_timer_print(&t);
3369                else
3370                        tcp_timer_print(&t);
3371        }
3372
3373        if (show_details) {
3374                sock_details_print(s);
3375                if (s->local.family == AF_INET6 && tb[INET_DIAG_SKV6ONLY])
3376                        out(" v6only:%u", v6only);
3377
3378                if (tb[INET_DIAG_SHUTDOWN]) {
3379                        unsigned char mask;
3380
3381                        mask = rta_getattr_u8(tb[INET_DIAG_SHUTDOWN]);
3382                        out(" %c-%c",
3383                            mask & 1 ? '-' : '<', mask & 2 ? '-' : '>');
3384                }
3385        }
3386
3387        if (show_tos) {
3388                if (tb[INET_DIAG_TOS])
3389                        out(" tos:%#x", rta_getattr_u8(tb[INET_DIAG_TOS]));
3390                if (tb[INET_DIAG_TCLASS])
3391                        out(" tclass:%#x", rta_getattr_u8(tb[INET_DIAG_TCLASS]));
3392                if (tb[INET_DIAG_CLASS_ID])
3393                        out(" class_id:%#x", rta_getattr_u32(tb[INET_DIAG_CLASS_ID]));
3394        }
3395
3396        if (show_cgroup) {
3397                if (tb[INET_DIAG_CGROUP_ID])
3398                        out(" cgroup:%s", cg_id_to_path(rta_getattr_u64(tb[INET_DIAG_CGROUP_ID])));
3399        }
3400
3401        if (show_inet_sockopt) {
3402                if (tb[INET_DIAG_SOCKOPT] && RTA_PAYLOAD(tb[INET_DIAG_SOCKOPT]) >=
3403                    sizeof(struct inet_diag_sockopt)) {
3404                        const struct inet_diag_sockopt *sockopt =
3405                                        RTA_DATA(tb[INET_DIAG_SOCKOPT]);
3406                        if (!oneline)
3407                                out("\n\tinet-sockopt: (");
3408                        else
3409                                out(" inet-sockopt: (");
3410                        if (sockopt->recverr)
3411                                out(" recverr");
3412                        if (sockopt->is_icsk)
3413                                out(" is_icsk");
3414                        if (sockopt->freebind)
3415                                out(" freebind");
3416                        if (sockopt->hdrincl)
3417                                out(" hdrincl");
3418                        if (sockopt->mc_loop)
3419                                out(" mc_loop");
3420                        if (sockopt->transparent)
3421                                out(" transparent");
3422                        if (sockopt->mc_all)
3423                                out(" mc_all");
3424                        if (sockopt->nodefrag)
3425                                out(" nodefrag");
3426                        if (sockopt->bind_address_no_port)
3427                                out(" bind_addr_no_port");
3428                        if (sockopt->recverr_rfc4884)
3429                                out(" recverr_rfc4884");
3430                        if (sockopt->defer_connect)
3431                                out(" defer_connect");
3432                        out(")");
3433                }
3434        }
3435
3436        if (show_mem || (show_tcpinfo && s->type != IPPROTO_UDP)) {
3437                if (!oneline)
3438                        out("\n\t");
3439                if (s->type == IPPROTO_SCTP)
3440                        sctp_show_info(nlh, r, tb);
3441                else if (s->type == IPPROTO_MPTCP)
3442                        mptcp_show_info(nlh, r, tb);
3443                else
3444                        tcp_show_info(nlh, r, tb);
3445        }
3446        sctp_ino = s->ino;
3447
3448        return 0;
3449}
3450
3451static int tcpdiag_send(int fd, int protocol, struct filter *f)
3452{
3453        struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
3454        struct {
3455                struct nlmsghdr nlh;
3456                struct inet_diag_req r;
3457        } req = {
3458                .nlh.nlmsg_len = sizeof(req),
3459                .nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST,
3460                .nlh.nlmsg_seq = MAGIC_SEQ,
3461                .r.idiag_family = AF_INET,
3462                .r.idiag_states = f->states,
3463        };
3464        char    *bc = NULL;
3465        int     bclen;
3466        struct msghdr msg;
3467        struct rtattr rta;
3468        struct iovec iov[3];
3469        int iovlen = 1;
3470
3471        if (protocol == IPPROTO_TCP)
3472                req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
3473        else if (protocol == IPPROTO_DCCP)
3474                req.nlh.nlmsg_type = DCCPDIAG_GETSOCK;
3475        else
3476                return -1;
3477
3478        if (show_mem) {
3479                req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1));
3480                req.r.idiag_ext |= (1<<(INET_DIAG_SKMEMINFO-1));
3481        }
3482
3483        if (show_tcpinfo) {
3484                req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1));
3485                req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1));
3486                req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1));
3487        }
3488
3489        if (show_tos) {
3490                req.r.idiag_ext |= (1<<(INET_DIAG_TOS-1));
3491                req.r.idiag_ext |= (1<<(INET_DIAG_TCLASS-1));
3492        }
3493
3494        iov[0] = (struct iovec){
3495                .iov_base = &req,
3496                .iov_len = sizeof(req)
3497        };
3498        if (f->f) {
3499                bclen = ssfilter_bytecompile(f->f, &bc);
3500                if (bclen) {
3501                        rta.rta_type = INET_DIAG_REQ_BYTECODE;
3502                        rta.rta_len = RTA_LENGTH(bclen);
3503                        iov[1] = (struct iovec){ &rta, sizeof(rta) };
3504                        iov[2] = (struct iovec){ bc, bclen };
3505                        req.nlh.nlmsg_len += RTA_LENGTH(bclen);
3506                        iovlen = 3;
3507                }
3508        }
3509
3510        msg = (struct msghdr) {
3511                .msg_name = (void *)&nladdr,
3512                .msg_namelen = sizeof(nladdr),
3513                .msg_iov = iov,
3514                .msg_iovlen = iovlen,
3515        };
3516
3517        if (sendmsg(fd, &msg, 0) < 0) {
3518                close(fd);
3519                return -1;
3520        }
3521
3522        return 0;
3523}
3524
3525static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
3526{
3527        struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
3528        DIAG_REQUEST(req, struct inet_diag_req_v2 r);
3529        char    *bc = NULL;
3530        int     bclen;
3531        __u32   proto;
3532        struct msghdr msg;
3533        struct rtattr rta_bc;
3534        struct rtattr rta_proto;
3535        struct iovec iov[5];
3536        int iovlen = 1;
3537
3538        if (family == PF_UNSPEC)
3539                return tcpdiag_send(fd, protocol, f);
3540
3541        memset(&req.r, 0, sizeof(req.r));
3542        req.r.sdiag_family = family;
3543        req.r.sdiag_protocol = protocol;
3544        req.r.idiag_states = f->states;
3545        if (show_mem) {
3546                req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1));
3547                req.r.idiag_ext |= (1<<(INET_DIAG_SKMEMINFO-1));
3548        }
3549
3550        if (show_tcpinfo) {
3551                req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1));
3552                req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1));
3553                req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1));
3554        }
3555
3556        if (show_tos) {
3557                req.r.idiag_ext |= (1<<(INET_DIAG_TOS-1));
3558                req.r.idiag_ext |= (1<<(INET_DIAG_TCLASS-1));
3559        }
3560
3561        iov[0] = (struct iovec){
3562                .iov_base = &req,
3563                .iov_len = sizeof(req)
3564        };
3565        if (f->f) {
3566                bclen = ssfilter_bytecompile(f->f, &bc);
3567                if (bclen) {
3568                        rta_bc.rta_type = INET_DIAG_REQ_BYTECODE;
3569                        rta_bc.rta_len = RTA_LENGTH(bclen);
3570                        iov[1] = (struct iovec){ &rta_bc, sizeof(rta_bc) };
3571                        iov[2] = (struct iovec){ bc, bclen };
3572                        req.nlh.nlmsg_len += RTA_LENGTH(bclen);
3573                        iovlen = 3;
3574                }
3575        }
3576
3577        /* put extended protocol attribute, if required */
3578        if (protocol > 255) {
3579                rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
3580                rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
3581                proto = protocol;
3582                iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto) };
3583                iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto) };
3584                req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
3585                iovlen += 2;
3586        }
3587
3588        msg = (struct msghdr) {
3589                .msg_name = (void *)&nladdr,
3590                .msg_namelen = sizeof(nladdr),
3591                .msg_iov = iov,
3592                .msg_iovlen = iovlen,
3593        };
3594
3595        if (sendmsg(fd, &msg, 0) < 0) {
3596                close(fd);
3597                return -1;
3598        }
3599
3600        return 0;
3601}
3602
3603struct inet_diag_arg {
3604        struct filter *f;
3605        int protocol;
3606        struct rtnl_handle *rth;
3607};
3608
3609static int kill_inet_sock(struct nlmsghdr *h, void *arg, struct sockstat *s)
3610{
3611        struct inet_diag_msg *d = NLMSG_DATA(h);
3612        struct inet_diag_arg *diag_arg = arg;
3613        struct rtnl_handle *rth = diag_arg->rth;
3614
3615        DIAG_REQUEST(req, struct inet_diag_req_v2 r);
3616
3617        req.nlh.nlmsg_type = SOCK_DESTROY;
3618        req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3619        req.nlh.nlmsg_seq = ++rth->seq;
3620        req.r.sdiag_family = d->idiag_family;
3621        req.r.sdiag_protocol = diag_arg->protocol;
3622        req.r.id = d->id;
3623
3624        if (diag_arg->protocol == IPPROTO_RAW) {
3625                struct inet_diag_req_raw *raw = (void *)&req.r;
3626
3627                BUILD_BUG_ON(sizeof(req.r) != sizeof(*raw));
3628                raw->sdiag_raw_protocol = s->raw_prot;
3629        }
3630
3631        return rtnl_talk(rth, &req.nlh, NULL);
3632}
3633
3634static int show_one_inet_sock(struct nlmsghdr *h, void *arg)
3635{
3636        int err;
3637        struct inet_diag_arg *diag_arg = arg;
3638        struct inet_diag_msg *r = NLMSG_DATA(h);
3639        struct sockstat s = {};
3640
3641        if (!(diag_arg->f->families & FAMILY_MASK(r->idiag_family)))
3642                return 0;
3643
3644        parse_diag_msg(h, &s);
3645        s.type = diag_arg->protocol;
3646
3647        if (diag_arg->f->f && run_ssfilter(diag_arg->f->f, &s) == 0)
3648                return 0;
3649
3650        if (diag_arg->f->kill && kill_inet_sock(h, arg, &s) != 0) {
3651                if (errno == EOPNOTSUPP || errno == ENOENT) {
3652                        /* Socket can't be closed, or is already closed. */
3653                        return 0;
3654                } else {
3655                        perror("SOCK_DESTROY answers");
3656                        return -1;
3657                }
3658        }
3659
3660        err = inet_show_sock(h, &s);
3661        if (err < 0)
3662                return err;
3663
3664        return 0;
3665}
3666
3667static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
3668{
3669        int err = 0;
3670        struct rtnl_handle rth, rth2;
3671        int family = PF_INET;
3672        struct inet_diag_arg arg = { .f = f, .protocol = protocol };
3673
3674        if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
3675                return -1;
3676
3677        if (f->kill) {
3678                if (rtnl_open_byproto(&rth2, 0, NETLINK_SOCK_DIAG)) {
3679                        rtnl_close(&rth);
3680                        return -1;
3681                }
3682                arg.rth = &rth2;
3683        }
3684
3685        rth.dump = MAGIC_SEQ;
3686        rth.dump_fp = dump_fp;
3687        if (preferred_family == PF_INET6)
3688                family = PF_INET6;
3689
3690        /* extended protocol will use INET_DIAG_REQ_PROTOCOL,
3691         * not supported by older kernels. On such kernel
3692         * rtnl_dump will bail with rtnl_dump_error().
3693         * Suppress the error to avoid confusing the user
3694         */
3695        if (protocol > 255)
3696                rth.flags |= RTNL_HANDLE_F_SUPPRESS_NLERR;
3697
3698again:
3699        if ((err = sockdiag_send(family, rth.fd, protocol, f)))
3700                goto Exit;
3701
3702        if ((err = rtnl_dump_filter(&rth, show_one_inet_sock, &arg))) {
3703                if (family != PF_UNSPEC) {
3704                        family = PF_UNSPEC;
3705                        goto again;
3706                }
3707                goto Exit;
3708        }
3709        if (family == PF_INET && preferred_family != PF_INET) {
3710                family = PF_INET6;
3711                goto again;
3712        }
3713
3714Exit:
3715        rtnl_close(&rth);
3716        if (arg.rth)
3717                rtnl_close(arg.rth);
3718        return err;
3719}
3720
3721static int tcp_show_netlink_file(struct filter *f)
3722{
3723        FILE    *fp;
3724        char    buf[16384];
3725        int     err = -1;
3726
3727        if ((fp = fopen(getenv("TCPDIAG_FILE"), "r")) == NULL) {
3728                perror("fopen($TCPDIAG_FILE)");
3729                return err;
3730        }
3731
3732        while (1) {
3733                int err2;
3734                size_t status, nitems;
3735                struct nlmsghdr *h = (struct nlmsghdr *)buf;
3736                struct sockstat s = {};
3737
3738                status = fread(buf, 1, sizeof(*h), fp);
3739                if (status != sizeof(*h)) {
3740                        if (ferror(fp))
3741                                perror("Reading header from $TCPDIAG_FILE");
3742                        if (feof(fp))
3743                                fprintf(stderr, "Unexpected EOF reading $TCPDIAG_FILE");
3744                        break;
3745                }
3746
3747                nitems = NLMSG_ALIGN(h->nlmsg_len - sizeof(*h));
3748                status = fread(h+1, 1, nitems, fp);
3749
3750                if (status != nitems) {
3751                        if (ferror(fp))
3752                                perror("Reading $TCPDIAG_FILE");
3753                        if (feof(fp))
3754                                fprintf(stderr, "Unexpected EOF reading $TCPDIAG_FILE");
3755                        break;
3756                }
3757
3758                /* The only legal exit point */
3759                if (h->nlmsg_type == NLMSG_DONE) {
3760                        err = 0;
3761                        break;
3762                }
3763
3764                if (h->nlmsg_type == NLMSG_ERROR) {
3765                        struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
3766
3767                        if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
3768                                fprintf(stderr, "ERROR truncated\n");
3769                        } else {
3770                                errno = -err->error;
3771                                perror("TCPDIAG answered");
3772                        }
3773                        break;
3774                }
3775
3776                parse_diag_msg(h, &s);
3777                s.type = IPPROTO_TCP;
3778
3779                if (f && f->f && run_ssfilter(f->f, &s) == 0)
3780                        continue;
3781
3782                err2 = inet_show_sock(h, &s);
3783                if (err2 < 0) {
3784                        err = err2;
3785                        break;
3786                }
3787        }
3788
3789        fclose(fp);
3790        return err;
3791}
3792
3793static int tcp_show(struct filter *f)
3794{
3795        FILE *fp = NULL;
3796        char *buf = NULL;
3797        int bufsize = 1024*1024;
3798
3799        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3800                return 0;
3801
3802        dg_proto = TCP_PROTO;
3803
3804        if (getenv("TCPDIAG_FILE"))
3805                return tcp_show_netlink_file(f);
3806
3807        if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT")
3808            && inet_show_netlink(f, NULL, IPPROTO_TCP) == 0)
3809                return 0;
3810
3811        /* Sigh... We have to parse /proc/net/tcp... */
3812        while (bufsize >= 64*1024) {
3813                if ((buf = malloc(bufsize)) != NULL)
3814                        break;
3815                bufsize /= 2;
3816        }
3817        if (buf == NULL) {
3818                errno = ENOMEM;
3819                return -1;
3820        }
3821
3822        if (f->families & FAMILY_MASK(AF_INET)) {
3823                if ((fp = net_tcp_open()) == NULL)
3824                        goto outerr;
3825
3826                setbuffer(fp, buf, bufsize);
3827                if (generic_record_read(fp, tcp_show_line, f, AF_INET))
3828                        goto outerr;
3829                fclose(fp);
3830        }
3831
3832        if ((f->families & FAMILY_MASK(AF_INET6)) &&
3833            (fp = net_tcp6_open()) != NULL) {
3834                setbuffer(fp, buf, bufsize);
3835                if (generic_record_read(fp, tcp_show_line, f, AF_INET6))
3836                        goto outerr;
3837                fclose(fp);
3838        }
3839
3840        free(buf);
3841        return 0;
3842
3843outerr:
3844        do {
3845                int saved_errno = errno;
3846
3847                free(buf);
3848                if (fp)
3849                        fclose(fp);
3850                errno = saved_errno;
3851                return -1;
3852        } while (0);
3853}
3854
3855static int mptcp_show(struct filter *f)
3856{
3857        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3858                return 0;
3859
3860        if (!getenv("PROC_NET_MPTCP") && !getenv("PROC_ROOT")
3861            && inet_show_netlink(f, NULL, IPPROTO_MPTCP) == 0)
3862                return 0;
3863
3864        return 0;
3865}
3866
3867static int dccp_show(struct filter *f)
3868{
3869        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3870                return 0;
3871
3872        if (!getenv("PROC_NET_DCCP") && !getenv("PROC_ROOT")
3873            && inet_show_netlink(f, NULL, IPPROTO_DCCP) == 0)
3874                return 0;
3875
3876        return 0;
3877}
3878
3879static int sctp_show(struct filter *f)
3880{
3881        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3882                return 0;
3883
3884        if (!getenv("PROC_NET_SCTP") && !getenv("PROC_ROOT")
3885            && inet_show_netlink(f, NULL, IPPROTO_SCTP) == 0)
3886                return 0;
3887
3888        return 0;
3889}
3890
3891static int dgram_show_line(char *line, const struct filter *f, int family)
3892{
3893        struct sockstat s = {};
3894        char *loc, *rem, *data;
3895        char opt[256];
3896        int n;
3897
3898        if (proc_inet_split_line(line, &loc, &rem, &data))
3899                return -1;
3900
3901        int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0');
3902
3903        if (!(f->states & (1 << state)))
3904                return 0;
3905
3906        proc_parse_inet_addr(loc, rem, family, &s);
3907
3908        if (f->f && run_ssfilter(f->f, &s) == 0)
3909                return 0;
3910
3911        opt[0] = 0;
3912        n = sscanf(data, "%x %x:%x %*x:%*x %*x %d %*d %u %d %llx %[^\n]\n",
3913               &s.state, &s.wq, &s.rq,
3914               &s.uid, &s.ino,
3915               &s.refcnt, &s.sk, opt);
3916
3917        if (n < 9)
3918                opt[0] = 0;
3919
3920        s.type = dg_proto == UDP_PROTO ? IPPROTO_UDP : 0;
3921        inet_stats_print(&s, false);
3922
3923        if (show_details && opt[0])
3924                out(" opt:\"%s\"", opt);
3925
3926        return 0;
3927}
3928
3929static int udp_show(struct filter *f)
3930{
3931        FILE *fp = NULL;
3932
3933        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3934                return 0;
3935
3936        dg_proto = UDP_PROTO;
3937
3938        if (!getenv("PROC_NET_UDP") && !getenv("PROC_ROOT")
3939            && inet_show_netlink(f, NULL, IPPROTO_UDP) == 0)
3940                return 0;
3941
3942        if (f->families&FAMILY_MASK(AF_INET)) {
3943                if ((fp = net_udp_open()) == NULL)
3944                        goto outerr;
3945                if (generic_record_read(fp, dgram_show_line, f, AF_INET))
3946                        goto outerr;
3947                fclose(fp);
3948        }
3949
3950        if ((f->families&FAMILY_MASK(AF_INET6)) &&
3951            (fp = net_udp6_open()) != NULL) {
3952                if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
3953                        goto outerr;
3954                fclose(fp);
3955        }
3956        return 0;
3957
3958outerr:
3959        do {
3960                int saved_errno = errno;
3961
3962                if (fp)
3963                        fclose(fp);
3964                errno = saved_errno;
3965                return -1;
3966        } while (0);
3967}
3968
3969static int raw_show(struct filter *f)
3970{
3971        FILE *fp = NULL;
3972
3973        if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
3974                return 0;
3975
3976        dg_proto = RAW_PROTO;
3977
3978        if (!getenv("PROC_NET_RAW") && !getenv("PROC_ROOT") &&
3979            inet_show_netlink(f, NULL, IPPROTO_RAW) == 0)
3980                return 0;
3981
3982        if (f->families&FAMILY_MASK(AF_INET)) {
3983                if ((fp = net_raw_open()) == NULL)
3984                        goto outerr;
3985                if (generic_record_read(fp, dgram_show_line, f, AF_INET))
3986                        goto outerr;
3987                fclose(fp);
3988        }
3989
3990        if ((f->families&FAMILY_MASK(AF_INET6)) &&
3991            (fp = net_raw6_open()) != NULL) {
3992                if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
3993                        goto outerr;
3994                fclose(fp);
3995        }
3996        return 0;
3997
3998outerr:
3999        do {
4000                int saved_errno = errno;
4001
4002                if (fp)
4003                        fclose(fp);
4004                errno = saved_errno;
4005                return -1;
4006        } while (0);
4007}
4008
4009#define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct sockstat))
4010
4011static void unix_list_drop_first(struct sockstat **list)
4012{
4013        struct sockstat *s = *list;
4014
4015        (*list) = (*list)->next;
4016        free(s->name);
4017        free(s);
4018}
4019
4020static bool unix_type_skip(struct sockstat *s, struct filter *f)
4021{
4022        if (s->type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB)))
4023                return true;
4024        if (s->type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB)))
4025                return true;
4026        if (s->type == SOCK_SEQPACKET && !(f->dbs&(1<<UNIX_SQ_DB)))
4027                return true;
4028        return false;
4029}
4030
4031static void unix_stats_print(struct sockstat *s, struct filter *f)
4032{
4033        char port_name[30] = {};
4034
4035        sock_state_print(s);
4036
4037        sock_addr_print(s->name ?: "*", " ",
4038                        int_to_str(s->lport, port_name), NULL);
4039        sock_addr_print(s->peer_name ?: "*", " ",
4040                        int_to_str(s->rport, port_name), NULL);
4041
4042        proc_ctx_print(s);
4043}
4044
4045static int unix_show_sock(struct nlmsghdr *nlh, void *arg)
4046{
4047        struct filter *f = (struct filter *)arg;
4048        struct unix_diag_msg *r = NLMSG_DATA(nlh);
4049        struct rtattr *tb[UNIX_DIAG_MAX+1];
4050        char name[128];
4051        struct sockstat stat = { .name = "*", .peer_name = "*" };
4052
4053        parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(r+1),
4054                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
4055
4056        stat.type  = r->udiag_type;
4057        stat.state = r->udiag_state;
4058        stat.ino   = stat.lport = r->udiag_ino;
4059        stat.local.family = stat.remote.family = AF_UNIX;
4060
4061        if (unix_type_skip(&stat, f))
4062                return 0;
4063
4064        if (tb[UNIX_DIAG_RQLEN]) {
4065                struct unix_diag_rqlen *rql = RTA_DATA(tb[UNIX_DIAG_RQLEN]);
4066
4067                stat.rq = rql->udiag_rqueue;
4068                stat.wq = rql->udiag_wqueue;
4069        }
4070        if (tb[UNIX_DIAG_NAME]) {
4071                int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
4072
4073                memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
4074                name[len] = '\0';
4075                if (name[0] == '\0') {
4076                        int i;
4077                        for (i = 0; i < len; i++)
4078                                if (name[i] == '\0')
4079                                        name[i] = '@';
4080                }
4081                stat.name = &name[0];
4082                memcpy(stat.local.data, &stat.name, sizeof(stat.name));
4083        }
4084        if (tb[UNIX_DIAG_PEER])
4085                stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]);
4086
4087        if (f->f && run_ssfilter(f->f, &stat) == 0)
4088                return 0;
4089
4090        unix_stats_print(&stat, f);
4091
4092        if (show_mem)
4093                print_skmeminfo(tb, UNIX_DIAG_MEMINFO);
4094        if (show_details) {
4095                if (tb[UNIX_DIAG_SHUTDOWN]) {
4096                        unsigned char mask;
4097
4098                        mask = rta_getattr_u8(tb[UNIX_DIAG_SHUTDOWN]);
4099                        out(" %c-%c",
4100                            mask & 1 ? '-' : '<', mask & 2 ? '-' : '>');
4101                }
4102                if (tb[UNIX_DIAG_VFS]) {
4103                        struct unix_diag_vfs *uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
4104
4105                        out(" ino:%u dev:%u/%u", uv->udiag_vfs_ino, major(uv->udiag_vfs_dev),
4106                                                 minor(uv->udiag_vfs_dev));
4107                }
4108                if (tb[UNIX_DIAG_ICONS]) {
4109                        int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
4110                        __u32 *peers = RTA_DATA(tb[UNIX_DIAG_ICONS]);
4111                        int i;
4112
4113                        out(" peers:");
4114                        for (i = 0; i < len / sizeof(__u32); i++)
4115                                out(" %u", peers[i]);
4116                }
4117        }
4118
4119        return 0;
4120}
4121
4122static int handle_netlink_request(struct filter *f, struct nlmsghdr *req,
4123                size_t size, rtnl_filter_t show_one_sock)
4124{
4125        int ret = -1;
4126        struct rtnl_handle rth;
4127
4128        if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
4129                return -1;
4130
4131        rth.dump = MAGIC_SEQ;
4132
4133        if (rtnl_send(&rth, req, size) < 0)
4134                goto Exit;
4135
4136        if (rtnl_dump_filter(&rth, show_one_sock, f))
4137                goto Exit;
4138
4139        ret = 0;
4140Exit:
4141        rtnl_close(&rth);
4142        return ret;
4143}
4144
4145static int unix_show_netlink(struct filter *f)
4146{
4147        DIAG_REQUEST(req, struct unix_diag_req r);
4148
4149        req.r.sdiag_family = AF_UNIX;
4150        req.r.udiag_states = f->states;
4151        req.r.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_PEER | UDIAG_SHOW_RQLEN;
4152        if (show_mem)
4153                req.r.udiag_show |= UDIAG_SHOW_MEMINFO;
4154        if (show_details)
4155                req.r.udiag_show |= UDIAG_SHOW_VFS | UDIAG_SHOW_ICONS;
4156
4157        return handle_netlink_request(f, &req.nlh, sizeof(req), unix_show_sock);
4158}
4159
4160static int unix_show(struct filter *f)
4161{
4162        FILE *fp;
4163        char buf[256];
4164        char name[128];
4165        int  newformat = 0;
4166        int  cnt;
4167        struct sockstat *list = NULL;
4168        const int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT,
4169                                       SS_ESTABLISHED, SS_CLOSING };
4170
4171        if (!filter_af_get(f, AF_UNIX))
4172                return 0;
4173
4174        if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT")
4175            && unix_show_netlink(f) == 0)
4176                return 0;
4177
4178        if ((fp = net_unix_open()) == NULL)
4179                return -1;
4180        if (!fgets(buf, sizeof(buf), fp)) {
4181                fclose(fp);
4182                return -1;
4183        }
4184
4185        if (memcmp(buf, "Peer", 4) == 0)
4186                newformat = 1;
4187        cnt = 0;
4188
4189        while (fgets(buf, sizeof(buf), fp)) {
4190                struct sockstat *u, **insp;
4191                int flags;
4192
4193                if (!(u = calloc(1, sizeof(*u))))
4194                        break;
4195
4196                if (sscanf(buf, "%x: %x %x %x %x %x %d %s",
4197                           &u->rport, &u->rq, &u->wq, &flags, &u->type,
4198                           &u->state, &u->ino, name) < 8)
4199                        name[0] = 0;
4200
4201                u->lport = u->ino;
4202                u->local.family = u->remote.family = AF_UNIX;
4203
4204                if (flags & (1 << 16)) {
4205                        u->state = SS_LISTEN;
4206                } else if (u->state > 0 &&
4207                           u->state <= ARRAY_SIZE(unix_state_map)) {
4208                        u->state = unix_state_map[u->state-1];
4209                        if (u->type == SOCK_DGRAM && u->state == SS_CLOSE && u->rport)
4210                                u->state = SS_ESTABLISHED;
4211                }
4212                if (unix_type_skip(u, f) ||
4213                    !(f->states & (1 << u->state))) {
4214                        free(u);
4215                        continue;
4216                }
4217
4218                if (!newformat) {
4219                        u->rport = 0;
4220                        u->rq = 0;
4221                        u->wq = 0;
4222                }
4223
4224                if (name[0]) {
4225                        u->name = strdup(name);
4226                        if (!u->name) {
4227                                free(u);
4228                                break;
4229                        }
4230                }
4231
4232                if (u->rport) {
4233                        struct sockstat *p;
4234
4235                        for (p = list; p; p = p->next) {
4236                                if (u->rport == p->lport)
4237                                        break;
4238                        }
4239                        if (!p)
4240                                u->peer_name = "?";
4241                        else
4242                                u->peer_name = p->name ? : "*";
4243                }
4244
4245                if (f->f) {
4246                        struct sockstat st = {
4247                                .local.family = AF_UNIX,
4248                                .remote.family = AF_UNIX,
4249                        };
4250
4251                        memcpy(st.local.data, &u->name, sizeof(u->name));
4252                        /* when parsing the old format rport is set to 0 and
4253                         * therefore peer_name remains NULL
4254                         */
4255                        if (u->peer_name && strcmp(u->peer_name, "*"))
4256                                memcpy(st.remote.data, &u->peer_name,
4257                                       sizeof(u->peer_name));
4258                        if (run_ssfilter(f->f, &st) == 0) {
4259                                free(u->name);
4260                                free(u);
4261                                continue;
4262                        }
4263                }
4264
4265                insp = &list;
4266                while (*insp) {
4267                        if (u->type < (*insp)->type ||
4268                            (u->type == (*insp)->type &&
4269                             u->ino < (*insp)->ino))
4270                                break;
4271                        insp = &(*insp)->next;
4272                }
4273                u->next = *insp;
4274                *insp = u;
4275
4276                if (++cnt > MAX_UNIX_REMEMBER) {
4277                        while (list) {
4278                                unix_stats_print(list, f);
4279                                unix_list_drop_first(&list);
4280                        }
4281                        cnt = 0;
4282                }
4283        }
4284        fclose(fp);
4285        while (list) {
4286                unix_stats_print(list, f);
4287                unix_list_drop_first(&list);
4288        }
4289
4290        return 0;
4291}
4292
4293static int packet_stats_print(struct sockstat *s, const struct filter *f)
4294{
4295        const char *addr, *port;
4296        char ll_name[16];
4297
4298        s->local.family = s->remote.family = AF_PACKET;
4299
4300        if (f->f) {
4301                s->local.data[0] = s->prot;
4302                if (run_ssfilter(f->f, s) == 0)
4303                        return 1;
4304        }
4305
4306        sock_state_print(s);
4307
4308        if (s->prot == 3)
4309                addr = "*";
4310        else
4311                addr = ll_proto_n2a(htons(s->prot), ll_name, sizeof(ll_name));
4312
4313        if (s->iface == 0)
4314                port = "*";
4315        else
4316                port = xll_index_to_name(s->iface);
4317
4318        sock_addr_print(addr, ":", port, NULL);
4319        sock_addr_print("", "*", "", NULL);
4320
4321        proc_ctx_print(s);
4322
4323        if (show_details)
4324                sock_details_print(s);
4325
4326        return 0;
4327}
4328
4329static void packet_show_ring(struct packet_diag_ring *ring)
4330{
4331        out("blk_size:%d", ring->pdr_block_size);
4332        out(",blk_nr:%d", ring->pdr_block_nr);
4333        out(",frm_size:%d", ring->pdr_frame_size);
4334        out(",frm_nr:%d", ring->pdr_frame_nr);
4335        out(",tmo:%d", ring->pdr_retire_tmo);
4336        out(",features:0x%x", ring->pdr_features);
4337}
4338
4339static int packet_show_sock(struct nlmsghdr *nlh, void *arg)
4340{
4341        const struct filter *f = arg;
4342        struct packet_diag_msg *r = NLMSG_DATA(nlh);
4343        struct packet_diag_info *pinfo = NULL;
4344        struct packet_diag_ring *ring_rx = NULL, *ring_tx = NULL;
4345        struct rtattr *tb[PACKET_DIAG_MAX+1];
4346        struct sockstat stat = {};
4347        uint32_t fanout = 0;
4348        bool has_fanout = false;
4349
4350        parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr *)(r+1),
4351                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
4352
4353        /* use /proc/net/packet if all info are not available */
4354        if (!tb[PACKET_DIAG_MEMINFO])
4355                return -1;
4356
4357        stat.type   = r->pdiag_type;
4358        stat.prot   = r->pdiag_num;
4359        stat.ino    = r->pdiag_ino;
4360        stat.state  = SS_CLOSE;
4361        stat.sk     = cookie_sk_get(&r->pdiag_cookie[0]);
4362
4363        if (tb[PACKET_DIAG_MEMINFO]) {
4364                __u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]);
4365
4366                stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC];
4367        }
4368
4369        if (tb[PACKET_DIAG_INFO]) {
4370                pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]);
4371                stat.lport = stat.iface = pinfo->pdi_index;
4372        }
4373
4374        if (tb[PACKET_DIAG_UID])
4375                stat.uid = rta_getattr_u32(tb[PACKET_DIAG_UID]);
4376
4377        if (tb[PACKET_DIAG_RX_RING])
4378                ring_rx = RTA_DATA(tb[PACKET_DIAG_RX_RING]);
4379
4380        if (tb[PACKET_DIAG_TX_RING])
4381                ring_tx = RTA_DATA(tb[PACKET_DIAG_TX_RING]);
4382
4383        if (tb[PACKET_DIAG_FANOUT]) {
4384                has_fanout = true;
4385                fanout = rta_getattr_u32(tb[PACKET_DIAG_FANOUT]);
4386        }
4387
4388        if (packet_stats_print(&stat, f))
4389                return 0;
4390
4391        if (show_details) {
4392                if (pinfo) {
4393                        if (oneline)
4394                                out(" ver:%d", pinfo->pdi_version);
4395                        else
4396                                out("\n\tver:%d", pinfo->pdi_version);
4397                        out(" cpy_thresh:%d", pinfo->pdi_copy_thresh);
4398                        out(" flags( ");
4399                        if (pinfo->pdi_flags & PDI_RUNNING)
4400                                out("running");
4401                        if (pinfo->pdi_flags & PDI_AUXDATA)
4402                                out(" auxdata");
4403                        if (pinfo->pdi_flags & PDI_ORIGDEV)
4404                                out(" origdev");
4405                        if (pinfo->pdi_flags & PDI_VNETHDR)
4406                                out(" vnethdr");
4407                        if (pinfo->pdi_flags & PDI_LOSS)
4408                                out(" loss");
4409                        if (!pinfo->pdi_flags)
4410                                out("0");
4411                        out(" )");
4412                }
4413                if (ring_rx) {
4414                        if (oneline)
4415                                out(" ring_rx(");
4416                        else
4417                                out("\n\tring_rx(");
4418                        packet_show_ring(ring_rx);
4419                        out(")");
4420                }
4421                if (ring_tx) {
4422                        if (oneline)
4423                                out(" ring_tx(");
4424                        else
4425                                out("\n\tring_tx(");
4426                        packet_show_ring(ring_tx);
4427                        out(")");
4428                }
4429                if (has_fanout) {
4430                        uint16_t type = (fanout >> 16) & 0xffff;
4431
4432                        if (oneline)
4433                                out(" fanout(");
4434                        else
4435                                out("\n\tfanout(");
4436                        out("id:%d,", fanout & 0xffff);
4437                        out("type:");
4438
4439                        if (type == 0)
4440                                out("hash");
4441                        else if (type == 1)
4442                                out("lb");
4443                        else if (type == 2)
4444                                out("cpu");
4445                        else if (type == 3)
4446                                out("roll");
4447                        else if (type == 4)
4448                                out("random");
4449                        else if (type == 5)
4450                                out("qm");
4451                        else
4452                                out("0x%x", type);
4453
4454                        out(")");
4455                }
4456        }
4457
4458        if (show_bpf && tb[PACKET_DIAG_FILTER]) {
4459                struct sock_filter *fil =
4460                       RTA_DATA(tb[PACKET_DIAG_FILTER]);
4461                int num = RTA_PAYLOAD(tb[PACKET_DIAG_FILTER]) /
4462                          sizeof(struct sock_filter);
4463
4464                if (oneline)
4465                        out(" bpf filter (%d): ", num);
4466                else
4467                        out("\n\tbpf filter (%d): ", num);
4468                while (num) {
4469                        out(" 0x%02x %u %u %u,",
4470                            fil->code, fil->jt, fil->jf, fil->k);
4471                        num--;
4472                        fil++;
4473                }
4474        }
4475
4476        if (show_mem)
4477                print_skmeminfo(tb, PACKET_DIAG_MEMINFO);
4478        return 0;
4479}
4480
4481static int packet_show_netlink(struct filter *f)
4482{
4483        DIAG_REQUEST(req, struct packet_diag_req r);
4484
4485        req.r.sdiag_family = AF_PACKET;
4486        req.r.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MEMINFO |
4487                PACKET_SHOW_FILTER | PACKET_SHOW_RING_CFG | PACKET_SHOW_FANOUT;
4488
4489        return handle_netlink_request(f, &req.nlh, sizeof(req), packet_show_sock);
4490}
4491
4492static int packet_show_line(char *buf, const struct filter *f, int fam)
4493{
4494        unsigned long long sk;
4495        struct sockstat stat = {};
4496        int type, prot, iface, state, rq, uid, ino;
4497
4498        sscanf(buf, "%llx %*d %d %x %d %d %u %u %u",
4499                        &sk,
4500                        &type, &prot, &iface, &state,
4501                        &rq, &uid, &ino);
4502
4503        if (stat.type == SOCK_RAW && !(f->dbs&(1<<PACKET_R_DB)))
4504                return 0;
4505        if (stat.type == SOCK_DGRAM && !(f->dbs&(1<<PACKET_DG_DB)))
4506                return 0;
4507
4508        stat.type  = type;
4509        stat.prot  = prot;
4510        stat.lport = stat.iface = iface;
4511        stat.state = state;
4512        stat.rq    = rq;
4513        stat.uid   = uid;
4514        stat.ino   = ino;
4515        stat.state = SS_CLOSE;
4516
4517        if (packet_stats_print(&stat, f))
4518                return 0;
4519
4520        return 0;
4521}
4522
4523static int packet_show(struct filter *f)
4524{
4525        FILE *fp;
4526        int rc = 0;
4527
4528        if (!filter_af_get(f, AF_PACKET) || !(f->states & (1 << SS_CLOSE)))
4529                return 0;
4530
4531        if (!getenv("PROC_NET_PACKET") && !getenv("PROC_ROOT") &&
4532                        packet_show_netlink(f) == 0)
4533                return 0;
4534
4535        if ((fp = net_packet_open()) == NULL)
4536                return -1;
4537        if (generic_record_read(fp, packet_show_line, f, AF_PACKET))
4538                rc = -1;
4539
4540        fclose(fp);
4541        return rc;
4542}
4543
4544static int xdp_stats_print(struct sockstat *s, const struct filter *f)
4545{
4546        const char *addr, *port;
4547        char q_str[16];
4548
4549        s->local.family = s->remote.family = AF_XDP;
4550
4551        if (f->f) {
4552                if (run_ssfilter(f->f, s) == 0)
4553                        return 1;
4554        }
4555
4556        sock_state_print(s);
4557
4558        if (s->iface) {
4559                addr = xll_index_to_name(s->iface);
4560                snprintf(q_str, sizeof(q_str), "q%d", s->lport);
4561                port = q_str;
4562                sock_addr_print(addr, ":", port, NULL);
4563        } else {
4564                sock_addr_print("", "*", "", NULL);
4565        }
4566
4567        sock_addr_print("", "*", "", NULL);
4568
4569        proc_ctx_print(s);
4570
4571        if (show_details)
4572                sock_details_print(s);
4573
4574        return 0;
4575}
4576
4577static void xdp_show_ring(const char *name, struct xdp_diag_ring *ring)
4578{
4579        if (oneline)
4580                out(" %s(", name);
4581        else
4582                out("\n\t%s(", name);
4583        out("entries:%u", ring->entries);
4584        out(")");
4585}
4586
4587static void xdp_show_umem(struct xdp_diag_umem *umem, struct xdp_diag_ring *fr,
4588                          struct xdp_diag_ring *cr)
4589{
4590        if (oneline)
4591                out(" tumem(");
4592        else
4593                out("\n\tumem(");
4594        out("id:%u", umem->id);
4595        out(",size:%llu", umem->size);
4596        out(",num_pages:%u", umem->num_pages);
4597        out(",chunk_size:%u", umem->chunk_size);
4598        out(",headroom:%u", umem->headroom);
4599        out(",ifindex:%u", umem->ifindex);
4600        out(",qid:%u", umem->queue_id);
4601        out(",zc:%u", umem->flags & XDP_DU_F_ZEROCOPY);
4602        out(",refs:%u", umem->refs);
4603        out(")");
4604
4605        if (fr)
4606                xdp_show_ring("fr", fr);
4607        if (cr)
4608                xdp_show_ring("cr", cr);
4609}
4610
4611static void xdp_show_stats(struct xdp_diag_stats *stats)
4612{
4613        if (oneline)
4614                out(" stats(");
4615        else
4616                out("\n\tstats(");
4617        out("rx dropped:%llu", stats->n_rx_dropped);
4618        out(",rx invalid:%llu", stats->n_rx_invalid);
4619        out(",rx queue full:%llu", stats->n_rx_full);
4620        out(",rx fill ring empty:%llu", stats->n_fill_ring_empty);
4621        out(",tx invalid:%llu", stats->n_tx_invalid);
4622        out(",tx ring empty:%llu", stats->n_tx_ring_empty);
4623        out(")");
4624}
4625
4626static int xdp_show_sock(struct nlmsghdr *nlh, void *arg)
4627{
4628        struct xdp_diag_ring *rx = NULL, *tx = NULL, *fr = NULL, *cr = NULL;
4629        struct xdp_diag_msg *msg = NLMSG_DATA(nlh);
4630        struct rtattr *tb[XDP_DIAG_MAX + 1];
4631        struct xdp_diag_info *info = NULL;
4632        struct xdp_diag_umem *umem = NULL;
4633        struct xdp_diag_stats *stats = NULL;
4634        const struct filter *f = arg;
4635        struct sockstat stat = {};
4636
4637        parse_rtattr(tb, XDP_DIAG_MAX, (struct rtattr *)(msg + 1),
4638                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*msg)));
4639
4640        stat.type = msg->xdiag_type;
4641        stat.ino = msg->xdiag_ino;
4642        stat.state = SS_CLOSE;
4643        stat.sk = cookie_sk_get(&msg->xdiag_cookie[0]);
4644
4645        if (tb[XDP_DIAG_INFO]) {
4646                info = RTA_DATA(tb[XDP_DIAG_INFO]);
4647                stat.iface = info->ifindex;
4648                stat.lport = info->queue_id;
4649        }
4650
4651        if (tb[XDP_DIAG_UID])
4652                stat.uid = rta_getattr_u32(tb[XDP_DIAG_UID]);
4653        if (tb[XDP_DIAG_RX_RING])
4654                rx = RTA_DATA(tb[XDP_DIAG_RX_RING]);
4655        if (tb[XDP_DIAG_TX_RING])
4656                tx = RTA_DATA(tb[XDP_DIAG_TX_RING]);
4657        if (tb[XDP_DIAG_UMEM])
4658                umem = RTA_DATA(tb[XDP_DIAG_UMEM]);
4659        if (tb[XDP_DIAG_UMEM_FILL_RING])
4660                fr = RTA_DATA(tb[XDP_DIAG_UMEM_FILL_RING]);
4661        if (tb[XDP_DIAG_UMEM_COMPLETION_RING])
4662                cr = RTA_DATA(tb[XDP_DIAG_UMEM_COMPLETION_RING]);
4663        if (tb[XDP_DIAG_MEMINFO]) {
4664                __u32 *skmeminfo = RTA_DATA(tb[XDP_DIAG_MEMINFO]);
4665
4666                stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC];
4667        }
4668        if (tb[XDP_DIAG_STATS])
4669                stats = RTA_DATA(tb[XDP_DIAG_STATS]);
4670
4671        if (xdp_stats_print(&stat, f))
4672                return 0;
4673
4674        if (show_details) {
4675                if (rx)
4676                        xdp_show_ring("rx", rx);
4677                if (tx)
4678                        xdp_show_ring("tx", tx);
4679                if (umem)
4680                        xdp_show_umem(umem, fr, cr);
4681                if (stats)
4682                        xdp_show_stats(stats);
4683        }
4684
4685        if (show_mem)
4686                print_skmeminfo(tb, XDP_DIAG_MEMINFO); // really?
4687
4688
4689        return 0;
4690}
4691
4692static int xdp_show(struct filter *f)
4693{
4694        DIAG_REQUEST(req, struct xdp_diag_req r);
4695
4696        if (!filter_af_get(f, AF_XDP) || !(f->states & (1 << SS_CLOSE)))
4697                return 0;
4698
4699        req.r.sdiag_family = AF_XDP;
4700        req.r.xdiag_show = XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM |
4701                           XDP_SHOW_MEMINFO | XDP_SHOW_STATS;
4702
4703        return handle_netlink_request(f, &req.nlh, sizeof(req), xdp_show_sock);
4704}
4705
4706static int netlink_show_one(struct filter *f,
4707                                int prot, int pid, unsigned int groups,
4708                                int state, int dst_pid, unsigned int dst_group,
4709                                int rq, int wq,
4710                                unsigned long long sk, unsigned long long cb)
4711{
4712        struct sockstat st = {
4713                .state          = SS_CLOSE,
4714                .rq             = rq,
4715                .wq             = wq,
4716                .local.family   = AF_NETLINK,
4717                .remote.family  = AF_NETLINK,
4718        };
4719
4720        SPRINT_BUF(prot_buf) = {};
4721        const char *prot_name;
4722        char procname[64] = {};
4723
4724        if (f->f) {
4725                st.rport = -1;
4726                st.lport = pid;
4727                st.local.data[0] = prot;
4728                if (run_ssfilter(f->f, &st) == 0)
4729                        return 1;
4730        }
4731
4732        sock_state_print(&st);
4733
4734        prot_name = nl_proto_n2a(prot, prot_buf, sizeof(prot_buf));
4735
4736        if (pid == -1) {
4737                procname[0] = '*';
4738        } else if (!numeric) {
4739                int done = 0;
4740
4741                if (!pid) {
4742                        done = 1;
4743                        strncpy(procname, "kernel", 7);
4744                } else if (pid > 0) {
4745                        FILE *fp;
4746
4747                        snprintf(procname, sizeof(procname), "%s/%d/stat",
4748                                getenv("PROC_ROOT") ? : "/proc", pid);
4749                        if ((fp = fopen(procname, "r")) != NULL) {
4750                                if (fscanf(fp, "%*d (%[^)])", procname) == 1) {
4751                                        snprintf(procname+strlen(procname),
4752                                                sizeof(procname)-strlen(procname),
4753                                                "/%d", pid);
4754                                        done = 1;
4755                                }
4756                                fclose(fp);
4757                        }
4758                }
4759                if (!done)
4760                        int_to_str(pid, procname);
4761        } else {
4762                int_to_str(pid, procname);
4763        }
4764
4765        sock_addr_print(prot_name, ":", procname, NULL);
4766
4767        if (state == NETLINK_CONNECTED) {
4768                char dst_group_buf[30];
4769                char dst_pid_buf[30];
4770
4771                sock_addr_print(int_to_str(dst_group, dst_group_buf), ":",
4772                                int_to_str(dst_pid, dst_pid_buf), NULL);
4773        } else {
4774                sock_addr_print("", "*", "", NULL);
4775        }
4776
4777        char *pid_context = NULL;
4778
4779        if (show_proc_ctx) {
4780                /* The pid value will either be:
4781                 *   0 if destination kernel - show kernel initial context.
4782                 *   A valid process pid - use getpidcon.
4783                 *   A unique value allocated by the kernel or netlink user
4784                 *   to the process - show context as "not available".
4785                 */
4786                if (!pid)
4787                        security_get_initial_context("kernel", &pid_context);
4788                else if (pid > 0)
4789                        getpidcon(pid, &pid_context);
4790
4791                out(" proc_ctx=%s", pid_context ? : "unavailable");
4792                freecon(pid_context);
4793        }
4794
4795        if (show_details) {
4796                out(" sk=%llx cb=%llx groups=0x%08x", sk, cb, groups);
4797        }
4798
4799        return 0;
4800}
4801
4802static int netlink_show_sock(struct nlmsghdr *nlh, void *arg)
4803{
4804        struct filter *f = (struct filter *)arg;
4805        struct netlink_diag_msg *r = NLMSG_DATA(nlh);
4806        struct rtattr *tb[NETLINK_DIAG_MAX+1];
4807        int rq = 0, wq = 0;
4808        unsigned long groups = 0;
4809
4810        parse_rtattr(tb, NETLINK_DIAG_MAX, (struct rtattr *)(r+1),
4811                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
4812
4813        if (tb[NETLINK_DIAG_GROUPS] && RTA_PAYLOAD(tb[NETLINK_DIAG_GROUPS]))
4814                groups = *(unsigned long *) RTA_DATA(tb[NETLINK_DIAG_GROUPS]);
4815
4816        if (tb[NETLINK_DIAG_MEMINFO]) {
4817                const __u32 *skmeminfo;
4818
4819                skmeminfo = RTA_DATA(tb[NETLINK_DIAG_MEMINFO]);
4820
4821                rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC];
4822                wq = skmeminfo[SK_MEMINFO_WMEM_ALLOC];
4823        }
4824
4825        if (netlink_show_one(f, r->ndiag_protocol, r->ndiag_portid, groups,
4826                         r->ndiag_state, r->ndiag_dst_portid, r->ndiag_dst_group,
4827                         rq, wq, 0, 0)) {
4828                return 0;
4829        }
4830
4831        if (show_mem) {
4832                out("\t");
4833                print_skmeminfo(tb, NETLINK_DIAG_MEMINFO);
4834        }
4835
4836        return 0;
4837}
4838
4839static int netlink_show_netlink(struct filter *f)
4840{
4841        DIAG_REQUEST(req, struct netlink_diag_req r);
4842
4843        req.r.sdiag_family = AF_NETLINK;
4844        req.r.sdiag_protocol = NDIAG_PROTO_ALL;
4845        req.r.ndiag_show = NDIAG_SHOW_GROUPS | NDIAG_SHOW_MEMINFO;
4846
4847        return handle_netlink_request(f, &req.nlh, sizeof(req), netlink_show_sock);
4848}
4849
4850static int netlink_show(struct filter *f)
4851{
4852        FILE *fp;
4853        char buf[256];
4854        int prot, pid;
4855        unsigned int groups;
4856        int rq, wq, rc;
4857        unsigned long long sk, cb;
4858
4859        if (!filter_af_get(f, AF_NETLINK) || !(f->states & (1 << SS_CLOSE)))
4860                return 0;
4861
4862        if (!getenv("PROC_NET_NETLINK") && !getenv("PROC_ROOT") &&
4863                netlink_show_netlink(f) == 0)
4864                return 0;
4865
4866        if ((fp = net_netlink_open()) == NULL)
4867                return -1;
4868        if (!fgets(buf, sizeof(buf), fp)) {
4869                fclose(fp);
4870                return -1;
4871        }
4872
4873        while (fgets(buf, sizeof(buf), fp)) {
4874                sscanf(buf, "%llx %d %d %x %d %d %llx %d",
4875                       &sk,
4876                       &prot, &pid, &groups, &rq, &wq, &cb, &rc);
4877
4878                netlink_show_one(f, prot, pid, groups, 0, 0, 0, rq, wq, sk, cb);
4879        }
4880
4881        fclose(fp);
4882        return 0;
4883}
4884
4885static bool vsock_type_skip(struct sockstat *s, struct filter *f)
4886{
4887        if (s->type == SOCK_STREAM && !(f->dbs & (1 << VSOCK_ST_DB)))
4888                return true;
4889        if (s->type == SOCK_DGRAM && !(f->dbs & (1 << VSOCK_DG_DB)))
4890                return true;
4891        return false;
4892}
4893
4894static void vsock_addr_print(inet_prefix *a, __u32 port)
4895{
4896        char cid_str[sizeof("4294967295")];
4897        char port_str[sizeof("4294967295")];
4898        __u32 cid;
4899
4900        memcpy(&cid, a->data, sizeof(cid));
4901
4902        if (cid == ~(__u32)0)
4903                snprintf(cid_str, sizeof(cid_str), "*");
4904        else
4905                snprintf(cid_str, sizeof(cid_str), "%u", cid);
4906
4907        if (port == ~(__u32)0)
4908                snprintf(port_str, sizeof(port_str), "*");
4909        else
4910                snprintf(port_str, sizeof(port_str), "%u", port);
4911
4912        sock_addr_print(cid_str, ":", port_str, NULL);
4913}
4914
4915static void vsock_stats_print(struct sockstat *s, struct filter *f)
4916{
4917        sock_state_print(s);
4918
4919        vsock_addr_print(&s->local, s->lport);
4920        vsock_addr_print(&s->remote, s->rport);
4921
4922        proc_ctx_print(s);
4923}
4924
4925static int vsock_show_sock(struct nlmsghdr *nlh, void *arg)
4926{
4927        struct filter *f = (struct filter *)arg;
4928        struct vsock_diag_msg *r = NLMSG_DATA(nlh);
4929        struct sockstat stat = {
4930                .type = r->vdiag_type,
4931                .lport = r->vdiag_src_port,
4932                .rport = r->vdiag_dst_port,
4933                .state = r->vdiag_state,
4934                .ino = r->vdiag_ino,
4935        };
4936
4937        vsock_set_inet_prefix(&stat.local, r->vdiag_src_cid);
4938        vsock_set_inet_prefix(&stat.remote, r->vdiag_dst_cid);
4939
4940        if (vsock_type_skip(&stat, f))
4941                return 0;
4942
4943        if (f->f && run_ssfilter(f->f, &stat) == 0)
4944                return 0;
4945
4946        vsock_stats_print(&stat, f);
4947
4948        return 0;
4949}
4950
4951static int vsock_show(struct filter *f)
4952{
4953        DIAG_REQUEST(req, struct vsock_diag_req r);
4954
4955        if (!filter_af_get(f, AF_VSOCK))
4956                return 0;
4957
4958        req.r.sdiag_family = AF_VSOCK;
4959        req.r.vdiag_states = f->states;
4960
4961        return handle_netlink_request(f, &req.nlh, sizeof(req), vsock_show_sock);
4962}
4963
4964static void tipc_sock_addr_print(struct rtattr *net_addr, struct rtattr *id)
4965{
4966        uint32_t node = rta_getattr_u32(net_addr);
4967        uint32_t identity = rta_getattr_u32(id);
4968
4969        SPRINT_BUF(addr) = {};
4970        SPRINT_BUF(port) = {};
4971
4972        sprintf(addr, "%u", node);
4973        sprintf(port, "%u", identity);
4974        sock_addr_print(addr, ":", port, NULL);
4975
4976}
4977
4978static int tipc_show_sock(struct nlmsghdr *nlh, void *arg)
4979{
4980        struct rtattr *stat[TIPC_NLA_SOCK_STAT_MAX + 1] = {};
4981        struct rtattr *attrs[TIPC_NLA_SOCK_MAX + 1] = {};
4982        struct rtattr *con[TIPC_NLA_CON_MAX + 1] = {};
4983        struct rtattr *info[TIPC_NLA_MAX + 1] = {};
4984        struct rtattr *msg_ref;
4985        struct sockstat ss = {};
4986
4987        parse_rtattr(info, TIPC_NLA_MAX, NLMSG_DATA(nlh),
4988                     NLMSG_PAYLOAD(nlh, 0));
4989
4990        if (!info[TIPC_NLA_SOCK])
4991                return 0;
4992
4993        msg_ref = info[TIPC_NLA_SOCK];
4994        parse_rtattr(attrs, TIPC_NLA_SOCK_MAX, RTA_DATA(msg_ref),
4995                     RTA_PAYLOAD(msg_ref));
4996
4997        msg_ref = attrs[TIPC_NLA_SOCK_STAT];
4998        parse_rtattr(stat, TIPC_NLA_SOCK_STAT_MAX,
4999                     RTA_DATA(msg_ref), RTA_PAYLOAD(msg_ref));
5000
5001
5002        ss.local.family = AF_TIPC;
5003        ss.type = rta_getattr_u32(attrs[TIPC_NLA_SOCK_TYPE]);
5004        ss.state = rta_getattr_u32(attrs[TIPC_NLA_SOCK_TIPC_STATE]);
5005        ss.uid = rta_getattr_u32(attrs[TIPC_NLA_SOCK_UID]);
5006        ss.ino = rta_getattr_u32(attrs[TIPC_NLA_SOCK_INO]);
5007        ss.rq = rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_RCVQ]);
5008        ss.wq = rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_SENDQ]);
5009        ss.sk = rta_getattr_u64(attrs[TIPC_NLA_SOCK_COOKIE]);
5010
5011        sock_state_print (&ss);
5012
5013        tipc_sock_addr_print(attrs[TIPC_NLA_SOCK_ADDR],
5014                             attrs[TIPC_NLA_SOCK_REF]);
5015
5016        msg_ref = attrs[TIPC_NLA_SOCK_CON];
5017        if (msg_ref) {
5018                parse_rtattr(con, TIPC_NLA_CON_MAX,
5019                             RTA_DATA(msg_ref), RTA_PAYLOAD(msg_ref));
5020
5021                tipc_sock_addr_print(con[TIPC_NLA_CON_NODE],
5022                                     con[TIPC_NLA_CON_SOCK]);
5023        } else
5024                sock_addr_print("", "-", "", NULL);
5025
5026        if (show_details)
5027                sock_details_print(&ss);
5028
5029        proc_ctx_print(&ss);
5030
5031        if (show_tipcinfo) {
5032                if (oneline)
5033                        out(" type:%s", stype_nameg[ss.type]);
5034                else
5035                        out("\n type:%s", stype_nameg[ss.type]);
5036                out(" cong:%s ",
5037                       stat[TIPC_NLA_SOCK_STAT_LINK_CONG] ? "link" :
5038                       stat[TIPC_NLA_SOCK_STAT_CONN_CONG] ? "conn" : "none");
5039                out(" drop:%d ",
5040                       rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_DROP]));
5041
5042                if (attrs[TIPC_NLA_SOCK_HAS_PUBL])
5043                        out(" publ");
5044
5045                if (con[TIPC_NLA_CON_FLAG])
5046                        out(" via {%u,%u} ",
5047                               rta_getattr_u32(con[TIPC_NLA_CON_TYPE]),
5048                               rta_getattr_u32(con[TIPC_NLA_CON_INST]));
5049        }
5050
5051        return 0;
5052}
5053
5054static int tipc_show(struct filter *f)
5055{
5056        DIAG_REQUEST(req, struct tipc_sock_diag_req r);
5057
5058        memset(&req.r, 0, sizeof(req.r));
5059        req.r.sdiag_family = AF_TIPC;
5060        req.r.tidiag_states = f->states;
5061
5062        return handle_netlink_request(f, &req.nlh, sizeof(req), tipc_show_sock);
5063}
5064
5065struct sock_diag_msg {
5066        __u8 sdiag_family;
5067};
5068
5069static int generic_show_sock(struct nlmsghdr *nlh, void *arg)
5070{
5071        struct sock_diag_msg *r = NLMSG_DATA(nlh);
5072        struct inet_diag_arg inet_arg = { .f = arg, .protocol = IPPROTO_MAX };
5073        int ret;
5074
5075        switch (r->sdiag_family) {
5076        case AF_INET:
5077        case AF_INET6:
5078                inet_arg.rth = inet_arg.f->rth_for_killing;
5079                ret = show_one_inet_sock(nlh, &inet_arg);
5080                break;
5081        case AF_UNIX:
5082                ret = unix_show_sock(nlh, arg);
5083                break;
5084        case AF_PACKET:
5085                ret = packet_show_sock(nlh, arg);
5086                break;
5087        case AF_NETLINK:
5088                ret = netlink_show_sock(nlh, arg);
5089                break;
5090        case AF_VSOCK:
5091                ret = vsock_show_sock(nlh, arg);
5092                break;
5093        case AF_XDP:
5094                ret = xdp_show_sock(nlh, arg);
5095                break;
5096        default:
5097                ret = -1;
5098        }
5099
5100        render();
5101
5102        return ret;
5103}
5104
5105static int handle_follow_request(struct filter *f)
5106{
5107        int ret = 0;
5108        int groups = 0;
5109        struct rtnl_handle rth, rth2;
5110
5111        if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << TCP_DB))
5112                groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1);
5113        if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << UDP_DB))
5114                groups |= 1 << (SKNLGRP_INET_UDP_DESTROY - 1);
5115        if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << TCP_DB))
5116                groups |= 1 << (SKNLGRP_INET6_TCP_DESTROY - 1);
5117        if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << UDP_DB))
5118                groups |= 1 << (SKNLGRP_INET6_UDP_DESTROY - 1);
5119
5120        if (groups == 0)
5121                return -1;
5122
5123        if (rtnl_open_byproto(&rth, groups, NETLINK_SOCK_DIAG))
5124                return -1;
5125
5126        rth.dump = 0;
5127        rth.local.nl_pid = 0;
5128
5129        if (f->kill) {
5130                if (rtnl_open_byproto(&rth2, groups, NETLINK_SOCK_DIAG)) {
5131                        rtnl_close(&rth);
5132                        return -1;
5133                }
5134                f->rth_for_killing = &rth2;
5135        }
5136
5137        if (rtnl_dump_filter(&rth, generic_show_sock, f))
5138                ret = -1;
5139
5140        rtnl_close(&rth);
5141        if (f->rth_for_killing)
5142                rtnl_close(f->rth_for_killing);
5143        return ret;
5144}
5145
5146static int get_snmp_int(char *proto, char *key, int *result)
5147{
5148        char buf[1024];
5149        FILE *fp;
5150        int protolen = strlen(proto);
5151        int keylen = strlen(key);
5152
5153        *result = 0;
5154
5155        if ((fp = net_snmp_open()) == NULL)
5156                return -1;
5157
5158        while (fgets(buf, sizeof(buf), fp) != NULL) {
5159                char *p = buf;
5160                int  pos = 0;
5161
5162                if (memcmp(buf, proto, protolen))
5163                        continue;
5164                while ((p = strchr(p, ' ')) != NULL) {
5165                        pos++;
5166                        p++;
5167                        if (memcmp(p, key, keylen) == 0 &&
5168                            (p[keylen] == ' ' || p[keylen] == '\n'))
5169                                break;
5170                }
5171                if (fgets(buf, sizeof(buf), fp) == NULL)
5172                        break;
5173                if (memcmp(buf, proto, protolen))
5174                        break;
5175                p = buf;
5176                while ((p = strchr(p, ' ')) != NULL) {
5177                        p++;
5178                        if (--pos == 0) {
5179                                sscanf(p, "%d", result);
5180                                fclose(fp);
5181                                return 0;
5182                        }
5183                }
5184        }
5185
5186        fclose(fp);
5187        errno = ESRCH;
5188        return -1;
5189}
5190
5191
5192/* Get stats from sockstat */
5193
5194struct ssummary {
5195        int socks;
5196        int tcp_mem;
5197        int tcp_total;
5198        int tcp_orphans;
5199        int tcp_tws;
5200        int tcp4_hashed;
5201        int udp4;
5202        int raw4;
5203        int frag4;
5204        int frag4_mem;
5205        int tcp6_hashed;
5206        int udp6;
5207        int raw6;
5208        int frag6;
5209        int frag6_mem;
5210};
5211
5212static void get_sockstat_line(char *line, struct ssummary *s)
5213{
5214        char id[256], rem[256];
5215
5216        if (sscanf(line, "%[^ ] %[^\n]\n", id, rem) != 2)
5217                return;
5218
5219        if (strcmp(id, "sockets:") == 0)
5220                sscanf(rem, "%*s%d", &s->socks);
5221        else if (strcmp(id, "UDP:") == 0)
5222                sscanf(rem, "%*s%d", &s->udp4);
5223        else if (strcmp(id, "UDP6:") == 0)
5224                sscanf(rem, "%*s%d", &s->udp6);
5225        else if (strcmp(id, "RAW:") == 0)
5226                sscanf(rem, "%*s%d", &s->raw4);
5227        else if (strcmp(id, "RAW6:") == 0)
5228                sscanf(rem, "%*s%d", &s->raw6);
5229        else if (strcmp(id, "TCP6:") == 0)
5230                sscanf(rem, "%*s%d", &s->tcp6_hashed);
5231        else if (strcmp(id, "FRAG:") == 0)
5232                sscanf(rem, "%*s%d%*s%d", &s->frag4, &s->frag4_mem);
5233        else if (strcmp(id, "FRAG6:") == 0)
5234                sscanf(rem, "%*s%d%*s%d", &s->frag6, &s->frag6_mem);
5235        else if (strcmp(id, "TCP:") == 0)
5236                sscanf(rem, "%*s%d%*s%d%*s%d%*s%d%*s%d",
5237                       &s->tcp4_hashed,
5238                       &s->tcp_orphans, &s->tcp_tws, &s->tcp_total, &s->tcp_mem);
5239}
5240
5241static int get_sockstat(struct ssummary *s)
5242{
5243        char buf[256];
5244        FILE *fp;
5245
5246        memset(s, 0, sizeof(*s));
5247
5248        if ((fp = net_sockstat_open()) == NULL)
5249                return -1;
5250        while (fgets(buf, sizeof(buf), fp) != NULL)
5251                get_sockstat_line(buf, s);
5252        fclose(fp);
5253
5254        if ((fp = net_sockstat6_open()) == NULL)
5255                return 0;
5256        while (fgets(buf, sizeof(buf), fp) != NULL)
5257                get_sockstat_line(buf, s);
5258        fclose(fp);
5259
5260        return 0;
5261}
5262
5263static int print_summary(void)
5264{
5265        struct ssummary s;
5266        int tcp_estab;
5267
5268        if (get_sockstat(&s) < 0)
5269                perror("ss: get_sockstat");
5270        if (get_snmp_int("Tcp:", "CurrEstab", &tcp_estab) < 0)
5271                perror("ss: get_snmpstat");
5272
5273        printf("Total: %d\n", s.socks);
5274
5275        printf("TCP:   %d (estab %d, closed %d, orphaned %d, timewait %d)\n",
5276               s.tcp_total + s.tcp_tws, tcp_estab,
5277               s.tcp_total - (s.tcp4_hashed + s.tcp6_hashed - s.tcp_tws),
5278               s.tcp_orphans, s.tcp_tws);
5279
5280        printf("\n");
5281        printf("Transport Total     IP        IPv6\n");
5282        printf("RAW       %-9d %-9d %-9d\n", s.raw4+s.raw6, s.raw4, s.raw6);
5283        printf("UDP       %-9d %-9d %-9d\n", s.udp4+s.udp6, s.udp4, s.udp6);
5284        printf("TCP       %-9d %-9d %-9d\n", s.tcp4_hashed+s.tcp6_hashed, s.tcp4_hashed, s.tcp6_hashed);
5285        printf("INET      %-9d %-9d %-9d\n",
5286               s.raw4+s.udp4+s.tcp4_hashed+
5287               s.raw6+s.udp6+s.tcp6_hashed,
5288               s.raw4+s.udp4+s.tcp4_hashed,
5289               s.raw6+s.udp6+s.tcp6_hashed);
5290        printf("FRAG      %-9d %-9d %-9d\n", s.frag4+s.frag6, s.frag4, s.frag6);
5291
5292        printf("\n");
5293
5294        return 0;
5295}
5296
5297static void _usage(FILE *dest)
5298{
5299        fprintf(dest,
5300"Usage: ss [ OPTIONS ]\n"
5301"       ss [ OPTIONS ] [ FILTER ]\n"
5302"   -h, --help          this message\n"
5303"   -V, --version       output version information\n"
5304"   -n, --numeric       don't resolve service names\n"
5305"   -r, --resolve       resolve host names\n"
5306"   -a, --all           display all sockets\n"
5307"   -l, --listening     display listening sockets\n"
5308"   -o, --options       show timer information\n"
5309"   -e, --extended      show detailed socket information\n"
5310"   -m, --memory        show socket memory usage\n"
5311"   -p, --processes     show process using socket\n"
5312"   -i, --info          show internal TCP information\n"
5313"       --tipcinfo      show internal tipc socket information\n"
5314"   -s, --summary       show socket usage summary\n"
5315"       --tos           show tos and priority information\n"
5316"       --cgroup        show cgroup information\n"
5317"   -b, --bpf           show bpf filter socket information\n"
5318"   -E, --events        continually display sockets as they are destroyed\n"
5319"   -Z, --context       display process SELinux security contexts\n"
5320"   -z, --contexts      display process and socket SELinux security contexts\n"
5321"   -N, --net           switch to the specified network namespace name\n"
5322"\n"
5323"   -4, --ipv4          display only IP version 4 sockets\n"
5324"   -6, --ipv6          display only IP version 6 sockets\n"
5325"   -0, --packet        display PACKET sockets\n"
5326"   -t, --tcp           display only TCP sockets\n"
5327"   -M, --mptcp         display only MPTCP sockets\n"
5328"   -S, --sctp          display only SCTP sockets\n"
5329"   -u, --udp           display only UDP sockets\n"
5330"   -d, --dccp          display only DCCP sockets\n"
5331"   -w, --raw           display only RAW sockets\n"
5332"   -x, --unix          display only Unix domain sockets\n"
5333"       --tipc          display only TIPC sockets\n"
5334"       --vsock         display only vsock sockets\n"
5335"   -f, --family=FAMILY display sockets of type FAMILY\n"
5336"       FAMILY := {inet|inet6|link|unix|netlink|vsock|tipc|xdp|help}\n"
5337"\n"
5338"   -K, --kill          forcibly close sockets, display what was closed\n"
5339"   -H, --no-header     Suppress header line\n"
5340"   -O, --oneline       socket's data printed on a single line\n"
5341"       --inet-sockopt  show various inet socket options\n"
5342"\n"
5343"   -A, --query=QUERY, --socket=QUERY\n"
5344"       QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink|vsock_stream|vsock_dgram|tipc}[,QUERY]\n"
5345"\n"
5346"   -D, --diag=FILE     Dump raw information about TCP sockets to FILE\n"
5347"   -F, --filter=FILE   read filter information from FILE\n"
5348"       FILTER := [ state STATE-FILTER ] [ EXPRESSION ]\n"
5349"       STATE-FILTER := {all|connected|synchronized|bucket|big|TCP-STATES}\n"
5350"         TCP-STATES := {established|syn-sent|syn-recv|fin-wait-{1,2}|time-wait|closed|close-wait|last-ack|listening|closing}\n"
5351"          connected := {established|syn-sent|syn-recv|fin-wait-{1,2}|time-wait|close-wait|last-ack|closing}\n"
5352"       synchronized := {established|syn-recv|fin-wait-{1,2}|time-wait|close-wait|last-ack|closing}\n"
5353"             bucket := {syn-recv|time-wait}\n"
5354"                big := {established|syn-sent|fin-wait-{1,2}|closed|close-wait|last-ack|listening|closing}\n"
5355                );
5356}
5357
5358static void help(void) __attribute__((noreturn));
5359static void help(void)
5360{
5361        _usage(stdout);
5362        exit(0);
5363}
5364
5365static void usage(void) __attribute__((noreturn));
5366static void usage(void)
5367{
5368        _usage(stderr);
5369        exit(-1);
5370}
5371
5372
5373static int scan_state(const char *state)
5374{
5375        static const char * const sstate_namel[] = {
5376                "UNKNOWN",
5377                [SS_ESTABLISHED] = "established",
5378                [SS_SYN_SENT] = "syn-sent",
5379                [SS_SYN_RECV] = "syn-recv",
5380                [SS_FIN_WAIT1] = "fin-wait-1",
5381                [SS_FIN_WAIT2] = "fin-wait-2",
5382                [SS_TIME_WAIT] = "time-wait",
5383                [SS_CLOSE] = "unconnected",
5384                [SS_CLOSE_WAIT] = "close-wait",
5385                [SS_LAST_ACK] = "last-ack",
5386                [SS_LISTEN] =   "listening",
5387                [SS_CLOSING] = "closing",
5388        };
5389        int i;
5390
5391        if (strcasecmp(state, "close") == 0 ||
5392            strcasecmp(state, "closed") == 0)
5393                return (1<<SS_CLOSE);
5394        if (strcasecmp(state, "syn-rcv") == 0)
5395                return (1<<SS_SYN_RECV);
5396        if (strcasecmp(state, "established") == 0)
5397                return (1<<SS_ESTABLISHED);
5398        if (strcasecmp(state, "all") == 0)
5399                return SS_ALL;
5400        if (strcasecmp(state, "connected") == 0)
5401                return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN));
5402        if (strcasecmp(state, "synchronized") == 0)
5403                return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN)|(1<<SS_SYN_SENT));
5404        if (strcasecmp(state, "bucket") == 0)
5405                return (1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT);
5406        if (strcasecmp(state, "big") == 0)
5407                return SS_ALL & ~((1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT));
5408        for (i = 0; i < SS_MAX; i++) {
5409                if (strcasecmp(state, sstate_namel[i]) == 0)
5410                        return (1<<i);
5411        }
5412
5413        fprintf(stderr, "ss: wrong state name: %s\n", state);
5414        exit(-1);
5415}
5416
5417/* Values 'v' and 'V' are already used so a non-character is used */
5418#define OPT_VSOCK 256
5419
5420/* Values of 't' are already used so a non-character is used */
5421#define OPT_TIPCSOCK 257
5422#define OPT_TIPCINFO 258
5423
5424#define OPT_TOS 259
5425
5426/* Values of 'x' are already used so a non-character is used */
5427#define OPT_XDPSOCK 260
5428
5429#define OPT_CGROUP 261
5430
5431#define OPT_INET_SOCKOPT 262
5432
5433static const struct option long_opts[] = {
5434        { "numeric", 0, 0, 'n' },
5435        { "resolve", 0, 0, 'r' },
5436        { "options", 0, 0, 'o' },
5437        { "extended", 0, 0, 'e' },
5438        { "memory", 0, 0, 'm' },
5439        { "info", 0, 0, 'i' },
5440        { "processes", 0, 0, 'p' },
5441        { "bpf", 0, 0, 'b' },
5442        { "events", 0, 0, 'E' },
5443        { "dccp", 0, 0, 'd' },
5444        { "tcp", 0, 0, 't' },
5445        { "sctp", 0, 0, 'S' },
5446        { "udp", 0, 0, 'u' },
5447        { "raw", 0, 0, 'w' },
5448        { "unix", 0, 0, 'x' },
5449        { "tipc", 0, 0, OPT_TIPCSOCK},
5450        { "vsock", 0, 0, OPT_VSOCK },
5451        { "all", 0, 0, 'a' },
5452        { "listening", 0, 0, 'l' },
5453        { "ipv4", 0, 0, '4' },
5454        { "ipv6", 0, 0, '6' },
5455        { "packet", 0, 0, '0' },
5456        { "family", 1, 0, 'f' },
5457        { "socket", 1, 0, 'A' },
5458        { "query", 1, 0, 'A' },
5459        { "summary", 0, 0, 's' },
5460        { "diag", 1, 0, 'D' },
5461        { "filter", 1, 0, 'F' },
5462        { "version", 0, 0, 'V' },
5463        { "help", 0, 0, 'h' },
5464        { "context", 0, 0, 'Z' },
5465        { "contexts", 0, 0, 'z' },
5466        { "net", 1, 0, 'N' },
5467        { "tipcinfo", 0, 0, OPT_TIPCINFO},
5468        { "tos", 0, 0, OPT_TOS },
5469        { "cgroup", 0, 0, OPT_CGROUP },
5470        { "kill", 0, 0, 'K' },
5471        { "no-header", 0, 0, 'H' },
5472        { "xdp", 0, 0, OPT_XDPSOCK},
5473        { "mptcp", 0, 0, 'M' },
5474        { "oneline", 0, 0, 'O' },
5475        { "inet-sockopt", 0, 0, OPT_INET_SOCKOPT },
5476        { 0 }
5477
5478};
5479
5480int main(int argc, char *argv[])
5481{
5482        int saw_states = 0;
5483        int saw_query = 0;
5484        int do_summary = 0;
5485        const char *dump_tcpdiag = NULL;
5486        FILE *filter_fp = NULL;
5487        int ch;
5488        int state_filter = 0;
5489
5490        while ((ch = getopt_long(argc, argv,
5491                                 "dhaletuwxnro460spbEf:mMiA:D:F:vVzZN:KHSO",
5492                                 long_opts, NULL)) != EOF) {
5493                switch (ch) {
5494                case 'n':
5495                        numeric = 1;
5496                        break;
5497                case 'r':
5498                        resolve_hosts = 1;
5499                        break;
5500                case 'o':
5501                        show_options = 1;
5502                        break;
5503                case 'e':
5504                        show_options = 1;
5505                        show_details++;
5506                        break;
5507                case 'm':
5508                        show_mem = 1;
5509                        break;
5510                case 'i':
5511                        show_tcpinfo = 1;
5512                        break;
5513                case 'p':
5514                        show_users++;
5515                        user_ent_hash_build();
5516                        break;
5517                case 'b':
5518                        show_options = 1;
5519                        show_bpf++;
5520                        break;
5521                case 'E':
5522                        follow_events = 1;
5523                        break;
5524                case 'd':
5525                        filter_db_set(&current_filter, DCCP_DB, true);
5526                        break;
5527                case 't':
5528                        filter_db_set(&current_filter, TCP_DB, true);
5529                        break;
5530                case 'S':
5531                        filter_db_set(&current_filter, SCTP_DB, true);
5532                        break;
5533                case 'u':
5534                        filter_db_set(&current_filter, UDP_DB, true);
5535                        break;
5536                case 'w':
5537                        filter_db_set(&current_filter, RAW_DB, true);
5538                        break;
5539                case 'x':
5540                        filter_af_set(&current_filter, AF_UNIX);
5541                        break;
5542                case OPT_VSOCK:
5543                        filter_af_set(&current_filter, AF_VSOCK);
5544                        break;
5545                case OPT_TIPCSOCK:
5546                        filter_af_set(&current_filter, AF_TIPC);
5547                        break;
5548                case 'a':
5549                        state_filter = SS_ALL;
5550                        break;
5551                case 'l':
5552                        state_filter = (1 << SS_LISTEN) | (1 << SS_CLOSE);
5553                        break;
5554                case '4':
5555                        filter_af_set(&current_filter, AF_INET);
5556                        break;
5557                case '6':
5558                        filter_af_set(&current_filter, AF_INET6);
5559                        break;
5560                case '0':
5561                        filter_af_set(&current_filter, AF_PACKET);
5562                        break;
5563                case OPT_XDPSOCK:
5564                        filter_af_set(&current_filter, AF_XDP);
5565                        break;
5566                case 'M':
5567                        filter_db_set(&current_filter, MPTCP_DB, true);
5568                        break;
5569                case 'f':
5570                        if (strcmp(optarg, "inet") == 0)
5571                                filter_af_set(&current_filter, AF_INET);
5572                        else if (strcmp(optarg, "inet6") == 0)
5573                                filter_af_set(&current_filter, AF_INET6);
5574                        else if (strcmp(optarg, "link") == 0)
5575                                filter_af_set(&current_filter, AF_PACKET);
5576                        else if (strcmp(optarg, "unix") == 0)
5577                                filter_af_set(&current_filter, AF_UNIX);
5578                        else if (strcmp(optarg, "netlink") == 0)
5579                                filter_af_set(&current_filter, AF_NETLINK);
5580                        else if (strcmp(optarg, "tipc") == 0)
5581                                filter_af_set(&current_filter, AF_TIPC);
5582                        else if (strcmp(optarg, "vsock") == 0)
5583                                filter_af_set(&current_filter, AF_VSOCK);
5584                        else if (strcmp(optarg, "xdp") == 0)
5585                                filter_af_set(&current_filter, AF_XDP);
5586                        else if (strcmp(optarg, "help") == 0)
5587                                help();
5588                        else {
5589                                fprintf(stderr, "ss: \"%s\" is invalid family\n",
5590                                                optarg);
5591                                usage();
5592                        }
5593                        break;
5594                case 'A':
5595                {
5596                        char *p, *p1;
5597
5598                        if (!saw_query) {
5599                                current_filter.dbs = 0;
5600                                state_filter = state_filter ?
5601                                               state_filter : SS_CONN;
5602                                saw_query = 1;
5603                                do_default = 0;
5604                        }
5605                        p = p1 = optarg;
5606                        do {
5607                                if ((p1 = strchr(p, ',')) != NULL)
5608                                        *p1 = 0;
5609                                if (filter_db_parse(&current_filter, p)) {
5610                                        fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p);
5611                                        usage();
5612                                }
5613                                p = p1 + 1;
5614                        } while (p1);
5615                        break;
5616                }
5617                case 's':
5618                        do_summary = 1;
5619                        break;
5620                case 'D':
5621                        dump_tcpdiag = optarg;
5622                        break;
5623                case 'F':
5624                        if (filter_fp) {
5625                                fprintf(stderr, "More than one filter file\n");
5626                                exit(-1);
5627                        }
5628                        if (optarg[0] == '-')
5629                                filter_fp = stdin;
5630                        else
5631                                filter_fp = fopen(optarg, "r");
5632                        if (!filter_fp) {
5633                                perror("fopen filter file");
5634                                exit(-1);
5635                        }
5636                        break;
5637                case 'v':
5638                case 'V':
5639                        printf("ss utility, iproute2-%s\n", version);
5640                        exit(0);
5641                case 'z':
5642                        show_sock_ctx++;
5643                        /* fall through */
5644                case 'Z':
5645                        if (is_selinux_enabled() <= 0) {
5646                                fprintf(stderr, "ss: SELinux is not enabled.\n");
5647                                exit(1);
5648                        }
5649                        show_proc_ctx++;
5650                        user_ent_hash_build();
5651                        break;
5652                case 'N':
5653                        if (netns_switch(optarg))
5654                                exit(1);
5655                        break;
5656                case OPT_TIPCINFO:
5657                        show_tipcinfo = 1;
5658                        break;
5659                case OPT_TOS:
5660                        show_tos = 1;
5661                        break;
5662                case OPT_CGROUP:
5663                        show_cgroup = 1;
5664                        break;
5665                case 'K':
5666                        current_filter.kill = 1;
5667                        break;
5668                case 'H':
5669                        show_header = 0;
5670                        break;
5671                case 'O':
5672                        oneline = 1;
5673                        break;
5674                case OPT_INET_SOCKOPT:
5675                        show_inet_sockopt = 1;
5676                        break;
5677                case 'h':
5678                        help();
5679                case '?':
5680                default:
5681                        usage();
5682                }
5683        }
5684
5685        argc -= optind;
5686        argv += optind;
5687
5688        if (do_summary) {
5689                print_summary();
5690                if (do_default && argc == 0)
5691                        exit(0);
5692        }
5693
5694        while (argc > 0) {
5695                if (strcmp(*argv, "state") == 0) {
5696                        NEXT_ARG();
5697                        if (!saw_states)
5698                                state_filter = 0;
5699                        state_filter |= scan_state(*argv);
5700                        saw_states = 1;
5701                } else if (strcmp(*argv, "exclude") == 0 ||
5702                           strcmp(*argv, "excl") == 0) {
5703                        NEXT_ARG();
5704                        if (!saw_states)
5705                                state_filter = SS_ALL;
5706                        state_filter &= ~scan_state(*argv);
5707                        saw_states = 1;
5708                } else {
5709                        break;
5710                }
5711                argc--; argv++;
5712        }
5713
5714        if (do_default) {
5715                state_filter = state_filter ? state_filter : SS_CONN;
5716                filter_db_parse(&current_filter, "all");
5717        }
5718
5719        filter_states_set(&current_filter, state_filter);
5720        filter_merge_defaults(&current_filter);
5721
5722#ifdef HAVE_RPC
5723        if (!numeric && resolve_hosts &&
5724            (current_filter.dbs & (UNIX_DBM|INET_L4_DBM)))
5725                init_service_resolver();
5726#endif
5727
5728        if (current_filter.dbs == 0) {
5729                fprintf(stderr, "ss: no socket tables to show with such filter.\n");
5730                exit(0);
5731        }
5732        if (current_filter.families == 0) {
5733                fprintf(stderr, "ss: no families to show with such filter.\n");
5734                exit(0);
5735        }
5736        if (current_filter.states == 0) {
5737                fprintf(stderr, "ss: no socket states to show with such filter.\n");
5738                exit(0);
5739        }
5740
5741        if (dump_tcpdiag) {
5742                FILE *dump_fp = stdout;
5743
5744                if (!(current_filter.dbs & (1<<TCP_DB))) {
5745                        fprintf(stderr, "ss: tcpdiag dump requested and no tcp in filter.\n");
5746                        exit(0);
5747                }
5748                if (dump_tcpdiag[0] != '-') {
5749                        dump_fp = fopen(dump_tcpdiag, "w");
5750                        if (!dump_tcpdiag) {
5751                                perror("fopen dump file");
5752                                exit(-1);
5753                        }
5754                }
5755                inet_show_netlink(&current_filter, dump_fp, IPPROTO_TCP);
5756                fflush(dump_fp);
5757                exit(0);
5758        }
5759
5760        if (ssfilter_parse(&current_filter.f, argc, argv, filter_fp))
5761                usage();
5762
5763        if (!(current_filter.dbs & (current_filter.dbs - 1)))
5764                columns[COL_NETID].disabled = 1;
5765
5766        if (!(current_filter.states & (current_filter.states - 1)))
5767                columns[COL_STATE].disabled = 1;
5768
5769        if (show_header)
5770                print_header();
5771
5772        fflush(stdout);
5773
5774        if (follow_events)
5775                exit(handle_follow_request(&current_filter));
5776
5777        if (current_filter.dbs & (1<<NETLINK_DB))
5778                netlink_show(&current_filter);
5779        if (current_filter.dbs & PACKET_DBM)
5780                packet_show(&current_filter);
5781        if (current_filter.dbs & UNIX_DBM)
5782                unix_show(&current_filter);
5783        if (current_filter.dbs & (1<<RAW_DB))
5784                raw_show(&current_filter);
5785        if (current_filter.dbs & (1<<UDP_DB))
5786                udp_show(&current_filter);
5787        if (current_filter.dbs & (1<<TCP_DB))
5788                tcp_show(&current_filter);
5789        if (current_filter.dbs & (1<<DCCP_DB))
5790                dccp_show(&current_filter);
5791        if (current_filter.dbs & (1<<SCTP_DB))
5792                sctp_show(&current_filter);
5793        if (current_filter.dbs & VSOCK_DBM)
5794                vsock_show(&current_filter);
5795        if (current_filter.dbs & (1<<TIPC_DB))
5796                tipc_show(&current_filter);
5797        if (current_filter.dbs & (1<<XDP_DB))
5798                xdp_show(&current_filter);
5799        if (current_filter.dbs & (1<<MPTCP_DB))
5800                mptcp_show(&current_filter);
5801
5802        if (show_users || show_proc_ctx || show_sock_ctx)
5803                user_ent_destroy();
5804
5805        render();
5806
5807        return 0;
5808}
5809