linux/net/ipv4/inet_diag.c
<<
>>
Prefs
   1/*
   2 * inet_diag.c  Module for monitoring INET transport protocols sockets.
   3 *
   4 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/fcntl.h>
  16#include <linux/random.h>
  17#include <linux/slab.h>
  18#include <linux/cache.h>
  19#include <linux/init.h>
  20#include <linux/time.h>
  21
  22#include <net/icmp.h>
  23#include <net/tcp.h>
  24#include <net/ipv6.h>
  25#include <net/inet_common.h>
  26#include <net/inet_connection_sock.h>
  27#include <net/inet_hashtables.h>
  28#include <net/inet_timewait_sock.h>
  29#include <net/inet6_hashtables.h>
  30#include <net/netlink.h>
  31
  32#include <linux/inet.h>
  33#include <linux/stddef.h>
  34
  35#include <linux/inet_diag.h>
  36#include <linux/sock_diag.h>
  37
  38static const struct inet_diag_handler **inet_diag_table;
  39
  40struct inet_diag_entry {
  41        const __be32 *saddr;
  42        const __be32 *daddr;
  43        u16 sport;
  44        u16 dport;
  45        u16 family;
  46        u16 userlocks;
  47};
  48
  49static DEFINE_MUTEX(inet_diag_table_mutex);
  50
  51static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
  52{
  53        if (!inet_diag_table[proto])
  54                sock_load_diag_module(AF_INET, proto);
  55
  56        mutex_lock(&inet_diag_table_mutex);
  57        if (!inet_diag_table[proto])
  58                return ERR_PTR(-ENOENT);
  59
  60        return inet_diag_table[proto];
  61}
  62
  63static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
  64{
  65        mutex_unlock(&inet_diag_table_mutex);
  66}
  67
  68void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
  69{
  70        r->idiag_family = sk->sk_family;
  71
  72        r->id.idiag_sport = htons(sk->sk_num);
  73        r->id.idiag_dport = sk->sk_dport;
  74        r->id.idiag_if = sk->sk_bound_dev_if;
  75        sock_diag_save_cookie(sk, r->id.idiag_cookie);
  76
  77#if IS_ENABLED(CONFIG_IPV6)
  78        if (sk->sk_family == AF_INET6) {
  79                *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
  80                *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
  81        } else
  82#endif
  83        {
  84        memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
  85        memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
  86
  87        r->id.idiag_src[0] = sk->sk_rcv_saddr;
  88        r->id.idiag_dst[0] = sk->sk_daddr;
  89        }
  90}
  91EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
  92
  93static size_t inet_sk_attr_size(void)
  94{
  95        return    nla_total_size(sizeof(struct tcp_info))
  96                + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
  97                + nla_total_size(1) /* INET_DIAG_TOS */
  98                + nla_total_size(1) /* INET_DIAG_TCLASS */
  99                + nla_total_size(sizeof(struct inet_diag_meminfo))
 100                + nla_total_size(sizeof(struct inet_diag_msg))
 101                + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
 102                + nla_total_size(TCP_CA_NAME_MAX)
 103                + nla_total_size(sizeof(struct tcpvegas_info))
 104                + 64;
 105}
 106
 107int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 108                             struct inet_diag_msg *r, int ext,
 109                             struct user_namespace *user_ns)
 110{
 111        const struct inet_sock *inet = inet_sk(sk);
 112
 113        if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
 114                goto errout;
 115
 116        /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
 117         * hence this needs to be included regardless of socket family.
 118         */
 119        if (ext & (1 << (INET_DIAG_TOS - 1)))
 120                if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
 121                        goto errout;
 122
 123#if IS_ENABLED(CONFIG_IPV6)
 124        if (r->idiag_family == AF_INET6) {
 125                if (ext & (1 << (INET_DIAG_TCLASS - 1)))
 126                        if (nla_put_u8(skb, INET_DIAG_TCLASS,
 127                                       inet6_sk(sk)->tclass) < 0)
 128                                goto errout;
 129
 130                if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
 131                    nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
 132                        goto errout;
 133        }
 134#endif
 135
 136        r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 137        r->idiag_inode = sock_i_ino(sk);
 138
 139        return 0;
 140errout:
 141        return 1;
 142}
 143EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
 144
 145int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 146                      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 147                      struct user_namespace *user_ns,
 148                      u32 portid, u32 seq, u16 nlmsg_flags,
 149                      const struct nlmsghdr *unlh)
 150{
 151        const struct tcp_congestion_ops *ca_ops;
 152        const struct inet_diag_handler *handler;
 153        int ext = req->idiag_ext;
 154        struct inet_diag_msg *r;
 155        struct nlmsghdr  *nlh;
 156        struct nlattr *attr;
 157        void *info = NULL;
 158
 159        handler = inet_diag_table[req->sdiag_protocol];
 160        BUG_ON(!handler);
 161
 162        nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 163                        nlmsg_flags);
 164        if (!nlh)
 165                return -EMSGSIZE;
 166
 167        r = nlmsg_data(nlh);
 168        BUG_ON(sk->sk_state == TCP_TIME_WAIT);
 169
 170        inet_diag_msg_common_fill(r, sk);
 171        r->idiag_state = sk->sk_state;
 172        r->idiag_timer = 0;
 173        r->idiag_retrans = 0;
 174
 175
 176        if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
 177                goto errout;
 178
 179        if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
 180                struct inet_diag_meminfo minfo = {
 181                        .idiag_rmem = sk_rmem_alloc_get(sk),
 182                        .idiag_wmem = sk->sk_wmem_queued,
 183                        .idiag_fmem = sk->sk_forward_alloc,
 184                        .idiag_tmem = sk_wmem_alloc_get(sk),
 185                };
 186
 187                if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
 188                        goto errout;
 189        }
 190
 191        if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
 192                if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
 193                        goto errout;
 194
 195        if (!icsk) {
 196                handler->idiag_get_info(sk, r, NULL);
 197                goto out;
 198        }
 199
 200        if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 201            icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 202            icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 203                r->idiag_timer = 1;
 204                r->idiag_retrans = icsk->icsk_retransmits;
 205                r->idiag_expires =
 206                        jiffies_to_msecs(icsk->icsk_timeout - jiffies);
 207        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 208                r->idiag_timer = 4;
 209                r->idiag_retrans = icsk->icsk_probes_out;
 210                r->idiag_expires =
 211                        jiffies_to_msecs(icsk->icsk_timeout - jiffies);
 212        } else if (timer_pending(&sk->sk_timer)) {
 213                r->idiag_timer = 2;
 214                r->idiag_retrans = icsk->icsk_probes_out;
 215                r->idiag_expires =
 216                        jiffies_to_msecs(sk->sk_timer.expires - jiffies);
 217        } else {
 218                r->idiag_timer = 0;
 219                r->idiag_expires = 0;
 220        }
 221
 222        if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
 223                attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
 224                                         handler->idiag_info_size,
 225                                         INET_DIAG_PAD);
 226                if (!attr)
 227                        goto errout;
 228
 229                info = nla_data(attr);
 230        }
 231
 232        if (ext & (1 << (INET_DIAG_CONG - 1))) {
 233                int err = 0;
 234
 235                rcu_read_lock();
 236                ca_ops = READ_ONCE(icsk->icsk_ca_ops);
 237                if (ca_ops)
 238                        err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
 239                rcu_read_unlock();
 240                if (err < 0)
 241                        goto errout;
 242        }
 243
 244        handler->idiag_get_info(sk, r, info);
 245
 246        if (sk->sk_state < TCP_TIME_WAIT) {
 247                int err = 0;
 248
 249                rcu_read_lock();
 250                ca_ops = READ_ONCE(icsk->icsk_ca_ops);
 251                if (ca_ops && ca_ops->get_info)
 252                        err = ca_ops->get_info(sk, ext, skb);
 253                rcu_read_unlock();
 254                if (err < 0)
 255                        goto errout;
 256        }
 257
 258out:
 259        nlmsg_end(skb, nlh);
 260        return 0;
 261
 262errout:
 263        nlmsg_cancel(skb, nlh);
 264        return -EMSGSIZE;
 265}
 266EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 267
 268static int inet_csk_diag_fill(struct sock *sk,
 269                              struct sk_buff *skb,
 270                              const struct inet_diag_req_v2 *req,
 271                              struct user_namespace *user_ns,
 272                              u32 portid, u32 seq, u16 nlmsg_flags,
 273                              const struct nlmsghdr *unlh)
 274{
 275        return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
 276                                 user_ns, portid, seq, nlmsg_flags, unlh);
 277}
 278
 279static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 280                               struct sk_buff *skb,
 281                               const struct inet_diag_req_v2 *req,
 282                               u32 portid, u32 seq, u16 nlmsg_flags,
 283                               const struct nlmsghdr *unlh)
 284{
 285        struct inet_diag_msg *r;
 286        struct nlmsghdr *nlh;
 287        s32 tmo;
 288
 289        nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 290                        nlmsg_flags);
 291        if (!nlh)
 292                return -EMSGSIZE;
 293
 294        r = nlmsg_data(nlh);
 295        BUG_ON(tw->tw_state != TCP_TIME_WAIT);
 296
 297        tmo = tw->tw_ttd - inet_tw_time_stamp();
 298        if (tmo < 0)
 299                tmo = 0;
 300
 301        inet_diag_msg_common_fill(r, (struct sock *)tw);
 302        r->idiag_retrans      = 0;
 303
 304        r->idiag_state        = tw->tw_substate;
 305        r->idiag_timer        = 3;
 306        r->idiag_expires      = jiffies_to_msecs(tmo);
 307        r->idiag_rqueue       = 0;
 308        r->idiag_wqueue       = 0;
 309        r->idiag_uid          = 0;
 310        r->idiag_inode        = 0;
 311
 312        nlmsg_end(skb, nlh);
 313        return 0;
 314}
 315
 316static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 317                        const struct inet_diag_req_v2 *r,
 318                        struct user_namespace *user_ns,
 319                        u32 portid, u32 seq, u16 nlmsg_flags,
 320                        const struct nlmsghdr *unlh)
 321{
 322        if (sk->sk_state == TCP_TIME_WAIT)
 323                return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
 324                                           nlmsg_flags, unlh);
 325
 326        return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
 327                                  nlmsg_flags, unlh);
 328}
 329
 330int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 331                            struct sk_buff *in_skb,
 332                            const struct nlmsghdr *nlh,
 333                            const struct inet_diag_req_v2 *req)
 334{
 335        struct net *net = sock_net(in_skb->sk);
 336        struct sk_buff *rep;
 337        struct sock *sk;
 338        int err;
 339
 340        err = -EINVAL;
 341        if (req->sdiag_family == AF_INET)
 342                sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 343                                 req->id.idiag_dport, req->id.idiag_src[0],
 344                                 req->id.idiag_sport, req->id.idiag_if);
 345#if IS_ENABLED(CONFIG_IPV6)
 346        else if (req->sdiag_family == AF_INET6)
 347                sk = inet6_lookup(net, hashinfo,
 348                                  (struct in6_addr *)req->id.idiag_dst,
 349                                  req->id.idiag_dport,
 350                                  (struct in6_addr *)req->id.idiag_src,
 351                                  req->id.idiag_sport,
 352                                  req->id.idiag_if);
 353#endif
 354        else
 355                goto out_nosk;
 356
 357        err = -ENOENT;
 358        if (!sk)
 359                goto out_nosk;
 360
 361        err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
 362        if (err)
 363                goto out;
 364
 365        rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 366        if (!rep) {
 367                err = -ENOMEM;
 368                goto out;
 369        }
 370
 371        err = sk_diag_fill(sk, rep, req,
 372                           sk_user_ns(NETLINK_CB(in_skb).sk),
 373                           NETLINK_CB(in_skb).portid,
 374                           nlh->nlmsg_seq, 0, nlh);
 375        if (err < 0) {
 376                WARN_ON(err == -EMSGSIZE);
 377                nlmsg_free(rep);
 378                goto out;
 379        }
 380        err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
 381                              MSG_DONTWAIT);
 382        if (err > 0)
 383                err = 0;
 384
 385out:
 386        if (sk)
 387                sock_gen_put(sk);
 388
 389out_nosk:
 390        return err;
 391}
 392EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 393
 394static int inet_diag_get_exact(struct sk_buff *in_skb,
 395                               const struct nlmsghdr *nlh,
 396                               const struct inet_diag_req_v2 *req)
 397{
 398        const struct inet_diag_handler *handler;
 399        int err;
 400
 401        handler = inet_diag_lock_handler(req->sdiag_protocol);
 402        if (IS_ERR(handler))
 403                err = PTR_ERR(handler);
 404        else
 405                err = handler->dump_one(in_skb, nlh, req);
 406        inet_diag_unlock_handler(handler);
 407
 408        return err;
 409}
 410
 411static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
 412{
 413        int words = bits >> 5;
 414
 415        bits &= 0x1f;
 416
 417        if (words) {
 418                if (memcmp(a1, a2, words << 2))
 419                        return 0;
 420        }
 421        if (bits) {
 422                __be32 w1, w2;
 423                __be32 mask;
 424
 425                w1 = a1[words];
 426                w2 = a2[words];
 427
 428                mask = htonl((0xffffffff) << (32 - bits));
 429
 430                if ((w1 ^ w2) & mask)
 431                        return 0;
 432        }
 433
 434        return 1;
 435}
 436
 437static int inet_diag_bc_run(const struct nlattr *_bc,
 438                            const struct inet_diag_entry *entry)
 439{
 440        const void *bc = nla_data(_bc);
 441        int len = nla_len(_bc);
 442
 443        while (len > 0) {
 444                int yes = 1;
 445                const struct inet_diag_bc_op *op = bc;
 446
 447                switch (op->code) {
 448                case INET_DIAG_BC_NOP:
 449                        break;
 450                case INET_DIAG_BC_JMP:
 451                        yes = 0;
 452                        break;
 453                case INET_DIAG_BC_S_GE:
 454                        yes = entry->sport >= op[1].no;
 455                        break;
 456                case INET_DIAG_BC_S_LE:
 457                        yes = entry->sport <= op[1].no;
 458                        break;
 459                case INET_DIAG_BC_D_GE:
 460                        yes = entry->dport >= op[1].no;
 461                        break;
 462                case INET_DIAG_BC_D_LE:
 463                        yes = entry->dport <= op[1].no;
 464                        break;
 465                case INET_DIAG_BC_AUTO:
 466                        yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
 467                        break;
 468                case INET_DIAG_BC_S_COND:
 469                case INET_DIAG_BC_D_COND: {
 470                        const struct inet_diag_hostcond *cond;
 471                        const __be32 *addr;
 472
 473                        cond = (const struct inet_diag_hostcond *)(op + 1);
 474                        if (cond->port != -1 &&
 475                            cond->port != (op->code == INET_DIAG_BC_S_COND ?
 476                                             entry->sport : entry->dport)) {
 477                                yes = 0;
 478                                break;
 479                        }
 480
 481                        if (op->code == INET_DIAG_BC_S_COND)
 482                                addr = entry->saddr;
 483                        else
 484                                addr = entry->daddr;
 485
 486                        if (cond->family != AF_UNSPEC &&
 487                            cond->family != entry->family) {
 488                                if (entry->family == AF_INET6 &&
 489                                    cond->family == AF_INET) {
 490                                        if (addr[0] == 0 && addr[1] == 0 &&
 491                                            addr[2] == htonl(0xffff) &&
 492                                            bitstring_match(addr + 3,
 493                                                            cond->addr,
 494                                                            cond->prefix_len))
 495                                                break;
 496                                }
 497                                yes = 0;
 498                                break;
 499                        }
 500
 501                        if (cond->prefix_len == 0)
 502                                break;
 503                        if (bitstring_match(addr, cond->addr,
 504                                            cond->prefix_len))
 505                                break;
 506                        yes = 0;
 507                        break;
 508                }
 509                }
 510
 511                if (yes) {
 512                        len -= op->yes;
 513                        bc += op->yes;
 514                } else {
 515                        len -= op->no;
 516                        bc += op->no;
 517                }
 518        }
 519        return len == 0;
 520}
 521
 522/* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
 523 */
 524static void entry_fill_addrs(struct inet_diag_entry *entry,
 525                             const struct sock *sk)
 526{
 527#if IS_ENABLED(CONFIG_IPV6)
 528        if (sk->sk_family == AF_INET6) {
 529                entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
 530                entry->daddr = sk->sk_v6_daddr.s6_addr32;
 531        } else
 532#endif
 533        {
 534                entry->saddr = &sk->sk_rcv_saddr;
 535                entry->daddr = &sk->sk_daddr;
 536        }
 537}
 538
 539int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 540{
 541        struct inet_sock *inet = inet_sk(sk);
 542        struct inet_diag_entry entry;
 543
 544        if (!bc)
 545                return 1;
 546
 547        entry.family = sk->sk_family;
 548        entry_fill_addrs(&entry, sk);
 549        entry.sport = inet->inet_num;
 550        entry.dport = ntohs(inet->inet_dport);
 551        entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0;
 552
 553        return inet_diag_bc_run(bc, &entry);
 554}
 555EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
 556
 557static int valid_cc(const void *bc, int len, int cc)
 558{
 559        while (len >= 0) {
 560                const struct inet_diag_bc_op *op = bc;
 561
 562                if (cc > len)
 563                        return 0;
 564                if (cc == len)
 565                        return 1;
 566                if (op->yes < 4 || op->yes & 3)
 567                        return 0;
 568                len -= op->yes;
 569                bc  += op->yes;
 570        }
 571        return 0;
 572}
 573
 574/* Validate an inet_diag_hostcond. */
 575static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
 576                           int *min_len)
 577{
 578        struct inet_diag_hostcond *cond;
 579        int addr_len;
 580
 581        /* Check hostcond space. */
 582        *min_len += sizeof(struct inet_diag_hostcond);
 583        if (len < *min_len)
 584                return false;
 585        cond = (struct inet_diag_hostcond *)(op + 1);
 586
 587        /* Check address family and address length. */
 588        switch (cond->family) {
 589        case AF_UNSPEC:
 590                addr_len = 0;
 591                break;
 592        case AF_INET:
 593                addr_len = sizeof(struct in_addr);
 594                break;
 595        case AF_INET6:
 596                addr_len = sizeof(struct in6_addr);
 597                break;
 598        default:
 599                return false;
 600        }
 601        *min_len += addr_len;
 602        if (len < *min_len)
 603                return false;
 604
 605        /* Check prefix length (in bits) vs address length (in bytes). */
 606        if (cond->prefix_len > 8 * addr_len)
 607                return false;
 608
 609        return true;
 610}
 611
 612/* Validate a port comparison operator. */
 613static bool valid_port_comparison(const struct inet_diag_bc_op *op,
 614                                  int len, int *min_len)
 615{
 616        /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
 617        *min_len += sizeof(struct inet_diag_bc_op);
 618        if (len < *min_len)
 619                return false;
 620        return true;
 621}
 622
 623static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 624{
 625        const void *bc = bytecode;
 626        int  len = bytecode_len;
 627
 628        while (len > 0) {
 629                int min_len = sizeof(struct inet_diag_bc_op);
 630                const struct inet_diag_bc_op *op = bc;
 631
 632                switch (op->code) {
 633                case INET_DIAG_BC_S_COND:
 634                case INET_DIAG_BC_D_COND:
 635                        if (!valid_hostcond(bc, len, &min_len))
 636                                return -EINVAL;
 637                        break;
 638                case INET_DIAG_BC_S_GE:
 639                case INET_DIAG_BC_S_LE:
 640                case INET_DIAG_BC_D_GE:
 641                case INET_DIAG_BC_D_LE:
 642                        if (!valid_port_comparison(bc, len, &min_len))
 643                                return -EINVAL;
 644                        break;
 645                case INET_DIAG_BC_AUTO:
 646                case INET_DIAG_BC_JMP:
 647                case INET_DIAG_BC_NOP:
 648                        break;
 649                default:
 650                        return -EINVAL;
 651                }
 652
 653                if (op->code != INET_DIAG_BC_NOP) {
 654                        if (op->no < min_len || op->no > len + 4 || op->no & 3)
 655                                return -EINVAL;
 656                        if (op->no < len &&
 657                            !valid_cc(bytecode, bytecode_len, len - op->no))
 658                                return -EINVAL;
 659                }
 660
 661                if (op->yes < min_len || op->yes > len + 4 || op->yes & 3)
 662                        return -EINVAL;
 663                bc  += op->yes;
 664                len -= op->yes;
 665        }
 666        return len == 0 ? 0 : -EINVAL;
 667}
 668
 669static int inet_csk_diag_dump(struct sock *sk,
 670                              struct sk_buff *skb,
 671                              struct netlink_callback *cb,
 672                              const struct inet_diag_req_v2 *r,
 673                              const struct nlattr *bc)
 674{
 675        if (!inet_diag_bc_sk(bc, sk))
 676                return 0;
 677
 678        return inet_csk_diag_fill(sk, skb, r,
 679                                  sk_user_ns(NETLINK_CB(cb->skb).sk),
 680                                  NETLINK_CB(cb->skb).portid,
 681                                  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 682}
 683
 684static void twsk_build_assert(void)
 685{
 686        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
 687                     offsetof(struct sock, sk_family));
 688
 689        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
 690                     offsetof(struct inet_sock, inet_num));
 691
 692        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
 693                     offsetof(struct inet_sock, inet_dport));
 694
 695        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
 696                     offsetof(struct inet_sock, inet_rcv_saddr));
 697
 698        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
 699                     offsetof(struct inet_sock, inet_daddr));
 700
 701#if IS_ENABLED(CONFIG_IPV6)
 702        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
 703                     offsetof(struct sock, sk_v6_rcv_saddr));
 704
 705        BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
 706                     offsetof(struct sock, sk_v6_daddr));
 707#endif
 708}
 709
 710static int inet_twsk_diag_dump(struct sock *sk,
 711                               struct sk_buff *skb,
 712                               struct netlink_callback *cb,
 713                               const struct inet_diag_req_v2 *r,
 714                               const struct nlattr *bc)
 715{
 716        twsk_build_assert();
 717
 718        if (!inet_diag_bc_sk(bc, sk))
 719                return 0;
 720
 721        return inet_twsk_diag_fill(inet_twsk(sk), skb, r,
 722                                   NETLINK_CB(cb->skb).portid,
 723                                   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 724}
 725
 726static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 727                              struct request_sock *req,
 728                              struct user_namespace *user_ns,
 729                              u32 portid, u32 seq,
 730                              const struct nlmsghdr *unlh)
 731{
 732        const struct inet_request_sock *ireq = inet_rsk(req);
 733        struct inet_diag_msg *r;
 734        struct nlmsghdr *nlh;
 735        long tmo;
 736
 737        nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 738                        NLM_F_MULTI);
 739        if (!nlh)
 740                return -EMSGSIZE;
 741
 742        r = nlmsg_data(nlh);
 743        inet_diag_msg_common_fill(r, (struct sock *)ireq);
 744
 745        /* RHEL hack: detect TCP SYN-RECV pseudo sockets with IPv4-mapped-IPv6
 746         * addresses in listeners hash table with AF_INET6 family but only IPv4
 747         * part of the address filled in, and fix up the addresses in the diag
 748         * response.
 749         *
 750         * This doesn't need to be fixed upstream as SYN-RECV pseudo sockets
 751         * have been moved to the ehash table, together with fully initialized
 752         * address storage, by:
 753         *
 754         *      commit 079096f103faca2dd87342cca6f23d4b34da8871
 755         *      Author: Eric Dumazet <edumazet@google.com>
 756         *      Date:   Fri Oct 2 11:43:32 2015 -0700
 757         *
 758         *          tcp/dccp: install syn_recv requests into ehash table
 759         */
 760#if IS_ENABLED(CONFIG_IPV6)
 761        if (ireq->ireq_family == AF_INET6 && req->rsk_ops->family == AF_INET) {
 762                struct sock *req_sk = (struct sock *)ireq;
 763
 764                memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
 765                memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
 766
 767                r->id.idiag_src[2] = htonl(0xffff);
 768                r->id.idiag_src[3] = req_sk->sk_rcv_saddr;
 769                r->id.idiag_dst[2] = htonl(0xffff);
 770                r->id.idiag_dst[3] = req_sk->sk_daddr;
 771        }
 772#endif
 773
 774        r->idiag_state = TCP_SYN_RECV;
 775        r->idiag_timer = 1;
 776        r->idiag_retrans = req->num_retrans;
 777
 778        tmo = req->expires - jiffies;
 779        if (tmo < 0)
 780                tmo = 0;
 781
 782        r->idiag_expires = jiffies_to_msecs(tmo);
 783        r->idiag_rqueue = 0;
 784        r->idiag_wqueue = 0;
 785        r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 786        r->idiag_inode = 0;
 787
 788        nlmsg_end(skb, nlh);
 789        return 0;
 790}
 791
 792static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 793                               struct netlink_callback *cb,
 794                               const struct inet_diag_req_v2 *r,
 795                               const struct nlattr *bc)
 796{
 797        struct inet_connection_sock *icsk = inet_csk(sk);
 798        struct inet_sock *inet = inet_sk(sk);
 799        struct inet_diag_entry entry;
 800        int j, s_j, reqnum, s_reqnum;
 801        struct listen_sock *lopt;
 802        int err = 0;
 803
 804        s_j = cb->args[3];
 805        s_reqnum = cb->args[4];
 806
 807        if (s_j > 0)
 808                s_j--;
 809
 810        entry.family = sk->sk_family;
 811
 812        read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 813
 814        lopt = icsk->icsk_accept_queue.listen_opt;
 815        if (!lopt || !lopt->qlen)
 816                goto out;
 817
 818        if (bc) {
 819                entry.sport = inet->inet_num;
 820                entry.userlocks = sk->sk_userlocks;
 821        }
 822
 823        for (j = s_j; j < lopt->nr_table_entries; j++) {
 824                struct request_sock *req, *head = lopt->syn_table[j];
 825
 826                reqnum = 0;
 827                for (req = head; req; reqnum++, req = req->dl_next) {
 828                        struct inet_request_sock *ireq = inet_rsk(req);
 829
 830                        if (reqnum < s_reqnum)
 831                                continue;
 832                        if (r->id.idiag_dport != ireq->ir_rmt_port &&
 833                            r->id.idiag_dport)
 834                                continue;
 835
 836                        if (bc) {
 837                                /* Note: entry.sport and entry.userlocks are already set */
 838                                entry_fill_addrs(&entry, (struct sock *)req);
 839                                entry.dport = ntohs(ireq->ir_rmt_port);
 840
 841                                if (!inet_diag_bc_run(bc, &entry))
 842                                        continue;
 843                        }
 844
 845                        err = inet_diag_fill_req(skb, sk, req,
 846                                                 sk_user_ns(NETLINK_CB(cb->skb).sk),
 847                                                 NETLINK_CB(cb->skb).portid,
 848                                                 cb->nlh->nlmsg_seq, cb->nlh);
 849                        if (err < 0) {
 850                                cb->args[3] = j + 1;
 851                                cb->args[4] = reqnum;
 852                                goto out;
 853                        }
 854                }
 855
 856                s_reqnum = 0;
 857        }
 858
 859out:
 860        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 861
 862        return err;
 863}
 864
 865void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 866                         struct netlink_callback *cb,
 867                         const struct inet_diag_req_v2 *r, struct nlattr *bc)
 868{
 869        struct net *net = sock_net(skb->sk);
 870        int i, num, s_i, s_num;
 871
 872        s_i = cb->args[1];
 873        s_num = num = cb->args[2];
 874
 875        if (cb->args[0] == 0) {
 876                if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
 877                        goto skip_listen_ht;
 878
 879                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
 880                        struct inet_listen_hashbucket *ilb;
 881                        struct hlist_nulls_node *node;
 882                        struct sock *sk;
 883
 884                        num = 0;
 885                        ilb = &hashinfo->listening_hash[i];
 886                        spin_lock_bh(&ilb->lock);
 887                        sk_nulls_for_each(sk, node, &ilb->head) {
 888                                struct inet_sock *inet = inet_sk(sk);
 889
 890                                if (!net_eq(sock_net(sk), net))
 891                                        continue;
 892
 893                                if (num < s_num) {
 894                                        num++;
 895                                        continue;
 896                                }
 897
 898                                if (r->sdiag_family != AF_UNSPEC &&
 899                                    sk->sk_family != r->sdiag_family)
 900                                        goto next_listen;
 901
 902                                if (r->id.idiag_sport != inet->inet_sport &&
 903                                    r->id.idiag_sport)
 904                                        goto next_listen;
 905
 906                                if (!(r->idiag_states & TCPF_LISTEN) ||
 907                                    r->id.idiag_dport ||
 908                                    cb->args[3] > 0)
 909                                        goto syn_recv;
 910
 911                                if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
 912                                        spin_unlock_bh(&ilb->lock);
 913                                        goto done;
 914                                }
 915
 916syn_recv:
 917                                if (!(r->idiag_states & TCPF_SYN_RECV))
 918                                        goto next_listen;
 919
 920                                if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
 921                                        spin_unlock_bh(&ilb->lock);
 922                                        goto done;
 923                                }
 924
 925next_listen:
 926                                cb->args[3] = 0;
 927                                cb->args[4] = 0;
 928                                ++num;
 929                        }
 930                        spin_unlock_bh(&ilb->lock);
 931
 932                        s_num = 0;
 933                        cb->args[3] = 0;
 934                        cb->args[4] = 0;
 935                }
 936skip_listen_ht:
 937                cb->args[0] = 1;
 938                s_i = num = s_num = 0;
 939        }
 940
 941        if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
 942                goto out;
 943
 944        for (i = s_i; i <= hashinfo->ehash_mask; i++) {
 945                struct inet_ehash_bucket *head = &hashinfo->ehash[i];
 946                spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
 947                struct hlist_nulls_node *node;
 948                struct sock *sk;
 949
 950                num = 0;
 951
 952                if (hlist_nulls_empty(&head->chain))
 953                        continue;
 954
 955                if (i > s_i)
 956                        s_num = 0;
 957
 958                spin_lock_bh(lock);
 959                sk_nulls_for_each(sk, node, &head->chain) {
 960                        int state, res;
 961
 962                        if (!net_eq(sock_net(sk), net))
 963                                continue;
 964                        if (num < s_num)
 965                                goto next_normal;
 966                        state = (sk->sk_state == TCP_TIME_WAIT) ?
 967                                inet_twsk(sk)->tw_substate : sk->sk_state;
 968                        if (!(r->idiag_states & (1 << state)))
 969                                goto next_normal;
 970                        if (r->sdiag_family != AF_UNSPEC &&
 971                            sk->sk_family != r->sdiag_family)
 972                                goto next_normal;
 973                        if (r->id.idiag_sport != htons(sk->sk_num) &&
 974                            r->id.idiag_sport)
 975                                goto next_normal;
 976                        if (r->id.idiag_dport != sk->sk_dport &&
 977                            r->id.idiag_dport)
 978                                goto next_normal;
 979                        if (sk->sk_state == TCP_TIME_WAIT)
 980                                res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
 981                        else
 982                                res = inet_csk_diag_dump(sk, skb, cb, r, bc);
 983                        if (res < 0) {
 984                                spin_unlock_bh(lock);
 985                                goto done;
 986                        }
 987next_normal:
 988                        ++num;
 989                }
 990
 991                spin_unlock_bh(lock);
 992        }
 993
 994done:
 995        cb->args[1] = i;
 996        cb->args[2] = num;
 997out:
 998        ;
 999}
1000EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
1001
1002static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
1003                            const struct inet_diag_req_v2 *r,
1004                            struct nlattr *bc)
1005{
1006        const struct inet_diag_handler *handler;
1007        int err = 0;
1008
1009        handler = inet_diag_lock_handler(r->sdiag_protocol);
1010        if (!IS_ERR(handler))
1011                handler->dump(skb, cb, r, bc);
1012        else
1013                err = PTR_ERR(handler);
1014        inet_diag_unlock_handler(handler);
1015
1016        return err ? : skb->len;
1017}
1018
1019static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
1020{
1021        int hdrlen = sizeof(struct inet_diag_req_v2);
1022        struct nlattr *bc = NULL;
1023
1024        if (nlmsg_attrlen(cb->nlh, hdrlen))
1025                bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
1026
1027        return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc);
1028}
1029
1030static int inet_diag_type2proto(int type)
1031{
1032        switch (type) {
1033        case TCPDIAG_GETSOCK:
1034                return IPPROTO_TCP;
1035        case DCCPDIAG_GETSOCK:
1036                return IPPROTO_DCCP;
1037        default:
1038                return 0;
1039        }
1040}
1041
1042static int inet_diag_dump_compat(struct sk_buff *skb,
1043                                 struct netlink_callback *cb)
1044{
1045        struct inet_diag_req *rc = nlmsg_data(cb->nlh);
1046        int hdrlen = sizeof(struct inet_diag_req);
1047        struct inet_diag_req_v2 req;
1048        struct nlattr *bc = NULL;
1049
1050        req.sdiag_family = AF_UNSPEC; /* compatibility */
1051        req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
1052        req.idiag_ext = rc->idiag_ext;
1053        req.idiag_states = rc->idiag_states;
1054        req.id = rc->id;
1055
1056        if (nlmsg_attrlen(cb->nlh, hdrlen))
1057                bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
1058
1059        return __inet_diag_dump(skb, cb, &req, bc);
1060}
1061
1062static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
1063                                      const struct nlmsghdr *nlh)
1064{
1065        struct inet_diag_req *rc = nlmsg_data(nlh);
1066        struct inet_diag_req_v2 req;
1067
1068        req.sdiag_family = rc->idiag_family;
1069        req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
1070        req.idiag_ext = rc->idiag_ext;
1071        req.idiag_states = rc->idiag_states;
1072        req.id = rc->id;
1073
1074        return inet_diag_get_exact(in_skb, nlh, &req);
1075}
1076
1077static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
1078{
1079        int hdrlen = sizeof(struct inet_diag_req);
1080        struct net *net = sock_net(skb->sk);
1081
1082        if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
1083            nlmsg_len(nlh) < hdrlen)
1084                return -EINVAL;
1085
1086        if (nlh->nlmsg_flags & NLM_F_DUMP) {
1087                if (nlmsg_attrlen(nlh, hdrlen)) {
1088                        struct nlattr *attr;
1089
1090                        attr = nlmsg_find_attr(nlh, hdrlen,
1091                                               INET_DIAG_REQ_BYTECODE);
1092                        if (!attr ||
1093                            nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
1094                            inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
1095                                return -EINVAL;
1096                }
1097                {
1098                        struct netlink_dump_control c = {
1099                                .dump = inet_diag_dump_compat,
1100                        };
1101                        return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
1102                }
1103        }
1104
1105        return inet_diag_get_exact_compat(skb, nlh);
1106}
1107
1108static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
1109{
1110        int hdrlen = sizeof(struct inet_diag_req_v2);
1111        struct net *net = sock_net(skb->sk);
1112
1113        if (nlmsg_len(h) < hdrlen)
1114                return -EINVAL;
1115
1116        if (h->nlmsg_flags & NLM_F_DUMP) {
1117                if (nlmsg_attrlen(h, hdrlen)) {
1118                        struct nlattr *attr;
1119
1120                        attr = nlmsg_find_attr(h, hdrlen,
1121                                               INET_DIAG_REQ_BYTECODE);
1122                        if (!attr ||
1123                            nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
1124                            inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
1125                                return -EINVAL;
1126                }
1127                {
1128                        struct netlink_dump_control c = {
1129                                .dump = inet_diag_dump,
1130                        };
1131                        return netlink_dump_start(net->diag_nlsk, skb, h, &c);
1132                }
1133        }
1134
1135        return inet_diag_get_exact(skb, h, nlmsg_data(h));
1136}
1137
1138static const struct sock_diag_handler inet_diag_handler = {
1139        .family = AF_INET,
1140        .dump = inet_diag_handler_dump,
1141};
1142
1143static const struct sock_diag_handler inet6_diag_handler = {
1144        .family = AF_INET6,
1145        .dump = inet_diag_handler_dump,
1146};
1147
1148int inet_diag_register(const struct inet_diag_handler *h)
1149{
1150        const __u16 type = h->idiag_type;
1151        int err = -EINVAL;
1152
1153        if (type >= IPPROTO_MAX)
1154                goto out;
1155
1156        mutex_lock(&inet_diag_table_mutex);
1157        err = -EEXIST;
1158        if (!inet_diag_table[type]) {
1159                inet_diag_table[type] = h;
1160                err = 0;
1161        }
1162        mutex_unlock(&inet_diag_table_mutex);
1163out:
1164        return err;
1165}
1166EXPORT_SYMBOL_GPL(inet_diag_register);
1167
1168void inet_diag_unregister(const struct inet_diag_handler *h)
1169{
1170        const __u16 type = h->idiag_type;
1171
1172        if (type >= IPPROTO_MAX)
1173                return;
1174
1175        mutex_lock(&inet_diag_table_mutex);
1176        inet_diag_table[type] = NULL;
1177        mutex_unlock(&inet_diag_table_mutex);
1178}
1179EXPORT_SYMBOL_GPL(inet_diag_unregister);
1180
1181static int __init inet_diag_init(void)
1182{
1183        const int inet_diag_table_size = (IPPROTO_MAX *
1184                                          sizeof(struct inet_diag_handler *));
1185        int err = -ENOMEM;
1186
1187        inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
1188        if (!inet_diag_table)
1189                goto out;
1190
1191        err = sock_diag_register(&inet_diag_handler);
1192        if (err)
1193                goto out_free_nl;
1194
1195        err = sock_diag_register(&inet6_diag_handler);
1196        if (err)
1197                goto out_free_inet;
1198
1199        sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
1200out:
1201        return err;
1202
1203out_free_inet:
1204        sock_diag_unregister(&inet_diag_handler);
1205out_free_nl:
1206        kfree(inet_diag_table);
1207        goto out;
1208}
1209
1210static void __exit inet_diag_exit(void)
1211{
1212        sock_diag_unregister(&inet6_diag_handler);
1213        sock_diag_unregister(&inet_diag_handler);
1214        sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
1215        kfree(inet_diag_table);
1216}
1217
1218module_init(inet_diag_init);
1219module_exit(inet_diag_exit);
1220MODULE_LICENSE("GPL");
1221MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
1222MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
1223