linux/net/dccp/proto.c
<<
>>
Prefs
   1/*
   2 *  net/dccp/proto.c
   3 *
   4 *  An implementation of the DCCP protocol
   5 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   6 *
   7 *      This program is free software; you can redistribute it and/or modify it
   8 *      under the terms of the GNU General Public License version 2 as
   9 *      published by the Free Software Foundation.
  10 */
  11
  12#include <linux/dccp.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/skbuff.h>
  18#include <linux/netdevice.h>
  19#include <linux/in.h>
  20#include <linux/if_arp.h>
  21#include <linux/init.h>
  22#include <linux/random.h>
  23#include <linux/slab.h>
  24#include <net/checksum.h>
  25
  26#include <net/inet_sock.h>
  27#include <net/sock.h>
  28#include <net/xfrm.h>
  29
  30#include <asm/ioctls.h>
  31#include <linux/spinlock.h>
  32#include <linux/timer.h>
  33#include <linux/delay.h>
  34#include <linux/poll.h>
  35
  36#include "ccid.h"
  37#include "dccp.h"
  38#include "feat.h"
  39
  40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  41
  42EXPORT_SYMBOL_GPL(dccp_statistics);
  43
  44struct percpu_counter dccp_orphan_count;
  45EXPORT_SYMBOL_GPL(dccp_orphan_count);
  46
  47struct inet_hashinfo dccp_hashinfo;
  48EXPORT_SYMBOL_GPL(dccp_hashinfo);
  49
  50/* the maximum queue length for tx in packets. 0 is no limit */
  51int sysctl_dccp_tx_qlen __read_mostly = 5;
  52
  53#ifdef CONFIG_IP_DCCP_DEBUG
  54static const char *dccp_state_name(const int state)
  55{
  56        static const char *const dccp_state_names[] = {
  57        [DCCP_OPEN]             = "OPEN",
  58        [DCCP_REQUESTING]       = "REQUESTING",
  59        [DCCP_PARTOPEN]         = "PARTOPEN",
  60        [DCCP_LISTEN]           = "LISTEN",
  61        [DCCP_RESPOND]          = "RESPOND",
  62        [DCCP_CLOSING]          = "CLOSING",
  63        [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
  64        [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
  65        [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
  66        [DCCP_TIME_WAIT]        = "TIME_WAIT",
  67        [DCCP_CLOSED]           = "CLOSED",
  68        };
  69
  70        if (state >= DCCP_MAX_STATES)
  71                return "INVALID STATE!";
  72        else
  73                return dccp_state_names[state];
  74}
  75#endif
  76
  77void dccp_set_state(struct sock *sk, const int state)
  78{
  79        const int oldstate = sk->sk_state;
  80
  81        dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
  82                      dccp_state_name(oldstate), dccp_state_name(state));
  83        WARN_ON(state == oldstate);
  84
  85        switch (state) {
  86        case DCCP_OPEN:
  87                if (oldstate != DCCP_OPEN)
  88                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
  89                /* Client retransmits all Confirm options until entering OPEN */
  90                if (oldstate == DCCP_PARTOPEN)
  91                        dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
  92                break;
  93
  94        case DCCP_CLOSED:
  95                if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
  96                    oldstate == DCCP_CLOSING)
  97                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
  98
  99                sk->sk_prot->unhash(sk);
 100                if (inet_csk(sk)->icsk_bind_hash != NULL &&
 101                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
 102                        inet_put_port(sk);
 103                /* fall through */
 104        default:
 105                if (oldstate == DCCP_OPEN)
 106                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
 107        }
 108
 109        /* Change state AFTER socket is unhashed to avoid closed
 110         * socket sitting in hash tables.
 111         */
 112        sk->sk_state = state;
 113}
 114
 115EXPORT_SYMBOL_GPL(dccp_set_state);
 116
 117static void dccp_finish_passive_close(struct sock *sk)
 118{
 119        switch (sk->sk_state) {
 120        case DCCP_PASSIVE_CLOSE:
 121                /* Node (client or server) has received Close packet. */
 122                dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
 123                dccp_set_state(sk, DCCP_CLOSED);
 124                break;
 125        case DCCP_PASSIVE_CLOSEREQ:
 126                /*
 127                 * Client received CloseReq. We set the `active' flag so that
 128                 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
 129                 */
 130                dccp_send_close(sk, 1);
 131                dccp_set_state(sk, DCCP_CLOSING);
 132        }
 133}
 134
 135void dccp_done(struct sock *sk)
 136{
 137        dccp_set_state(sk, DCCP_CLOSED);
 138        dccp_clear_xmit_timers(sk);
 139
 140        sk->sk_shutdown = SHUTDOWN_MASK;
 141
 142        if (!sock_flag(sk, SOCK_DEAD))
 143                sk->sk_state_change(sk);
 144        else
 145                inet_csk_destroy_sock(sk);
 146}
 147
 148EXPORT_SYMBOL_GPL(dccp_done);
 149
 150const char *dccp_packet_name(const int type)
 151{
 152        static const char *const dccp_packet_names[] = {
 153                [DCCP_PKT_REQUEST]  = "REQUEST",
 154                [DCCP_PKT_RESPONSE] = "RESPONSE",
 155                [DCCP_PKT_DATA]     = "DATA",
 156                [DCCP_PKT_ACK]      = "ACK",
 157                [DCCP_PKT_DATAACK]  = "DATAACK",
 158                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
 159                [DCCP_PKT_CLOSE]    = "CLOSE",
 160                [DCCP_PKT_RESET]    = "RESET",
 161                [DCCP_PKT_SYNC]     = "SYNC",
 162                [DCCP_PKT_SYNCACK]  = "SYNCACK",
 163        };
 164
 165        if (type >= DCCP_NR_PKT_TYPES)
 166                return "INVALID";
 167        else
 168                return dccp_packet_names[type];
 169}
 170
 171EXPORT_SYMBOL_GPL(dccp_packet_name);
 172
 173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 174{
 175        struct dccp_sock *dp = dccp_sk(sk);
 176        struct inet_connection_sock *icsk = inet_csk(sk);
 177
 178        icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
 179        icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
 180        sk->sk_state            = DCCP_CLOSED;
 181        sk->sk_write_space      = dccp_write_space;
 182        icsk->icsk_sync_mss     = dccp_sync_mss;
 183        dp->dccps_mss_cache     = 536;
 184        dp->dccps_rate_last     = jiffies;
 185        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
 186        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
 187        dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
 188
 189        dccp_init_xmit_timers(sk);
 190
 191        INIT_LIST_HEAD(&dp->dccps_featneg);
 192        /* control socket doesn't need feat nego */
 193        if (likely(ctl_sock_initialized))
 194                return dccp_feat_init(sk);
 195        return 0;
 196}
 197
 198EXPORT_SYMBOL_GPL(dccp_init_sock);
 199
 200void dccp_destroy_sock(struct sock *sk)
 201{
 202        struct dccp_sock *dp = dccp_sk(sk);
 203
 204        /*
 205         * DCCP doesn't use sk_write_queue, just sk_send_head
 206         * for retransmissions
 207         */
 208        if (sk->sk_send_head != NULL) {
 209                kfree_skb(sk->sk_send_head);
 210                sk->sk_send_head = NULL;
 211        }
 212
 213        /* Clean up a referenced DCCP bind bucket. */
 214        if (inet_csk(sk)->icsk_bind_hash != NULL)
 215                inet_put_port(sk);
 216
 217        kfree(dp->dccps_service_list);
 218        dp->dccps_service_list = NULL;
 219
 220        if (dp->dccps_hc_rx_ackvec != NULL) {
 221                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 222                dp->dccps_hc_rx_ackvec = NULL;
 223        }
 224        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 225        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 226        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 227
 228        /* clean up feature negotiation state */
 229        dccp_feat_list_purge(&dp->dccps_featneg);
 230}
 231
 232EXPORT_SYMBOL_GPL(dccp_destroy_sock);
 233
 234static inline int dccp_listen_start(struct sock *sk, int backlog)
 235{
 236        struct dccp_sock *dp = dccp_sk(sk);
 237
 238        dp->dccps_role = DCCP_ROLE_LISTEN;
 239        /* do not start to listen if feature negotiation setup fails */
 240        if (dccp_feat_finalise_settings(dp))
 241                return -EPROTO;
 242        return inet_csk_listen_start(sk, backlog);
 243}
 244
 245static inline int dccp_need_reset(int state)
 246{
 247        return state != DCCP_CLOSED && state != DCCP_LISTEN &&
 248               state != DCCP_REQUESTING;
 249}
 250
 251int dccp_disconnect(struct sock *sk, int flags)
 252{
 253        struct inet_connection_sock *icsk = inet_csk(sk);
 254        struct inet_sock *inet = inet_sk(sk);
 255        int err = 0;
 256        const int old_state = sk->sk_state;
 257
 258        if (old_state != DCCP_CLOSED)
 259                dccp_set_state(sk, DCCP_CLOSED);
 260
 261        /*
 262         * This corresponds to the ABORT function of RFC793, sec. 3.8
 263         * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
 264         */
 265        if (old_state == DCCP_LISTEN) {
 266                inet_csk_listen_stop(sk);
 267        } else if (dccp_need_reset(old_state)) {
 268                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 269                sk->sk_err = ECONNRESET;
 270        } else if (old_state == DCCP_REQUESTING)
 271                sk->sk_err = ECONNRESET;
 272
 273        dccp_clear_xmit_timers(sk);
 274
 275        __skb_queue_purge(&sk->sk_receive_queue);
 276        __skb_queue_purge(&sk->sk_write_queue);
 277        if (sk->sk_send_head != NULL) {
 278                __kfree_skb(sk->sk_send_head);
 279                sk->sk_send_head = NULL;
 280        }
 281
 282        inet->inet_dport = 0;
 283
 284        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 285                inet_reset_saddr(sk);
 286
 287        sk->sk_shutdown = 0;
 288        sock_reset_flag(sk, SOCK_DONE);
 289
 290        icsk->icsk_backoff = 0;
 291        inet_csk_delack_init(sk);
 292        __sk_dst_reset(sk);
 293
 294        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 295
 296        sk->sk_error_report(sk);
 297        return err;
 298}
 299
 300EXPORT_SYMBOL_GPL(dccp_disconnect);
 301
 302/*
 303 *      Wait for a DCCP event.
 304 *
 305 *      Note that we don't need to lock the socket, as the upper poll layers
 306 *      take care of normal races (between the test and the event) and we don't
 307 *      go look at any of the socket buffers directly.
 308 */
 309unsigned int dccp_poll(struct file *file, struct socket *sock,
 310                       poll_table *wait)
 311{
 312        unsigned int mask;
 313        struct sock *sk = sock->sk;
 314
 315        sock_poll_wait(file, sk_sleep(sk), wait);
 316        if (sk->sk_state == DCCP_LISTEN)
 317                return inet_csk_listen_poll(sk);
 318
 319        /* Socket is not locked. We are protected from async events
 320           by poll logic and correct handling of state changes
 321           made by another threads is impossible in any case.
 322         */
 323
 324        mask = 0;
 325        if (sk->sk_err)
 326                mask = POLLERR;
 327
 328        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 329                mask |= POLLHUP;
 330        if (sk->sk_shutdown & RCV_SHUTDOWN)
 331                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 332
 333        /* Connected? */
 334        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
 335                if (atomic_read(&sk->sk_rmem_alloc) > 0)
 336                        mask |= POLLIN | POLLRDNORM;
 337
 338                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 339                        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 340                                mask |= POLLOUT | POLLWRNORM;
 341                        } else {  /* send SIGIO later */
 342                                set_bit(SOCK_ASYNC_NOSPACE,
 343                                        &sk->sk_socket->flags);
 344                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 345
 346                                /* Race breaker. If space is freed after
 347                                 * wspace test but before the flags are set,
 348                                 * IO signal will be lost.
 349                                 */
 350                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 351                                        mask |= POLLOUT | POLLWRNORM;
 352                        }
 353                }
 354        }
 355        return mask;
 356}
 357
 358EXPORT_SYMBOL_GPL(dccp_poll);
 359
 360int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 361{
 362        int rc = -ENOTCONN;
 363
 364        lock_sock(sk);
 365
 366        if (sk->sk_state == DCCP_LISTEN)
 367                goto out;
 368
 369        switch (cmd) {
 370        case SIOCINQ: {
 371                struct sk_buff *skb;
 372                unsigned long amount = 0;
 373
 374                skb = skb_peek(&sk->sk_receive_queue);
 375                if (skb != NULL) {
 376                        /*
 377                         * We will only return the amount of this packet since
 378                         * that is all that will be read.
 379                         */
 380                        amount = skb->len;
 381                }
 382                rc = put_user(amount, (int __user *)arg);
 383        }
 384                break;
 385        default:
 386                rc = -ENOIOCTLCMD;
 387                break;
 388        }
 389out:
 390        release_sock(sk);
 391        return rc;
 392}
 393
 394EXPORT_SYMBOL_GPL(dccp_ioctl);
 395
 396static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 397                                   char __user *optval, unsigned int optlen)
 398{
 399        struct dccp_sock *dp = dccp_sk(sk);
 400        struct dccp_service_list *sl = NULL;
 401
 402        if (service == DCCP_SERVICE_INVALID_VALUE ||
 403            optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
 404                return -EINVAL;
 405
 406        if (optlen > sizeof(service)) {
 407                sl = kmalloc(optlen, GFP_KERNEL);
 408                if (sl == NULL)
 409                        return -ENOMEM;
 410
 411                sl->dccpsl_nr = optlen / sizeof(u32) - 1;
 412                if (copy_from_user(sl->dccpsl_list,
 413                                   optval + sizeof(service),
 414                                   optlen - sizeof(service)) ||
 415                    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
 416                        kfree(sl);
 417                        return -EFAULT;
 418                }
 419        }
 420
 421        lock_sock(sk);
 422        dp->dccps_service = service;
 423
 424        kfree(dp->dccps_service_list);
 425
 426        dp->dccps_service_list = sl;
 427        release_sock(sk);
 428        return 0;
 429}
 430
 431static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 432{
 433        u8 *list, len;
 434        int i, rc;
 435
 436        if (cscov < 0 || cscov > 15)
 437                return -EINVAL;
 438        /*
 439         * Populate a list of permissible values, in the range cscov...15. This
 440         * is necessary since feature negotiation of single values only works if
 441         * both sides incidentally choose the same value. Since the list starts
 442         * lowest-value first, negotiation will pick the smallest shared value.
 443         */
 444        if (cscov == 0)
 445                return 0;
 446        len = 16 - cscov;
 447
 448        list = kmalloc(len, GFP_KERNEL);
 449        if (list == NULL)
 450                return -ENOBUFS;
 451
 452        for (i = 0; i < len; i++)
 453                list[i] = cscov++;
 454
 455        rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
 456
 457        if (rc == 0) {
 458                if (rx)
 459                        dccp_sk(sk)->dccps_pcrlen = cscov;
 460                else
 461                        dccp_sk(sk)->dccps_pcslen = cscov;
 462        }
 463        kfree(list);
 464        return rc;
 465}
 466
 467static int dccp_setsockopt_ccid(struct sock *sk, int type,
 468                                char __user *optval, unsigned int optlen)
 469{
 470        u8 *val;
 471        int rc = 0;
 472
 473        if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
 474                return -EINVAL;
 475
 476        val = memdup_user(optval, optlen);
 477        if (IS_ERR(val))
 478                return PTR_ERR(val);
 479
 480        lock_sock(sk);
 481        if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
 482                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
 483
 484        if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
 485                rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
 486        release_sock(sk);
 487
 488        kfree(val);
 489        return rc;
 490}
 491
 492static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 493                char __user *optval, unsigned int optlen)
 494{
 495        struct dccp_sock *dp = dccp_sk(sk);
 496        int val, err = 0;
 497
 498        switch (optname) {
 499        case DCCP_SOCKOPT_PACKET_SIZE:
 500                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 501                return 0;
 502        case DCCP_SOCKOPT_CHANGE_L:
 503        case DCCP_SOCKOPT_CHANGE_R:
 504                DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
 505                return 0;
 506        case DCCP_SOCKOPT_CCID:
 507        case DCCP_SOCKOPT_RX_CCID:
 508        case DCCP_SOCKOPT_TX_CCID:
 509                return dccp_setsockopt_ccid(sk, optname, optval, optlen);
 510        }
 511
 512        if (optlen < (int)sizeof(int))
 513                return -EINVAL;
 514
 515        if (get_user(val, (int __user *)optval))
 516                return -EFAULT;
 517
 518        if (optname == DCCP_SOCKOPT_SERVICE)
 519                return dccp_setsockopt_service(sk, val, optval, optlen);
 520
 521        lock_sock(sk);
 522        switch (optname) {
 523        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 524                if (dp->dccps_role != DCCP_ROLE_SERVER)
 525                        err = -EOPNOTSUPP;
 526                else
 527                        dp->dccps_server_timewait = (val != 0);
 528                break;
 529        case DCCP_SOCKOPT_SEND_CSCOV:
 530                err = dccp_setsockopt_cscov(sk, val, false);
 531                break;
 532        case DCCP_SOCKOPT_RECV_CSCOV:
 533                err = dccp_setsockopt_cscov(sk, val, true);
 534                break;
 535        case DCCP_SOCKOPT_QPOLICY_ID:
 536                if (sk->sk_state != DCCP_CLOSED)
 537                        err = -EISCONN;
 538                else if (val < 0 || val >= DCCPQ_POLICY_MAX)
 539                        err = -EINVAL;
 540                else
 541                        dp->dccps_qpolicy = val;
 542                break;
 543        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 544                if (val < 0)
 545                        err = -EINVAL;
 546                else
 547                        dp->dccps_tx_qlen = val;
 548                break;
 549        default:
 550                err = -ENOPROTOOPT;
 551                break;
 552        }
 553        release_sock(sk);
 554
 555        return err;
 556}
 557
 558int dccp_setsockopt(struct sock *sk, int level, int optname,
 559                    char __user *optval, unsigned int optlen)
 560{
 561        if (level != SOL_DCCP)
 562                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
 563                                                             optname, optval,
 564                                                             optlen);
 565        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 566}
 567
 568EXPORT_SYMBOL_GPL(dccp_setsockopt);
 569
 570#ifdef CONFIG_COMPAT
 571int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 572                           char __user *optval, unsigned int optlen)
 573{
 574        if (level != SOL_DCCP)
 575                return inet_csk_compat_setsockopt(sk, level, optname,
 576                                                  optval, optlen);
 577        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 578}
 579
 580EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
 581#endif
 582
 583static int dccp_getsockopt_service(struct sock *sk, int len,
 584                                   __be32 __user *optval,
 585                                   int __user *optlen)
 586{
 587        const struct dccp_sock *dp = dccp_sk(sk);
 588        const struct dccp_service_list *sl;
 589        int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 590
 591        lock_sock(sk);
 592        if ((sl = dp->dccps_service_list) != NULL) {
 593                slen = sl->dccpsl_nr * sizeof(u32);
 594                total_len += slen;
 595        }
 596
 597        err = -EINVAL;
 598        if (total_len > len)
 599                goto out;
 600
 601        err = 0;
 602        if (put_user(total_len, optlen) ||
 603            put_user(dp->dccps_service, optval) ||
 604            (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
 605                err = -EFAULT;
 606out:
 607        release_sock(sk);
 608        return err;
 609}
 610
 611static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 612                    char __user *optval, int __user *optlen)
 613{
 614        struct dccp_sock *dp;
 615        int val, len;
 616
 617        if (get_user(len, optlen))
 618                return -EFAULT;
 619
 620        if (len < (int)sizeof(int))
 621                return -EINVAL;
 622
 623        dp = dccp_sk(sk);
 624
 625        switch (optname) {
 626        case DCCP_SOCKOPT_PACKET_SIZE:
 627                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 628                return 0;
 629        case DCCP_SOCKOPT_SERVICE:
 630                return dccp_getsockopt_service(sk, len,
 631                                               (__be32 __user *)optval, optlen);
 632        case DCCP_SOCKOPT_GET_CUR_MPS:
 633                val = dp->dccps_mss_cache;
 634                break;
 635        case DCCP_SOCKOPT_AVAILABLE_CCIDS:
 636                return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
 637        case DCCP_SOCKOPT_TX_CCID:
 638                val = ccid_get_current_tx_ccid(dp);
 639                if (val < 0)
 640                        return -ENOPROTOOPT;
 641                break;
 642        case DCCP_SOCKOPT_RX_CCID:
 643                val = ccid_get_current_rx_ccid(dp);
 644                if (val < 0)
 645                        return -ENOPROTOOPT;
 646                break;
 647        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 648                val = dp->dccps_server_timewait;
 649                break;
 650        case DCCP_SOCKOPT_SEND_CSCOV:
 651                val = dp->dccps_pcslen;
 652                break;
 653        case DCCP_SOCKOPT_RECV_CSCOV:
 654                val = dp->dccps_pcrlen;
 655                break;
 656        case DCCP_SOCKOPT_QPOLICY_ID:
 657                val = dp->dccps_qpolicy;
 658                break;
 659        case DCCP_SOCKOPT_QPOLICY_TXQLEN:
 660                val = dp->dccps_tx_qlen;
 661                break;
 662        case 128 ... 191:
 663                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 664                                             len, (u32 __user *)optval, optlen);
 665        case 192 ... 255:
 666                return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
 667                                             len, (u32 __user *)optval, optlen);
 668        default:
 669                return -ENOPROTOOPT;
 670        }
 671
 672        len = sizeof(val);
 673        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 674                return -EFAULT;
 675
 676        return 0;
 677}
 678
 679int dccp_getsockopt(struct sock *sk, int level, int optname,
 680                    char __user *optval, int __user *optlen)
 681{
 682        if (level != SOL_DCCP)
 683                return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
 684                                                             optname, optval,
 685                                                             optlen);
 686        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 687}
 688
 689EXPORT_SYMBOL_GPL(dccp_getsockopt);
 690
 691#ifdef CONFIG_COMPAT
 692int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 693                           char __user *optval, int __user *optlen)
 694{
 695        if (level != SOL_DCCP)
 696                return inet_csk_compat_getsockopt(sk, level, optname,
 697                                                  optval, optlen);
 698        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 699}
 700
 701EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 702#endif
 703
 704static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 705{
 706        struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
 707
 708        /*
 709         * Assign an (opaque) qpolicy priority value to skb->priority.
 710         *
 711         * We are overloading this skb field for use with the qpolicy subystem.
 712         * The skb->priority is normally used for the SO_PRIORITY option, which
 713         * is initialised from sk_priority. Since the assignment of sk_priority
 714         * to skb->priority happens later (on layer 3), we overload this field
 715         * for use with queueing priorities as long as the skb is on layer 4.
 716         * The default priority value (if nothing is set) is 0.
 717         */
 718        skb->priority = 0;
 719
 720        for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
 721
 722                if (!CMSG_OK(msg, cmsg))
 723                        return -EINVAL;
 724
 725                if (cmsg->cmsg_level != SOL_DCCP)
 726                        continue;
 727
 728                if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
 729                    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
 730                        return -EINVAL;
 731
 732                switch (cmsg->cmsg_type) {
 733                case DCCP_SCM_PRIORITY:
 734                        if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
 735                                return -EINVAL;
 736                        skb->priority = *(__u32 *)CMSG_DATA(cmsg);
 737                        break;
 738                default:
 739                        return -EINVAL;
 740                }
 741        }
 742        return 0;
 743}
 744
 745int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 746                 size_t len)
 747{
 748        const struct dccp_sock *dp = dccp_sk(sk);
 749        const int flags = msg->msg_flags;
 750        const int noblock = flags & MSG_DONTWAIT;
 751        struct sk_buff *skb;
 752        int rc, size;
 753        long timeo;
 754
 755        if (len > dp->dccps_mss_cache)
 756                return -EMSGSIZE;
 757
 758        lock_sock(sk);
 759
 760        if (dccp_qpolicy_full(sk)) {
 761                rc = -EAGAIN;
 762                goto out_release;
 763        }
 764
 765        timeo = sock_sndtimeo(sk, noblock);
 766
 767        /*
 768         * We have to use sk_stream_wait_connect here to set sk_write_pending,
 769         * so that the trick in dccp_rcv_request_sent_state_process.
 770         */
 771        /* Wait for a connection to finish. */
 772        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 773                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
 774                        goto out_release;
 775
 776        size = sk->sk_prot->max_header + len;
 777        release_sock(sk);
 778        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 779        lock_sock(sk);
 780        if (skb == NULL)
 781                goto out_release;
 782
 783        skb_reserve(skb, sk->sk_prot->max_header);
 784        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 785        if (rc != 0)
 786                goto out_discard;
 787
 788        rc = dccp_msghdr_parse(msg, skb);
 789        if (rc != 0)
 790                goto out_discard;
 791
 792        dccp_qpolicy_push(sk, skb);
 793        /*
 794         * The xmit_timer is set if the TX CCID is rate-based and will expire
 795         * when congestion control permits to release further packets into the
 796         * network. Window-based CCIDs do not use this timer.
 797         */
 798        if (!timer_pending(&dp->dccps_xmit_timer))
 799                dccp_write_xmit(sk);
 800out_release:
 801        release_sock(sk);
 802        return rc ? : len;
 803out_discard:
 804        kfree_skb(skb);
 805        goto out_release;
 806}
 807
 808EXPORT_SYMBOL_GPL(dccp_sendmsg);
 809
 810int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 811                 size_t len, int nonblock, int flags, int *addr_len)
 812{
 813        const struct dccp_hdr *dh;
 814        long timeo;
 815
 816        lock_sock(sk);
 817
 818        if (sk->sk_state == DCCP_LISTEN) {
 819                len = -ENOTCONN;
 820                goto out;
 821        }
 822
 823        timeo = sock_rcvtimeo(sk, nonblock);
 824
 825        do {
 826                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 827
 828                if (skb == NULL)
 829                        goto verify_sock_status;
 830
 831                dh = dccp_hdr(skb);
 832
 833                switch (dh->dccph_type) {
 834                case DCCP_PKT_DATA:
 835                case DCCP_PKT_DATAACK:
 836                        goto found_ok_skb;
 837
 838                case DCCP_PKT_CLOSE:
 839                case DCCP_PKT_CLOSEREQ:
 840                        if (!(flags & MSG_PEEK))
 841                                dccp_finish_passive_close(sk);
 842                        /* fall through */
 843                case DCCP_PKT_RESET:
 844                        dccp_pr_debug("found fin (%s) ok!\n",
 845                                      dccp_packet_name(dh->dccph_type));
 846                        len = 0;
 847                        goto found_fin_ok;
 848                default:
 849                        dccp_pr_debug("packet_type=%s\n",
 850                                      dccp_packet_name(dh->dccph_type));
 851                        sk_eat_skb(sk, skb, false);
 852                }
 853verify_sock_status:
 854                if (sock_flag(sk, SOCK_DONE)) {
 855                        len = 0;
 856                        break;
 857                }
 858
 859                if (sk->sk_err) {
 860                        len = sock_error(sk);
 861                        break;
 862                }
 863
 864                if (sk->sk_shutdown & RCV_SHUTDOWN) {
 865                        len = 0;
 866                        break;
 867                }
 868
 869                if (sk->sk_state == DCCP_CLOSED) {
 870                        if (!sock_flag(sk, SOCK_DONE)) {
 871                                /* This occurs when user tries to read
 872                                 * from never connected socket.
 873                                 */
 874                                len = -ENOTCONN;
 875                                break;
 876                        }
 877                        len = 0;
 878                        break;
 879                }
 880
 881                if (!timeo) {
 882                        len = -EAGAIN;
 883                        break;
 884                }
 885
 886                if (signal_pending(current)) {
 887                        len = sock_intr_errno(timeo);
 888                        break;
 889                }
 890
 891                sk_wait_data(sk, &timeo);
 892                continue;
 893        found_ok_skb:
 894                if (len > skb->len)
 895                        len = skb->len;
 896                else if (len < skb->len)
 897                        msg->msg_flags |= MSG_TRUNC;
 898
 899                if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
 900                        /* Exception. Bailout! */
 901                        len = -EFAULT;
 902                        break;
 903                }
 904                if (flags & MSG_TRUNC)
 905                        len = skb->len;
 906        found_fin_ok:
 907                if (!(flags & MSG_PEEK))
 908                        sk_eat_skb(sk, skb, false);
 909                break;
 910        } while (1);
 911out:
 912        release_sock(sk);
 913        return len;
 914}
 915
 916EXPORT_SYMBOL_GPL(dccp_recvmsg);
 917
 918int inet_dccp_listen(struct socket *sock, int backlog)
 919{
 920        struct sock *sk = sock->sk;
 921        unsigned char old_state;
 922        int err;
 923
 924        lock_sock(sk);
 925
 926        err = -EINVAL;
 927        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
 928                goto out;
 929
 930        old_state = sk->sk_state;
 931        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 932                goto out;
 933
 934        /* Really, if the socket is already in listen state
 935         * we can only allow the backlog to be adjusted.
 936         */
 937        if (old_state != DCCP_LISTEN) {
 938                /*
 939                 * FIXME: here it probably should be sk->sk_prot->listen_start
 940                 * see tcp_listen_start
 941                 */
 942                err = dccp_listen_start(sk, backlog);
 943                if (err)
 944                        goto out;
 945        }
 946        sk->sk_max_ack_backlog = backlog;
 947        err = 0;
 948
 949out:
 950        release_sock(sk);
 951        return err;
 952}
 953
 954EXPORT_SYMBOL_GPL(inet_dccp_listen);
 955
 956static void dccp_terminate_connection(struct sock *sk)
 957{
 958        u8 next_state = DCCP_CLOSED;
 959
 960        switch (sk->sk_state) {
 961        case DCCP_PASSIVE_CLOSE:
 962        case DCCP_PASSIVE_CLOSEREQ:
 963                dccp_finish_passive_close(sk);
 964                break;
 965        case DCCP_PARTOPEN:
 966                dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
 967                inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 968                /* fall through */
 969        case DCCP_OPEN:
 970                dccp_send_close(sk, 1);
 971
 972                if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
 973                    !dccp_sk(sk)->dccps_server_timewait)
 974                        next_state = DCCP_ACTIVE_CLOSEREQ;
 975                else
 976                        next_state = DCCP_CLOSING;
 977                /* fall through */
 978        default:
 979                dccp_set_state(sk, next_state);
 980        }
 981}
 982
 983void dccp_close(struct sock *sk, long timeout)
 984{
 985        struct dccp_sock *dp = dccp_sk(sk);
 986        struct sk_buff *skb;
 987        u32 data_was_unread = 0;
 988        int state;
 989
 990        lock_sock(sk);
 991
 992        sk->sk_shutdown = SHUTDOWN_MASK;
 993
 994        if (sk->sk_state == DCCP_LISTEN) {
 995                dccp_set_state(sk, DCCP_CLOSED);
 996
 997                /* Special case. */
 998                inet_csk_listen_stop(sk);
 999
1000                goto adjudge_to_death;
1001        }
1002
1003        sk_stop_timer(sk, &dp->dccps_xmit_timer);
1004
1005        /*
1006         * We need to flush the recv. buffs.  We do this only on the
1007         * descriptor close, not protocol-sourced closes, because the
1008          *reader process may not have drained the data yet!
1009         */
1010        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1011                data_was_unread += skb->len;
1012                __kfree_skb(skb);
1013        }
1014
1015        if (data_was_unread) {
1016                /* Unread data was tossed, send an appropriate Reset Code */
1017                DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1018                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1019                dccp_set_state(sk, DCCP_CLOSED);
1020        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1021                /* Check zero linger _after_ checking for unread data. */
1022                sk->sk_prot->disconnect(sk, 0);
1023        } else if (sk->sk_state != DCCP_CLOSED) {
1024                /*
1025                 * Normal connection termination. May need to wait if there are
1026                 * still packets in the TX queue that are delayed by the CCID.
1027                 */
1028                dccp_flush_write_queue(sk, &timeout);
1029                dccp_terminate_connection(sk);
1030        }
1031
1032        /*
1033         * Flush write queue. This may be necessary in several cases:
1034         * - we have been closed by the peer but still have application data;
1035         * - abortive termination (unread data or zero linger time),
1036         * - normal termination but queue could not be flushed within time limit
1037         */
1038        __skb_queue_purge(&sk->sk_write_queue);
1039
1040        sk_stream_wait_close(sk, timeout);
1041
1042adjudge_to_death:
1043        state = sk->sk_state;
1044        sock_hold(sk);
1045        sock_orphan(sk);
1046
1047        /*
1048         * It is the last release_sock in its life. It will remove backlog.
1049         */
1050        release_sock(sk);
1051        /*
1052         * Now socket is owned by kernel and we acquire BH lock
1053         * to finish close. No need to check for user refs.
1054         */
1055        local_bh_disable();
1056        bh_lock_sock(sk);
1057        WARN_ON(sock_owned_by_user(sk));
1058
1059        percpu_counter_inc(sk->sk_prot->orphan_count);
1060
1061        /* Have we already been destroyed by a softirq or backlog? */
1062        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1063                goto out;
1064
1065        if (sk->sk_state == DCCP_CLOSED)
1066                inet_csk_destroy_sock(sk);
1067
1068        /* Otherwise, socket is reprieved until protocol close. */
1069
1070out:
1071        bh_unlock_sock(sk);
1072        local_bh_enable();
1073        sock_put(sk);
1074}
1075
1076EXPORT_SYMBOL_GPL(dccp_close);
1077
1078void dccp_shutdown(struct sock *sk, int how)
1079{
1080        dccp_pr_debug("called shutdown(%x)\n", how);
1081}
1082
1083EXPORT_SYMBOL_GPL(dccp_shutdown);
1084
1085static inline int dccp_mib_init(void)
1086{
1087        return snmp_mib_init((void __percpu **)dccp_statistics,
1088                             sizeof(struct dccp_mib),
1089                             __alignof__(struct dccp_mib));
1090}
1091
1092static inline void dccp_mib_exit(void)
1093{
1094        snmp_mib_free((void __percpu **)dccp_statistics);
1095}
1096
1097static int thash_entries;
1098module_param(thash_entries, int, 0444);
1099MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100
1101#ifdef CONFIG_IP_DCCP_DEBUG
1102bool dccp_debug;
1103module_param(dccp_debug, bool, 0644);
1104MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105
1106EXPORT_SYMBOL_GPL(dccp_debug);
1107#endif
1108
1109static int __init dccp_init(void)
1110{
1111        unsigned long goal;
1112        int ehash_order, bhash_order, i;
1113        int rc;
1114
1115        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1116                     FIELD_SIZEOF(struct sk_buff, cb));
1117        rc = percpu_counter_init(&dccp_orphan_count, 0);
1118        if (rc)
1119                goto out_fail;
1120        rc = -ENOBUFS;
1121        inet_hashinfo_init(&dccp_hashinfo);
1122        dccp_hashinfo.bind_bucket_cachep =
1123                kmem_cache_create("dccp_bind_bucket",
1124                                  sizeof(struct inet_bind_bucket), 0,
1125                                  SLAB_HWCACHE_ALIGN, NULL);
1126        if (!dccp_hashinfo.bind_bucket_cachep)
1127                goto out_free_percpu;
1128
1129        /*
1130         * Size and allocate the main established and bind bucket
1131         * hash tables.
1132         *
1133         * The methodology is similar to that of the buffer cache.
1134         */
1135        if (totalram_pages >= (128 * 1024))
1136                goal = totalram_pages >> (21 - PAGE_SHIFT);
1137        else
1138                goal = totalram_pages >> (23 - PAGE_SHIFT);
1139
1140        if (thash_entries)
1141                goal = (thash_entries *
1142                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1143        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1144                ;
1145        do {
1146                unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1147                                        sizeof(struct inet_ehash_bucket);
1148
1149                while (hash_size & (hash_size - 1))
1150                        hash_size--;
1151                dccp_hashinfo.ehash_mask = hash_size - 1;
1152                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1153                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1154        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1155
1156        if (!dccp_hashinfo.ehash) {
1157                DCCP_CRIT("Failed to allocate DCCP established hash table");
1158                goto out_free_bind_bucket_cachep;
1159        }
1160
1161        for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
1162                INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1163                INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1164        }
1165
1166        if (inet_ehash_locks_alloc(&dccp_hashinfo))
1167                        goto out_free_dccp_ehash;
1168
1169        bhash_order = ehash_order;
1170
1171        do {
1172                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1173                                        sizeof(struct inet_bind_hashbucket);
1174                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1175                    bhash_order > 0)
1176                        continue;
1177                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1178                        __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1179        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1180
1181        if (!dccp_hashinfo.bhash) {
1182                DCCP_CRIT("Failed to allocate DCCP bind hash table");
1183                goto out_free_dccp_locks;
1184        }
1185
1186        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1187                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1188                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1189        }
1190
1191        rc = dccp_mib_init();
1192        if (rc)
1193                goto out_free_dccp_bhash;
1194
1195        rc = dccp_ackvec_init();
1196        if (rc)
1197                goto out_free_dccp_mib;
1198
1199        rc = dccp_sysctl_init();
1200        if (rc)
1201                goto out_ackvec_exit;
1202
1203        rc = ccid_initialize_builtins();
1204        if (rc)
1205                goto out_sysctl_exit;
1206
1207        dccp_timestamping_init();
1208
1209        return 0;
1210
1211out_sysctl_exit:
1212        dccp_sysctl_exit();
1213out_ackvec_exit:
1214        dccp_ackvec_exit();
1215out_free_dccp_mib:
1216        dccp_mib_exit();
1217out_free_dccp_bhash:
1218        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1219out_free_dccp_locks:
1220        inet_ehash_locks_free(&dccp_hashinfo);
1221out_free_dccp_ehash:
1222        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1223out_free_bind_bucket_cachep:
1224        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1225out_free_percpu:
1226        percpu_counter_destroy(&dccp_orphan_count);
1227out_fail:
1228        dccp_hashinfo.bhash = NULL;
1229        dccp_hashinfo.ehash = NULL;
1230        dccp_hashinfo.bind_bucket_cachep = NULL;
1231        return rc;
1232}
1233
1234static void __exit dccp_fini(void)
1235{
1236        ccid_cleanup_builtins();
1237        dccp_mib_exit();
1238        free_pages((unsigned long)dccp_hashinfo.bhash,
1239                   get_order(dccp_hashinfo.bhash_size *
1240                             sizeof(struct inet_bind_hashbucket)));
1241        free_pages((unsigned long)dccp_hashinfo.ehash,
1242                   get_order((dccp_hashinfo.ehash_mask + 1) *
1243                             sizeof(struct inet_ehash_bucket)));
1244        inet_ehash_locks_free(&dccp_hashinfo);
1245        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1246        dccp_ackvec_exit();
1247        dccp_sysctl_exit();
1248        percpu_counter_destroy(&dccp_orphan_count);
1249}
1250
1251module_init(dccp_init);
1252module_exit(dccp_fini);
1253
1254MODULE_LICENSE("GPL");
1255MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1256MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1257